[abi] Improved ujit integration

* Do not mark UniCompiler emit functions noexcept to make the
    signatures compatible with BackendCompiler
  * Added more functions that wrap BackendCompiler functionality
  * Simplified the way of creating functions to make it more
    compatible with how BackendCompiler creates and finishes
    functions
This commit is contained in:
kobalicek
2025-10-12 21:16:52 +02:00
parent 212832c329
commit 5134d396bd
16 changed files with 1269 additions and 833 deletions

View File

@@ -1050,16 +1050,16 @@ Error Assembler::_emit(InstId inst_id, const Operand_& o0, const Operand_& o1, c
o3.as<Imm>().value_as<uint64_t>() >= Support::bit_mask<uint32_t>(op_data.b_imm_size + op_data.b_imm_discard_lsb))
goto InvalidImmediate;
uint32_t aImm = o2.as<Imm>().value_as<uint32_t>() >> op_data.a_imm_discard_lsb;
uint32_t bImm = o3.as<Imm>().value_as<uint32_t>() >> op_data.b_imm_discard_lsb;
uint32_t a_imm = o2.as<Imm>().value_as<uint32_t>() >> op_data.a_imm_discard_lsb;
uint32_t b_imm = o3.as<Imm>().value_as<uint32_t>() >> op_data.b_imm_discard_lsb;
if ((aImm << op_data.a_imm_discard_lsb) != o2.as<Imm>().value_as<uint32_t>() ||
(bImm << op_data.b_imm_discard_lsb) != o3.as<Imm>().value_as<uint32_t>())
if ((a_imm << op_data.a_imm_discard_lsb) != o2.as<Imm>().value_as<uint32_t>() ||
(b_imm << op_data.b_imm_discard_lsb) != o3.as<Imm>().value_as<uint32_t>())
goto InvalidImmediate;
opcode.reset(op_data.opcode());
opcode.add_imm(aImm, op_data.a_imm_offset);
opcode.add_imm(bImm, op_data.b_imm_offset);
opcode.add_imm(a_imm, op_data.a_imm_offset);
opcode.add_imm(b_imm, op_data.b_imm_offset);
opcode.add_reg(o1, 5);
opcode.add_reg(o0, 0);
goto EmitOp;

View File

@@ -215,11 +215,19 @@ public:
//! \overload
ASMJIT_INLINE_NODEBUG Error invoke(Out<InvokeNode*> out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
//! Return.
//! Return from function.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret() { return add_ret(Operand(), Operand()); }
//! \overload
//! Return from function - one value.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0) { return add_ret(o0, Operand()); }
//! \overload
//! Return from function - two values / register pair.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0, const Reg& o1) { return add_ret(o0, o1); }
//! \}

View File

@@ -2118,7 +2118,8 @@ namespace asmjit {
//! its own IR. Instead, it translates user calls into target-dependent instructions (or instruction sequences)
//! and allows users to switch to target-specific assembly only where required for extra performance.
//!
//! \warning UJIT is still in an experimental phase, expect minor API breaks in the future.
//! \warning UJIT is still in an experimental phase, expect minor API breaks in the future especially towards API
//! stabilization.
//!
//! API Overview
//!
@@ -2155,6 +2156,123 @@ namespace asmjit {
//! - \ref ujit::UniOpVVV - instruction with `[vec, vec, vec]` operands.
//! - \ref ujit::UniOpVVVI - instruction with `[vec, vec, vec, imm]` operands.
//! - \ref ujit::UniOpVVVV - instruction with `[vec, vec, vec, vec]` operands.
//!
//! ### UniCompiler Example
//!
//! Using UniCompiler is like using a regular platform-dependent AsmJit's Compiler - UniCompiler wraps its API
//! and delegates most of non-emit calls to Compiler, however, it abstracts how instructions are emitted so it
//! could offer universal API for both general-purpose and SIMD instructions. The following example demonstrates
//! how to use it:
//!
//! ```
//! #include <asmjit/ujit.h>
//! #include <stdio.h>
//!
//! using namespace asmjit;
//!
//! int main() {
//! // Signature of the generated function.
//! using Func = void (*)(uint32_t* dst, const uint32_t* src1, const uint32_t* src2);
//!
//! JitRuntime rt; // Creates a JIT runtime that holds executable code.
//! FileLogger logger(stdout); // Creates a logger that prints to stdout.
//! CodeHolder code; // Creates a CodeHolder - holds code and other information.
//!
//! code.init(rt.environment(), // Initializes CodeHolder to match the JIT environment.
//! rt.cpu_features());
//! code.set_logger(&logger); // Initializes CodeHolder's logger.
//!
//! ujit::BackendCompiler backend_cc(&code); // Creates a regular backend compiler instance.
//! ujit::UniCompiler uc(&backend_cc, // Creates UniCompiler with attached backend compiler.
//! rt.cpu_features(), // CPU features must be passed explicitly.
//! rt.cpu_hints()); // CPU hints must be passed explicitly.
//!
//! // Begin a function of the required signature (this exactly matches the Compiler use).
//! FuncNode* func = uc.add_func(FuncSignature::build<void, uint32_t*, const uint32_t*, const uint32_t*>());
//!
//! ujit::Gp d_ptr = uc.new_gp_ptr(); // Creates a destination pointer.
//! ujit::Gp a_ptr = uc.new_gp_ptr(); // Creates a first source pointer.
//! ujit::Gp b_ptr = uc.new_gp_ptr(); // Creates a second source pointer.
//!
//! func->set_arg(0, d_ptr); // Assigns 1st argument.
//! func->set_arg(1, a_ptr); // Assigns 2nd argument.
//! func->set_arg(2, b_ptr); // Assigns 3rd argument.
//!
//! ujit::Vec v0 = uc.new_vec128(); // Creates a 128-bit vector register.
//! ujit::Vec v1 = uc.new_vec128(); // Creates a 128-bit vector register.
//!
//! uc.v_loadu128(v0, ujit::mem_ptr(a_ptr)); // Unaligned load of 128 bits from [a_ptr] into v0.
//! uc.v_loadu128(v1, ujit::mem_ptr(b_ptr)); // Unaligned load of 128 bits from [b_ptr] into v1.
//! uc.v_add_i32(v0, v0, v1); // Vector addition of 4 32-bit integers.
//! uc.v_storeu128(ujit::mem_ptr(d_ptr), v0);// Unaligned store of 128 bits from v0 to [d_ptr].
//!
//! uc.end_func(); // End of the function body.
//! Error err1 = uc.finalize(); // Translates and assembles the whole 'backend_cc' content.
//!
//! if (err1 != Error::kOk) {
//! // Handle a possible error returned by AsmJit as finalize can fail. One reason could be wrong operands
//! // to some instruction or other platform constraints. Usually UniCompiler handles most of platform
//! // constraints by itself, but this error code must be checked regardless.
//! return 1;
//! }
//! // ----> Both BackendCompiler and UniCompiler are no longer needed from here and can be destroyed <----
//!
//! Func fn;
//! Error err2 = rt.add(&fn, &code); // Add the generated code to JIT runtime (executable memory).
//!
//! if (err2 != Error::kOk) {
//! // Handle a possible error returned by AsmJit. This would be either out of executable memory or failure
//! // to allocate it (for example excessive user-space hardening or making the allocation of executable
//! // memory forbidden).
//! return 1;
//! }
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
//!
//! // Input data.
//! static constexpr uint32_t a_data[4] = {1u,2u,4u,8u};
//! static constexpr uint32_t b_data[4] = {6u,4u,3u,1u};
//!
//! // Output data.
//! uint32_t d_data[4] {};
//!
//! // Calls the generated function.
//! fn(d_data, a_data, b_data);
//!
//! // Prints both inputs and the output.
//! printf("a_data={%u,%u,%u,%u}\n", a_data[0], a_data[1], a_data[2], a_data[3]);
//! printf("b_data={%u,%u,%u,%u}\n", b_data[0], b_data[1], b_data[2], b_data[3]);
//! printf("d_data={%u,%u,%u,%u}\n", d_data[0], d_data[1], d_data[2], d_data[3]);
//!
//! // Explicitly removes the function from JIT runtime.
//! rt.release(fn);
//!
//! return 0;
//! }
//! ```
//!
//! ### Emitting Backend-Specific Code
//!
//! In cases, in which backend-specific code is required for performance reasons, it's possible to use the
//! underlying backend-specific Compiler, which is provided as a `cc` member of `UniCompiler`. The next example
//! demonstrates how to use AArch64-specific code path during code generation:
//!
//! ```
//! #include <asmjit/ujit.h>
//!
//! using namespace asmjit;
//!
//! void emit_backend_specific_code(UniCompiler& uc, const ujit::Gp& a, const ujit::Gp& b, const ujit::Gp& c) {
//! #if defined(ASMJIT_UJIT_AARCH64)
//! // Emit aarch64 specific code via `uc.cc`:
//! uc.cc->orn(a, b, c);
//! #else
//! // Generic code.
//! ujit::Gp tmp = uc.new_similar_reg(a);
//! uc.not_(tmp, c);
//! uc.or_(a, b, tmp);
//! #endif
//! ```
//!
//! \cond INTERNAL
//! \defgroup asmjit_ra RA

View File

@@ -16,7 +16,7 @@
#define ASMJIT_LIBRARY_MAKE_VERSION(major, minor, patch) ((major << 16) | (minor << 8) | (patch))
//! AsmJit library version, see \ref ASMJIT_LIBRARY_MAKE_VERSION for a version format reference.
#define ASMJIT_LIBRARY_VERSION ASMJIT_LIBRARY_MAKE_VERSION(1, 19, 0)
#define ASMJIT_LIBRARY_VERSION ASMJIT_LIBRARY_MAKE_VERSION(1, 20, 0)
//! \def ASMJIT_ABI_NAMESPACE
//!
@@ -27,7 +27,7 @@
//! AsmJit default, which makes it possible to use multiple AsmJit libraries within a single project, totally
//! controlled by users. This is useful especially in cases in which some of such library comes from third party.
#if !defined(ASMJIT_ABI_NAMESPACE)
#define ASMJIT_ABI_NAMESPACE v1_19
#define ASMJIT_ABI_NAMESPACE v1_20
#endif // !ASMJIT_ABI_NAMESPACE
//! \}

View File

@@ -126,6 +126,8 @@ public:
ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
ASMJIT_INLINE Error comment(Span<const char> data) { return comment(data.data(), data.size()); }
//! \}
//! \name Events

View File

@@ -403,6 +403,11 @@ public:
[[nodiscard]]
ASMJIT_API Label new_named_label(const char* name, size_t name_size = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parent_id = Globals::kInvalidId) override;
[[nodiscard]]
ASMJIT_INLINE Label new_named_label(Span<const char> name, LabelType type = LabelType::kGlobal, uint32_t parent_id = Globals::kInvalidId) {
return new_named_label(name.data(), name.size(), type, parent_id);
}
ASMJIT_API Error bind(const Label& label) override;
//! \}
@@ -473,6 +478,8 @@ public:
ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
ASMJIT_INLINE Error comment(Span<const char> data) { return comment(data.data(), data.size()); }
//! \}
//! \name Serialization

View File

@@ -96,6 +96,10 @@ public:
//! \name Function Management
//! \{
//! Returns the function being generated.
[[nodiscard]]
ASMJIT_INLINE_NODEBUG FuncNode* func() const noexcept { return _func; }
//! Creates a new \ref FuncNode.
ASMJIT_API Error new_func_node(Out<FuncNode*> out, const FuncSignature& signature);
//! Creates a new \ref FuncNode adds it to the instruction stream.
@@ -106,10 +110,6 @@ public:
//! Creates a new \ref FuncRetNode and adds it to the instruction stream.
ASMJIT_API Error add_func_ret_node(Out<FuncRetNode*> out, const Operand_& o0, const Operand_& o1);
//! Returns the current function.
[[nodiscard]]
ASMJIT_INLINE_NODEBUG FuncNode* func() const noexcept { return _func; }
//! Creates a new \ref FuncNode with the given `signature` and returns it.
ASMJIT_INLINE FuncNode* new_func(const FuncSignature& signature) {
FuncNode* node;
@@ -127,7 +127,12 @@ public:
//! Adds a function `node` to the instruction stream.
ASMJIT_API FuncNode* add_func(FuncNode* ASMJIT_NONNULL(func));
//! Emits a sentinel that marks the end of the current function.
//! Ends the current function by emitting a sentinel that marks the end of it.
//!
//! This would close the context for generating the current function. After calling \ref end_func() the active
//! function node is reset and \ref func() would return `nullptr` unless another function is being started via
//! \ref add_func().
ASMJIT_API Error end_func();
ASMJIT_INLINE Error add_ret(const Operand_& o0, const Operand_& o1) {

View File

@@ -678,7 +678,7 @@ public:
//! \name Labels
//! \{
//! Creates a new label.
//! Creates a new anonymous label.
[[nodiscard]]
ASMJIT_API virtual Label new_label();
@@ -891,6 +891,11 @@ public:
//! Emits a comment stored in `data` with an optional `size` parameter.
ASMJIT_API virtual Error comment(const char* data, size_t size = SIZE_MAX);
//! Emits a comment passed via a `data` span.
ASMJIT_INLINE Error comment(Span<const char> data) {
return comment(data.data(), data.size());
}
//! Emits a formatted comment specified by `fmt` and variable number of arguments.
ASMJIT_API Error commentf(const char* fmt, ...);
//! Emits a formatted comment specified by `fmt` and `ap`.

View File

@@ -874,7 +874,10 @@ enum class FuncAttributes : uint32_t {
kX86_MMXCleanup = 0x00040000u,
//! This flag instructs the epilog writer to emit VZEROUPPER instruction before RET (X86|X86_64).
kX86_AVXCleanup = 0x00080000u
kX86_AVXCleanup = 0x00080000u,
//! This flag instructs the epilog writer to emit VZEROUPPER only if there are dirty vector registers (X86|X86_64).
kX86_AVXAutoCleanup = 0x00100000u
};
ASMJIT_DEFINE_ENUM_FLAGS(FuncAttributes)
@@ -1341,6 +1344,19 @@ public:
//! Disables AVX cleanup.
ASMJIT_INLINE_NODEBUG void reset_avx_cleanup() noexcept { clear_attributes(FuncAttributes::kX86_AVXCleanup); }
//! Tests whether the function has automatic AVX cleanup - 'vzeroupper' instruction in epilog when vector registers are
//! used.
//!
//! \note Automatic cleanup is currently determined via dirty registers, which are provided by \ref FuncFrame.
[[nodiscard]]
ASMJIT_INLINE_NODEBUG bool has_avx_auto_cleanup() const noexcept { return has_attribute(FuncAttributes::kX86_AVXAutoCleanup); }
//! Enables AVX automatic cleanup.
ASMJIT_INLINE_NODEBUG void set_avx_auto_cleanup() noexcept { add_attributes(FuncAttributes::kX86_AVXAutoCleanup); }
//! Disables AVX automatic cleanup.
ASMJIT_INLINE_NODEBUG void reset_avx_auto_cleanup() noexcept { clear_attributes(FuncAttributes::kX86_AVXAutoCleanup); }
//! Tests whether the function uses call stack.
[[nodiscard]]
ASMJIT_INLINE_NODEBUG bool has_call_stack() const noexcept { return _call_stack_size != 0; }

File diff suppressed because it is too large Load Diff

View File

@@ -105,47 +105,64 @@ bool UniCompiler::has_masked_access_of(uint32_t data_size) const noexcept {
}
}
// ujit::UniCompiler - Embed
// =========================
void UniCompiler::embed_jump_table(Span<const Label> jump_table, const Label& jump_table_base, uint32_t entry_size) {
static const uint8_t zeros[8] {};
for (const Label& label : jump_table) {
if (label.is_valid()) {
cc->embed_label_delta(label, jump_table_base, entry_size);
}
else {
cc->embed(zeros, entry_size);
}
}
}
// ujit::UniCompiler - Function
// ============================
void UniCompiler::init_function(FuncNode* func_node) noexcept {
cc->add_func(func_node);
void UniCompiler::hook_func() noexcept {
FuncNode* func = cc->func();
_func_init_hook = func;
}
_func_node = func_node;
_func_init = cc->cursor();
_func_end = func_node->end_node()->prev();
void UniCompiler::unhook_func() noexcept {
_func_init_hook = nullptr;
}
// ujit::UniCompiler - Constants
// =============================
void UniCompiler::_init_vec_const_table_ptr() noexcept {
void UniCompiler::_init_vec_const_table_ptr() {
const void* global = ct_ptr<void>();
if (!_common_table_ptr.is_valid()) {
ScopedInjector injector(cc, &_func_init);
ScopedInjector injector(cc, &_func_init_hook);
_common_table_ptr = new_gpz("common_table_ptr");
cc->mov(_common_table_ptr, (int64_t)global + _common_table_offset);
}
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) {
return simd_vec_const(c, bcst_width, const_width);
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
Support::maybe_unused(similar_to);
return simd_vec_const(c, bcst_width, VecWidth::k128);
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
ASMJIT_ASSERT(!similar_to.is_empty());
Support::maybe_unused(bcst_width, similar_to);
return simd_vec_const(c, bcst_width, VecWidth::k128);
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) {
Support::maybe_unused(bcst_width);
Support::maybe_unused(const_width);
@@ -159,32 +176,32 @@ Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_w
return Vec(OperandSignature{RegTraits<RegType::kVec128>::kSignature}, _new_vec_const(c, true).id());
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
Support::maybe_unused(similar_to);
return simd_vec_const(c, bcst_width, VecWidth::k128);
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
Support::maybe_unused(similar_to);
return simd_vec_const(c, bcst_width, VecWidth::k128);
}
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) {
Support::maybe_unused(bcst_width, const_width);
return _get_mem_const(c);
}
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
Support::maybe_unused(bcst_width, similar_to);
return _get_mem_const(c);
}
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
Support::maybe_unused(bcst_width, similar_to);
return _get_mem_const(c);
}
Mem UniCompiler::_get_mem_const(const void* c) noexcept {
Mem UniCompiler::_get_mem_const(const void* c) {
// Make sure we are addressing a constant from the `ct` constant pool.
const void* ct_addr = ct_ptr<void>();
ASMJIT_ASSERT((uintptr_t)c >= (uintptr_t)ct_addr &&
@@ -197,7 +214,7 @@ Mem UniCompiler::_get_mem_const(const void* c) noexcept {
return mem_ptr(_common_table_ptr, disp - _common_table_offset);
}
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) {
Support::maybe_unused(is_unique_const);
Vec vec;
@@ -226,21 +243,21 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
_vec_consts.append(arena(), const_data);
if (c == &ct().p_0000000000000000) {
ScopedInjector inject(cc, &_func_init);
ScopedInjector inject(cc, &_func_init_hook);
v_zero_i(vec.v128());
}
else {
// NOTE: _get_mem_const() must be outside of injected code as it uses injection too.
Mem m = _get_mem_const(c);
ScopedInjector inject(cc, &_func_init);
ScopedInjector inject(cc, &_func_init_hook);
v_loadavec(vec, m);
}
return vec;
}
Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
Vec UniCompiler::simd_const_16b(const void* data16) {
size_t n = _vec_consts_ex.size();
for (size_t i = 0; i < n; i++) {
@@ -258,7 +275,7 @@ Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
Mem mem = cc->new_const(ConstPoolScope::kLocal, data16, 16);
{
ScopedInjector inject(cc, &_func_init);
ScopedInjector inject(cc, &_func_init_hook);
v_loadavec(vec, mem);
}
@@ -268,7 +285,7 @@ Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
// ujit::UniCompiler - Stack
// =========================
Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
Mem UniCompiler::tmp_stack(StackId id, uint32_t size) {
ASMJIT_ASSERT(Support::is_power_of_2(size));
ASMJIT_ASSERT(size <= 32);
@@ -282,22 +299,6 @@ Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
return stack;
}
// ujit::UniCompiler - Utilities
// =============================
void UniCompiler::embed_jump_table(const Label* jump_table, size_t jump_table_size, const Label& jump_table_base, uint32_t entry_size) noexcept {
static const uint8_t zeros[8] {};
for (size_t i = 0; i < jump_table_size; i++) {
if (jump_table[i].is_valid()) {
cc->embed_label_delta(jump_table[i], jump_table_base, entry_size);
}
else {
cc->embed(zeros, entry_size);
}
}
}
// ujit::UniCompiler - General Purpose Instructions - Utilities
// ============================================================
@@ -306,7 +307,7 @@ struct MemInst {
uint16_t mem_size;
};
static ASMJIT_NOINLINE void gp_emit_mem_op(UniCompiler& uc, Gp r, Mem m, MemInst ii) noexcept {
static ASMJIT_NOINLINE void gp_emit_mem_op(UniCompiler& uc, Gp r, Mem m, MemInst ii) {
BackendCompiler* cc = uc.cc;
InstId inst_id = ii.inst_id;
@@ -332,7 +333,7 @@ static ASMJIT_INLINE const Gp& gp_zero_as(const Gp& ref) noexcept {
return gp_zero_regs[size_t(ref.is_gp64())];
}
static ASMJIT_NOINLINE Gp gp_force_reg(UniCompiler& uc, const Operand_& op, const Gp& ref) noexcept {
static ASMJIT_NOINLINE Gp gp_force_reg(UniCompiler& uc, const Operand_& op, const Gp& ref) {
ASMJIT_ASSERT(op.is_gp() || op.is_mem() || op.is_imm());
Gp reg;
@@ -416,7 +417,7 @@ public:
cond = a64::reverse_cond(cond);
}
ASMJIT_NOINLINE void emit(UniCompiler& uc) noexcept {
ASMJIT_NOINLINE void emit(UniCompiler& uc) {
BackendCompiler* cc = uc.cc;
ConditionOpInfo info = condition_op_info[size_t(op)];
@@ -514,7 +515,7 @@ public:
// ujit::UniCompiler - General Purpose Instructions - Emit
// =======================================================
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) {
if (src.is_mem()) {
gp_emit_mem_op(*this, dst, src.as<Mem>(), MemInst{uint16_t(Inst::kIdLdr), uint16_t(dst.size())});
}
@@ -523,8 +524,9 @@ void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
}
}
void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
void UniCompiler::emit_m(UniOpM op, const Mem& m_) {
static constexpr MemInst st_inst[] = {
{ Inst::kIdNone, 0 }, // kPrefetch
{ Inst::kIdStr , 0 }, // kStoreZeroReg
{ Inst::kIdStrb, 1 }, // kStoreZeroU8
{ Inst::kIdStrh, 2 }, // kStoreZeroU16
@@ -535,10 +537,14 @@ void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
Gp zero = gp_zero_regs[size_t(op == UniOpM::kStoreZeroReg || op == UniOpM::kStoreZeroU64)];
MemInst ii = st_inst[size_t(op)];
if (!ii.inst_id) {
return;
}
gp_emit_mem_op(*this, zero, m_, ii);
}
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) noexcept {
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) {
static constexpr MemInst ld_inst[] = {
{ Inst::kIdLdr , 0 }, // kLoadReg
{ Inst::kIdLdrsb, 1 }, // kLoadI8
@@ -614,7 +620,7 @@ struct UniOpMRInfo {
uint32_t size : 4;
};
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) {
static constexpr UniOpMRInfo op_info_table[] = {
{ Inst::kIdNone, Inst::kIdStr , 0 }, // kStoreReg
{ Inst::kIdNone, Inst::kIdStrb, 1 }, // kStoreU8
@@ -653,14 +659,14 @@ void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
}
}
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) noexcept {
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
ca.emit(*this);
cc->csel(dst, gp_force_reg(*this, sel, dst), dst, condition.cond);
}
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) noexcept {
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
ca.emit(*this);
@@ -670,7 +676,7 @@ void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operan
cc->csel(dst, sel1, sel2, condition.cond);
}
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) noexcept {
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) {
// ArithOp Reg, Any
// ----------------
@@ -749,7 +755,7 @@ static ASMJIT_INLINE_NODEBUG bool is_op_3i_commutative(UniOpRRR op) noexcept {
return (kOp3ICommutativeMask & (uint64_t(1) << unsigned(op))) != 0;
}
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) noexcept {
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) {
Operand src1(src1_);
Operand src2(src2_);
@@ -1020,18 +1026,18 @@ void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, con
}
}
void UniCompiler::emit_j(const Operand_& target) noexcept {
void UniCompiler::emit_j(const Operand_& target) {
cc->emit(Inst::kIdB, target);
}
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) noexcept {
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
ca.emit(*this);
cc->b(ca.cond, target);
}
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcept {
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) {
ASMJIT_ASSERT(dst.size() == src1.size());
ASMJIT_ASSERT(dst.size() == src2.size());
@@ -1047,11 +1053,11 @@ void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcep
cc->csel(dst, dst, tmp, CondCode::kLO);
}
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) {
cc->eor(dst, src, 0xFF);
}
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) {
ASMJIT_ASSERT(dst.size() == src.size());
// dst = src + 128;
@@ -1061,13 +1067,13 @@ void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
cc->lsr(dst, dst, 8);
}
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) {
ASMJIT_ASSERT(dst.size() == src.size());
cc->add(dst, src, src, a64::lsl(8));
cc->lsr(dst, dst, 16);
}
void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) noexcept {
void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) {
Gp a = a_.clone_as(dst);
if (Support::is_power_of_2(b)) {
@@ -1084,7 +1090,7 @@ void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) noexcept {
}
}
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) noexcept {
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) {
ASMJIT_ASSERT(scale != 0);
Gp src = src_.clone_as(dst);
@@ -1115,7 +1121,7 @@ void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_
}
}
void UniCompiler::lea(const Gp& dst, const Mem& src) noexcept {
void UniCompiler::lea(const Gp& dst, const Mem& src) {
Gp base = src.base_reg().as<Gp>();
if (src.has_index()) {
@@ -1829,7 +1835,7 @@ static ASMJIT_INLINE void vec_set_type_and_index(Vec& vec, ElementSize sz, uint3
vec.set_element_index(idx);
}
static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem src, uint32_t mem_size) noexcept {
static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem src, uint32_t mem_size) {
BackendCompiler* cc = uc.cc;
if (src.has_index() && src.has_shift()) {
@@ -1863,7 +1869,7 @@ static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem sr
}
}
static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Vec& ref, uint32_t mem_size = 0) noexcept {
static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Vec& ref, uint32_t mem_size = 0) {
Vec vec = uc.new_vec128("@tmp");
if (mem_size == 0)
mem_size = ref.size();
@@ -1871,21 +1877,21 @@ static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Ve
return vec.clone_as(ref);
}
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, uint32_t mem_size = 0) noexcept {
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, uint32_t mem_size = 0) {
if (op.is_vec())
return op.as<Vec>().clone_as(ref);
else
return vec_from_mem(uc, op.as<Mem>(), ref, mem_size);
}
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, FloatMode fm) noexcept {
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, FloatMode fm) {
if (op.is_vec())
return op.as<Vec>().clone_as(ref);
else
return vec_from_mem(uc, op.as<Mem>(), ref, float_mode_mem_size_table[size_t(fm)]);
}
static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Operand_& src_) noexcept {
static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Operand_& src_) {
BackendCompiler* cc = uc.cc;
Vec dst(dst_);
@@ -1908,7 +1914,7 @@ static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Opera
ASMJIT_NOT_REACHED();
}
static ASMJIT_NOINLINE void vec_neg(UniCompiler& uc, const Vec& dst, const Vec& src, FloatMode fm) noexcept {
static ASMJIT_NOINLINE void vec_neg(UniCompiler& uc, const Vec& dst, const Vec& src, FloatMode fm) {
BackendCompiler* cc = uc.cc;
if (fm == FloatMode::kF32S)
@@ -2129,7 +2135,7 @@ static constexpr Swizzle32Data swizzle_32_data[256] = {
#undef OP
static void emit_swizzle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src, uint32_t imm) noexcept {
static void emit_swizzle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src, uint32_t imm) {
ASMJIT_ASSERT((imm & 0xFCFCFCFC) == 0);
BackendCompiler* cc = uc.cc;
@@ -2528,7 +2534,7 @@ static constexpr InterleavedShuffle32Ops interleaved_shuffle32_ops_dst_same_as_b
#undef OP
static void emit_interleaved_shuffle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src1, const Vec& src2, uint32_t imm) noexcept {
static void emit_interleaved_shuffle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src1, const Vec& src2, uint32_t imm) {
ASMJIT_ASSERT((imm & 0xFCFCFCFC) == 0);
if (src1.id() == src2.id())
@@ -2710,7 +2716,7 @@ public:
};
template<typename Src>
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) noexcept {
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) {
size_t n = dst_.size();
OpArrayIter<Src> src(src_);
@@ -2721,7 +2727,7 @@ static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray&
}
template<typename Src>
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) noexcept {
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) {
size_t n = dst_.size();
OpArrayIter<Src> src(src_);
@@ -2732,7 +2738,7 @@ static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray
}
template<typename Src1, typename Src2>
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) noexcept {
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -2745,7 +2751,7 @@ static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray&
}
template<typename Src1, typename Src2>
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) noexcept {
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -2758,7 +2764,7 @@ static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArra
}
template<typename Src1, typename Src2, typename Src3>
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) noexcept {
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -2775,7 +2781,7 @@ static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray
// ujit::UniCompiler - Vector Instructions - Emit 2V
// =================================================
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) noexcept {
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) {
ASMJIT_ASSERT(dst_.is_vec());
Vec dst(dst_.as<Vec>());
@@ -3146,13 +3152,13 @@ void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_
}
}
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) { emit_2v_t(*this, op, dst_, src_); }
// ujit::UniCompiler - Vector Instructions - Emit 2VI
// ==================================================
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) noexcept {
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) {
ASMJIT_ASSERT(dst_.is_vec());
Vec dst(dst_.as<Vec>());
@@ -3292,13 +3298,13 @@ void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& sr
}
}
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
// ujit::UniCompiler - Vector Instructions - Emit 2VS
// ==================================================
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) noexcept {
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) {
UniOpVInfo op_info = opcode_info_2vs[size_t(op)];
switch (op) {
@@ -3415,7 +3421,7 @@ void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src
// ujit::UniCompiler - Vector Instructions - Emit 2VM
// ==================================================
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src_.is_mem());
@@ -3548,7 +3554,7 @@ void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignmen
}
}
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
Support::maybe_unused(alignment);
size_t i = 0;
@@ -3594,7 +3600,7 @@ void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alig
}
}
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) {
ASMJIT_ASSERT(dst_.is_mem());
ASMJIT_ASSERT(src_.is_vec());
@@ -3737,7 +3743,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignmen
}
}
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) {
Support::maybe_unused(alignment);
size_t i = 0;
@@ -3794,7 +3800,7 @@ static void emit_3v_op(
FloatMode float_mode,
ElementSize dst_element, VecPart dst_part,
ElementSize src_element, VecPart src_part,
uint32_t reversed) noexcept {
uint32_t reversed) {
Vec src2;
@@ -3839,7 +3845,7 @@ static void emit_3v_op(
cc->emit(inst_id, dst, src1, src2);
}
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) noexcept {
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -4148,14 +4154,14 @@ void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src
}
}
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
// ujit::UniCompiler - Vector Instructions - Emit 3VI
// ==================================================
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) noexcept {
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -4231,14 +4237,14 @@ void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& s
}
}
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
// ujit::UniCompiler - Vector Instructions - Emit 4V
// =================================================
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) noexcept {
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -4372,13 +4378,13 @@ void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& sr
}
}
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
ASMJIT_END_SUB_NAMESPACE

View File

@@ -147,51 +147,68 @@ bool UniCompiler::has_masked_access_of(uint32_t data_size) const noexcept {
}
}
// ujit::UniCompiler - Embed
// =========================
void UniCompiler::embed_jump_table(Span<const Label> jump_table, const Label& jump_table_base, uint32_t entry_size) {
static const uint8_t zeros[8] {};
for (const Label& label : jump_table) {
if (label.is_valid()) {
cc->embed_label_delta(label, jump_table_base, entry_size);
}
else {
cc->embed(zeros, entry_size);
}
}
}
// ujit::UniCompiler - Function
// ============================
void UniCompiler::init_function(FuncNode* func_node) noexcept {
cc->add_func(func_node);
void UniCompiler::hook_func() noexcept {
FuncNode* func = cc->func();
_func_init_hook = func;
_func_node = func_node;
_func_init = cc->cursor();
_func_end = func_node->end_node()->prev();
if (func && has_avx()) {
func->frame().set_avx_enabled();
func->frame().set_avx_auto_cleanup();
if (has_avx()) {
func_node->frame().set_avx_enabled();
func_node->frame().set_avx_cleanup();
if (has_avx512()) {
func->frame().set_avx512_enabled();
}
}
}
if (has_avx512()) {
func_node->frame().set_avx512_enabled();
}
void UniCompiler::unhook_func() noexcept {
_func_init_hook = nullptr;
}
// ujit::UniCompiler - Constants
// =============================
void UniCompiler::_init_vec_const_table_ptr() noexcept {
void UniCompiler::_init_vec_const_table_ptr() {
const void* ct_addr = ct_ptr<void>();
if (!_common_table_ptr.is_valid()) {
ScopedInjector injector(cc, &_func_init);
ScopedInjector injector(cc, &_func_init_hook);
_common_table_ptr = new_gpz("common_table_ptr");
cc->mov(_common_table_ptr, (int64_t)ct_addr + _common_table_offset);
}
}
x86::KReg UniCompiler::k_const(uint64_t value) noexcept {
x86::KReg UniCompiler::k_const(uint64_t value) {
uint32_t slot;
for (slot = 0; slot < kMaxKRegConstCount; slot++)
if (_k_reg[slot].is_valid() && _k_imm[slot] == value)
return _k_reg[slot];
BaseNode* prevNode = nullptr;
BaseNode* prev_node = nullptr;
Gp tmp;
x86::KReg kReg;
if (slot < kMaxKRegConstCount) {
prevNode = cc->set_cursor(_func_init);
prev_node = cc->set_cursor(_func_init_hook);
}
if (value & 0xFFFFFFFF00000000u) {
@@ -209,13 +226,13 @@ x86::KReg UniCompiler::k_const(uint64_t value) noexcept {
if (slot < kMaxKRegConstCount) {
_k_reg[slot] = kReg;
_func_init = cc->set_cursor(prevNode);
_func_init_hook = cc->set_cursor(prev_node);
}
return kReg;
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) {
size_t const_count = _vec_consts.size();
for (size_t i = 0; i < const_count; i++) {
@@ -237,19 +254,19 @@ Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_w
return Vec(signature_of_xmm_ymm_zmm[size_t(const_width)], _new_vec_const(c, bcst_width == Bcst::kNA_Unique).id());
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
return simd_const(c, bcst_width, const_width);
}
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
ASMJIT_ASSERT(!similar_to.is_empty());
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
return simd_const(c, bcst_width, const_width);
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) {
size_t const_count = _vec_consts.size();
for (size_t i = 0; i < const_count; i++)
@@ -259,19 +276,19 @@ Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_w
return Vec(signature_of_xmm_ymm_zmm[size_t(const_width)], _new_vec_const(c, bcst_width == Bcst::kNA_Unique).id());
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
return simd_vec_const(c, bcst_width, const_width);
}
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
ASMJIT_ASSERT(!similar_to.is_empty());
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
return simd_vec_const(c, bcst_width, const_width);
}
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) {
x86::Mem m = _get_mem_const(c);
if (const_width != VecWidth::k512)
return m;
@@ -289,19 +306,19 @@ x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth co
return m;
}
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
return simd_mem_const(c, bcst_width, const_width);
}
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
ASMJIT_ASSERT(!similar_to.is_empty());
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
return simd_mem_const(c, bcst_width, const_width);
}
x86::Mem UniCompiler::_get_mem_const(const void* c) noexcept {
x86::Mem UniCompiler::_get_mem_const(const void* c) {
// Make sure we are addressing a constant from the `commonTable` constant pool.
const void* ct_addr = ct_ptr<void>();
ASMJIT_ASSERT((uintptr_t)c >= (uintptr_t)ct_addr &&
@@ -322,7 +339,7 @@ x86::Mem UniCompiler::_get_mem_const(const void* c) noexcept {
}
}
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) {
Vec vec;
const char* special_const_name = nullptr;
@@ -349,14 +366,14 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
_vec_consts.append(arena(), const_data);
if (c == &ct().p_0000000000000000) {
ScopedInjector inject(cc, &_func_init);
ScopedInjector inject(cc, &_func_init_hook);
v_zero_i(vec.xmm());
}
else {
// NOTE: _get_mem_const() must be outside of injected code as it uses injection too.
Mem m = _get_mem_const(c);
ScopedInjector inject(cc, &_func_init);
ScopedInjector inject(cc, &_func_init_hook);
if (has_avx512() && !vec.is_vec128() && !is_unique_const)
cc->vbroadcasti32x4(vec, m);
else if (has_avx2() && vec.is_vec256() && !is_unique_const)
@@ -372,7 +389,7 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
// ujit::UniCompiler - Stack
// =========================
x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) {
ASMJIT_ASSERT(Support::is_power_of_2(size));
ASMJIT_ASSERT(size <= 64);
@@ -385,20 +402,6 @@ x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
return stack;
}
// ujit::UniCompiler - Utilities
// =============================
void UniCompiler::embed_jump_table(const Label* jump_table, size_t jump_table_size, const Label& jump_table_base, uint32_t entry_size) noexcept {
static const uint8_t zeros[8] {};
for (size_t i = 0; i < jump_table_size; i++) {
if (jump_table[i].is_valid())
cc->embed_label_delta(jump_table[i], jump_table_base, entry_size);
else
cc->embed(zeros, entry_size);
}
}
// ujit::UniCompiler - General Purpose Instructions - Conditions
// =============================================================
@@ -472,7 +475,7 @@ public:
cond = x86::reverse_cond(cond);
}
ASMJIT_NOINLINE void emit(UniCompiler& uc) noexcept {
ASMJIT_NOINLINE void emit(UniCompiler& uc) {
BackendCompiler* cc = uc.cc;
InstId inst_id = condition_to_inst_id[size_t(op)];
@@ -503,7 +506,7 @@ public:
// ujit::UniCompiler - General Purpose Instructions - Emit
// =======================================================
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) {
if (src.is_imm() && src.as<Imm>().value() == 0) {
Gp r(dst);
if (r.is_gp64())
@@ -515,8 +518,9 @@ void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
}
}
void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
void UniCompiler::emit_m(UniOpM op, const Mem& m_) {
static constexpr uint8_t size_table[] = {
1, // Prefetch
0, // kStoreZeroReg
1, // kStoreZeroU8
2, // kStoreZeroU16
@@ -524,16 +528,21 @@ void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
8 // kStoreZeroU64
};
Mem m(m_);
uint32_t size = size_table[size_t(op)];
if (size == 0)
size = cc->register_size();
if (op == UniOpM::kPrefetch) {
cc->prefetcht0(m_);
}
else {
Mem m(m_);
uint32_t size = size_table[size_t(op)];
if (size == 0)
size = cc->register_size();
m.set_size(size);
cc->mov(m, 0);
m.set_size(size);
cc->mov(m, 0);
}
}
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) noexcept {
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) {
static constexpr uint8_t size_table[] = {
0, // kLoadReg
1, // kLoadI8
@@ -613,7 +622,7 @@ struct UniOpMRInfo {
uint16_t size;
};
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) {
static constexpr UniOpMRInfo op_info_table[] = {
{ Inst::kIdMov, 0 }, // kStoreReg
{ Inst::kIdMov, 1 }, // kStoreU8
@@ -648,14 +657,14 @@ void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
cc->emit(op_info.inst_id, m, r);
}
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) noexcept {
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
ca.emit(*this);
cc->emit(Inst::cmovcc_from_cond(ca.cond), dst, sel);
}
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) noexcept {
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
@@ -697,7 +706,7 @@ void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operan
cc->emit(Inst::cmovcc_from_cond(x86::negate_cond(ca.cond)), dst, sel2);
}
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) noexcept {
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) {
Operand src(src_);
// Notes
@@ -835,13 +844,13 @@ static constexpr uint64_t kOp3ICommutativeMask =
(uint64_t(1) << unsigned(UniOpRRR::kUMin)) |
(uint64_t(1) << unsigned(UniOpRRR::kUMax)) ;
static ASMJIT_INLINE_NODEBUG bool is_op3i_commutative(UniOpRRR op) noexcept {
static ASMJIT_INLINE_NODEBUG bool is_op3i_commutative(UniOpRRR op) {
return (kOp3ICommutativeMask & (uint64_t(1) << unsigned(op))) != 0;
}
struct UniOpRRRMinMaxCMovInst { InstId a, b; };
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) noexcept {
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) {
Operand src1(src1_);
Operand src2(src2_);
@@ -1481,18 +1490,18 @@ void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, con
ASMJIT_NOT_REACHED();
}
void UniCompiler::emit_j(const Operand_& target) noexcept {
void UniCompiler::emit_j(const Operand_& target) {
cc->emit(Inst::kIdJmp, target);
}
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) noexcept {
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) {
ConditionApplier ca(condition);
ca.optimize(*this);
ca.emit(*this);
cc->j(ca.cond, target);
}
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcept {
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) {
ASMJIT_ASSERT(dst.size() == src1.size());
ASMJIT_ASSERT(dst.size() == src2.size());
@@ -1512,13 +1521,13 @@ void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcep
cc->or_(dst.r8(), u8_msk.r8());
}
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) {
if (dst.id() != src.id())
cc->mov(dst, src);
cc->xor_(dst.r8(), 0xFF);
}
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) {
ASMJIT_ASSERT(dst.size() == src.size());
if (dst.id() == src.id()) {
@@ -1540,13 +1549,13 @@ void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
}
}
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) noexcept {
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) {
ASMJIT_ASSERT(dst.size() == src.size());
cc->imul(dst, src, 257);
cc->shr(dst, 16);
}
void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) noexcept {
void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) {
switch (b) {
case 1:
cc->add(dst, a);
@@ -1570,7 +1579,7 @@ void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) noexcept {
}
}
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) noexcept {
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) {
ASMJIT_ASSERT(scale != 0u);
Gp src = src_.clone_as(dst);
@@ -1610,7 +1619,7 @@ void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_
cc->lea(dst, x86::ptr(src, tmp));
}
void UniCompiler::lea(const Gp& dst, const Mem& src) noexcept {
void UniCompiler::lea(const Gp& dst, const Mem& src) {
Mem m(src);
if (is_64bit() && dst.size() == 4) {
@@ -2589,7 +2598,7 @@ static constexpr UniOpVMInfo opcode_info_2mv[size_t(UniOpMV::kMaxValue) + 1] = {
// ujit::UniCompiler - Vector Instructions - Utility Functions
// ===========================================================
static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& vec, const Mem& mem, uint32_t broadcast_size = 0) noexcept {
static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& vec, const Mem& mem, uint32_t broadcast_size = 0) {
BackendCompiler* cc = uc.cc;
Mem m(mem);
@@ -2617,7 +2626,7 @@ static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& ve
// TODO: Unused for now...
[[maybe_unused]]
static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec& dst, const Operand_& src, uint32_t broadcast_size = 0) noexcept {
static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec& dst, const Operand_& src, uint32_t broadcast_size = 0) {
if (src.is_reg()) {
ASMJIT_ASSERT(src.is_vec());
if (dst.id() != src.as<Reg>().id()) {
@@ -2632,7 +2641,7 @@ static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec&
}
}
static ASMJIT_NOINLINE Vec UniCompiler_load_new(UniCompiler& uc, const Vec& ref, const Mem& mem, uint32_t broadcast_size = 0) noexcept {
static ASMJIT_NOINLINE Vec UniCompiler_load_new(UniCompiler& uc, const Vec& ref, const Mem& mem, uint32_t broadcast_size = 0) {
Vec vec = uc.new_similar_reg(ref, "@vec_m");
UniCompiler_load_into(uc, vec, mem, broadcast_size);
return vec;
@@ -2642,7 +2651,7 @@ static ASMJIT_INLINE bool is_same_vec(const Vec& a, const Operand_& b) noexcept
return b.is_reg() && a.id() == b.as<Reg>().id();
}
static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatMode fm) {
Operand op;
if (is_f32_op(fm))
op = uc.simd_const(&uc.ct().f32_1, Bcst::k32, dst);
@@ -2651,7 +2660,7 @@ static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatM
return op;
}
static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec& dst, FloatMode fm) {
Operand op;
if (is_f32_op(fm))
op = uc.simd_const(&uc.ct().f32_0_5_minus_1ulp, Bcst::k32, dst);
@@ -2660,7 +2669,7 @@ static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec&
return op;
}
static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst, FloatMode fm) {
Operand op;
if (is_f32_op(fm))
op = uc.simd_const(&uc.ct().f32_round_magic, Bcst::k32, dst);
@@ -2669,7 +2678,7 @@ static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst
return op;
}
static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, FloatMode fm) {
Operand op;
if (is_f32_op(fm))
op = uc.simd_const(&uc.ct().p_8000000080000000, Bcst::k32, dst);
@@ -2678,7 +2687,7 @@ static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, Fl
return op;
}
static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) {
BackendCompiler* cc = uc.cc;
if (src.is_mem())
cc->emit(Inst::kIdMovups, dst, src);
@@ -2686,7 +2695,7 @@ static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Opera
cc->emit(Inst::kIdMovaps, dst, src);
}
static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
BackendCompiler* cc = uc.cc;
if (src.is_reg()) {
if (dst.id() != src.id()) {
@@ -2701,13 +2710,13 @@ static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Oper
}
}
static ASMJIT_NOINLINE Vec sse_copy(UniCompiler& uc, const Vec& vec, const char* name) noexcept {
static ASMJIT_NOINLINE Vec sse_copy(UniCompiler& uc, const Vec& vec, const char* name) {
Vec copy = uc.new_similar_reg(vec, name);
uc.cc->emit(Inst::kIdMovaps, copy, vec);
return copy;
}
static ASMJIT_NOINLINE void sse_make_vec(UniCompiler& uc, Operand_& op, const char* name) noexcept {
static ASMJIT_NOINLINE void sse_make_vec(UniCompiler& uc, Operand_& op, const char* name) {
if (op.is_mem()) {
Vec tmp = uc.new_vec128(name);
sse_mov(uc, tmp, op);
@@ -2734,7 +2743,7 @@ static ASMJIT_INLINE uint32_t shuf_imm4_from_swizzle(Swizzle2 s) noexcept {
return x86::shuffle_imm(imm1 * 2u + 1u, imm1 * 2u, imm0 * 2u + 1u, imm0 * 2u);
}
static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) {
BackendCompiler* cc = uc.cc;
sse_mov(uc, dst, src);
@@ -2742,7 +2751,7 @@ static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const O
cc->emit(Inst::kIdPxor, dst, ones);
}
static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) noexcept {
static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) {
BackendCompiler* cc = uc.cc;
const void* msk_data {};
@@ -2761,7 +2770,7 @@ static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const
cc->emit(Inst::kIdPxor, dst, msk);
}
static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
BackendCompiler* cc = uc.cc;
const FloatInst& fi = sse_float_inst[size_t(fm)];
@@ -2783,7 +2792,7 @@ static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, cons
// Possibly the best solution:
// https://stackoverflow.com/questions/65166174/how-to-simulate-pcmpgtq-on-sse2
static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) noexcept {
static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) {
BackendCompiler* cc = uc.cc;
if (uc.has_sse4_2()) {
@@ -2827,7 +2836,7 @@ static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, cons
// Possibly the best solution:
// https://stackoverflow.com/questions/65441496/what-is-the-most-efficient-way-to-do-unsigned-64-bit-comparison-on-sse2
static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) noexcept {
static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) {
BackendCompiler* cc = uc.cc;
if (uc.has_sse4_2()) {
@@ -2866,7 +2875,7 @@ static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, cons
}
}
static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Vec& a, const Operand_& b, const Vec& msk) noexcept {
static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Vec& a, const Operand_& b, const Vec& msk) {
BackendCompiler* cc = uc.cc;
sse_mov(uc, dst, a);
cc->emit(Inst::kIdPand, dst, msk);
@@ -2874,7 +2883,7 @@ static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Ve
cc->emit(Inst::kIdPor, dst, msk);
}
static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const Vec& src, WideningOp cvt) noexcept {
static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const Vec& src, WideningOp cvt) {
BackendCompiler* cc = uc.cc;
WideningOpInfo cvt_info = sse_int_widening_op_info[size_t(cvt)];
@@ -2943,7 +2952,7 @@ static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const
}
}
static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Operand& src, FloatMode fm, x86::RoundImm round_mode) noexcept {
static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Operand& src, FloatMode fm, x86::RoundImm round_mode) {
BackendCompiler* cc = uc.cc;
uint32_t is_f32 = fm == FloatMode::kF32S || fm == FloatMode::kF32V;
@@ -3116,7 +3125,7 @@ static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Ope
ASMJIT_NOT_REACHED();
}
static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) {
BackendCompiler* cc = uc.cc;
InstId inst_id = 0;
@@ -3130,7 +3139,7 @@ static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Opera
cc->emit(inst_id, dst, src);
}
static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
BackendCompiler* cc = uc.cc;
if (src.is_reg()) {
if (dst.id() != src.id()) {
@@ -3148,7 +3157,7 @@ static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Oper
}
}
static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Vec& ref, const char* name) noexcept {
static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Vec& ref, const char* name) {
if (op.is_mem()) {
Vec tmp = uc.new_similar_reg(ref, name);
avx_mov(uc, tmp, op);
@@ -3156,14 +3165,14 @@ static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Ve
}
}
static ASMJIT_NOINLINE void avx_zero(UniCompiler& uc, const Vec& dst) noexcept {
static ASMJIT_NOINLINE void avx_zero(UniCompiler& uc, const Vec& dst) {
BackendCompiler* cc = uc.cc;
Vec x = dst.xmm();
cc->vpxor(x, x, x);
return;
}
static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) noexcept {
static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) {
BackendCompiler* cc = uc.cc;
if (uc.has_avx512())
cc->emit(Inst::kIdVpternlogd, dst, dst, dst, 0xFF);
@@ -3171,7 +3180,7 @@ static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) noexcept {
cc->emit(Inst::kIdVpcmpeqb, dst, dst, dst);
}
static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) {
BackendCompiler* cc = uc.cc;
if (uc.has_avx512()) {
@@ -3197,7 +3206,7 @@ static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const O
}
}
static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) noexcept {
static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) {
BackendCompiler* cc = uc.cc;
Operand msk;
@@ -3222,7 +3231,7 @@ static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, cons
}
}
static ASMJIT_NOINLINE void avx_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
static ASMJIT_NOINLINE void avx_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
BackendCompiler* cc = uc.cc;
const FloatInst& fi = avx_float_inst[size_t(fm)];
@@ -3276,7 +3285,7 @@ public:
};
template<typename Src>
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) noexcept {
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) {
size_t n = dst_.size();
OpArrayIter<Src> src(src_);
@@ -3287,7 +3296,7 @@ static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray&
}
template<typename Src>
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) noexcept {
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) {
size_t n = dst_.size();
OpArrayIter<Src> src(src_);
@@ -3298,7 +3307,7 @@ static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray
}
template<typename Src1, typename Src2>
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) noexcept {
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -3311,7 +3320,7 @@ static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray&
}
template<typename Src1, typename Src2>
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) noexcept {
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -3324,7 +3333,7 @@ static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArra
}
template<typename Src1, typename Src2, typename Src3>
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) noexcept {
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) {
size_t n = dst_.size();
OpArrayIter<Src1> src1(src1_);
OpArrayIter<Src2> src2(src2_);
@@ -3341,7 +3350,7 @@ static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray
// ujit::UniCompiler - Vector Instructions - Emit 2V
// =================================================
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) noexcept {
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) {
ASMJIT_ASSERT(dst_.is_vec());
Vec dst(dst_.as<Vec>());
@@ -4425,13 +4434,13 @@ void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_
}
}
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) { emit_2v_t(*this, op, dst_, src_); }
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) { emit_2v_t(*this, op, dst_, src_); }
// ujit::UniCompiler - Vector Instructions - Emit 2VI
// ==================================================
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) noexcept {
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) {
ASMJIT_ASSERT(dst_.is_vec());
Vec dst(dst_.as<Vec>());
@@ -4790,13 +4799,13 @@ void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& sr
}
}
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
// ujit::UniCompiler - Vector Instructions - Emit 2VS
// ==================================================
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) noexcept {
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) {
UniOpVInfo op_info = opcode_info_2vs[size_t(op)];
Operand src(src_);
@@ -5085,7 +5094,7 @@ void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src
// ujit::UniCompiler - Vector Instructions - Emit 2VM
// ==================================================
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src_.is_mem());
@@ -5492,7 +5501,7 @@ void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignmen
}
}
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
Mem src(src_);
UniOpVMInfo op_info = opcode_info_2vm[size_t(op)];
@@ -5525,7 +5534,7 @@ void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alig
}
}
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) {
ASMJIT_ASSERT(dst_.is_mem());
ASMJIT_ASSERT(src_.is_reg() && src_.is_vec());
@@ -5832,7 +5841,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignmen
}
}
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) noexcept {
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) {
Support::maybe_unused(idx);
Mem dst(dst_);
@@ -5869,7 +5878,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alig
// ujit::UniCompiler - Vector Instructions - Emit 3V
// =================================================
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) noexcept {
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -7012,14 +7021,14 @@ void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src
}
}
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
// ujit::UniCompiler - Vector Instructions - Emit 3VI
// ==================================================
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) noexcept {
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -7252,14 +7261,14 @@ void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& s
}
}
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
// ujit::UniCompiler - Vector Instructions - Emit 4V
// =================================================
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) noexcept {
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) {
ASMJIT_ASSERT(dst_.is_vec());
ASMJIT_ASSERT(src1_.is_vec());
@@ -7569,13 +7578,13 @@ void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& sr
}
}
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
ASMJIT_END_SUB_NAMESPACE

View File

@@ -32,6 +32,7 @@ enum class UniOpCond : uint32_t {
//! Instruction with a single memory operand.
enum class UniOpM : uint32_t {
kPrefetch, //!< Explicitly prefetch memory for reading (can be implemented as NOP).
kStoreZeroReg, //!< Store zero (data-width depends on register size).
kStoreZeroU8, //!< Store zero (8-bit).
kStoreZeroU16, //!< Store zero (16-bit).

View File

@@ -775,10 +775,18 @@ public:
ASMJIT_INLINE_NODEBUG Error invoke(Out<InvokeNode*> out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
//! Return from function.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret() { return add_ret(Operand(), Operand()); }
//! \overload
//! Return from function - one value.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0) { return add_ret(o0, Operand()); }
//! \overload
//! Return from function - two values / register pair.
//!
//! \note This doesn't end the function - it just emits a return.
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0, const Reg& o1) { return add_ret(o0, o1); }
//! \}

View File

@@ -575,14 +575,11 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_prolog(const FuncFrame& frame) {
ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
Emitter* emitter = _emitter->as<Emitter>();
uint32_t i;
uint32_t reg_id;
uint32_t register_size = emitter->register_size();
uint32_t gp_saved = frame.saved_regs(RegGroup::kGp);
Gp zsp = emitter->zsp(); // ESP|RSP register.
Gp zbp = emitter->zbp(); // EBP|RBP register.
Gp zsp = emitter->zsp(); // ESP|RSP register.
Gp zbp = emitter->zbp(); // EBP|RBP register.
Gp gp_reg = emitter->zsp(); // General purpose register (temporary).
// Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
@@ -610,12 +607,21 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
}
}
// Emit 'emms' and/or 'vzeroupper'.
if (frame.has_mmx_cleanup()) {
bool do_mmx_cleanup = frame.has_mmx_cleanup();
bool do_avx_cleanup = frame.has_avx_cleanup();
// Perform automatic AVX cleanup (VZEROUPPER) if there are dirty vector registers.
if (frame.has_avx_auto_cleanup() && frame.dirty_regs(RegGroup::kVec) != 0u) {
do_avx_cleanup = true;
}
// Emit 'EMMS' if MMX cleanup is enabled.
if (do_mmx_cleanup) {
ASMJIT_PROPAGATE(emitter->emms());
}
if (frame.has_avx_cleanup()) {
// Emit 'VZEROUPPER' if AVX cleanup is enabled.
if (do_avx_cleanup) {
ASMJIT_PROPAGATE(emitter->vzeroupper());
}
@@ -643,8 +649,8 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
// Emit 'pop gp' sequence.
if (gp_saved) {
i = gp_saved;
reg_id = 16;
uint32_t i = gp_saved;
uint32_t reg_id = 16;
do {
reg_id--;

File diff suppressed because it is too large Load Diff