mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-16 20:17:05 +03:00
[abi] Improved ujit integration
* Do not mark UniCompiler emit functions noexcept to make the
signatures compatible with BackendCompiler
* Added more functions that wrap BackendCompiler functionality
* Simplified the way of creating functions to make it more
compatible with how BackendCompiler creates and finishes
functions
This commit is contained in:
@@ -1050,16 +1050,16 @@ Error Assembler::_emit(InstId inst_id, const Operand_& o0, const Operand_& o1, c
|
||||
o3.as<Imm>().value_as<uint64_t>() >= Support::bit_mask<uint32_t>(op_data.b_imm_size + op_data.b_imm_discard_lsb))
|
||||
goto InvalidImmediate;
|
||||
|
||||
uint32_t aImm = o2.as<Imm>().value_as<uint32_t>() >> op_data.a_imm_discard_lsb;
|
||||
uint32_t bImm = o3.as<Imm>().value_as<uint32_t>() >> op_data.b_imm_discard_lsb;
|
||||
uint32_t a_imm = o2.as<Imm>().value_as<uint32_t>() >> op_data.a_imm_discard_lsb;
|
||||
uint32_t b_imm = o3.as<Imm>().value_as<uint32_t>() >> op_data.b_imm_discard_lsb;
|
||||
|
||||
if ((aImm << op_data.a_imm_discard_lsb) != o2.as<Imm>().value_as<uint32_t>() ||
|
||||
(bImm << op_data.b_imm_discard_lsb) != o3.as<Imm>().value_as<uint32_t>())
|
||||
if ((a_imm << op_data.a_imm_discard_lsb) != o2.as<Imm>().value_as<uint32_t>() ||
|
||||
(b_imm << op_data.b_imm_discard_lsb) != o3.as<Imm>().value_as<uint32_t>())
|
||||
goto InvalidImmediate;
|
||||
|
||||
opcode.reset(op_data.opcode());
|
||||
opcode.add_imm(aImm, op_data.a_imm_offset);
|
||||
opcode.add_imm(bImm, op_data.b_imm_offset);
|
||||
opcode.add_imm(a_imm, op_data.a_imm_offset);
|
||||
opcode.add_imm(b_imm, op_data.b_imm_offset);
|
||||
opcode.add_reg(o1, 5);
|
||||
opcode.add_reg(o0, 0);
|
||||
goto EmitOp;
|
||||
|
||||
@@ -215,11 +215,19 @@ public:
|
||||
//! \overload
|
||||
ASMJIT_INLINE_NODEBUG Error invoke(Out<InvokeNode*> out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
|
||||
|
||||
//! Return.
|
||||
//! Return from function.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret() { return add_ret(Operand(), Operand()); }
|
||||
//! \overload
|
||||
|
||||
//! Return from function - one value.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0) { return add_ret(o0, Operand()); }
|
||||
//! \overload
|
||||
|
||||
//! Return from function - two values / register pair.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0, const Reg& o1) { return add_ret(o0, o1); }
|
||||
|
||||
//! \}
|
||||
|
||||
@@ -2118,7 +2118,8 @@ namespace asmjit {
|
||||
//! its own IR. Instead, it translates user calls into target-dependent instructions (or instruction sequences)
|
||||
//! and allows users to switch to target-specific assembly only where required for extra performance.
|
||||
//!
|
||||
//! \warning UJIT is still in an experimental phase, expect minor API breaks in the future.
|
||||
//! \warning UJIT is still in an experimental phase, expect minor API breaks in the future especially towards API
|
||||
//! stabilization.
|
||||
//!
|
||||
//! API Overview
|
||||
//!
|
||||
@@ -2155,6 +2156,123 @@ namespace asmjit {
|
||||
//! - \ref ujit::UniOpVVV - instruction with `[vec, vec, vec]` operands.
|
||||
//! - \ref ujit::UniOpVVVI - instruction with `[vec, vec, vec, imm]` operands.
|
||||
//! - \ref ujit::UniOpVVVV - instruction with `[vec, vec, vec, vec]` operands.
|
||||
//!
|
||||
//! ### UniCompiler Example
|
||||
//!
|
||||
//! Using UniCompiler is like using a regular platform-dependent AsmJit's Compiler - UniCompiler wraps its API
|
||||
//! and delegates most of non-emit calls to Compiler, however, it abstracts how instructions are emitted so it
|
||||
//! could offer universal API for both general-purpose and SIMD instructions. The following example demonstrates
|
||||
//! how to use it:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/ujit.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! int main() {
|
||||
//! // Signature of the generated function.
|
||||
//! using Func = void (*)(uint32_t* dst, const uint32_t* src1, const uint32_t* src2);
|
||||
//!
|
||||
//! JitRuntime rt; // Creates a JIT runtime that holds executable code.
|
||||
//! FileLogger logger(stdout); // Creates a logger that prints to stdout.
|
||||
//! CodeHolder code; // Creates a CodeHolder - holds code and other information.
|
||||
//!
|
||||
//! code.init(rt.environment(), // Initializes CodeHolder to match the JIT environment.
|
||||
//! rt.cpu_features());
|
||||
//! code.set_logger(&logger); // Initializes CodeHolder's logger.
|
||||
//!
|
||||
//! ujit::BackendCompiler backend_cc(&code); // Creates a regular backend compiler instance.
|
||||
//! ujit::UniCompiler uc(&backend_cc, // Creates UniCompiler with attached backend compiler.
|
||||
//! rt.cpu_features(), // CPU features must be passed explicitly.
|
||||
//! rt.cpu_hints()); // CPU hints must be passed explicitly.
|
||||
//!
|
||||
//! // Begin a function of the required signature (this exactly matches the Compiler use).
|
||||
//! FuncNode* func = uc.add_func(FuncSignature::build<void, uint32_t*, const uint32_t*, const uint32_t*>());
|
||||
//!
|
||||
//! ujit::Gp d_ptr = uc.new_gp_ptr(); // Creates a destination pointer.
|
||||
//! ujit::Gp a_ptr = uc.new_gp_ptr(); // Creates a first source pointer.
|
||||
//! ujit::Gp b_ptr = uc.new_gp_ptr(); // Creates a second source pointer.
|
||||
//!
|
||||
//! func->set_arg(0, d_ptr); // Assigns 1st argument.
|
||||
//! func->set_arg(1, a_ptr); // Assigns 2nd argument.
|
||||
//! func->set_arg(2, b_ptr); // Assigns 3rd argument.
|
||||
//!
|
||||
//! ujit::Vec v0 = uc.new_vec128(); // Creates a 128-bit vector register.
|
||||
//! ujit::Vec v1 = uc.new_vec128(); // Creates a 128-bit vector register.
|
||||
//!
|
||||
//! uc.v_loadu128(v0, ujit::mem_ptr(a_ptr)); // Unaligned load of 128 bits from [a_ptr] into v0.
|
||||
//! uc.v_loadu128(v1, ujit::mem_ptr(b_ptr)); // Unaligned load of 128 bits from [b_ptr] into v1.
|
||||
//! uc.v_add_i32(v0, v0, v1); // Vector addition of 4 32-bit integers.
|
||||
//! uc.v_storeu128(ujit::mem_ptr(d_ptr), v0);// Unaligned store of 128 bits from v0 to [d_ptr].
|
||||
//!
|
||||
//! uc.end_func(); // End of the function body.
|
||||
//! Error err1 = uc.finalize(); // Translates and assembles the whole 'backend_cc' content.
|
||||
//!
|
||||
//! if (err1 != Error::kOk) {
|
||||
//! // Handle a possible error returned by AsmJit as finalize can fail. One reason could be wrong operands
|
||||
//! // to some instruction or other platform constraints. Usually UniCompiler handles most of platform
|
||||
//! // constraints by itself, but this error code must be checked regardless.
|
||||
//! return 1;
|
||||
//! }
|
||||
//! // ----> Both BackendCompiler and UniCompiler are no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! Func fn;
|
||||
//! Error err2 = rt.add(&fn, &code); // Add the generated code to JIT runtime (executable memory).
|
||||
//!
|
||||
//! if (err2 != Error::kOk) {
|
||||
//! // Handle a possible error returned by AsmJit. This would be either out of executable memory or failure
|
||||
//! // to allocate it (for example excessive user-space hardening or making the allocation of executable
|
||||
//! // memory forbidden).
|
||||
//! return 1;
|
||||
//! }
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! // Input data.
|
||||
//! static constexpr uint32_t a_data[4] = {1u,2u,4u,8u};
|
||||
//! static constexpr uint32_t b_data[4] = {6u,4u,3u,1u};
|
||||
//!
|
||||
//! // Output data.
|
||||
//! uint32_t d_data[4] {};
|
||||
//!
|
||||
//! // Calls the generated function.
|
||||
//! fn(d_data, a_data, b_data);
|
||||
//!
|
||||
//! // Prints both inputs and the output.
|
||||
//! printf("a_data={%u,%u,%u,%u}\n", a_data[0], a_data[1], a_data[2], a_data[3]);
|
||||
//! printf("b_data={%u,%u,%u,%u}\n", b_data[0], b_data[1], b_data[2], b_data[3]);
|
||||
//! printf("d_data={%u,%u,%u,%u}\n", d_data[0], d_data[1], d_data[2], d_data[3]);
|
||||
//!
|
||||
//! // Explicitly removes the function from JIT runtime.
|
||||
//! rt.release(fn);
|
||||
//!
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Emitting Backend-Specific Code
|
||||
//!
|
||||
//! In cases, in which backend-specific code is required for performance reasons, it's possible to use the
|
||||
//! underlying backend-specific Compiler, which is provided as a `cc` member of `UniCompiler`. The next example
|
||||
//! demonstrates how to use AArch64-specific code path during code generation:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/ujit.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void emit_backend_specific_code(UniCompiler& uc, const ujit::Gp& a, const ujit::Gp& b, const ujit::Gp& c) {
|
||||
//! #if defined(ASMJIT_UJIT_AARCH64)
|
||||
//! // Emit aarch64 specific code via `uc.cc`:
|
||||
//! uc.cc->orn(a, b, c);
|
||||
//! #else
|
||||
//! // Generic code.
|
||||
//! ujit::Gp tmp = uc.new_similar_reg(a);
|
||||
//! uc.not_(tmp, c);
|
||||
//! uc.or_(a, b, tmp);
|
||||
//! #endif
|
||||
//! ```
|
||||
//!
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \defgroup asmjit_ra RA
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
#define ASMJIT_LIBRARY_MAKE_VERSION(major, minor, patch) ((major << 16) | (minor << 8) | (patch))
|
||||
|
||||
//! AsmJit library version, see \ref ASMJIT_LIBRARY_MAKE_VERSION for a version format reference.
|
||||
#define ASMJIT_LIBRARY_VERSION ASMJIT_LIBRARY_MAKE_VERSION(1, 19, 0)
|
||||
#define ASMJIT_LIBRARY_VERSION ASMJIT_LIBRARY_MAKE_VERSION(1, 20, 0)
|
||||
|
||||
//! \def ASMJIT_ABI_NAMESPACE
|
||||
//!
|
||||
@@ -27,7 +27,7 @@
|
||||
//! AsmJit default, which makes it possible to use multiple AsmJit libraries within a single project, totally
|
||||
//! controlled by users. This is useful especially in cases in which some of such library comes from third party.
|
||||
#if !defined(ASMJIT_ABI_NAMESPACE)
|
||||
#define ASMJIT_ABI_NAMESPACE v1_19
|
||||
#define ASMJIT_ABI_NAMESPACE v1_20
|
||||
#endif // !ASMJIT_ABI_NAMESPACE
|
||||
|
||||
//! \}
|
||||
|
||||
@@ -126,6 +126,8 @@ public:
|
||||
|
||||
ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
|
||||
|
||||
ASMJIT_INLINE Error comment(Span<const char> data) { return comment(data.data(), data.size()); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Events
|
||||
|
||||
@@ -403,6 +403,11 @@ public:
|
||||
[[nodiscard]]
|
||||
ASMJIT_API Label new_named_label(const char* name, size_t name_size = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parent_id = Globals::kInvalidId) override;
|
||||
|
||||
[[nodiscard]]
|
||||
ASMJIT_INLINE Label new_named_label(Span<const char> name, LabelType type = LabelType::kGlobal, uint32_t parent_id = Globals::kInvalidId) {
|
||||
return new_named_label(name.data(), name.size(), type, parent_id);
|
||||
}
|
||||
|
||||
ASMJIT_API Error bind(const Label& label) override;
|
||||
|
||||
//! \}
|
||||
@@ -473,6 +478,8 @@ public:
|
||||
|
||||
ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
|
||||
|
||||
ASMJIT_INLINE Error comment(Span<const char> data) { return comment(data.data(), data.size()); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Serialization
|
||||
|
||||
@@ -96,6 +96,10 @@ public:
|
||||
//! \name Function Management
|
||||
//! \{
|
||||
|
||||
//! Returns the function being generated.
|
||||
[[nodiscard]]
|
||||
ASMJIT_INLINE_NODEBUG FuncNode* func() const noexcept { return _func; }
|
||||
|
||||
//! Creates a new \ref FuncNode.
|
||||
ASMJIT_API Error new_func_node(Out<FuncNode*> out, const FuncSignature& signature);
|
||||
//! Creates a new \ref FuncNode adds it to the instruction stream.
|
||||
@@ -106,10 +110,6 @@ public:
|
||||
//! Creates a new \ref FuncRetNode and adds it to the instruction stream.
|
||||
ASMJIT_API Error add_func_ret_node(Out<FuncRetNode*> out, const Operand_& o0, const Operand_& o1);
|
||||
|
||||
//! Returns the current function.
|
||||
[[nodiscard]]
|
||||
ASMJIT_INLINE_NODEBUG FuncNode* func() const noexcept { return _func; }
|
||||
|
||||
//! Creates a new \ref FuncNode with the given `signature` and returns it.
|
||||
ASMJIT_INLINE FuncNode* new_func(const FuncSignature& signature) {
|
||||
FuncNode* node;
|
||||
@@ -127,7 +127,12 @@ public:
|
||||
|
||||
//! Adds a function `node` to the instruction stream.
|
||||
ASMJIT_API FuncNode* add_func(FuncNode* ASMJIT_NONNULL(func));
|
||||
//! Emits a sentinel that marks the end of the current function.
|
||||
|
||||
//! Ends the current function by emitting a sentinel that marks the end of it.
|
||||
//!
|
||||
//! This would close the context for generating the current function. After calling \ref end_func() the active
|
||||
//! function node is reset and \ref func() would return `nullptr` unless another function is being started via
|
||||
//! \ref add_func().
|
||||
ASMJIT_API Error end_func();
|
||||
|
||||
ASMJIT_INLINE Error add_ret(const Operand_& o0, const Operand_& o1) {
|
||||
|
||||
@@ -678,7 +678,7 @@ public:
|
||||
//! \name Labels
|
||||
//! \{
|
||||
|
||||
//! Creates a new label.
|
||||
//! Creates a new anonymous label.
|
||||
[[nodiscard]]
|
||||
ASMJIT_API virtual Label new_label();
|
||||
|
||||
@@ -891,6 +891,11 @@ public:
|
||||
//! Emits a comment stored in `data` with an optional `size` parameter.
|
||||
ASMJIT_API virtual Error comment(const char* data, size_t size = SIZE_MAX);
|
||||
|
||||
//! Emits a comment passed via a `data` span.
|
||||
ASMJIT_INLINE Error comment(Span<const char> data) {
|
||||
return comment(data.data(), data.size());
|
||||
}
|
||||
|
||||
//! Emits a formatted comment specified by `fmt` and variable number of arguments.
|
||||
ASMJIT_API Error commentf(const char* fmt, ...);
|
||||
//! Emits a formatted comment specified by `fmt` and `ap`.
|
||||
|
||||
@@ -874,7 +874,10 @@ enum class FuncAttributes : uint32_t {
|
||||
kX86_MMXCleanup = 0x00040000u,
|
||||
|
||||
//! This flag instructs the epilog writer to emit VZEROUPPER instruction before RET (X86|X86_64).
|
||||
kX86_AVXCleanup = 0x00080000u
|
||||
kX86_AVXCleanup = 0x00080000u,
|
||||
|
||||
//! This flag instructs the epilog writer to emit VZEROUPPER only if there are dirty vector registers (X86|X86_64).
|
||||
kX86_AVXAutoCleanup = 0x00100000u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(FuncAttributes)
|
||||
|
||||
@@ -1341,6 +1344,19 @@ public:
|
||||
//! Disables AVX cleanup.
|
||||
ASMJIT_INLINE_NODEBUG void reset_avx_cleanup() noexcept { clear_attributes(FuncAttributes::kX86_AVXCleanup); }
|
||||
|
||||
//! Tests whether the function has automatic AVX cleanup - 'vzeroupper' instruction in epilog when vector registers are
|
||||
//! used.
|
||||
//!
|
||||
//! \note Automatic cleanup is currently determined via dirty registers, which are provided by \ref FuncFrame.
|
||||
[[nodiscard]]
|
||||
ASMJIT_INLINE_NODEBUG bool has_avx_auto_cleanup() const noexcept { return has_attribute(FuncAttributes::kX86_AVXAutoCleanup); }
|
||||
|
||||
//! Enables AVX automatic cleanup.
|
||||
ASMJIT_INLINE_NODEBUG void set_avx_auto_cleanup() noexcept { add_attributes(FuncAttributes::kX86_AVXAutoCleanup); }
|
||||
|
||||
//! Disables AVX automatic cleanup.
|
||||
ASMJIT_INLINE_NODEBUG void reset_avx_auto_cleanup() noexcept { clear_attributes(FuncAttributes::kX86_AVXAutoCleanup); }
|
||||
|
||||
//! Tests whether the function uses call stack.
|
||||
[[nodiscard]]
|
||||
ASMJIT_INLINE_NODEBUG bool has_call_stack() const noexcept { return _call_stack_size != 0; }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -105,47 +105,64 @@ bool UniCompiler::has_masked_access_of(uint32_t data_size) const noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Embed
|
||||
// =========================
|
||||
|
||||
void UniCompiler::embed_jump_table(Span<const Label> jump_table, const Label& jump_table_base, uint32_t entry_size) {
|
||||
static const uint8_t zeros[8] {};
|
||||
|
||||
for (const Label& label : jump_table) {
|
||||
if (label.is_valid()) {
|
||||
cc->embed_label_delta(label, jump_table_base, entry_size);
|
||||
}
|
||||
else {
|
||||
cc->embed(zeros, entry_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Function
|
||||
// ============================
|
||||
|
||||
void UniCompiler::init_function(FuncNode* func_node) noexcept {
|
||||
cc->add_func(func_node);
|
||||
void UniCompiler::hook_func() noexcept {
|
||||
FuncNode* func = cc->func();
|
||||
_func_init_hook = func;
|
||||
}
|
||||
|
||||
_func_node = func_node;
|
||||
_func_init = cc->cursor();
|
||||
_func_end = func_node->end_node()->prev();
|
||||
void UniCompiler::unhook_func() noexcept {
|
||||
_func_init_hook = nullptr;
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Constants
|
||||
// =============================
|
||||
|
||||
void UniCompiler::_init_vec_const_table_ptr() noexcept {
|
||||
void UniCompiler::_init_vec_const_table_ptr() {
|
||||
const void* global = ct_ptr<void>();
|
||||
|
||||
if (!_common_table_ptr.is_valid()) {
|
||||
ScopedInjector injector(cc, &_func_init);
|
||||
ScopedInjector injector(cc, &_func_init_hook);
|
||||
_common_table_ptr = new_gpz("common_table_ptr");
|
||||
cc->mov(_common_table_ptr, (int64_t)global + _common_table_offset);
|
||||
}
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
return simd_vec_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
Support::maybe_unused(similar_to);
|
||||
return simd_vec_const(c, bcst_width, VecWidth::k128);
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
ASMJIT_ASSERT(!similar_to.is_empty());
|
||||
Support::maybe_unused(bcst_width, similar_to);
|
||||
|
||||
return simd_vec_const(c, bcst_width, VecWidth::k128);
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
Support::maybe_unused(bcst_width);
|
||||
Support::maybe_unused(const_width);
|
||||
|
||||
@@ -159,32 +176,32 @@ Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_w
|
||||
return Vec(OperandSignature{RegTraits<RegType::kVec128>::kSignature}, _new_vec_const(c, true).id());
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
Support::maybe_unused(similar_to);
|
||||
return simd_vec_const(c, bcst_width, VecWidth::k128);
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
Support::maybe_unused(similar_to);
|
||||
return simd_vec_const(c, bcst_width, VecWidth::k128);
|
||||
}
|
||||
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
Support::maybe_unused(bcst_width, const_width);
|
||||
return _get_mem_const(c);
|
||||
}
|
||||
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
Support::maybe_unused(bcst_width, similar_to);
|
||||
return _get_mem_const(c);
|
||||
}
|
||||
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
Support::maybe_unused(bcst_width, similar_to);
|
||||
return _get_mem_const(c);
|
||||
}
|
||||
|
||||
Mem UniCompiler::_get_mem_const(const void* c) noexcept {
|
||||
Mem UniCompiler::_get_mem_const(const void* c) {
|
||||
// Make sure we are addressing a constant from the `ct` constant pool.
|
||||
const void* ct_addr = ct_ptr<void>();
|
||||
ASMJIT_ASSERT((uintptr_t)c >= (uintptr_t)ct_addr &&
|
||||
@@ -197,7 +214,7 @@ Mem UniCompiler::_get_mem_const(const void* c) noexcept {
|
||||
return mem_ptr(_common_table_ptr, disp - _common_table_offset);
|
||||
}
|
||||
|
||||
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
|
||||
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) {
|
||||
Support::maybe_unused(is_unique_const);
|
||||
|
||||
Vec vec;
|
||||
@@ -226,21 +243,21 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
|
||||
_vec_consts.append(arena(), const_data);
|
||||
|
||||
if (c == &ct().p_0000000000000000) {
|
||||
ScopedInjector inject(cc, &_func_init);
|
||||
ScopedInjector inject(cc, &_func_init_hook);
|
||||
v_zero_i(vec.v128());
|
||||
}
|
||||
else {
|
||||
// NOTE: _get_mem_const() must be outside of injected code as it uses injection too.
|
||||
Mem m = _get_mem_const(c);
|
||||
|
||||
ScopedInjector inject(cc, &_func_init);
|
||||
ScopedInjector inject(cc, &_func_init_hook);
|
||||
v_loadavec(vec, m);
|
||||
}
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
|
||||
Vec UniCompiler::simd_const_16b(const void* data16) {
|
||||
size_t n = _vec_consts_ex.size();
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
@@ -258,7 +275,7 @@ Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
|
||||
|
||||
Mem mem = cc->new_const(ConstPoolScope::kLocal, data16, 16);
|
||||
{
|
||||
ScopedInjector inject(cc, &_func_init);
|
||||
ScopedInjector inject(cc, &_func_init_hook);
|
||||
v_loadavec(vec, mem);
|
||||
}
|
||||
|
||||
@@ -268,7 +285,7 @@ Vec UniCompiler::simd_const_16b(const void* data16) noexcept {
|
||||
// ujit::UniCompiler - Stack
|
||||
// =========================
|
||||
|
||||
Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
|
||||
Mem UniCompiler::tmp_stack(StackId id, uint32_t size) {
|
||||
ASMJIT_ASSERT(Support::is_power_of_2(size));
|
||||
ASMJIT_ASSERT(size <= 32);
|
||||
|
||||
@@ -282,22 +299,6 @@ Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
|
||||
return stack;
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Utilities
|
||||
// =============================
|
||||
|
||||
void UniCompiler::embed_jump_table(const Label* jump_table, size_t jump_table_size, const Label& jump_table_base, uint32_t entry_size) noexcept {
|
||||
static const uint8_t zeros[8] {};
|
||||
|
||||
for (size_t i = 0; i < jump_table_size; i++) {
|
||||
if (jump_table[i].is_valid()) {
|
||||
cc->embed_label_delta(jump_table[i], jump_table_base, entry_size);
|
||||
}
|
||||
else {
|
||||
cc->embed(zeros, entry_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - General Purpose Instructions - Utilities
|
||||
// ============================================================
|
||||
|
||||
@@ -306,7 +307,7 @@ struct MemInst {
|
||||
uint16_t mem_size;
|
||||
};
|
||||
|
||||
static ASMJIT_NOINLINE void gp_emit_mem_op(UniCompiler& uc, Gp r, Mem m, MemInst ii) noexcept {
|
||||
static ASMJIT_NOINLINE void gp_emit_mem_op(UniCompiler& uc, Gp r, Mem m, MemInst ii) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
InstId inst_id = ii.inst_id;
|
||||
|
||||
@@ -332,7 +333,7 @@ static ASMJIT_INLINE const Gp& gp_zero_as(const Gp& ref) noexcept {
|
||||
return gp_zero_regs[size_t(ref.is_gp64())];
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE Gp gp_force_reg(UniCompiler& uc, const Operand_& op, const Gp& ref) noexcept {
|
||||
static ASMJIT_NOINLINE Gp gp_force_reg(UniCompiler& uc, const Operand_& op, const Gp& ref) {
|
||||
ASMJIT_ASSERT(op.is_gp() || op.is_mem() || op.is_imm());
|
||||
|
||||
Gp reg;
|
||||
@@ -416,7 +417,7 @@ public:
|
||||
cond = a64::reverse_cond(cond);
|
||||
}
|
||||
|
||||
ASMJIT_NOINLINE void emit(UniCompiler& uc) noexcept {
|
||||
ASMJIT_NOINLINE void emit(UniCompiler& uc) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
ConditionOpInfo info = condition_op_info[size_t(op)];
|
||||
|
||||
@@ -514,7 +515,7 @@ public:
|
||||
// ujit::UniCompiler - General Purpose Instructions - Emit
|
||||
// =======================================================
|
||||
|
||||
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
|
||||
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) {
|
||||
if (src.is_mem()) {
|
||||
gp_emit_mem_op(*this, dst, src.as<Mem>(), MemInst{uint16_t(Inst::kIdLdr), uint16_t(dst.size())});
|
||||
}
|
||||
@@ -523,8 +524,9 @@ void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
|
||||
void UniCompiler::emit_m(UniOpM op, const Mem& m_) {
|
||||
static constexpr MemInst st_inst[] = {
|
||||
{ Inst::kIdNone, 0 }, // kPrefetch
|
||||
{ Inst::kIdStr , 0 }, // kStoreZeroReg
|
||||
{ Inst::kIdStrb, 1 }, // kStoreZeroU8
|
||||
{ Inst::kIdStrh, 2 }, // kStoreZeroU16
|
||||
@@ -535,10 +537,14 @@ void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
|
||||
Gp zero = gp_zero_regs[size_t(op == UniOpM::kStoreZeroReg || op == UniOpM::kStoreZeroU64)];
|
||||
MemInst ii = st_inst[size_t(op)];
|
||||
|
||||
if (!ii.inst_id) {
|
||||
return;
|
||||
}
|
||||
|
||||
gp_emit_mem_op(*this, zero, m_, ii);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) noexcept {
|
||||
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) {
|
||||
static constexpr MemInst ld_inst[] = {
|
||||
{ Inst::kIdLdr , 0 }, // kLoadReg
|
||||
{ Inst::kIdLdrsb, 1 }, // kLoadI8
|
||||
@@ -614,7 +620,7 @@ struct UniOpMRInfo {
|
||||
uint32_t size : 4;
|
||||
};
|
||||
|
||||
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) {
|
||||
static constexpr UniOpMRInfo op_info_table[] = {
|
||||
{ Inst::kIdNone, Inst::kIdStr , 0 }, // kStoreReg
|
||||
{ Inst::kIdNone, Inst::kIdStrb, 1 }, // kStoreU8
|
||||
@@ -653,14 +659,14 @@ void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
ca.emit(*this);
|
||||
cc->csel(dst, gp_force_reg(*this, sel, dst), dst, condition.cond);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
ca.emit(*this);
|
||||
@@ -670,7 +676,7 @@ void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operan
|
||||
cc->csel(dst, sel1, sel2, condition.cond);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) noexcept {
|
||||
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) {
|
||||
// ArithOp Reg, Any
|
||||
// ----------------
|
||||
|
||||
@@ -749,7 +755,7 @@ static ASMJIT_INLINE_NODEBUG bool is_op_3i_commutative(UniOpRRR op) noexcept {
|
||||
return (kOp3ICommutativeMask & (uint64_t(1) << unsigned(op))) != 0;
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) noexcept {
|
||||
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) {
|
||||
Operand src1(src1_);
|
||||
Operand src2(src2_);
|
||||
|
||||
@@ -1020,18 +1026,18 @@ void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, con
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_j(const Operand_& target) noexcept {
|
||||
void UniCompiler::emit_j(const Operand_& target) {
|
||||
cc->emit(Inst::kIdB, target);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
ca.emit(*this);
|
||||
cc->b(ca.cond, target);
|
||||
}
|
||||
|
||||
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcept {
|
||||
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) {
|
||||
ASMJIT_ASSERT(dst.size() == src1.size());
|
||||
ASMJIT_ASSERT(dst.size() == src2.size());
|
||||
|
||||
@@ -1047,11 +1053,11 @@ void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcep
|
||||
cc->csel(dst, dst, tmp, CondCode::kLO);
|
||||
}
|
||||
|
||||
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) {
|
||||
cc->eor(dst, src, 0xFF);
|
||||
}
|
||||
|
||||
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) {
|
||||
ASMJIT_ASSERT(dst.size() == src.size());
|
||||
|
||||
// dst = src + 128;
|
||||
@@ -1061,13 +1067,13 @@ void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
|
||||
cc->lsr(dst, dst, 8);
|
||||
}
|
||||
|
||||
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) {
|
||||
ASMJIT_ASSERT(dst.size() == src.size());
|
||||
cc->add(dst, src, src, a64::lsl(8));
|
||||
cc->lsr(dst, dst, 16);
|
||||
}
|
||||
|
||||
void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) noexcept {
|
||||
void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) {
|
||||
Gp a = a_.clone_as(dst);
|
||||
|
||||
if (Support::is_power_of_2(b)) {
|
||||
@@ -1084,7 +1090,7 @@ void UniCompiler::add_scaled(const Gp& dst, const Gp& a_, int b) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) noexcept {
|
||||
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) {
|
||||
ASMJIT_ASSERT(scale != 0);
|
||||
|
||||
Gp src = src_.clone_as(dst);
|
||||
@@ -1115,7 +1121,7 @@ void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::lea(const Gp& dst, const Mem& src) noexcept {
|
||||
void UniCompiler::lea(const Gp& dst, const Mem& src) {
|
||||
Gp base = src.base_reg().as<Gp>();
|
||||
|
||||
if (src.has_index()) {
|
||||
@@ -1829,7 +1835,7 @@ static ASMJIT_INLINE void vec_set_type_and_index(Vec& vec, ElementSize sz, uint3
|
||||
vec.set_element_index(idx);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem src, uint32_t mem_size) noexcept {
|
||||
static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem src, uint32_t mem_size) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
if (src.has_index() && src.has_shift()) {
|
||||
@@ -1863,7 +1869,7 @@ static ASMJIT_NOINLINE void vec_load_mem(UniCompiler& uc, const Vec& dst, Mem sr
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Vec& ref, uint32_t mem_size = 0) noexcept {
|
||||
static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Vec& ref, uint32_t mem_size = 0) {
|
||||
Vec vec = uc.new_vec128("@tmp");
|
||||
if (mem_size == 0)
|
||||
mem_size = ref.size();
|
||||
@@ -1871,21 +1877,21 @@ static ASMJIT_NOINLINE Vec vec_from_mem(UniCompiler& uc, const Mem& op, const Ve
|
||||
return vec.clone_as(ref);
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, uint32_t mem_size = 0) noexcept {
|
||||
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, uint32_t mem_size = 0) {
|
||||
if (op.is_vec())
|
||||
return op.as<Vec>().clone_as(ref);
|
||||
else
|
||||
return vec_from_mem(uc, op.as<Mem>(), ref, mem_size);
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, FloatMode fm) noexcept {
|
||||
static ASMJIT_INLINE Vec as_vec(UniCompiler& uc, const Operand_& op, const Vec& ref, FloatMode fm) {
|
||||
if (op.is_vec())
|
||||
return op.as<Vec>().clone_as(ref);
|
||||
else
|
||||
return vec_from_mem(uc, op.as<Mem>(), ref, float_mode_mem_size_table[size_t(fm)]);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Operand_& src_) noexcept {
|
||||
static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Operand_& src_) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
Vec dst(dst_);
|
||||
@@ -1908,7 +1914,7 @@ static ASMJIT_NOINLINE Vec vec_mov(UniCompiler& uc, const Vec& dst_, const Opera
|
||||
ASMJIT_NOT_REACHED();
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void vec_neg(UniCompiler& uc, const Vec& dst, const Vec& src, FloatMode fm) noexcept {
|
||||
static ASMJIT_NOINLINE void vec_neg(UniCompiler& uc, const Vec& dst, const Vec& src, FloatMode fm) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
if (fm == FloatMode::kF32S)
|
||||
@@ -2129,7 +2135,7 @@ static constexpr Swizzle32Data swizzle_32_data[256] = {
|
||||
|
||||
#undef OP
|
||||
|
||||
static void emit_swizzle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src, uint32_t imm) noexcept {
|
||||
static void emit_swizzle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src, uint32_t imm) {
|
||||
ASMJIT_ASSERT((imm & 0xFCFCFCFC) == 0);
|
||||
|
||||
BackendCompiler* cc = uc.cc;
|
||||
@@ -2528,7 +2534,7 @@ static constexpr InterleavedShuffle32Ops interleaved_shuffle32_ops_dst_same_as_b
|
||||
|
||||
#undef OP
|
||||
|
||||
static void emit_interleaved_shuffle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src1, const Vec& src2, uint32_t imm) noexcept {
|
||||
static void emit_interleaved_shuffle32_impl(UniCompiler& uc, const Vec& dst, const Vec& src1, const Vec& src2, uint32_t imm) {
|
||||
ASMJIT_ASSERT((imm & 0xFCFCFCFC) == 0);
|
||||
|
||||
if (src1.id() == src2.id())
|
||||
@@ -2710,7 +2716,7 @@ public:
|
||||
};
|
||||
|
||||
template<typename Src>
|
||||
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) noexcept {
|
||||
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src> src(src_);
|
||||
|
||||
@@ -2721,7 +2727,7 @@ static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray&
|
||||
}
|
||||
|
||||
template<typename Src>
|
||||
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) noexcept {
|
||||
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src> src(src_);
|
||||
|
||||
@@ -2732,7 +2738,7 @@ static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2>
|
||||
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) noexcept {
|
||||
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -2745,7 +2751,7 @@ static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray&
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2>
|
||||
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) noexcept {
|
||||
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -2758,7 +2764,7 @@ static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArra
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2, typename Src3>
|
||||
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) noexcept {
|
||||
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -2775,7 +2781,7 @@ static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2V
|
||||
// =================================================
|
||||
|
||||
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) noexcept {
|
||||
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
|
||||
Vec dst(dst_.as<Vec>());
|
||||
@@ -3146,13 +3152,13 @@ void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) { emit_2v_t(*this, op, dst_, src_); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VI
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) noexcept {
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
|
||||
Vec dst(dst_.as<Vec>());
|
||||
@@ -3292,13 +3298,13 @@ void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& sr
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VS
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) {
|
||||
UniOpVInfo op_info = opcode_info_2vs[size_t(op)];
|
||||
|
||||
switch (op) {
|
||||
@@ -3415,7 +3421,7 @@ void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VM
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src_.is_mem());
|
||||
|
||||
@@ -3548,7 +3554,7 @@ void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignmen
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
|
||||
Support::maybe_unused(alignment);
|
||||
|
||||
size_t i = 0;
|
||||
@@ -3594,7 +3600,7 @@ void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alig
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) {
|
||||
ASMJIT_ASSERT(dst_.is_mem());
|
||||
ASMJIT_ASSERT(src_.is_vec());
|
||||
|
||||
@@ -3737,7 +3743,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignmen
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) {
|
||||
Support::maybe_unused(alignment);
|
||||
|
||||
size_t i = 0;
|
||||
@@ -3794,7 +3800,7 @@ static void emit_3v_op(
|
||||
FloatMode float_mode,
|
||||
ElementSize dst_element, VecPart dst_part,
|
||||
ElementSize src_element, VecPart src_part,
|
||||
uint32_t reversed) noexcept {
|
||||
uint32_t reversed) {
|
||||
|
||||
Vec src2;
|
||||
|
||||
@@ -3839,7 +3845,7 @@ static void emit_3v_op(
|
||||
cc->emit(inst_id, dst, src1, src2);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) noexcept {
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -4148,14 +4154,14 @@ void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 3VI
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) noexcept {
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -4231,14 +4237,14 @@ void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& s
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 4V
|
||||
// =================================================
|
||||
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) noexcept {
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -4372,13 +4378,13 @@ void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& sr
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
|
||||
@@ -147,51 +147,68 @@ bool UniCompiler::has_masked_access_of(uint32_t data_size) const noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Embed
|
||||
// =========================
|
||||
|
||||
void UniCompiler::embed_jump_table(Span<const Label> jump_table, const Label& jump_table_base, uint32_t entry_size) {
|
||||
static const uint8_t zeros[8] {};
|
||||
|
||||
for (const Label& label : jump_table) {
|
||||
if (label.is_valid()) {
|
||||
cc->embed_label_delta(label, jump_table_base, entry_size);
|
||||
}
|
||||
else {
|
||||
cc->embed(zeros, entry_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Function
|
||||
// ============================
|
||||
|
||||
void UniCompiler::init_function(FuncNode* func_node) noexcept {
|
||||
cc->add_func(func_node);
|
||||
void UniCompiler::hook_func() noexcept {
|
||||
FuncNode* func = cc->func();
|
||||
_func_init_hook = func;
|
||||
|
||||
_func_node = func_node;
|
||||
_func_init = cc->cursor();
|
||||
_func_end = func_node->end_node()->prev();
|
||||
if (func && has_avx()) {
|
||||
func->frame().set_avx_enabled();
|
||||
func->frame().set_avx_auto_cleanup();
|
||||
|
||||
if (has_avx()) {
|
||||
func_node->frame().set_avx_enabled();
|
||||
func_node->frame().set_avx_cleanup();
|
||||
if (has_avx512()) {
|
||||
func->frame().set_avx512_enabled();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has_avx512()) {
|
||||
func_node->frame().set_avx512_enabled();
|
||||
}
|
||||
void UniCompiler::unhook_func() noexcept {
|
||||
_func_init_hook = nullptr;
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Constants
|
||||
// =============================
|
||||
|
||||
void UniCompiler::_init_vec_const_table_ptr() noexcept {
|
||||
void UniCompiler::_init_vec_const_table_ptr() {
|
||||
const void* ct_addr = ct_ptr<void>();
|
||||
|
||||
if (!_common_table_ptr.is_valid()) {
|
||||
ScopedInjector injector(cc, &_func_init);
|
||||
ScopedInjector injector(cc, &_func_init_hook);
|
||||
_common_table_ptr = new_gpz("common_table_ptr");
|
||||
cc->mov(_common_table_ptr, (int64_t)ct_addr + _common_table_offset);
|
||||
}
|
||||
}
|
||||
|
||||
x86::KReg UniCompiler::k_const(uint64_t value) noexcept {
|
||||
x86::KReg UniCompiler::k_const(uint64_t value) {
|
||||
uint32_t slot;
|
||||
for (slot = 0; slot < kMaxKRegConstCount; slot++)
|
||||
if (_k_reg[slot].is_valid() && _k_imm[slot] == value)
|
||||
return _k_reg[slot];
|
||||
|
||||
BaseNode* prevNode = nullptr;
|
||||
BaseNode* prev_node = nullptr;
|
||||
Gp tmp;
|
||||
x86::KReg kReg;
|
||||
|
||||
if (slot < kMaxKRegConstCount) {
|
||||
prevNode = cc->set_cursor(_func_init);
|
||||
prev_node = cc->set_cursor(_func_init_hook);
|
||||
}
|
||||
|
||||
if (value & 0xFFFFFFFF00000000u) {
|
||||
@@ -209,13 +226,13 @@ x86::KReg UniCompiler::k_const(uint64_t value) noexcept {
|
||||
|
||||
if (slot < kMaxKRegConstCount) {
|
||||
_k_reg[slot] = kReg;
|
||||
_func_init = cc->set_cursor(prevNode);
|
||||
_func_init_hook = cc->set_cursor(prev_node);
|
||||
}
|
||||
|
||||
return kReg;
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
size_t const_count = _vec_consts.size();
|
||||
|
||||
for (size_t i = 0; i < const_count; i++) {
|
||||
@@ -237,19 +254,19 @@ Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, VecWidth const_w
|
||||
return Vec(signature_of_xmm_ymm_zmm[size_t(const_width)], _new_vec_const(c, bcst_width == Bcst::kNA_Unique).id());
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
Operand UniCompiler::simd_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
ASMJIT_ASSERT(!similar_to.is_empty());
|
||||
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
size_t const_count = _vec_consts.size();
|
||||
|
||||
for (size_t i = 0; i < const_count; i++)
|
||||
@@ -259,19 +276,19 @@ Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, VecWidth const_w
|
||||
return Vec(signature_of_xmm_ymm_zmm[size_t(const_width)], _new_vec_const(c, bcst_width == Bcst::kNA_Unique).id());
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_vec_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
Vec UniCompiler::simd_vec_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
ASMJIT_ASSERT(!similar_to.is_empty());
|
||||
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_vec_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) noexcept {
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth const_width) {
|
||||
x86::Mem m = _get_mem_const(c);
|
||||
if (const_width != VecWidth::k512)
|
||||
return m;
|
||||
@@ -289,19 +306,19 @@ x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, VecWidth co
|
||||
return m;
|
||||
}
|
||||
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) noexcept {
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const Vec& similar_to) {
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to.reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_mem_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) noexcept {
|
||||
x86::Mem UniCompiler::simd_mem_const(const void* c, Bcst bcst_width, const VecArray& similar_to) {
|
||||
ASMJIT_ASSERT(!similar_to.is_empty());
|
||||
|
||||
VecWidth const_width = VecWidth(uint32_t(similar_to[0].reg_type()) - uint32_t(RegType::kVec128));
|
||||
return simd_mem_const(c, bcst_width, const_width);
|
||||
}
|
||||
|
||||
x86::Mem UniCompiler::_get_mem_const(const void* c) noexcept {
|
||||
x86::Mem UniCompiler::_get_mem_const(const void* c) {
|
||||
// Make sure we are addressing a constant from the `commonTable` constant pool.
|
||||
const void* ct_addr = ct_ptr<void>();
|
||||
ASMJIT_ASSERT((uintptr_t)c >= (uintptr_t)ct_addr &&
|
||||
@@ -322,7 +339,7 @@ x86::Mem UniCompiler::_get_mem_const(const void* c) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
|
||||
Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) {
|
||||
Vec vec;
|
||||
const char* special_const_name = nullptr;
|
||||
|
||||
@@ -349,14 +366,14 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
|
||||
_vec_consts.append(arena(), const_data);
|
||||
|
||||
if (c == &ct().p_0000000000000000) {
|
||||
ScopedInjector inject(cc, &_func_init);
|
||||
ScopedInjector inject(cc, &_func_init_hook);
|
||||
v_zero_i(vec.xmm());
|
||||
}
|
||||
else {
|
||||
// NOTE: _get_mem_const() must be outside of injected code as it uses injection too.
|
||||
Mem m = _get_mem_const(c);
|
||||
|
||||
ScopedInjector inject(cc, &_func_init);
|
||||
ScopedInjector inject(cc, &_func_init_hook);
|
||||
if (has_avx512() && !vec.is_vec128() && !is_unique_const)
|
||||
cc->vbroadcasti32x4(vec, m);
|
||||
else if (has_avx2() && vec.is_vec256() && !is_unique_const)
|
||||
@@ -372,7 +389,7 @@ Vec UniCompiler::_new_vec_const(const void* c, bool is_unique_const) noexcept {
|
||||
// ujit::UniCompiler - Stack
|
||||
// =========================
|
||||
|
||||
x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
|
||||
x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) {
|
||||
ASMJIT_ASSERT(Support::is_power_of_2(size));
|
||||
ASMJIT_ASSERT(size <= 64);
|
||||
|
||||
@@ -385,20 +402,6 @@ x86::Mem UniCompiler::tmp_stack(StackId id, uint32_t size) noexcept {
|
||||
return stack;
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - Utilities
|
||||
// =============================
|
||||
|
||||
void UniCompiler::embed_jump_table(const Label* jump_table, size_t jump_table_size, const Label& jump_table_base, uint32_t entry_size) noexcept {
|
||||
static const uint8_t zeros[8] {};
|
||||
|
||||
for (size_t i = 0; i < jump_table_size; i++) {
|
||||
if (jump_table[i].is_valid())
|
||||
cc->embed_label_delta(jump_table[i], jump_table_base, entry_size);
|
||||
else
|
||||
cc->embed(zeros, entry_size);
|
||||
}
|
||||
}
|
||||
|
||||
// ujit::UniCompiler - General Purpose Instructions - Conditions
|
||||
// =============================================================
|
||||
|
||||
@@ -472,7 +475,7 @@ public:
|
||||
cond = x86::reverse_cond(cond);
|
||||
}
|
||||
|
||||
ASMJIT_NOINLINE void emit(UniCompiler& uc) noexcept {
|
||||
ASMJIT_NOINLINE void emit(UniCompiler& uc) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
InstId inst_id = condition_to_inst_id[size_t(op)];
|
||||
|
||||
@@ -503,7 +506,7 @@ public:
|
||||
// ujit::UniCompiler - General Purpose Instructions - Emit
|
||||
// =======================================================
|
||||
|
||||
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
|
||||
void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) {
|
||||
if (src.is_imm() && src.as<Imm>().value() == 0) {
|
||||
Gp r(dst);
|
||||
if (r.is_gp64())
|
||||
@@ -515,8 +518,9 @@ void UniCompiler::emit_mov(const Gp& dst, const Operand_& src) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
|
||||
void UniCompiler::emit_m(UniOpM op, const Mem& m_) {
|
||||
static constexpr uint8_t size_table[] = {
|
||||
1, // Prefetch
|
||||
0, // kStoreZeroReg
|
||||
1, // kStoreZeroU8
|
||||
2, // kStoreZeroU16
|
||||
@@ -524,16 +528,21 @@ void UniCompiler::emit_m(UniOpM op, const Mem& m_) noexcept {
|
||||
8 // kStoreZeroU64
|
||||
};
|
||||
|
||||
Mem m(m_);
|
||||
uint32_t size = size_table[size_t(op)];
|
||||
if (size == 0)
|
||||
size = cc->register_size();
|
||||
if (op == UniOpM::kPrefetch) {
|
||||
cc->prefetcht0(m_);
|
||||
}
|
||||
else {
|
||||
Mem m(m_);
|
||||
uint32_t size = size_table[size_t(op)];
|
||||
if (size == 0)
|
||||
size = cc->register_size();
|
||||
|
||||
m.set_size(size);
|
||||
cc->mov(m, 0);
|
||||
m.set_size(size);
|
||||
cc->mov(m, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) noexcept {
|
||||
void UniCompiler::emit_rm(UniOpRM op, const Gp& dst, const Mem& src) {
|
||||
static constexpr uint8_t size_table[] = {
|
||||
0, // kLoadReg
|
||||
1, // kLoadI8
|
||||
@@ -613,7 +622,7 @@ struct UniOpMRInfo {
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) {
|
||||
static constexpr UniOpMRInfo op_info_table[] = {
|
||||
{ Inst::kIdMov, 0 }, // kStoreReg
|
||||
{ Inst::kIdMov, 1 }, // kStoreU8
|
||||
@@ -648,14 +657,14 @@ void UniCompiler::emit_mr(UniOpMR op, const Mem& dst, const Gp& src) noexcept {
|
||||
cc->emit(op_info.inst_id, m, r);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_cmov(const Gp& dst, const Operand_& sel, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
ca.emit(*this);
|
||||
cc->emit(Inst::cmovcc_from_cond(ca.cond), dst, sel);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operand_& sel2_, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
|
||||
@@ -697,7 +706,7 @@ void UniCompiler::emit_select(const Gp& dst, const Operand_& sel1_, const Operan
|
||||
cc->emit(Inst::cmovcc_from_cond(x86::negate_cond(ca.cond)), dst, sel2);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) noexcept {
|
||||
void UniCompiler::emit_2i(UniOpRR op, const Gp& dst, const Operand_& src_) {
|
||||
Operand src(src_);
|
||||
|
||||
// Notes
|
||||
@@ -835,13 +844,13 @@ static constexpr uint64_t kOp3ICommutativeMask =
|
||||
(uint64_t(1) << unsigned(UniOpRRR::kUMin)) |
|
||||
(uint64_t(1) << unsigned(UniOpRRR::kUMax)) ;
|
||||
|
||||
static ASMJIT_INLINE_NODEBUG bool is_op3i_commutative(UniOpRRR op) noexcept {
|
||||
static ASMJIT_INLINE_NODEBUG bool is_op3i_commutative(UniOpRRR op) {
|
||||
return (kOp3ICommutativeMask & (uint64_t(1) << unsigned(op))) != 0;
|
||||
}
|
||||
|
||||
struct UniOpRRRMinMaxCMovInst { InstId a, b; };
|
||||
|
||||
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) noexcept {
|
||||
void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, const Operand_& src2_) {
|
||||
Operand src1(src1_);
|
||||
Operand src2(src2_);
|
||||
|
||||
@@ -1481,18 +1490,18 @@ void UniCompiler::emit_3i(UniOpRRR op, const Gp& dst, const Operand_& src1_, con
|
||||
ASMJIT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void UniCompiler::emit_j(const Operand_& target) noexcept {
|
||||
void UniCompiler::emit_j(const Operand_& target) {
|
||||
cc->emit(Inst::kIdJmp, target);
|
||||
}
|
||||
|
||||
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) noexcept {
|
||||
void UniCompiler::emit_j_if(const Label& target, const UniCondition& condition) {
|
||||
ConditionApplier ca(condition);
|
||||
ca.optimize(*this);
|
||||
ca.emit(*this);
|
||||
cc->j(ca.cond, target);
|
||||
}
|
||||
|
||||
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcept {
|
||||
void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) {
|
||||
ASMJIT_ASSERT(dst.size() == src1.size());
|
||||
ASMJIT_ASSERT(dst.size() == src2.size());
|
||||
|
||||
@@ -1512,13 +1521,13 @@ void UniCompiler::adds_u8(const Gp& dst, const Gp& src1, const Gp& src2) noexcep
|
||||
cc->or_(dst.r8(), u8_msk.r8());
|
||||
}
|
||||
|
||||
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::inv_u8(const Gp& dst, const Gp& src) {
|
||||
if (dst.id() != src.id())
|
||||
cc->mov(dst, src);
|
||||
cc->xor_(dst.r8(), 0xFF);
|
||||
}
|
||||
|
||||
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) {
|
||||
ASMJIT_ASSERT(dst.size() == src.size());
|
||||
|
||||
if (dst.id() == src.id()) {
|
||||
@@ -1540,13 +1549,13 @@ void UniCompiler::div_255_u32(const Gp& dst, const Gp& src) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) noexcept {
|
||||
void UniCompiler::mul_257_hu16(const Gp& dst, const Gp& src) {
|
||||
ASMJIT_ASSERT(dst.size() == src.size());
|
||||
cc->imul(dst, src, 257);
|
||||
cc->shr(dst, 16);
|
||||
}
|
||||
|
||||
void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) noexcept {
|
||||
void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) {
|
||||
switch (b) {
|
||||
case 1:
|
||||
cc->add(dst, a);
|
||||
@@ -1570,7 +1579,7 @@ void UniCompiler::add_scaled(const Gp& dst, const Gp& a, int b) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) noexcept {
|
||||
void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_t scale, int32_t disp) {
|
||||
ASMJIT_ASSERT(scale != 0u);
|
||||
|
||||
Gp src = src_.clone_as(dst);
|
||||
@@ -1610,7 +1619,7 @@ void UniCompiler::add_ext(const Gp& dst, const Gp& src_, const Gp& idx_, uint32_
|
||||
cc->lea(dst, x86::ptr(src, tmp));
|
||||
}
|
||||
|
||||
void UniCompiler::lea(const Gp& dst, const Mem& src) noexcept {
|
||||
void UniCompiler::lea(const Gp& dst, const Mem& src) {
|
||||
Mem m(src);
|
||||
|
||||
if (is_64bit() && dst.size() == 4) {
|
||||
@@ -2589,7 +2598,7 @@ static constexpr UniOpVMInfo opcode_info_2mv[size_t(UniOpMV::kMaxValue) + 1] = {
|
||||
// ujit::UniCompiler - Vector Instructions - Utility Functions
|
||||
// ===========================================================
|
||||
|
||||
static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& vec, const Mem& mem, uint32_t broadcast_size = 0) noexcept {
|
||||
static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& vec, const Mem& mem, uint32_t broadcast_size = 0) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
Mem m(mem);
|
||||
|
||||
@@ -2617,7 +2626,7 @@ static ASMJIT_NOINLINE void UniCompiler_load_into(UniCompiler& uc, const Vec& ve
|
||||
|
||||
// TODO: Unused for now...
|
||||
[[maybe_unused]]
|
||||
static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec& dst, const Operand_& src, uint32_t broadcast_size = 0) noexcept {
|
||||
static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec& dst, const Operand_& src, uint32_t broadcast_size = 0) {
|
||||
if (src.is_reg()) {
|
||||
ASMJIT_ASSERT(src.is_vec());
|
||||
if (dst.id() != src.as<Reg>().id()) {
|
||||
@@ -2632,7 +2641,7 @@ static ASMJIT_NOINLINE void UniCompiler_move_to_dst(UniCompiler& uc, const Vec&
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE Vec UniCompiler_load_new(UniCompiler& uc, const Vec& ref, const Mem& mem, uint32_t broadcast_size = 0) noexcept {
|
||||
static ASMJIT_NOINLINE Vec UniCompiler_load_new(UniCompiler& uc, const Vec& ref, const Mem& mem, uint32_t broadcast_size = 0) {
|
||||
Vec vec = uc.new_similar_reg(ref, "@vec_m");
|
||||
UniCompiler_load_into(uc, vec, mem, broadcast_size);
|
||||
return vec;
|
||||
@@ -2642,7 +2651,7 @@ static ASMJIT_INLINE bool is_same_vec(const Vec& a, const Operand_& b) noexcept
|
||||
return b.is_reg() && a.id() == b.as<Reg>().id();
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
|
||||
static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatMode fm) {
|
||||
Operand op;
|
||||
if (is_f32_op(fm))
|
||||
op = uc.simd_const(&uc.ct().f32_1, Bcst::k32, dst);
|
||||
@@ -2651,7 +2660,7 @@ static ASMJIT_INLINE Operand get_fop_one(UniCompiler& uc, const Vec& dst, FloatM
|
||||
return op;
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
|
||||
static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec& dst, FloatMode fm) {
|
||||
Operand op;
|
||||
if (is_f32_op(fm))
|
||||
op = uc.simd_const(&uc.ct().f32_0_5_minus_1ulp, Bcst::k32, dst);
|
||||
@@ -2660,7 +2669,7 @@ static ASMJIT_INLINE Operand get_fop_half_minus_1ulp(UniCompiler& uc, const Vec&
|
||||
return op;
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
|
||||
static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst, FloatMode fm) {
|
||||
Operand op;
|
||||
if (is_f32_op(fm))
|
||||
op = uc.simd_const(&uc.ct().f32_round_magic, Bcst::k32, dst);
|
||||
@@ -2669,7 +2678,7 @@ static ASMJIT_INLINE Operand get_fop_round_magic(UniCompiler& uc, const Vec& dst
|
||||
return op;
|
||||
}
|
||||
|
||||
static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, FloatMode fm) noexcept {
|
||||
static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, FloatMode fm) {
|
||||
Operand op;
|
||||
if (is_f32_op(fm))
|
||||
op = uc.simd_const(&uc.ct().p_8000000080000000, Bcst::k32, dst);
|
||||
@@ -2678,7 +2687,7 @@ static ASMJIT_INLINE Operand get_fop_msb_bit(UniCompiler& uc, const Vec& dst, Fl
|
||||
return op;
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
if (src.is_mem())
|
||||
cc->emit(Inst::kIdMovups, dst, src);
|
||||
@@ -2686,7 +2695,7 @@ static ASMJIT_NOINLINE void sse_mov(UniCompiler& uc, const Vec& dst, const Opera
|
||||
cc->emit(Inst::kIdMovaps, dst, src);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
if (src.is_reg()) {
|
||||
if (dst.id() != src.id()) {
|
||||
@@ -2701,13 +2710,13 @@ static ASMJIT_NOINLINE void sse_fmov(UniCompiler& uc, const Vec& dst, const Oper
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE Vec sse_copy(UniCompiler& uc, const Vec& vec, const char* name) noexcept {
|
||||
static ASMJIT_NOINLINE Vec sse_copy(UniCompiler& uc, const Vec& vec, const char* name) {
|
||||
Vec copy = uc.new_similar_reg(vec, name);
|
||||
uc.cc->emit(Inst::kIdMovaps, copy, vec);
|
||||
return copy;
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_make_vec(UniCompiler& uc, Operand_& op, const char* name) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_make_vec(UniCompiler& uc, Operand_& op, const char* name) {
|
||||
if (op.is_mem()) {
|
||||
Vec tmp = uc.new_vec128(name);
|
||||
sse_mov(uc, tmp, op);
|
||||
@@ -2734,7 +2743,7 @@ static ASMJIT_INLINE uint32_t shuf_imm4_from_swizzle(Swizzle2 s) noexcept {
|
||||
return x86::shuffle_imm(imm1 * 2u + 1u, imm1 * 2u, imm0 * 2u + 1u, imm0 * 2u);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
sse_mov(uc, dst, src);
|
||||
@@ -2742,7 +2751,7 @@ static ASMJIT_NOINLINE void sse_bit_not(UniCompiler& uc, const Vec& dst, const O
|
||||
cc->emit(Inst::kIdPxor, dst, ones);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
const void* msk_data {};
|
||||
|
||||
@@ -2761,7 +2770,7 @@ static ASMJIT_NOINLINE void sse_msb_flip(UniCompiler& uc, const Vec& dst, const
|
||||
cc->emit(Inst::kIdPxor, dst, msk);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
const FloatInst& fi = sse_float_inst[size_t(fm)];
|
||||
@@ -2783,7 +2792,7 @@ static ASMJIT_NOINLINE void sse_fsign_flip(UniCompiler& uc, const Vec& dst, cons
|
||||
|
||||
// Possibly the best solution:
|
||||
// https://stackoverflow.com/questions/65166174/how-to-simulate-pcmpgtq-on-sse2
|
||||
static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
if (uc.has_sse4_2()) {
|
||||
@@ -2827,7 +2836,7 @@ static ASMJIT_NOINLINE void sse_cmp_gt_i64(UniCompiler& uc, const Vec& dst, cons
|
||||
|
||||
// Possibly the best solution:
|
||||
// https://stackoverflow.com/questions/65441496/what-is-the-most-efficient-way-to-do-unsigned-64-bit-comparison-on-sse2
|
||||
static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, const Operand_& a, const Operand_& b) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
if (uc.has_sse4_2()) {
|
||||
@@ -2866,7 +2875,7 @@ static ASMJIT_NOINLINE void sse_cmp_gt_u64(UniCompiler& uc, const Vec& dst, cons
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Vec& a, const Operand_& b, const Vec& msk) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Vec& a, const Operand_& b, const Vec& msk) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
sse_mov(uc, dst, a);
|
||||
cc->emit(Inst::kIdPand, dst, msk);
|
||||
@@ -2874,7 +2883,7 @@ static ASMJIT_NOINLINE void sse_select(UniCompiler& uc, const Vec& dst, const Ve
|
||||
cc->emit(Inst::kIdPor, dst, msk);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const Vec& src, WideningOp cvt) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const Vec& src, WideningOp cvt) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
WideningOpInfo cvt_info = sse_int_widening_op_info[size_t(cvt)];
|
||||
|
||||
@@ -2943,7 +2952,7 @@ static ASMJIT_NOINLINE void sse_int_widen(UniCompiler& uc, const Vec& dst, const
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Operand& src, FloatMode fm, x86::RoundImm round_mode) noexcept {
|
||||
static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Operand& src, FloatMode fm, x86::RoundImm round_mode) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
uint32_t is_f32 = fm == FloatMode::kF32S || fm == FloatMode::kF32V;
|
||||
@@ -3116,7 +3125,7 @@ static ASMJIT_NOINLINE void sse_round(UniCompiler& uc, const Vec& dst, const Ope
|
||||
ASMJIT_NOT_REACHED();
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Operand_& src) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
InstId inst_id = 0;
|
||||
|
||||
@@ -3130,7 +3139,7 @@ static ASMJIT_NOINLINE void avx_mov(UniCompiler& uc, const Vec& dst, const Opera
|
||||
cc->emit(inst_id, dst, src);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
if (src.is_reg()) {
|
||||
if (dst.id() != src.id()) {
|
||||
@@ -3148,7 +3157,7 @@ static ASMJIT_NOINLINE void avx_fmov(UniCompiler& uc, const Vec& dst, const Oper
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Vec& ref, const char* name) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Vec& ref, const char* name) {
|
||||
if (op.is_mem()) {
|
||||
Vec tmp = uc.new_similar_reg(ref, name);
|
||||
avx_mov(uc, tmp, op);
|
||||
@@ -3156,14 +3165,14 @@ static ASMJIT_NOINLINE void avx_make_vec(UniCompiler& uc, Operand_& op, const Ve
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_zero(UniCompiler& uc, const Vec& dst) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_zero(UniCompiler& uc, const Vec& dst) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
Vec x = dst.xmm();
|
||||
cc->vpxor(x, x, x);
|
||||
return;
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
if (uc.has_avx512())
|
||||
cc->emit(Inst::kIdVpternlogd, dst, dst, dst, 0xFF);
|
||||
@@ -3171,7 +3180,7 @@ static ASMJIT_NOINLINE void avx_ones(UniCompiler& uc, const Vec& dst) noexcept {
|
||||
cc->emit(Inst::kIdVpcmpeqb, dst, dst, dst);
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const Operand_& src) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
if (uc.has_avx512()) {
|
||||
@@ -3197,7 +3206,7 @@ static ASMJIT_NOINLINE void avx_bit_not(UniCompiler& uc, const Vec& dst, const O
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, ElementSize sz) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
Operand msk;
|
||||
|
||||
@@ -3222,7 +3231,7 @@ static ASMJIT_NOINLINE void avx_isign_flip(UniCompiler& uc, const Vec& dst, cons
|
||||
}
|
||||
}
|
||||
|
||||
static ASMJIT_NOINLINE void avx_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) noexcept {
|
||||
static ASMJIT_NOINLINE void avx_fsign_flip(UniCompiler& uc, const Vec& dst, const Operand_& src, FloatMode fm) {
|
||||
BackendCompiler* cc = uc.cc;
|
||||
|
||||
const FloatInst& fi = avx_float_inst[size_t(fm)];
|
||||
@@ -3276,7 +3285,7 @@ public:
|
||||
};
|
||||
|
||||
template<typename Src>
|
||||
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) noexcept {
|
||||
static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray& dst_, const Src& src_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src> src(src_);
|
||||
|
||||
@@ -3287,7 +3296,7 @@ static ASMJIT_INLINE void emit_2v_t(UniCompiler& uc, UniOpVV op, const OpArray&
|
||||
}
|
||||
|
||||
template<typename Src>
|
||||
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) noexcept {
|
||||
static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray& dst_, const Src& src_, uint32_t imm) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src> src(src_);
|
||||
|
||||
@@ -3298,7 +3307,7 @@ static ASMJIT_INLINE void emit_2vi_t(UniCompiler& uc, UniOpVVI op, const OpArray
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2>
|
||||
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) noexcept {
|
||||
static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -3311,7 +3320,7 @@ static ASMJIT_INLINE void emit_3v_t(UniCompiler& uc, UniOpVVV op, const OpArray&
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2>
|
||||
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) noexcept {
|
||||
static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, uint32_t imm) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -3324,7 +3333,7 @@ static ASMJIT_INLINE void emit_3vi_t(UniCompiler& uc, UniOpVVVI op, const OpArra
|
||||
}
|
||||
|
||||
template<typename Src1, typename Src2, typename Src3>
|
||||
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) noexcept {
|
||||
static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray& dst_, const Src1& src1_, const Src2& src2_, const Src3& src3_) {
|
||||
size_t n = dst_.size();
|
||||
OpArrayIter<Src1> src1(src1_);
|
||||
OpArrayIter<Src2> src2(src2_);
|
||||
@@ -3341,7 +3350,7 @@ static ASMJIT_INLINE void emit_4v_t(UniCompiler& uc, UniOpVVVV op, const OpArray
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2V
|
||||
// =================================================
|
||||
|
||||
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) noexcept {
|
||||
void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
|
||||
Vec dst(dst_.as<Vec>());
|
||||
@@ -4425,13 +4434,13 @@ void UniCompiler::emit_2v(UniOpVV op, const Operand_& dst_, const Operand_& src_
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) noexcept { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const Operand_& src_) { emit_2v_t(*this, op, dst_, src_); }
|
||||
void UniCompiler::emit_2v(UniOpVV op, const OpArray& dst_, const OpArray& src_) { emit_2v_t(*this, op, dst_, src_); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VI
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) noexcept {
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& src_, uint32_t imm) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
|
||||
Vec dst(dst_.as<Vec>());
|
||||
@@ -4790,13 +4799,13 @@ void UniCompiler::emit_2vi(UniOpVVI op, const Operand_& dst_, const Operand_& sr
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) noexcept { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const Operand_& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
void UniCompiler::emit_2vi(UniOpVVI op, const OpArray& dst_, const OpArray& src_, uint32_t imm) { emit_2vi_t(*this, op, dst_, src_, imm); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VS
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src_, uint32_t idx) {
|
||||
UniOpVInfo op_info = opcode_info_2vs[size_t(op)];
|
||||
|
||||
Operand src(src_);
|
||||
@@ -5085,7 +5094,7 @@ void UniCompiler::emit_2vs(UniOpVR op, const Operand_& dst_, const Operand_& src
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 2VM
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src_.is_mem());
|
||||
|
||||
@@ -5492,7 +5501,7 @@ void UniCompiler::emit_vm(UniOpVM op, const Vec& dst_, const Mem& src_, Alignmen
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alignment alignment, uint32_t idx) {
|
||||
Mem src(src_);
|
||||
|
||||
UniOpVMInfo op_info = opcode_info_2vm[size_t(op)];
|
||||
@@ -5525,7 +5534,7 @@ void UniCompiler::emit_vm(UniOpVM op, const OpArray& dst_, const Mem& src_, Alig
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignment alignment, uint32_t idx) {
|
||||
ASMJIT_ASSERT(dst_.is_mem());
|
||||
ASMJIT_ASSERT(src_.is_reg() && src_.is_vec());
|
||||
|
||||
@@ -5832,7 +5841,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const Vec& src_, Alignmen
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) noexcept {
|
||||
void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alignment alignment, uint32_t idx) {
|
||||
Support::maybe_unused(idx);
|
||||
|
||||
Mem dst(dst_);
|
||||
@@ -5869,7 +5878,7 @@ void UniCompiler::emit_mv(UniOpMV op, const Mem& dst_, const OpArray& src_, Alig
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 3V
|
||||
// =================================================
|
||||
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) noexcept {
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -7012,14 +7021,14 @@ void UniCompiler::emit_3v(UniOpVVV op, const Operand_& dst_, const Operand_& src
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) noexcept { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
void UniCompiler::emit_3v(UniOpVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_) { emit_3v_t(*this, op, dst_, src1_, src2_); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 3VI
|
||||
// ==================================================
|
||||
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) noexcept {
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, uint32_t imm) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -7252,14 +7261,14 @@ void UniCompiler::emit_3vi(UniOpVVVI op, const Operand_& dst_, const Operand_& s
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) noexcept { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
void UniCompiler::emit_3vi(UniOpVVVI op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, uint32_t imm) { emit_3vi_t(*this, op, dst_, src1_, src2_, imm); }
|
||||
|
||||
// ujit::UniCompiler - Vector Instructions - Emit 4V
|
||||
// =================================================
|
||||
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) noexcept {
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& src1_, const Operand_& src2_, const Operand_& src3_) {
|
||||
ASMJIT_ASSERT(dst_.is_vec());
|
||||
ASMJIT_ASSERT(src1_.is_vec());
|
||||
|
||||
@@ -7569,13 +7578,13 @@ void UniCompiler::emit_4v(UniOpVVVV op, const Operand_& dst_, const Operand_& sr
|
||||
}
|
||||
}
|
||||
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) noexcept { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const Operand_& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const Operand_& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const Operand& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
void UniCompiler::emit_4v(UniOpVVVV op, const OpArray& dst_, const OpArray& src1_, const OpArray& src2_, const OpArray& src3_) { emit_4v_t(*this, op, dst_, src1_, src2_, src3_); }
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ enum class UniOpCond : uint32_t {
|
||||
|
||||
//! Instruction with a single memory operand.
|
||||
enum class UniOpM : uint32_t {
|
||||
kPrefetch, //!< Explicitly prefetch memory for reading (can be implemented as NOP).
|
||||
kStoreZeroReg, //!< Store zero (data-width depends on register size).
|
||||
kStoreZeroU8, //!< Store zero (8-bit).
|
||||
kStoreZeroU16, //!< Store zero (16-bit).
|
||||
|
||||
@@ -775,10 +775,18 @@ public:
|
||||
ASMJIT_INLINE_NODEBUG Error invoke(Out<InvokeNode*> out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
|
||||
|
||||
//! Return from function.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret() { return add_ret(Operand(), Operand()); }
|
||||
//! \overload
|
||||
|
||||
//! Return from function - one value.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0) { return add_ret(o0, Operand()); }
|
||||
//! \overload
|
||||
|
||||
//! Return from function - two values / register pair.
|
||||
//!
|
||||
//! \note This doesn't end the function - it just emits a return.
|
||||
ASMJIT_INLINE_NODEBUG Error ret(const Reg& o0, const Reg& o1) { return add_ret(o0, o1); }
|
||||
|
||||
//! \}
|
||||
|
||||
@@ -575,14 +575,11 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_prolog(const FuncFrame& frame) {
|
||||
ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
|
||||
Emitter* emitter = _emitter->as<Emitter>();
|
||||
|
||||
uint32_t i;
|
||||
uint32_t reg_id;
|
||||
|
||||
uint32_t register_size = emitter->register_size();
|
||||
uint32_t gp_saved = frame.saved_regs(RegGroup::kGp);
|
||||
|
||||
Gp zsp = emitter->zsp(); // ESP|RSP register.
|
||||
Gp zbp = emitter->zbp(); // EBP|RBP register.
|
||||
Gp zsp = emitter->zsp(); // ESP|RSP register.
|
||||
Gp zbp = emitter->zbp(); // EBP|RBP register.
|
||||
Gp gp_reg = emitter->zsp(); // General purpose register (temporary).
|
||||
|
||||
// Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
|
||||
@@ -610,12 +607,21 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
|
||||
}
|
||||
}
|
||||
|
||||
// Emit 'emms' and/or 'vzeroupper'.
|
||||
if (frame.has_mmx_cleanup()) {
|
||||
bool do_mmx_cleanup = frame.has_mmx_cleanup();
|
||||
bool do_avx_cleanup = frame.has_avx_cleanup();
|
||||
|
||||
// Perform automatic AVX cleanup (VZEROUPPER) if there are dirty vector registers.
|
||||
if (frame.has_avx_auto_cleanup() && frame.dirty_regs(RegGroup::kVec) != 0u) {
|
||||
do_avx_cleanup = true;
|
||||
}
|
||||
|
||||
// Emit 'EMMS' if MMX cleanup is enabled.
|
||||
if (do_mmx_cleanup) {
|
||||
ASMJIT_PROPAGATE(emitter->emms());
|
||||
}
|
||||
|
||||
if (frame.has_avx_cleanup()) {
|
||||
// Emit 'VZEROUPPER' if AVX cleanup is enabled.
|
||||
if (do_avx_cleanup) {
|
||||
ASMJIT_PROPAGATE(emitter->vzeroupper());
|
||||
}
|
||||
|
||||
@@ -643,8 +649,8 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emit_epilog(const FuncFrame& frame) {
|
||||
|
||||
// Emit 'pop gp' sequence.
|
||||
if (gp_saved) {
|
||||
i = gp_saved;
|
||||
reg_id = 16;
|
||||
uint32_t i = gp_saved;
|
||||
uint32_t reg_id = 16;
|
||||
|
||||
do {
|
||||
reg_id--;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user