Files
asmjit/testing/tests/asmjit_test_unicompiler.cpp
kobalicek cdc4eacbb1 [abi] Added more functionality to ujit
* Renamed round to round_even
  * Added round_half_up intrinsic
  * Added floating-point mod
  * Added a scalar version of floating-point abs and neg
  * Added a behavior enum to specify how float to int conversion
    handles out-of-range and NaN cases
  * Updated some APX stuff in instruction database
2025-10-05 17:31:24 +02:00

5688 lines
268 KiB
C++

// This file is part of AsmJit project <https://asmjit.com>
//
// See <asmjit/core.h> or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
#include <asmjit/ujit.h>
#include <algorithm>
#include <cmath>
#include <limits>
#include "../commons/asmjitutils.h"
#include "../commons/random.h"
static void print_app_info() noexcept {
printf("AsmJit UniCompiler Test Suite v%u.%u.%u [Arch=%s] [Mode=%s]\n\n",
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF),
asmjit_arch_as_string(asmjit::Arch::kHost),
asmjit_build_type()
);
}
#if !defined(ASMJIT_NO_UJIT) && !defined(ASMJIT_NO_JIT)
#include "broken.h"
namespace UniCompilerTests {
using namespace asmjit;
using namespace asmjit::ujit;
// ujit::UniCompiler - Tests - Constants
// =====================================
static constexpr uint64_t kRandomSeed = 0x1234u;
static constexpr uint32_t kTestIterCount = 1000u;
static ASMJIT_INLINE_CONSTEXPR uint32_t byte_width_from_vec_width(VecWidth vw) noexcept {
return 16u << uint32_t(vw);
}
// ujit::UniCompiler - Tests - MulAdd
// ==================================
#if defined(ASMJIT_UJIT_X86)
float fadd(float a, float b) noexcept;
float fsub(float a, float b) noexcept;
float fmul(float a, float b) noexcept;
float fdiv(float a, float b) noexcept;
float fsqrt(float a) noexcept;
float fmadd_nofma_ref(float a, float b, float c) noexcept;
float fmadd_fma_ref(float a, float b, float c) noexcept;
double fadd(double a, double b) noexcept;
double fsub(double a, double b) noexcept;
double fmul(double a, double b) noexcept;
double fdiv(double a, double b) noexcept;
double fsqrt(double a) noexcept;
double fmadd_nofma_ref(double a, double b, double c) noexcept;
double fmadd_fma_ref(double a, double b, double c) noexcept;
void madd_fma_check_valgrind_bug(const float a[4], const float b[4], const float c[4], float dst[4]) noexcept;
#else
static ASMJIT_NOINLINE float fadd(float a, float b) noexcept { return a + b; }
static ASMJIT_NOINLINE float fsub(float a, float b) noexcept { return a - b; }
static ASMJIT_NOINLINE float fmul(float a, float b) noexcept { return a * b; }
static ASMJIT_NOINLINE float fdiv(float a, float b) noexcept { return a / b; }
static ASMJIT_NOINLINE float fsqrt(float a) noexcept { return std::sqrt(a); }
static ASMJIT_NOINLINE float fmadd_nofma_ref(float a, float b, float c) noexcept { return a * b + c; }
static ASMJIT_NOINLINE float fmadd_fma_ref(float a, float b, float c) noexcept { return std::fma(a, b, c); }
static ASMJIT_NOINLINE double fadd(double a, double b) noexcept { return a + b; }
static ASMJIT_NOINLINE double fsub(double a, double b) noexcept { return a - b; }
static ASMJIT_NOINLINE double fmul(double a, double b) noexcept { return a * b; }
static ASMJIT_NOINLINE double fdiv(double a, double b) noexcept { return a / b; }
static ASMJIT_NOINLINE double fsqrt(double a) noexcept { return std::sqrt(a); }
static ASMJIT_NOINLINE double fmadd_nofma_ref(double a, double b, double c) noexcept { return fadd(fmul(a, b), c); }
static ASMJIT_NOINLINE double fmadd_fma_ref(double a, double b, double c) noexcept { return std::fma(a, b, c); }
#endif
static ASMJIT_INLINE float fsign(float a) noexcept { return Support::bit_cast<float>(Support::bit_cast<uint32_t>(a) & (uint32_t(1) << 31)); }
static ASMJIT_INLINE double fsign(double a) noexcept { return Support::bit_cast<double>(Support::bit_cast<uint64_t>(a) & (uint64_t(1) << 63)); }
static ASMJIT_INLINE float fxor(float a, float b) noexcept { return Support::bit_cast<float>(Support::bit_cast<uint32_t>(a) ^ Support::bit_cast<uint32_t>(b)); }
static ASMJIT_INLINE double fxor(double a, double b) noexcept { return Support::bit_cast<double>(Support::bit_cast<uint64_t>(a) ^ Support::bit_cast<uint64_t>(b)); }
// ujit::UniCompiler - Tests - Types
// =================================
struct Variation {
uint32_t value;
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator==(uint32_t v) const noexcept { return value == v; }
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator!=(uint32_t v) const noexcept { return value != v; }
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator<=(uint32_t v) const noexcept { return value <= v; }
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator< (uint32_t v) const noexcept { return value < v; }
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator>=(uint32_t v) const noexcept { return value >= v; }
[[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator> (uint32_t v) const noexcept { return value > v; }
};
// ujit::UniCompiler - Tests - JIT Function Prototypes
// ===================================================
typedef uint32_t (*TestCondRRFunc)(int32_t a, int32_t b);
typedef uint32_t (*TestCondRIFunc)(int32_t a);
typedef void (*TestMFunc)(void* ptr);
typedef uintptr_t (*TestRMFunc)(uintptr_t reg, void* ptr);
typedef void (*TestMRFunc)(void* ptr, uintptr_t reg);
typedef uint32_t (*TestRRFunc)(uint32_t a);
typedef uint32_t (*TestRRRFunc)(uint32_t a, uint32_t b);
typedef uint32_t (*TestRRIFunc)(uint32_t a);
typedef void (*TestVVFunc)(void* dst, const void* src);
typedef void (*TestVVVFunc)(void* dst, const void* src1, const void* src2);
typedef void (*TestVVVVFunc)(void* dst, const void* src1, const void* src2, const void* src3);
// ujit::UniCompiler - Tests - JIT Context Error Handler
// =====================================================
class TestErrorHandler : public ErrorHandler {
public:
TestErrorHandler() noexcept {}
~TestErrorHandler() noexcept override {}
void handle_error(Error err, const char* message, BaseEmitter* origin) override {
Support::maybe_unused(origin);
EXPECT_EQ(err, Error::kOk)
.message("AsmJit Error: %s", message);
}
};
// ujit::UniCompiler - Tests - JIT Context for Testing
// ===================================================
class JitContext {
public:
JitRuntime rt;
CpuFeatures features {};
CpuHints cpu_hints {};
#if !defined(ASMJIT_NO_LOGGING)
StringLogger logger;
#endif // !ASMJIT_NO_LOGGING
TestErrorHandler eh;
CodeHolder code;
BackendCompiler cc;
void prepare() noexcept {
code.reset();
code.init(rt.environment());
code.set_error_handler(&eh);
#if !defined(ASMJIT_NO_LOGGING)
logger.clear();
code.set_logger(&logger);
#endif // !ASMJIT_NO_LOGGING
code.attach(&cc);
cc.add_diagnostic_options(DiagnosticOptions::kRAAnnotate);
cc.add_diagnostic_options(DiagnosticOptions::kValidateAssembler);
cc.add_diagnostic_options(DiagnosticOptions::kValidateIntermediate);
}
template<typename Fn>
Fn finish() noexcept {
Fn fn;
EXPECT_EQ(cc.finalize(), Error::kOk);
EXPECT_EQ(rt.add(&fn, &code), Error::kOk);
code.reset();
return fn;
}
#if !defined(ASMJIT_NO_LOGGING)
const char* logger_content() const noexcept { return logger.data(); }
#else
const char* logger_content() const noexcept { return "<ASMJIT_NO_LOGGING>"; }
#endif
};
// ujit::UniCompiler - Tests - Conditional Operations - Functions
// ==============================================================
static TestCondRRFunc create_func_cond_rr(JitContext& ctx, UniOpCond op, CondCode cond_code, uint32_t variation) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uint32_t, int32_t, int32_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp a = pc.new_gp32("a");
Gp b = pc.new_gp32("b");
Gp result = pc.new_gp32("result");
node->set_arg(0, a);
node->set_arg(1, b);
switch (variation) {
case 0: {
// Test a conditional branch based on the given condition.
Label done = pc.new_label();
pc.mov(result, 1);
pc.j(done, UniCondition(op, cond_code, a, b));
pc.mov(result, 0);
pc.bind(done);
break;
}
case 1: {
// Test a cmov functionality.
Gp true_value = pc.new_gp32("true_value");
pc.mov(result, 0);
pc.mov(true_value, 1);
pc.cmov(result, true_value, UniCondition(op, cond_code, a, b));
break;
}
case 2: {
// Test a select functionality.
Gp false_value = pc.new_gp32("false_value");
Gp true_value = pc.new_gp32("true_value");
pc.mov(false_value, 0);
pc.mov(true_value, 1);
pc.select(result, true_value, false_value, UniCondition(op, cond_code, a, b));
break;
}
}
ctx.cc.ret(result);
ctx.cc.end_func();
return ctx.finish<TestCondRRFunc>();
}
static TestCondRIFunc create_func_cond_ri(JitContext& ctx, UniOpCond op, CondCode cond_code, Imm bImm) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uint32_t, int32_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp a = pc.new_gp32("a");
Gp result = pc.new_gp32("result");
Label done = pc.new_label();
node->set_arg(0, a);
pc.mov(result, 1);
pc.j(done, UniCondition(op, cond_code, a, bImm));
pc.mov(result, 0);
pc.bind(done);
ctx.cc.ret(result);
ctx.cc.end_func();
return ctx.finish<TestCondRIFunc>();
}
// ujit::UniCompiler - Tests - Conditional Operations - Runner
// ===========================================================
static ASMJIT_NOINLINE void test_conditional_op(JitContext& ctx, UniOpCond op, CondCode cond_code, int32_t a, int32_t b, bool expected) noexcept {
for (uint32_t variation = 0; variation < 3; variation++) {
TestCondRRFunc fn_rr = create_func_cond_rr(ctx, op, cond_code, variation);
TestCondRIFunc fn_ri = create_func_cond_ri(ctx, op, cond_code, b);
uint32_t observed_rr = fn_rr(a, b);
EXPECT_EQ(observed_rr, uint32_t(expected))
.message("Operation failed (RR):\n"
" Input #1: %d\n"
" Input #2: %d\n"
" Expected: %d\n"
" Observed: %d\n"
"Assembly:\n%s",
a,
b,
uint32_t(expected),
observed_rr,
ctx.logger_content());
uint32_t observed_ri = fn_ri(a);
EXPECT_EQ(observed_ri, uint32_t(expected))
.message("Operation failed (RI):\n"
" Input #1: %d\n"
" Input #2: %d\n"
" Expected: %d\n"
" Observed: %d\n"
"Assembly:\n%s",
a,
b,
uint32_t(expected),
observed_ri,
ctx.logger_content());
ctx.rt.reset();
}
}
static ASMJIT_NOINLINE void test_cond_ops(JitContext& ctx) noexcept {
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 1, 1, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 1, 2, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 100, 31, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 1, 1, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 1, 2, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 100, 31, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 1, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 111111, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 111111, 222, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 222, 111111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 222, 111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 1, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 222, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 222, 111111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 1, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 0, 1, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 111111, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 111111, 222, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 222, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 222, 111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 1, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 0, 1, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 111111, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 111111, 222, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 222, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 22222, 22222, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 1, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 111111, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 111111, -222, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -222, 111111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -222, -111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -111, -1, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 1, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, -222, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -222, 111111, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -111, -1, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -111, -111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 1, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 111111, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 111111, -222, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -222, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -222, -111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -111, -1, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -1, -1, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 1, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 111111, 0, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 111111, -222, false);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -222, 111111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -222, -111, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -111, -1, true);
test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -1, -1, true);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 0, 0, true);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 1, 0, true);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 111111, 0, true);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 111111, -222, false);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, -222, 111111, false);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 0, 0, false);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 1, 0, false);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 111111, 0, false);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 111111, -222, true);
test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, -222, 111111, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x0), 0, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x1), 0, false);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFF), 7, false);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFF), 9, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFFFFFFFF), 31, false);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x7FFFFFFF), 31, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x0), 0, false);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x1), 0, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFF), 7, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFF), 9, false);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFFFFFFFF), 31, true);
test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x7FFFFFFF), 31, false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0xFF000000), int32_t(0x01000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0xFF000000), int32_t(0x01000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0xFF000000), int32_t(0x01000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x000000FF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x000000FF), int32_t(0x800000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x000000FF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x000000FF), int32_t(0x800000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0xFF000000), int32_t(0x01000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0xFF000000), int32_t(0x01000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x000000FF), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x00000111), int32_t(0x0000F0FF), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x000000FF), int32_t(0x000000FF), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0xFFFFFFFF), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000001), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000001), int32_t(0x00000010), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0xFFFFFFFF), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000001), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000001), int32_t(0x00000010), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x00000000), int32_t(0x00000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0xFF000000), int32_t(0x01000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x000000FF), int32_t(0x00000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x000000FF), int32_t(0x000000FF), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x7FFFFFFF), int32_t(0x80000000), false);
test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x00000111), int32_t(0x0000F0FF), false);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x00000000), 1, true);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x000000FF), 8, true);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x000000FF), 7, false);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0xFFFFFFFF), 31, false);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x7FFFFFFF), 31, true);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x00000000), 1, false);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x000000FF), 8, false);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x000000FF), 7, true);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0xFFFFFFFF), 31, true);
test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x7FFFFFFF), 31, false);
}
// ujit::UniCompiler - Tests - M Operations - Functions
// ====================================================
static TestMFunc create_func_m(JitContext& ctx, UniOpM op) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp ptr = pc.new_gpz("ptr");
node->set_arg(0, ptr);
pc.emit_m(op, mem_ptr(ptr));
ctx.cc.end_func();
return ctx.finish<TestMFunc>();
}
// ujit::UniCompiler - Tests - M Operations - Runner
// =================================================
static ASMJIT_NOINLINE void test_m_ops(JitContext& ctx) noexcept {
uint8_t buffer[8];
TestMFunc fn_zero_u8 = create_func_m(ctx, UniOpM::kStoreZeroU8);
memcpy(buffer, "ABCDEFGH", 8);
fn_zero_u8(buffer + 0);
EXPECT_EQ(memcmp(buffer, "\0BCDEFGH", 8), 0);
fn_zero_u8(buffer + 5);
EXPECT_EQ(memcmp(buffer, "\0BCDE\0GH", 8), 0);
TestMFunc fn_zero_u16 = create_func_m(ctx, UniOpM::kStoreZeroU16);
memcpy(buffer, "ABCDEFGH", 8);
fn_zero_u16(buffer + 0);
EXPECT_EQ(memcmp(buffer, "\0\0CDEFGH", 8), 0);
fn_zero_u16(buffer + 4);
EXPECT_EQ(memcmp(buffer, "\0\0CD\0\0GH", 8), 0);
TestMFunc fn_zero_u32 = create_func_m(ctx, UniOpM::kStoreZeroU32);
memcpy(buffer, "ABCDEFGH", 8);
fn_zero_u32(buffer + 0);
EXPECT_EQ(memcmp(buffer, "\0\0\0\0EFGH", 8), 0);
fn_zero_u32(buffer + 4);
EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0);
#if ASMJIT_ARCH_BITS >= 64
TestMFunc fn_zero_u64 = create_func_m(ctx, UniOpM::kStoreZeroU64);
memcpy(buffer, "ABCDEFGH", 8);
fn_zero_u64(buffer + 0);
EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0);
#endif
TestMFunc fn_zero_reg = create_func_m(ctx, UniOpM::kStoreZeroReg);
memcpy(buffer, "ABCDEFGH", 8);
fn_zero_reg(buffer + 0);
#if ASMJIT_ARCH_BITS >= 64
EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0);
#else
EXPECT_EQ(memcmp(buffer, "\0\0\0\0EFGH", 8), 0);
#endif
ctx.rt.reset();
}
// ujit::UniCompiler - Tests - RM Operations - Functions
// =====================================================
static TestRMFunc create_func_rm(JitContext& ctx, UniOpRM op) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uintptr_t, uintptr_t, void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp reg = pc.new_gpz("reg");
Gp ptr = pc.new_gpz("ptr");
node->set_arg(0, reg);
node->set_arg(1, ptr);
pc.emit_rm(op, reg, mem_ptr(ptr));
ctx.cc.ret(reg);
ctx.cc.end_func();
return ctx.finish<TestRMFunc>();
}
// ujit::UniCompiler - Tests - RM Operations - Runner
// ==================================================
static ASMJIT_NOINLINE void test_rm_ops(JitContext& ctx) noexcept {
union Mem {
uint8_t buffer[8];
uint16_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
};
Mem mem{};
TestRMFunc fn_load_i8 = create_func_rm(ctx, UniOpRM::kLoadI8);
mem.u8 = uint8_t(int8_t(6));
EXPECT_EQ(fn_load_i8(0, mem.buffer), uintptr_t(intptr_t(6)));
mem.u8 = uint8_t(int8_t(-6));
EXPECT_EQ(fn_load_i8(0, mem.buffer), uintptr_t(intptr_t(-6)));
TestRMFunc fn_load_u8 = create_func_rm(ctx, UniOpRM::kLoadU8);
mem.u8 = uint8_t(0x80);
EXPECT_EQ(fn_load_u8(0, mem.buffer), 0x80u);
mem.u8 = uint8_t(0xFF);
EXPECT_EQ(fn_load_u8(0, mem.buffer), 0xFFu);
TestRMFunc fn_load_i16 = create_func_rm(ctx, UniOpRM::kLoadI16);
mem.u16 = uint16_t(int16_t(666));
EXPECT_EQ(fn_load_i16(0, mem.buffer), uintptr_t(intptr_t(666)));
mem.u16 = uint16_t(int16_t(-666));
EXPECT_EQ(fn_load_i16(0, mem.buffer), uintptr_t(intptr_t(-666)));
TestRMFunc fn_load_u16 = create_func_rm(ctx, UniOpRM::kLoadU16);
mem.u16 = uint16_t(0x8000);
EXPECT_EQ(fn_load_u16(0, mem.buffer), 0x8000u);
mem.u16 = uint16_t(0xFEED);
EXPECT_EQ(fn_load_u16(0, mem.buffer), 0xFEEDu);
TestRMFunc fn_load_i32 = create_func_rm(ctx, UniOpRM::kLoadI32);
mem.u32 = uint32_t(int32_t(666666));
EXPECT_EQ(fn_load_i32(0, mem.buffer), uintptr_t(intptr_t(666666)));
mem.u32 = uint32_t(int32_t(-666666));
EXPECT_EQ(fn_load_i32(0, mem.buffer), uintptr_t(intptr_t(-666666)));
TestRMFunc fn_load_u32 = create_func_rm(ctx, UniOpRM::kLoadU32);
mem.u32 = 0x12345678;
EXPECT_EQ(fn_load_u32(0, mem.buffer), uint32_t(0x12345678));
#if ASMJIT_ARCH_BITS >= 64
TestRMFunc fn_load_i64 = create_func_rm(ctx, UniOpRM::kLoadI64);
mem.u64 = 0xF123456789ABCDEFu;
EXPECT_EQ(fn_load_i64(0, mem.buffer), 0xF123456789ABCDEFu);
TestRMFunc fn_load_u64 = create_func_rm(ctx, UniOpRM::kLoadU64);
mem.u64 = 0xF123456789ABCDEFu;
EXPECT_EQ(fn_load_u64(0, mem.buffer), 0xF123456789ABCDEFu);
#endif
TestRMFunc fn_load_reg = create_func_rm(ctx, UniOpRM::kLoadReg);
mem.u64 = 0xF123456789ABCDEFu;
#if ASMJIT_ARCH_BITS >= 64
EXPECT_EQ(fn_load_reg(0, mem.buffer), 0xF123456789ABCDEFu);
#else
EXPECT_EQ(fn_load_reg(0, mem.buffer), 0x89ABCDEFu);
#endif
TestRMFunc fn_load_merge_u8 = create_func_rm(ctx, UniOpRM::kLoadMergeU8);
mem.u8 = uint8_t(0xAA);
EXPECT_EQ(fn_load_merge_u8(0x1F2FFF00, mem.buffer), 0x1F2FFFAAu);
TestRMFunc fn_load_shift_u8 = create_func_rm(ctx, UniOpRM::kLoadShiftU8);
mem.u8 = uint8_t(0xAA);
EXPECT_EQ(fn_load_shift_u8(0x002FFF00, mem.buffer), 0x2FFF00AAu);
TestRMFunc fn_load_merge_u16 = create_func_rm(ctx, UniOpRM::kLoadMergeU16);
mem.u16 = uint16_t(0xAABB);
EXPECT_EQ(fn_load_merge_u16(0x1F2F0000, mem.buffer), 0x1F2FAABBu);
TestRMFunc fn_load_shift_u16 = create_func_rm(ctx, UniOpRM::kLoadShiftU16);
mem.u16 = uint16_t(0xAABB);
EXPECT_EQ(fn_load_shift_u16(0x00001F2F, mem.buffer), 0x1F2FAABBu);
ctx.rt.reset();
}
// ujit::UniCompiler - Tests - MR Operations - Functions
// =====================================================
static TestMRFunc create_func_mr(JitContext& ctx, UniOpMR op) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, uintptr_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp ptr = pc.new_gpz("ptr");
Gp reg = pc.new_gpz("reg");
node->set_arg(0, ptr);
node->set_arg(1, reg);
pc.emit_mr(op, mem_ptr(ptr), reg);
ctx.cc.end_func();
return ctx.finish<TestMRFunc>();
}
// ujit::UniCompiler - Tests - MR Operations - Runner
// ==================================================
static ASMJIT_NOINLINE void test_mr_ops(JitContext& ctx) noexcept {
union Mem {
uint8_t buffer[8];
uint16_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
};
Mem mem{};
TestMRFunc fn_store_u8 = create_func_mr(ctx, UniOpMR::kStoreU8);
memcpy(mem.buffer, "ABCDEFGH", 8);
fn_store_u8(mem.buffer, 0x7A);
EXPECT_EQ(memcmp(mem.buffer, "zBCDEFGH", 8), 0);
TestMRFunc fn_store_u16 = create_func_mr(ctx, UniOpMR::kStoreU16);
memcpy(mem.buffer, "ABCDEFGH", 8);
fn_store_u16(mem.buffer, 0x7A7A);
EXPECT_EQ(memcmp(mem.buffer, "zzCDEFGH", 8), 0);
TestMRFunc fn_store_u32 = create_func_mr(ctx, UniOpMR::kStoreU32);
memcpy(mem.buffer, "ABCDEFGH", 8);
fn_store_u32(mem.buffer, 0x7A7A7A7A);
EXPECT_EQ(memcmp(mem.buffer, "zzzzEFGH", 8), 0);
#if ASMJIT_ARCH_BITS >= 64
TestMRFunc fn_store_u64 = create_func_mr(ctx, UniOpMR::kStoreU64);
memcpy(mem.buffer, "ABCDEFGH", 8);
fn_store_u64(mem.buffer, 0x7A7A7A7A7A7A7A7A);
EXPECT_EQ(memcmp(mem.buffer, "zzzzzzzz", 8), 0);
#endif
TestMRFunc fn_store_reg = create_func_mr(ctx, UniOpMR::kStoreReg);
memcpy(mem.buffer, "ABCDEFGH", 8);
#if ASMJIT_ARCH_BITS >= 64
fn_store_reg(mem.buffer, 0x7A7A7A7A7A7A7A7A);
EXPECT_EQ(memcmp(mem.buffer, "zzzzzzzz", 8), 0);
#else
fn_store_reg(mem.buffer, 0x7A7A7A7A);
EXPECT_EQ(memcmp(mem.buffer, "zzzzEFGH", 8), 0);
#endif
TestMRFunc fn_add_u8 = create_func_mr(ctx, UniOpMR::kAddU8);
mem.u64 = 0;
mem.u8 = 42;
fn_add_u8(mem.buffer, 13);
EXPECT_EQ(mem.u8, 55u);
EXPECT_EQ(memcmp(mem.buffer + 1, "\0\0\0\0\0\0\0", 7), 0);
TestMRFunc fn_add_u16 = create_func_mr(ctx, UniOpMR::kAddU16);
mem.u64 = 0;
mem.u16 = 442;
fn_add_u16(mem.buffer, 335);
EXPECT_EQ(mem.u16, 777u);
EXPECT_EQ(memcmp(mem.buffer + 2, "\0\0\0\0\0\0", 6), 0);
TestMRFunc fn_add_u32 = create_func_mr(ctx, UniOpMR::kAddU32);
mem.u64 = 0;
mem.u32 = 442332;
fn_add_u32(mem.buffer, 335223);
EXPECT_EQ(mem.u32, 777555u);
EXPECT_EQ(memcmp(mem.buffer + 4, "\0\0\0\0", 4), 0);
#if ASMJIT_ARCH_BITS >= 64
TestMRFunc fn_add_u64 = create_func_mr(ctx, UniOpMR::kAddU64);
mem.u64 = 0xF123456789ABCDEFu;
fn_add_u64(mem.buffer, 0x0102030405060708u);
EXPECT_EQ(mem.u64, 0xF225486B8EB1D4F7u);
#endif
TestMRFunc fn_add_reg = create_func_mr(ctx, UniOpMR::kAddReg);
mem.u64 = 0xFFFFFFFFFFFFFFFF;
#if ASMJIT_ARCH_BITS >= 64
fn_add_reg(mem.buffer, 1);
EXPECT_EQ(mem.u64, 0u);
#else
mem.u32 = 0x01020304;
fn_add_reg(mem.buffer, 0x02030405);
EXPECT_EQ(mem.u32, 0x03050709u);
EXPECT_EQ(memcmp(mem.buffer + 4, "\xFF\xFF\xFF\xFF", 4), 0);
#endif
ctx.rt.reset();
}
// ujit::UniCompiler - Tests - RR Operations - Functions
// =====================================================
static TestRRFunc create_func_rr(JitContext& ctx, UniOpRR op) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uint32_t, uint32_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp r = pc.new_gp32("r");
node->set_arg(0, r);
pc.emit_2i(op, r, r);
ctx.cc.ret(r);
ctx.cc.end_func();
return ctx.finish<TestRRFunc>();
}
// ujit::UniCompiler - Tests - RR Operations - Runner
// ==================================================
static ASMJIT_NOINLINE void test_rr_ops(JitContext& ctx) noexcept {
TestRRFunc fn_abs = create_func_rr(ctx, UniOpRR::kAbs);
EXPECT_EQ(fn_abs(0u), 0u);
EXPECT_EQ(fn_abs(1u), 1u);
EXPECT_EQ(fn_abs(uint32_t(-1)), 1u);
EXPECT_EQ(fn_abs(uint32_t(-333)), 333u);
EXPECT_EQ(fn_abs(0x80000000u), 0x80000000u);
TestRRFunc fn_neg = create_func_rr(ctx, UniOpRR::kNeg);
EXPECT_EQ(fn_neg(0u), 0u);
EXPECT_EQ(fn_neg(1u), uint32_t(-1));
EXPECT_EQ(fn_neg(uint32_t(-1)), 1u);
EXPECT_EQ(fn_neg(uint32_t(-333)), 333u);
EXPECT_EQ(fn_neg(333u), uint32_t(-333));
EXPECT_EQ(fn_neg(0x80000000u), 0x80000000u);
TestRRFunc fn_not = create_func_rr(ctx, UniOpRR::kNot);
EXPECT_EQ(fn_not(0u), 0xFFFFFFFFu);
EXPECT_EQ(fn_not(1u), 0xFFFFFFFEu);
EXPECT_EQ(fn_not(0xFFFFFFFF), 0u);
EXPECT_EQ(fn_not(0x12333245), ~0x12333245u);
EXPECT_EQ(fn_not(0x80000000u), 0x7FFFFFFFu);
TestRRFunc fn_bswap32 = create_func_rr(ctx, UniOpRR::kBSwap);
EXPECT_EQ(fn_bswap32(0x11223344u), 0x44332211u);
EXPECT_EQ(fn_bswap32(0xFFFF0000u), 0x0000FFFFu);
EXPECT_EQ(fn_bswap32(0x00000000u), 0x00000000u);
TestRRFunc fn_clz32 = create_func_rr(ctx, UniOpRR::kCLZ);
EXPECT_EQ(fn_clz32(0x80000000u), 0u);
EXPECT_EQ(fn_clz32(0x40000000u), 1u);
EXPECT_EQ(fn_clz32(0x00800000u), 8u);
EXPECT_EQ(fn_clz32(0x00008000u), 16u);
EXPECT_EQ(fn_clz32(0x00000080u), 24u);
EXPECT_EQ(fn_clz32(0x00000001u), 31u);
TestRRFunc fn_ctz32 = create_func_rr(ctx, UniOpRR::kCTZ);
EXPECT_EQ(fn_ctz32(0x80000000u), 31u);
EXPECT_EQ(fn_ctz32(0x40000000u), 30u);
EXPECT_EQ(fn_ctz32(0x00800000u), 23u);
EXPECT_EQ(fn_ctz32(0x00008000u), 15u);
EXPECT_EQ(fn_ctz32(0x00000080u), 7u);
EXPECT_EQ(fn_ctz32(0x00000001u), 0u);
TestRRFunc fn_reflect = create_func_rr(ctx, UniOpRR::kReflect);
EXPECT_EQ(fn_reflect(0x00000000u), 0x00000000u);
EXPECT_EQ(fn_reflect(0x00FF0000u), 0x00FF0000u);
EXPECT_EQ(fn_reflect(0x000000FFu), 0x000000FFu);
EXPECT_EQ(fn_reflect(0x80000000u), 0x7FFFFFFFu);
EXPECT_EQ(fn_reflect(0xFFFFFFFFu), 0x00000000u);
EXPECT_EQ(fn_reflect(0x88FF0000u), 0x7700FFFFu);
ctx.rt.reset();
}
// ujit::UniCompiler - Tests - RRR Operations - Functions
// ======================================================
static TestRRRFunc create_func_rrr(JitContext& ctx, UniOpRRR op) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uint32_t, uint32_t, uint32_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp a = pc.new_gp32("a");
Gp b = pc.new_gp32("b");
Gp result = pc.new_gp32("result");
node->set_arg(0, a);
node->set_arg(1, b);
pc.emit_3i(op, result, a, b);
ctx.cc.ret(result);
ctx.cc.end_func();
return ctx.finish<TestRRRFunc>();
}
static TestRRIFunc create_func_rri(JitContext& ctx, UniOpRRR op, Imm bImm) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<uint32_t, uint32_t>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(VecWidth::k128);
pc.init_function(node);
Gp a = pc.new_gp32("a");
Gp result = pc.new_gp32("result");
node->set_arg(0, a);
pc.emit_3i(op, result, a, bImm);
ctx.cc.ret(result);
ctx.cc.end_func();
return ctx.finish<TestRRIFunc>();
}
// ujit::UniCompiler - Tests - RRR Operations - Runner
// ===================================================
static ASMJIT_NOINLINE void test_rrr_op(JitContext& ctx, UniOpRRR op, uint32_t a, uint32_t b, uint32_t expected) noexcept {
TestRRRFunc fn_rrr = create_func_rrr(ctx, op);
uint32_t observed_rrr = fn_rrr(a, b);
EXPECT_EQ(observed_rrr, expected)
.message("Operation failed (RRR):\n"
" Input #1: %d\n"
" Input #2: %d\n"
" Expected: %d\n"
" Observed: %d\n"
"Assembly:\n%s",
a,
b,
uint32_t(expected),
observed_rrr,
ctx.logger_content());
TestRRIFunc fn_rri = create_func_rri(ctx, op, Imm(b));
uint32_t observed_rri = fn_rri(a);
EXPECT_EQ(observed_rri, expected)
.message("Operation failed (RRI):\n"
" Input #1: %d\n"
" Input #2: %d\n"
" Expected: %d\n"
" Observed: %d\n"
"Assembly:\n%s",
a,
b,
uint32_t(expected),
observed_rri,
ctx.logger_content());
ctx.rt.reset();
}
static ASMJIT_NOINLINE void test_rrr_ops(JitContext& ctx) noexcept {
test_rrr_op(ctx, UniOpRRR::kAnd, 0u, 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0xFFu, 0x11u, 0x11u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0x11u, 0xFFu, 0x11u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0xFF11u, 0x1111u, 0x1111u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0x1111u, 0xFF11u, 0x1111u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0x0000FFFFu, 0xFFFF0000u, 0u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0xFFFFFFFFu, 0xFFFF0000u, 0xFFFF0000u);
test_rrr_op(ctx, UniOpRRR::kAnd, 0x11111111u, 0x11223344u, 0x11001100u);
test_rrr_op(ctx, UniOpRRR::kOr, 0u, 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kOr, 0xFFu, 0x11u, 0xFFu);
test_rrr_op(ctx, UniOpRRR::kOr, 0x11u, 0xFFu, 0xFFu);
test_rrr_op(ctx, UniOpRRR::kOr, 0xFF11u, 0x1111u, 0xFF11u);
test_rrr_op(ctx, UniOpRRR::kOr, 0x1111u, 0xFF11u, 0xFF11u);
test_rrr_op(ctx, UniOpRRR::kOr, 0x0000FFFFu, 0xFFFF0001u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kOr, 0xFFFFFFFFu, 0xFF000000u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kOr, 0x11111111u, 0x00223344u, 0x11333355u);
test_rrr_op(ctx, UniOpRRR::kXor, 0u, 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kXor, 0xFFu, 0x11u, 0xEEu);
test_rrr_op(ctx, UniOpRRR::kXor, 0x11u, 0xFFu, 0xEEu);
test_rrr_op(ctx, UniOpRRR::kXor, 0xFF11u, 0x1111u, 0xEE00u);
test_rrr_op(ctx, UniOpRRR::kXor, 0x1111u, 0xFF11u, 0xEE00u);
test_rrr_op(ctx, UniOpRRR::kXor, 0x0000FFFFu, 0xFFFF0001u, 0xFFFFFFFEu);
test_rrr_op(ctx, UniOpRRR::kXor, 0xFFFFFFFFu, 0xFF000000u, 0x00FFFFFFu);
test_rrr_op(ctx, UniOpRRR::kXor, 0x11111111u, 0x00223344u, 0x11332255u);
test_rrr_op(ctx, UniOpRRR::kBic, 0u, 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kBic, 0xFFu, 0x11u, 0xEEu);
test_rrr_op(ctx, UniOpRRR::kBic, 0x11u, 0xFFu, 0x00u);
test_rrr_op(ctx, UniOpRRR::kBic, 0xFF11u, 0x1111u, 0xEE00u);
test_rrr_op(ctx, UniOpRRR::kBic, 0x1111u, 0xFF11u, 0x0000u);
test_rrr_op(ctx, UniOpRRR::kBic, 0x0000FFFFu, 0xFFFF0000u, 0x0000FFFFu);
test_rrr_op(ctx, UniOpRRR::kBic, 0xFFFFFFFFu, 0xFFFF0000u, 0x0000FFFFu);
test_rrr_op(ctx, UniOpRRR::kBic, 0x11111111u, 0x11223344u, 0x00110011u);
test_rrr_op(ctx, UniOpRRR::kAdd, 0u, 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 2u, 3u);
test_rrr_op(ctx, UniOpRRR::kAdd, 0xFF000000u, 0x00FFFFFFu, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 0xFFFu, 0x1000u);
test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 0xFFF000u, 0xFFF001u);
test_rrr_op(ctx, UniOpRRR::kSub, 1u, 2u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kMul, 1000u, 999u, 999000u);
test_rrr_op(ctx, UniOpRRR::kMul, 0xFFFFu, 0x00010001u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kUDiv, 100000u, 1000u, 100u);
test_rrr_op(ctx, UniOpRRR::kUMod, 1999u, 1000u, 999u);
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1111), uint32_t(0), uint32_t(0));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1111), uint32_t(0), uint32_t(-1111));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1), uint32_t(22), uint32_t(1));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1), uint32_t(0), uint32_t(0));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(100101033), uint32_t(999), uint32_t(999));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(100101033), uint32_t(112), uint32_t(112));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(112), uint32_t(1125532), uint32_t(112));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1111), uint32_t(-1), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1111), uint32_t(-1), uint32_t(-1111));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1), uint32_t(-22), uint32_t(-22));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1), uint32_t(-128), uint32_t(-128));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-128), uint32_t(-1), uint32_t(-128));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-128), uint32_t(9), uint32_t(-128));
test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(12444), uint32_t(-1), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1), uint32_t(22), uint32_t(22));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1), uint32_t(0), uint32_t(1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(100101033), uint32_t(999), uint32_t(100101033));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(100101033), uint32_t(112), uint32_t(100101033));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(112), uint32_t(1125532), uint32_t(1125532));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1111), uint32_t(-1), uint32_t(1111));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1111), uint32_t(-1), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1), uint32_t(-22), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1), uint32_t(-128), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-128), uint32_t(-1), uint32_t(-1));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-128), uint32_t(9), uint32_t(9));
test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(12444), uint32_t(-1), uint32_t(12444));
test_rrr_op(ctx, UniOpRRR::kUMin, 1, 22, 1);
test_rrr_op(ctx, UniOpRRR::kUMin, 22, 1, 1);
test_rrr_op(ctx, UniOpRRR::kUMin, 1, 255, 1);
test_rrr_op(ctx, UniOpRRR::kUMin, 255, 1, 1);
test_rrr_op(ctx, UniOpRRR::kUMin, 1023, 255, 255);
test_rrr_op(ctx, UniOpRRR::kUMin, 255, 1023, 255);
test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 255, 255);
test_rrr_op(ctx, UniOpRRR::kUMin, 255, 0xFFFFFFFFu, 255);
test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 0xFFFFFF00u, 0xFFFFFF00u);
test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kUMax, 1, 22, 22);
test_rrr_op(ctx, UniOpRRR::kUMax, 22, 1, 22);
test_rrr_op(ctx, UniOpRRR::kUMax, 1, 255, 255);
test_rrr_op(ctx, UniOpRRR::kUMax, 255, 1, 255);
test_rrr_op(ctx, UniOpRRR::kUMax, 1023, 255, 1023);
test_rrr_op(ctx, UniOpRRR::kUMax, 255, 1023, 1023);
test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 255, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kUMax, 255, 0xFFFFFFFFu, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 0xFFFFFF00u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kSll, 1u, 1u, 1u << 1);
test_rrr_op(ctx, UniOpRRR::kSll, 1u, 22u, 1u << 22);
test_rrr_op(ctx, UniOpRRR::kSll, 1u, 31u, 1u << 31);
test_rrr_op(ctx, UniOpRRR::kSll, 0x7FFFFFFFu, 1u, 0xFFFFFFFEu);
test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 1u, 1u >> 1);
test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 22u, 1u >> 22);
test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 31u, 1u >> 31);
test_rrr_op(ctx, UniOpRRR::kSrl, 0x7FFFFFFFu, 1u, 0x7FFFFFFFu >> 1);
test_rrr_op(ctx, UniOpRRR::kSra, 1u, 1u, 1u >> 1);
test_rrr_op(ctx, UniOpRRR::kSra, 1u, 22u, 1u >> 22);
test_rrr_op(ctx, UniOpRRR::kSra, 1u, 31u, 1u >> 31);
test_rrr_op(ctx, UniOpRRR::kSra, 0x7FFFFFFFu, 1u, 0x7FFFFFFFu >> 1);
test_rrr_op(ctx, UniOpRRR::kSra, 0xF0000000u, 4u, 0xFF000000u);
test_rrr_op(ctx, UniOpRRR::kSra, 0x80000000u, 31u, 0xFFFFFFFFu);
test_rrr_op(ctx, UniOpRRR::kRol, 0x11223344u, 8u, 0x22334411u);
test_rrr_op(ctx, UniOpRRR::kRol, 0x11223344u, 16u, 0x33441122u);
test_rrr_op(ctx, UniOpRRR::kRol, 0xFCFFDABBu, 1u, 0xF9FFB577u);
test_rrr_op(ctx, UniOpRRR::kRor, 0x11223344u, 8u, 0x44112233u);
test_rrr_op(ctx, UniOpRRR::kRor, 0x11223344u, 16u, 0x33441122u);
test_rrr_op(ctx, UniOpRRR::kRor, 0xF0000000u, 1u, 0x78000000u);
test_rrr_op(ctx, UniOpRRR::kSBound, 0, 244u, 0);
test_rrr_op(ctx, UniOpRRR::kSBound, 42, 244u, 42u);
test_rrr_op(ctx, UniOpRRR::kSBound, 1111, 244u, 244u);
test_rrr_op(ctx, UniOpRRR::kSBound, 9999999, 111244u, 111244u);
test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(int32_t(-1)), 1000u, 0u);
test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MIN), 100000u, 0u);
test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), 0u, 0u);
test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), 100000u, 100000u);
test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), uint32_t(INT32_MAX), uint32_t(INT32_MAX));
}
// ujit::UniCompiler - Tests - SIMD - Functions
// ============================================
// The following variations are supported:
// - 0 - separate destination & source registers
// - 1 - destination register is a source register as well
// - 2 - source is a memory operand
// - 3 - source register is a GP register (only for broadcasts from a GP register, otherwise maps to 0)
static constexpr uint32_t kNumVariationsVV = 3;
static constexpr uint32_t kNumVariationsVV_Broadcast = 4;
static TestVVFunc create_func_vv(JitContext& ctx, VecWidth vw, UniOpVV op, Variation variation = Variation{0}) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, const void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(vw);
pc.init_function(node);
Gp dst_ptr = pc.new_gpz("dst_ptr");
Gp src_ptr = pc.new_gpz("src_ptr");
node->set_arg(0, dst_ptr);
node->set_arg(1, src_ptr);
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
// There are some instructions that fill the high part of the register, so just zero the destination to make
// sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage).
pc.v_zero_i(dst_vec);
if (variation == 3u && (op == UniOpVV::kBroadcastU8 ||
op == UniOpVV::kBroadcastU8Z ||
op == UniOpVV::kBroadcastU32 ||
op == UniOpVV::kBroadcastU64 ||
op == UniOpVV::kBroadcastF32 ||
op == UniOpVV::kBroadcastF64)) {
// This is used to test broadcasts from a GP register to a vector register.
Gp src_gp = pc.new_gpz("src_gp");
switch (op) {
case UniOpVV::kBroadcastU8:
case UniOpVV::kBroadcastU8Z:
pc.load_u8(src_gp, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, src_gp);
break;
case UniOpVV::kBroadcastU16:
case UniOpVV::kBroadcastU16Z:
pc.load_u16(src_gp, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, src_gp);
break;
case UniOpVV::kBroadcastU32:
case UniOpVV::kBroadcastF32:
pc.load_u32(src_gp, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, src_gp);
break;
case UniOpVV::kBroadcastU64:
case UniOpVV::kBroadcastF64:
// Prevent using 64-bit registers on 32-bit architectures (that would fail).
if (pc.is_64bit()) {
pc.load_u64(src_gp, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, src_gp);
}
else {
pc.emit_2v(op, dst_vec, mem_ptr(src_ptr));
}
break;
default:
ASMJIT_NOT_REACHED();
}
}
else if (variation == 2u) {
pc.emit_2v(op, dst_vec, mem_ptr(src_ptr));
}
else if (variation == 1u) {
pc.v_loaduvec(dst_vec, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, dst_vec);
}
else {
Vec src_vec = pc.new_vec_with_width(vw, "src_vec");
pc.v_loaduvec(src_vec, mem_ptr(src_ptr));
pc.emit_2v(op, dst_vec, src_vec);
}
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
ctx.cc.end_func();
return ctx.finish<TestVVFunc>();
}
// The following variations are supported:
// - 0 - separate destination & source registers
// - 1 - destination register is a source register as well
// - 2 - source is a memory operand
static constexpr uint32_t kNumVariationsVVI = 3;
static TestVVFunc create_func_vvi(JitContext& ctx, VecWidth vw, UniOpVVI op, uint32_t imm, Variation variation = Variation{0}) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, const void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(vw);
pc.init_function(node);
Gp dst_ptr = pc.new_gpz("dst_ptr");
Gp src_ptr = pc.new_gpz("src_ptr");
node->set_arg(0, dst_ptr);
node->set_arg(1, src_ptr);
Vec src_vec = pc.new_vec_with_width(vw, "src_vec");
switch (variation.value) {
default:
case 0: {
// There are some instructions that fill the high part of the register, so just zero the destination to make
// sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage).
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.v_loaduvec(src_vec, mem_ptr(src_ptr));
pc.emit_2vi(op, dst_vec, src_vec, imm);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 1: {
pc.v_loaduvec(src_vec, mem_ptr(src_ptr));
pc.emit_2vi(op, src_vec, src_vec, imm);
pc.v_storeuvec(mem_ptr(dst_ptr), src_vec);
break;
}
case 2: {
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.emit_2vi(op, dst_vec, mem_ptr(src_ptr), imm);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
}
ctx.cc.end_func();
return ctx.finish<TestVVFunc>();
}
// The following variations are supported:
// - 0 - separate destination & source registers
// - 1 - destination register is the same as the first source register
// - 2 - destination register is the same as the second source register
// - 3 - separate destination & source registers, the second source is a memory operand
// - 4 - destination register is the same as the first source register, second source is a memory operand
static constexpr uint32_t kNumVariationsVVV = 5;
static TestVVVFunc create_func_vvv(JitContext& ctx, VecWidth vw, UniOpVVV op, Variation variation = Variation{0}) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, const void*, const void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(vw);
pc.init_function(node);
Gp dst_ptr = pc.new_gpz("dst_ptr");
Gp src1_ptr = pc.new_gpz("src1_ptr");
Gp src2_ptr = pc.new_gpz("src2_ptr");
node->set_arg(0, dst_ptr);
node->set_arg(1, src1_ptr);
node->set_arg(2, src2_ptr);
Vec src1_vec = pc.new_vec_with_width(vw, "src1_vec");
Vec src2_vec = pc.new_vec_with_width(vw, "src2_vec");
switch (variation.value) {
default:
case 0: {
// There are some instructions that fill the high part of the register, so just zero the destination to make
// sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage).
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3v(op, dst_vec, src1_vec, src2_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 1: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3v(op, src1_vec, src1_vec, src2_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), src1_vec);
break;
}
case 2: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3v(op, src2_vec, src1_vec, src2_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), src2_vec);
break;
}
case 3: {
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.emit_3v(op, dst_vec, src1_vec, mem_ptr(src2_ptr));
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 4: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.emit_3v(op, src1_vec, src1_vec, mem_ptr(src2_ptr));
pc.v_storeuvec(mem_ptr(dst_ptr), src1_vec);
break;
}
}
ctx.cc.end_func();
return ctx.finish<TestVVVFunc>();
}
static constexpr uint32_t kNumVariationsVVVI = 5;
static TestVVVFunc create_func_vvvi(JitContext& ctx, VecWidth vw, UniOpVVVI op, uint32_t imm, Variation variation = Variation{0}) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, const void*, const void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(vw);
pc.init_function(node);
Gp dst_ptr = pc.new_gpz("dst_ptr");
Gp src1_ptr = pc.new_gpz("src1_ptr");
Gp src2_ptr = pc.new_gpz("src2_ptr");
node->set_arg(0, dst_ptr);
node->set_arg(1, src1_ptr);
node->set_arg(2, src2_ptr);
Vec src1_vec = pc.new_vec_with_width(vw, "src1_vec");
Vec src2_vec = pc.new_vec_with_width(vw, "src2_vec");
switch (variation.value) {
default:
case 0: {
// There are some instructions that fill the high part of the register, so just zero the destination to make
// sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage).
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3vi(op, dst_vec, src1_vec, src2_vec, imm);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 1: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3vi(op, src1_vec, src1_vec, src2_vec, imm);
pc.v_storeuvec(mem_ptr(dst_ptr), src1_vec);
break;
}
case 2: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.emit_3vi(op, src2_vec, src1_vec, src2_vec, imm);
pc.v_storeuvec(mem_ptr(dst_ptr), src2_vec);
break;
}
case 3: {
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.emit_3vi(op, dst_vec, src1_vec, mem_ptr(src2_ptr), imm);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 4: {
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.emit_3vi(op, src1_vec, src1_vec, mem_ptr(src2_ptr), imm);
pc.v_storeuvec(mem_ptr(dst_ptr), src1_vec);
break;
}
}
ctx.cc.end_func();
return ctx.finish<TestVVVFunc>();
}
// The following variations are supported:
// - 0 - separate destination & source registers
// - 1 - destination register is the first source register
// - 2 - destination register is the second source register
// - 3 - destination register is the third source register
static constexpr uint32_t kNumVariationsVVVV = 4;
static TestVVVVFunc create_func_vvvv(JitContext& ctx, VecWidth vw, UniOpVVVV op, Variation variation = Variation{0}) noexcept {
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
FuncNode* node = ctx.cc.new_func(FuncSignature::build<void, void*, const void*, const void*, const void*>());
EXPECT_NOT_NULL(node);
pc.init_vec_width(vw);
pc.init_function(node);
Gp dst_ptr = pc.new_gpz("dst_ptr");
Gp src1_ptr = pc.new_gpz("src1_ptr");
Gp src2_ptr = pc.new_gpz("src2_ptr");
Gp src3_ptr = pc.new_gpz("src3_ptr");
node->set_arg(0, dst_ptr);
node->set_arg(1, src1_ptr);
node->set_arg(2, src2_ptr);
node->set_arg(3, src3_ptr);
Vec src1_vec = pc.new_vec_with_width(vw, "src1_vec");
Vec src2_vec = pc.new_vec_with_width(vw, "src2_vec");
Vec src3_vec = pc.new_vec_with_width(vw, "src3_vec");
pc.v_loaduvec(src1_vec, mem_ptr(src1_ptr));
pc.v_loaduvec(src2_vec, mem_ptr(src2_ptr));
pc.v_loaduvec(src3_vec, mem_ptr(src3_ptr));
switch (variation.value) {
default:
case 0: {
// There are some instructions that fill the high part of the register, so just zero the destination to make
// sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage).
Vec dst_vec = pc.new_vec_with_width(vw, "dst_vec");
pc.v_zero_i(dst_vec);
pc.emit_4v(op, dst_vec, src1_vec, src2_vec, src3_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), dst_vec);
break;
}
case 1: {
pc.emit_4v(op, src1_vec, src1_vec, src2_vec, src3_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), src1_vec);
break;
}
case 2: {
pc.emit_4v(op, src2_vec, src1_vec, src2_vec, src3_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), src2_vec);
break;
}
case 3: {
pc.emit_4v(op, src3_vec, src1_vec, src2_vec, src3_vec);
pc.v_storeuvec(mem_ptr(dst_ptr), src3_vec);
break;
}
}
ctx.cc.end_func();
return ctx.finish<TestVVVVFunc>();
}
// ujit::UniCompiler - Tests - SIMD - Vector Overlay
// =================================================
enum class VecElementType : uint8_t {
kInt8,
kInt16,
kInt32,
kInt64,
kUInt8,
kUInt16,
kUInt32,
kUInt64,
kFloat32,
kFloat64
};
struct VecOpInfo {
uint32_t _data;
ASMJIT_INLINE_NODEBUG uint32_t count() const noexcept { return _data >> 28; }
ASMJIT_INLINE_NODEBUG VecElementType ret() const noexcept { return VecElementType((_data >> 24) & 0xFu); }
ASMJIT_INLINE_NODEBUG VecElementType arg(uint32_t i) const noexcept { return VecElementType((_data >> (i * 4)) & 0xFu); }
static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0) noexcept {
return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0)};
}
static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1) noexcept {
return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4)};
}
static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1, VecElementType arg2) noexcept {
return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4) | (uint32_t(arg2) << 8)};
}
static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1, VecElementType arg2, VecElementType arg3) noexcept {
return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4) | (uint32_t(arg2) << 8) | (uint32_t(arg3) << 12)};
}
};
template<uint32_t kW>
struct alignas(16) VecOverlay {
union {
int8_t data_i8[kW];
uint8_t data_u8[kW];
int16_t data_i16[kW / 2u];
uint16_t data_u16[kW / 2u];
int32_t data_i32[kW / 4u];
uint32_t data_u32[kW / 4u];
int64_t data_i64[kW / 8u];
uint64_t data_u64[kW / 8u];
float data_f32[kW / 4u];
double data_f64[kW / 8u];
};
template<typename T>
ASMJIT_INLINE_NODEBUG T* data() noexcept;
template<typename T>
ASMJIT_INLINE_NODEBUG const T* data() const noexcept;
template<typename T>
ASMJIT_INLINE_NODEBUG T get(size_t index) const noexcept;
template<typename T>
ASMJIT_INLINE_NODEBUG void set(size_t index, const T& value) noexcept;
template<uint32_t kOtherW>
ASMJIT_INLINE_NODEBUG void copy_16b_from(const VecOverlay<kOtherW>& other) noexcept {
data_u64[0] = other.data_u64[0];
data_u64[1] = other.data_u64[1];
}
};
template<typename T>
struct VecAccess;
template<>
struct VecAccess<int8_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int8_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_i8; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const int8_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_i8; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int8_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_i8[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, int8_t value) noexcept { vec.data_i8[index] = value; }
};
template<>
struct VecAccess<int16_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int16_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_i16; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const int16_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_i16; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int16_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_i16[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, int16_t value) noexcept { vec.data_i16[index] = value; }
};
template<>
struct VecAccess<int32_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int32_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_i32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const int32_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_i32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int32_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_i32[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, int32_t value) noexcept { vec.data_i32[index] = value; }
};
template<>
struct VecAccess<int64_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int64_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_i64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const int64_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_i64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG int64_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_i64[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, int64_t value) noexcept { vec.data_i64[index] = value; }
};
template<>
struct VecAccess<uint8_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint8_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_u8; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const uint8_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_u8; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint8_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_u8[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, uint8_t value) noexcept { vec.data_u8[index] = value; }
};
template<>
struct VecAccess<uint16_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint16_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_u16; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const uint16_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_u16; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint16_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_u16[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, uint16_t value) noexcept { vec.data_u16[index] = value; }
};
template<>
struct VecAccess<uint32_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint32_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_u32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const uint32_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_u32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint32_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_u32[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, uint32_t value) noexcept { vec.data_u32[index] = value; }
};
template<>
struct VecAccess<uint64_t> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint64_t* data(VecOverlay<kW>& vec) noexcept { return vec.data_u64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const uint64_t* data(const VecOverlay<kW>& vec) noexcept { return vec.data_u64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG uint64_t get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_u64[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, uint64_t value) noexcept { vec.data_u64[index] = value; }
};
template<>
struct VecAccess<float> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG float* data(VecOverlay<kW>& vec) noexcept { return vec.data_f32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const float* data(const VecOverlay<kW>& vec) noexcept { return vec.data_f32; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG float get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_f32[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, float value) noexcept { vec.data_f32[index] = value; }
};
template<>
struct VecAccess<double> {
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG double* data(VecOverlay<kW>& vec) noexcept { return vec.data_f64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG const double* data(const VecOverlay<kW>& vec) noexcept { return vec.data_f64; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG double get(const VecOverlay<kW>& vec, size_t index) noexcept { return vec.data_f64[index]; }
template<uint32_t kW> static ASMJIT_INLINE_NODEBUG void set(VecOverlay<kW>& vec, size_t index, double value) noexcept { vec.data_f64[index] = value; }
};
template<uint32_t kW>
template<typename T>
ASMJIT_INLINE_NODEBUG T* VecOverlay<kW>::data() noexcept { return VecAccess<T>::data(*this); }
template<uint32_t kW>
template<typename T>
ASMJIT_INLINE_NODEBUG const T* VecOverlay<kW>::data() const noexcept { return VecAccess<T>::data(*this); }
template<uint32_t kW>
template<typename T>
ASMJIT_INLINE_NODEBUG T VecOverlay<kW>::get(size_t index) const noexcept { return VecAccess<T>::get(*this, index); }
template<uint32_t kW>
template<typename T>
ASMJIT_INLINE_NODEBUG void VecOverlay<kW>::set(size_t index, const T& value) noexcept { return VecAccess<T>::set(*this, index, value); }
template<typename T> struct TypeNameToString {};
template<> struct TypeNameToString<int8_t > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int8"; } };
template<> struct TypeNameToString<int16_t > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int16"; } };
template<> struct TypeNameToString<int32_t > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int32"; } };
template<> struct TypeNameToString<int64_t > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int64"; } };
template<> struct TypeNameToString<uint8_t > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint8"; } };
template<> struct TypeNameToString<uint16_t> { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint16"; } };
template<> struct TypeNameToString<uint32_t> { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint32"; } };
template<> struct TypeNameToString<uint64_t> { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint64"; } };
template<> struct TypeNameToString<float > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "float32"; } };
template<> struct TypeNameToString<double > { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "float64"; } };
template<uint32_t kW>
static bool vec_eq(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
return memcmp(a.data_u8, b.data_u8, kW) == 0;
}
template<typename T>
static bool float_eq(const T& a, const T& b) noexcept {
return a == b || (std::isnan(a) && std::isnan(b));
}
template<uint32_t kW>
static bool vec_eq(const VecOverlay<kW>& a, const VecOverlay<kW>& b, VecElementType element_type) noexcept {
if (element_type == VecElementType::kFloat32) {
size_t count = kW / sizeof(float);
for (size_t i = 0; i < count; i++) {
if (!float_eq(a.data_f32[i], b.data_f32[i])) {
return false;
}
}
return true;
}
else if (element_type == VecElementType::kFloat64) {
size_t count = kW / sizeof(double);
for (size_t i = 0; i < count; i++) {
if (!float_eq(a.data_f64[i], b.data_f64[i])) {
return false;
}
}
return true;
}
else {
return vec_eq(a, b);
}
}
template<uint32_t kW>
static ASMJIT_NOINLINE String vec_stringify(const VecOverlay<kW>& vec, VecElementType element_type) noexcept {
String s;
s.append('{');
switch (element_type) {
case VecElementType::kInt8 : { for (uint32_t i = 0; i < kW ; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i8[i]); break; }
case VecElementType::kInt16 : { for (uint32_t i = 0; i < kW / 2; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i16[i]); break; }
case VecElementType::kInt32 : { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i32[i]); break; }
case VecElementType::kInt64 : { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%lld", i == 0 ? "" : ", ", (long long)vec.data_i64[i]); break; }
case VecElementType::kUInt8 : { for (uint32_t i = 0; i < kW ; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u8[i]); break; }
case VecElementType::kUInt16 : { for (uint32_t i = 0; i < kW / 2; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u16[i]); break; }
case VecElementType::kUInt32 : { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u32[i]); break; }
case VecElementType::kUInt64 : { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%llu", i == 0 ? "" : ", ", (unsigned long long)vec.data_u64[i]); break; }
case VecElementType::kFloat32: { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%.20f" , i == 0 ? "" : ", ", double(vec.data_f32[i])); break; }
case VecElementType::kFloat64: { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%.20f" , i == 0 ? "" : ", ", double(vec.data_f64[i])); break; }
default:
ASMJIT_NOT_REACHED();
}
s.append('}');
return s;
}
// ujit::UniCompiler - Tests - SIMD - Metadata
// ===========================================
static const char* vec_op_name_vv(UniOpVV op) noexcept {
switch (op) {
case UniOpVV::kMov : return "v_mov";
case UniOpVV::kMovU64 : return "v_mov_u64";
case UniOpVV::kBroadcastU8Z : return "v_broadcast_u8z";
case UniOpVV::kBroadcastU16Z : return "v_broadcast_u16z";
case UniOpVV::kBroadcastU8 : return "v_broadcast_u8";
case UniOpVV::kBroadcastU16 : return "v_broadcast_u16";
case UniOpVV::kBroadcastU32 : return "v_broadcast_u32";
case UniOpVV::kBroadcastU64 : return "v_broadcast_u64";
case UniOpVV::kBroadcastF32 : return "v_broadcast_f32";
case UniOpVV::kBroadcastF64 : return "v_broadcast_f64";
case UniOpVV::kBroadcastV128_U32 : return "v_broadcast_v128_u32";
case UniOpVV::kBroadcastV128_U64 : return "v_broadcast_v128_u64";
case UniOpVV::kBroadcastV128_F32 : return "v_broadcast_v128_f32";
case UniOpVV::kBroadcastV128_F64 : return "v_broadcast_v128_f64";
case UniOpVV::kBroadcastV256_U32 : return "v_broadcast_v256_u32";
case UniOpVV::kBroadcastV256_U64 : return "v_broadcast_v256_u64";
case UniOpVV::kBroadcastV256_F32 : return "v_broadcast_v256_f32";
case UniOpVV::kBroadcastV256_F64 : return "v_broadcast_v256_f64";
case UniOpVV::kAbsI8 : return "v_abs_i8";
case UniOpVV::kAbsI16 : return "v_abs_i16";
case UniOpVV::kAbsI32 : return "v_abs_i32";
case UniOpVV::kAbsI64 : return "v_abs_i64";
case UniOpVV::kNotU32 : return "v_not_u32";
case UniOpVV::kNotU64 : return "v_not_u64";
case UniOpVV::kCvtI8LoToI16 : return "v_cvt_i8_lo_to_i16";
case UniOpVV::kCvtI8HiToI16 : return "v_cvt_i8_hi_to_i16";
case UniOpVV::kCvtU8LoToU16 : return "v_cvt_u8_lo_to_u16";
case UniOpVV::kCvtU8HiToU16 : return "v_cvt_u8_hi_to_u16";
case UniOpVV::kCvtI8ToI32 : return "v_cvt_i8_to_i32";
case UniOpVV::kCvtU8ToU32 : return "v_cvt_u8_to_u32";
case UniOpVV::kCvtI16LoToI32 : return "v_cvt_i16_lo_to_i32";
case UniOpVV::kCvtI16HiToI32 : return "v_cvt_i16_hi_to_i32";
case UniOpVV::kCvtU16LoToU32 : return "v_cvt_u16_lo_to_u32";
case UniOpVV::kCvtU16HiToU32 : return "v_cvt_u16_hi_to_u32";
case UniOpVV::kCvtI32LoToI64 : return "v_cvt_i32_lo_to_i64";
case UniOpVV::kCvtI32HiToI64 : return "v_cvt_i32_hi_to_i64";
case UniOpVV::kCvtU32LoToU64 : return "v_cvt_u32_lo_to_u64";
case UniOpVV::kCvtU32HiToU64 : return "v_cvt_u32_hi_to_u64";
case UniOpVV::kAbsF32S : return "s_abs_f32";
case UniOpVV::kAbsF64S : return "s_abs_f64";
case UniOpVV::kAbsF32 : return "v_abs_f32";
case UniOpVV::kAbsF64 : return "v_abs_f64";
case UniOpVV::kNegF32S : return "s_neg_f32";
case UniOpVV::kNegF64S : return "s_neg_f64";
case UniOpVV::kNegF32 : return "v_neg_f32";
case UniOpVV::kNegF64 : return "v_neg_f64";
case UniOpVV::kNotF32 : return "v_not_f32";
case UniOpVV::kNotF64 : return "v_not_f64";
case UniOpVV::kTruncF32S : return "v_trunc_f32s";
case UniOpVV::kTruncF64S : return "v_trunc_f64s";
case UniOpVV::kTruncF32 : return "v_trunc_f32";
case UniOpVV::kTruncF64 : return "v_trunc_f64";
case UniOpVV::kFloorF32S : return "v_floor_f32s";
case UniOpVV::kFloorF64S : return "v_floor_f64s";
case UniOpVV::kFloorF32 : return "v_floor_f32";
case UniOpVV::kFloorF64 : return "v_floor_f64";
case UniOpVV::kCeilF32S : return "v_ceil_f32s";
case UniOpVV::kCeilF64S : return "v_ceil_f64s";
case UniOpVV::kCeilF32 : return "v_ceil_f32";
case UniOpVV::kCeilF64 : return "v_ceil_f64";
case UniOpVV::kRoundEvenF32S : return "v_round_even_f32s";
case UniOpVV::kRoundEvenF64S : return "v_round_even_f64s";
case UniOpVV::kRoundEvenF32 : return "v_round_even_f32";
case UniOpVV::kRoundEvenF64 : return "v_round_even_f64";
case UniOpVV::kRoundHalfAwayF32S : return "v_round_half_away_f32s";
case UniOpVV::kRoundHalfAwayF64S : return "v_round_half_away_f64s";
case UniOpVV::kRoundHalfAwayF32 : return "v_round_half_away_f32";
case UniOpVV::kRoundHalfAwayF64 : return "v_round_half_away_f64";
case UniOpVV::kRoundHalfUpF32S : return "v_round_half_up_f32s";
case UniOpVV::kRoundHalfUpF64S : return "v_round_half_up_f64s";
case UniOpVV::kRoundHalfUpF32 : return "v_round_half_up_f32";
case UniOpVV::kRoundHalfUpF64 : return "v_round_half_up_f64";
case UniOpVV::kRcpF32 : return "v_rcp_f32";
case UniOpVV::kRcpF64 : return "v_rcp_f64";
case UniOpVV::kSqrtF32S : return "v_sqrt_f32s";
case UniOpVV::kSqrtF64S : return "v_sqrt_f64s";
case UniOpVV::kSqrtF32 : return "v_sqrt_f32";
case UniOpVV::kSqrtF64 : return "v_sqrt_f64";
case UniOpVV::kCvtF32ToF64S : return "v_cvt_f32_to_f64s";
case UniOpVV::kCvtF64ToF32S : return "v_cvt_f64_to_f32s";
case UniOpVV::kCvtI32ToF32 : return "v_cvt_i32_to_f32";
case UniOpVV::kCvtF32LoToF64 : return "v_cvt_f32_lo_to_f64";
case UniOpVV::kCvtF32HiToF64 : return "v_cvt_f32_hi_to_f64";
case UniOpVV::kCvtF64ToF32Lo : return "v_cvt_f64_to_f32_lo";
case UniOpVV::kCvtF64ToF32Hi : return "v_cvt_f64_to_f32_hi";
case UniOpVV::kCvtI32LoToF64 : return "v_cvt_i32_lo_to_f64";
case UniOpVV::kCvtI32HiToF64 : return "v_cvt_i32_hi_to_f64";
case UniOpVV::kCvtTruncF32ToI32 : return "v_cvt_trunc_f32_to_i32";
case UniOpVV::kCvtTruncF64ToI32Lo: return "v_cvt_trunc_f64_to_i32_lo";
case UniOpVV::kCvtTruncF64ToI32Hi: return "v_cvt_trunc_f64_to_i32_hi";
case UniOpVV::kCvtRoundF32ToI32 : return "v_cvt_round_f32_to_i32";
case UniOpVV::kCvtRoundF64ToI32Lo: return "v_cvt_round_f64_to_i32_lo";
case UniOpVV::kCvtRoundF64ToI32Hi: return "v_cvt_round_f64_to_i32_hi";
}
ASMJIT_NOT_REACHED();
}
static VecOpInfo vec_op_info_vv(UniOpVV op) noexcept {
using VE = VecElementType;
switch (op) {
case UniOpVV::kMov : return VecOpInfo::make(VE::kUInt8, VE::kUInt8);
case UniOpVV::kMovU64 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8);
case UniOpVV::kBroadcastU8Z : return VecOpInfo::make(VE::kUInt8, VE::kUInt8);
case UniOpVV::kBroadcastU16Z : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVV::kBroadcastU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8);
case UniOpVV::kBroadcastU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVV::kBroadcastU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVV::kBroadcastU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVV::kBroadcastF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kBroadcastF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kBroadcastV128_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVV::kBroadcastV128_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVV::kBroadcastV128_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kBroadcastV128_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kBroadcastV256_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVV::kBroadcastV256_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVV::kBroadcastV256_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kBroadcastV256_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kAbsI8 : return VecOpInfo::make(VE::kUInt8, VE::kInt8);
case UniOpVV::kAbsI16 : return VecOpInfo::make(VE::kUInt16, VE::kInt16);
case UniOpVV::kAbsI32 : return VecOpInfo::make(VE::kUInt32, VE::kInt32);
case UniOpVV::kAbsI64 : return VecOpInfo::make(VE::kUInt64, VE::kInt64);
case UniOpVV::kNotU32 : return VecOpInfo::make(VE::kUInt32, VE::kInt32);
case UniOpVV::kNotU64 : return VecOpInfo::make(VE::kUInt64, VE::kInt64);
case UniOpVV::kCvtI8LoToI16 : return VecOpInfo::make(VE::kInt16, VE::kInt8);
case UniOpVV::kCvtI8HiToI16 : return VecOpInfo::make(VE::kInt16, VE::kInt8);
case UniOpVV::kCvtU8LoToU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8);
case UniOpVV::kCvtU8HiToU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8);
case UniOpVV::kCvtI8ToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt8);
case UniOpVV::kCvtU8ToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt8);
case UniOpVV::kCvtI16LoToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt16);
case UniOpVV::kCvtI16HiToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt16);
case UniOpVV::kCvtU16LoToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16);
case UniOpVV::kCvtU16HiToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16);
case UniOpVV::kCvtI32LoToI64 : return VecOpInfo::make(VE::kInt64, VE::kInt32);
case UniOpVV::kCvtI32HiToI64 : return VecOpInfo::make(VE::kInt64, VE::kInt32);
case UniOpVV::kCvtU32LoToU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32);
case UniOpVV::kCvtU32HiToU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32);
case UniOpVV::kAbsF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kAbsF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kAbsF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kAbsF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kNegF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kNegF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kNegF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kNegF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kNotF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVV::kNotF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVV::kTruncF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kTruncF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kTruncF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kTruncF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kFloorF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kFloorF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kFloorF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kFloorF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kCeilF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kCeilF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kCeilF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kCeilF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundEvenF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundEvenF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundEvenF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundEvenF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundHalfAwayF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundHalfAwayF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundHalfAwayF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundHalfAwayF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundHalfUpF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundHalfUpF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRoundHalfUpF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRoundHalfUpF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kRcpF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kRcpF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kSqrtF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kSqrtF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kSqrtF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVV::kSqrtF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVV::kCvtF32ToF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat32);
case UniOpVV::kCvtF64ToF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat64);
case UniOpVV::kCvtI32ToF32 : return VecOpInfo::make(VE::kFloat32, VE::kInt32);
case UniOpVV::kCvtF32LoToF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat32);
case UniOpVV::kCvtF32HiToF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat32);
case UniOpVV::kCvtF64ToF32Lo : return VecOpInfo::make(VE::kFloat32, VE::kFloat64);
case UniOpVV::kCvtF64ToF32Hi : return VecOpInfo::make(VE::kFloat32, VE::kFloat64);
case UniOpVV::kCvtI32LoToF64 : return VecOpInfo::make(VE::kFloat64, VE::kInt32);
case UniOpVV::kCvtI32HiToF64 : return VecOpInfo::make(VE::kFloat64, VE::kInt32);
case UniOpVV::kCvtTruncF32ToI32 : return VecOpInfo::make(VE::kInt32, VE::kFloat32);
case UniOpVV::kCvtTruncF64ToI32Lo: return VecOpInfo::make(VE::kInt32, VE::kFloat64);
case UniOpVV::kCvtTruncF64ToI32Hi: return VecOpInfo::make(VE::kInt32, VE::kFloat64);
case UniOpVV::kCvtRoundF32ToI32 : return VecOpInfo::make(VE::kInt32, VE::kFloat32);
case UniOpVV::kCvtRoundF64ToI32Lo: return VecOpInfo::make(VE::kInt32, VE::kFloat64);
case UniOpVV::kCvtRoundF64ToI32Hi: return VecOpInfo::make(VE::kInt32, VE::kFloat64);
}
ASMJIT_NOT_REACHED();
}
static const char* vec_op_name_vvi(UniOpVVI op) noexcept {
switch (op) {
case UniOpVVI::kSllU16 : return "v_sll_u16";
case UniOpVVI::kSllU32 : return "v_sll_u32";
case UniOpVVI::kSllU64 : return "v_sll_u64";
case UniOpVVI::kSrlU16 : return "v_srl_u16";
case UniOpVVI::kSrlU32 : return "v_srl_u32";
case UniOpVVI::kSrlU64 : return "v_srl_u64";
case UniOpVVI::kSraI16 : return "v_sra_i16";
case UniOpVVI::kSraI32 : return "v_sra_i32";
case UniOpVVI::kSraI64 : return "v_sra_i64";
case UniOpVVI::kSllbU128 : return "v_sllb_u128";
case UniOpVVI::kSrlbU128 : return "v_srlb_u128";
case UniOpVVI::kSwizzleU16x4 : return "v_swizzle_u16x4";
case UniOpVVI::kSwizzleLoU16x4 : return "v_swizzle_lo_u16x4";
case UniOpVVI::kSwizzleHiU16x4 : return "v_swizzle_hi_u16x4";
case UniOpVVI::kSwizzleU32x4 : return "v_swizzle_u32x4";
case UniOpVVI::kSwizzleU64x2 : return "v_swizzle_u64x2";
case UniOpVVI::kSwizzleF32x4 : return "v_swizzle_f32x4";
case UniOpVVI::kSwizzleF64x2 : return "v_swizzle_f64x2";
case UniOpVVI::kSwizzleU64x4 : return "v_swizzle_u64x4";
case UniOpVVI::kSwizzleF64x4 : return "v_swizzle_f64x4";
case UniOpVVI::kExtractV128_I32: return "v_extract_v128_i32";
case UniOpVVI::kExtractV128_I64: return "v_extract_v128_i64";
case UniOpVVI::kExtractV128_F32: return "v_extract_v128_f32";
case UniOpVVI::kExtractV128_F64: return "v_extract_v128_f64";
case UniOpVVI::kExtractV256_I32: return "v_extract_v256_i32";
case UniOpVVI::kExtractV256_I64: return "v_extract_v256_i64";
case UniOpVVI::kExtractV256_F32: return "v_extract_v256_f32";
case UniOpVVI::kExtractV256_F64: return "v_extract_v256_f64";
#if defined(ASMJIT_UJIT_AARCH64)
case UniOpVVI::kSrlRndU16 : return "v_srl_rnd_u16";
case UniOpVVI::kSrlRndU32 : return "v_srl_rnd_u32";
case UniOpVVI::kSrlRndU64 : return "v_srl_rnd_u64";
case UniOpVVI::kSrlAccU16 : return "v_srl_acc_u16";
case UniOpVVI::kSrlAccU32 : return "v_srl_acc_u32";
case UniOpVVI::kSrlAccU64 : return "v_srl_acc_u64";
case UniOpVVI::kSrlRndAccU16 : return "v_srl_rnd_acc_u16";
case UniOpVVI::kSrlRndAccU32 : return "v_srl_rnd_acc_u32";
case UniOpVVI::kSrlRndAccU64 : return "v_srl_rnd_acc_u64";
case UniOpVVI::kSrlnLoU16 : return "v_srln_lo_u16";
case UniOpVVI::kSrlnHiU16 : return "v_srln_hi_u16";
case UniOpVVI::kSrlnLoU32 : return "v_srln_lo_u32";
case UniOpVVI::kSrlnHiU32 : return "v_srln_hi_u32";
case UniOpVVI::kSrlnLoU64 : return "v_srln_lo_u64";
case UniOpVVI::kSrlnHiU64 : return "v_srln_hi_u64";
case UniOpVVI::kSrlnRndLoU16 : return "v_srln_rnd_lo_u16";
case UniOpVVI::kSrlnRndHiU16 : return "v_srln_rnd_hi_u16";
case UniOpVVI::kSrlnRndLoU32 : return "v_srln_rnd_lo_u32";
case UniOpVVI::kSrlnRndHiU32 : return "v_srln_rnd_hi_u32";
case UniOpVVI::kSrlnRndLoU64 : return "v_srln_rnd_lo_u64";
case UniOpVVI::kSrlnRndHiU64 : return "v_srln_rnd_hi_u64";
#endif // ASMJIT_UJIT_AARCH64
}
ASMJIT_NOT_REACHED();
}
static VecOpInfo vec_op_info_vvi(UniOpVVI op) noexcept {
using VE = VecElementType;
switch (op) {
case UniOpVVI::kSllU16 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16);
case UniOpVVI::kSllU32 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32);
case UniOpVVI::kSllU64 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64);
case UniOpVVI::kSrlU16 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16);
case UniOpVVI::kSrlU32 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32);
case UniOpVVI::kSrlU64 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64);
case UniOpVVI::kSraI16 : return VecOpInfo::make(VE::kInt16 , VE::kInt16);
case UniOpVVI::kSraI32 : return VecOpInfo::make(VE::kInt32 , VE::kInt32);
case UniOpVVI::kSraI64 : return VecOpInfo::make(VE::kInt64 , VE::kInt64);
case UniOpVVI::kSllbU128 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt8);
case UniOpVVI::kSrlbU128 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt8);
case UniOpVVI::kSwizzleU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16);
case UniOpVVI::kSwizzleLoU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16);
case UniOpVVI::kSwizzleHiU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16);
case UniOpVVI::kSwizzleU32x4 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32);
case UniOpVVI::kSwizzleU64x2 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64);
case UniOpVVI::kSwizzleF32x4 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVVI::kSwizzleF64x2 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVVI::kSwizzleU64x4 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64);
case UniOpVVI::kSwizzleF64x4 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVVI::kExtractV128_I32: return VecOpInfo::make(VE::kInt32 , VE::kInt32);
case UniOpVVI::kExtractV128_I64: return VecOpInfo::make(VE::kInt64 , VE::kInt64);
case UniOpVVI::kExtractV128_F32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVVI::kExtractV128_F64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
case UniOpVVI::kExtractV256_I32: return VecOpInfo::make(VE::kUInt32 , VE::kUInt32);
case UniOpVVI::kExtractV256_I64: return VecOpInfo::make(VE::kUInt64 , VE::kUInt64);
case UniOpVVI::kExtractV256_F32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32);
case UniOpVVI::kExtractV256_F64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64);
#if defined(ASMJIT_UJIT_AARCH64)
case UniOpVVI::kSrlRndU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVVI::kSrlRndU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVVI::kSrlRndU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVVI::kSrlAccU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVVI::kSrlAccU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVVI::kSrlAccU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVVI::kSrlRndAccU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVVI::kSrlRndAccU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVVI::kSrlRndAccU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVVI::kSrlnLoU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVVI::kSrlnHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVVI::kSrlnLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVVI::kSrlnHiU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16);
case UniOpVVI::kSrlnLoU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32);
case UniOpVVI::kSrlnHiU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64);
case UniOpVVI::kSrlnRndLoU16 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt16);
case UniOpVVI::kSrlnRndHiU16 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt16);
case UniOpVVI::kSrlnRndLoU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt32);
case UniOpVVI::kSrlnRndHiU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt32);
case UniOpVVI::kSrlnRndLoU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt64);
case UniOpVVI::kSrlnRndHiU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt64);
#endif // ASMJIT_UJIT_AARCH64
}
ASMJIT_NOT_REACHED();
}
static const char* vec_op_name_vvv(UniOpVVV op) noexcept {
switch (op) {
case UniOpVVV::kAndU32 : return "v_and_u32";
case UniOpVVV::kAndU64 : return "v_and_u64";
case UniOpVVV::kOrU32 : return "v_or_u32";
case UniOpVVV::kOrU64 : return "v_or_u64";
case UniOpVVV::kXorU32 : return "v_xor_u32";
case UniOpVVV::kXorU64 : return "v_xor_u64";
case UniOpVVV::kAndnU32 : return "v_andn_u32";
case UniOpVVV::kAndnU64 : return "v_andn_u64";
case UniOpVVV::kBicU32 : return "v_bic_u32";
case UniOpVVV::kBicU64 : return "v_bic_u64";
case UniOpVVV::kAvgrU8 : return "v_avgr_u8";
case UniOpVVV::kAvgrU16 : return "v_avgr_u16";
case UniOpVVV::kAddU8 : return "v_add_u8";
case UniOpVVV::kAddU16 : return "v_add_u16";
case UniOpVVV::kAddU32 : return "v_add_u32";
case UniOpVVV::kAddU64 : return "v_add_u64";
case UniOpVVV::kSubU8 : return "v_sub_u8";
case UniOpVVV::kSubU16 : return "v_sub_u16";
case UniOpVVV::kSubU32 : return "v_sub_u32";
case UniOpVVV::kSubU64 : return "v_sub_u64";
case UniOpVVV::kAddsI8 : return "v_adds_i8";
case UniOpVVV::kAddsU8 : return "v_adds_u8";
case UniOpVVV::kAddsI16 : return "v_adds_i16";
case UniOpVVV::kAddsU16 : return "v_adds_u16";
case UniOpVVV::kSubsI8 : return "v_subs_i8";
case UniOpVVV::kSubsU8 : return "v_subs_u8";
case UniOpVVV::kSubsI16 : return "v_subs_i16";
case UniOpVVV::kSubsU16 : return "v_subs_u16";
case UniOpVVV::kMulU16 : return "v_mul_u16";
case UniOpVVV::kMulU32 : return "v_mul_u32";
case UniOpVVV::kMulU64 : return "v_mul_u64";
case UniOpVVV::kMulhI16 : return "v_mulh_i16";
case UniOpVVV::kMulhU16 : return "v_mulh_u16";
case UniOpVVV::kMulU64_LoU32 : return "v_mul_u64_lo_u32";
case UniOpVVV::kMHAddI16_I32 : return "v_mhadd_i16_i32";
case UniOpVVV::kMinI8 : return "v_min_i8";
case UniOpVVV::kMinU8 : return "v_min_u8";
case UniOpVVV::kMinI16 : return "v_min_i16";
case UniOpVVV::kMinU16 : return "v_min_u16";
case UniOpVVV::kMinI32 : return "v_min_i32";
case UniOpVVV::kMinU32 : return "v_min_u32";
case UniOpVVV::kMinI64 : return "v_min_i64";
case UniOpVVV::kMinU64 : return "v_min_u64";
case UniOpVVV::kMaxI8 : return "v_max_i8";
case UniOpVVV::kMaxU8 : return "v_max_u8";
case UniOpVVV::kMaxI16 : return "v_max_i16";
case UniOpVVV::kMaxU16 : return "v_max_u16";
case UniOpVVV::kMaxI32 : return "v_max_i32";
case UniOpVVV::kMaxU32 : return "v_max_u32";
case UniOpVVV::kMaxI64 : return "v_max_i64";
case UniOpVVV::kMaxU64 : return "v_max_u64";
case UniOpVVV::kCmpEqU8 : return "v_cmp_eq_u8";
case UniOpVVV::kCmpEqU16 : return "v_cmp_eq_u16";
case UniOpVVV::kCmpEqU32 : return "v_cmp_eq_u32";
case UniOpVVV::kCmpEqU64 : return "v_cmp_eq_u64";
case UniOpVVV::kCmpGtI8 : return "v_cmp_gt_i8";
case UniOpVVV::kCmpGtU8 : return "v_cmp_gt_u8";
case UniOpVVV::kCmpGtI16 : return "v_cmp_gt_i16";
case UniOpVVV::kCmpGtU16 : return "v_cmp_gt_u16";
case UniOpVVV::kCmpGtI32 : return "v_cmp_gt_i32";
case UniOpVVV::kCmpGtU32 : return "v_cmp_gt_u32";
case UniOpVVV::kCmpGtI64 : return "v_cmp_gt_i64";
case UniOpVVV::kCmpGtU64 : return "v_cmp_gt_u64";
case UniOpVVV::kCmpGeI8 : return "v_cmp_ge_i8";
case UniOpVVV::kCmpGeU8 : return "v_cmp_ge_u8";
case UniOpVVV::kCmpGeI16 : return "v_cmp_ge_i16";
case UniOpVVV::kCmpGeU16 : return "v_cmp_ge_u16";
case UniOpVVV::kCmpGeI32 : return "v_cmp_ge_i32";
case UniOpVVV::kCmpGeU32 : return "v_cmp_ge_u32";
case UniOpVVV::kCmpGeI64 : return "v_cmp_ge_i64";
case UniOpVVV::kCmpGeU64 : return "v_cmp_ge_u64";
case UniOpVVV::kCmpLtI8 : return "v_cmp_lt_i8";
case UniOpVVV::kCmpLtU8 : return "v_cmp_lt_u8";
case UniOpVVV::kCmpLtI16 : return "v_cmp_lt_i16";
case UniOpVVV::kCmpLtU16 : return "v_cmp_lt_u16";
case UniOpVVV::kCmpLtI32 : return "v_cmp_lt_i32";
case UniOpVVV::kCmpLtU32 : return "v_cmp_lt_u32";
case UniOpVVV::kCmpLtI64 : return "v_cmp_lt_i64";
case UniOpVVV::kCmpLtU64 : return "v_cmp_lt_u64";
case UniOpVVV::kCmpLeI8 : return "v_cmp_le_i8";
case UniOpVVV::kCmpLeU8 : return "v_cmp_le_u8";
case UniOpVVV::kCmpLeI16 : return "v_cmp_le_i16";
case UniOpVVV::kCmpLeU16 : return "v_cmp_le_u16";
case UniOpVVV::kCmpLeI32 : return "v_cmp_le_i32";
case UniOpVVV::kCmpLeU32 : return "v_cmp_le_u32";
case UniOpVVV::kCmpLeI64 : return "v_cmp_le_i64";
case UniOpVVV::kCmpLeU64 : return "v_cmp_le_u64";
case UniOpVVV::kAndF32 : return "v_and_f32";
case UniOpVVV::kAndF64 : return "v_and_f64";
case UniOpVVV::kOrF32 : return "v_or_f32";
case UniOpVVV::kOrF64 : return "v_or_f64";
case UniOpVVV::kXorF32 : return "v_xor_f32";
case UniOpVVV::kXorF64 : return "v_xor_f64";
case UniOpVVV::kAndnF32 : return "v_andn_f32";
case UniOpVVV::kAndnF64 : return "v_andn_f64";
case UniOpVVV::kBicF32 : return "v_bic_f32";
case UniOpVVV::kBicF64 : return "v_bic_f64";
case UniOpVVV::kAddF32S : return "v_add_f32s";
case UniOpVVV::kAddF64S : return "v_add_f64s";
case UniOpVVV::kAddF32 : return "v_add_f32";
case UniOpVVV::kAddF64 : return "v_add_f64";
case UniOpVVV::kSubF32S : return "v_sub_f32s";
case UniOpVVV::kSubF64S : return "v_sub_f64s";
case UniOpVVV::kSubF32 : return "v_sub_f32";
case UniOpVVV::kSubF64 : return "v_sub_f64";
case UniOpVVV::kMulF32S : return "v_mul_f32s";
case UniOpVVV::kMulF64S : return "v_mul_f64s";
case UniOpVVV::kMulF32 : return "v_mul_f32";
case UniOpVVV::kMulF64 : return "v_mul_f64";
case UniOpVVV::kDivF32S : return "v_div_f32s";
case UniOpVVV::kDivF64S : return "v_div_f64s";
case UniOpVVV::kDivF32 : return "v_div_f32";
case UniOpVVV::kDivF64 : return "v_div_f64";
case UniOpVVV::kModF32S : return "v_mod_f32s";
case UniOpVVV::kModF64S : return "v_mod_f64s";
case UniOpVVV::kModF32 : return "v_mod_f32";
case UniOpVVV::kModF64 : return "v_mod_f64";
case UniOpVVV::kMinF32S : return "v_min_f32s";
case UniOpVVV::kMinF64S : return "v_min_f64s";
case UniOpVVV::kMinF32 : return "v_min_f32";
case UniOpVVV::kMinF64 : return "v_min_f64";
case UniOpVVV::kMaxF32S : return "v_max_f32s";
case UniOpVVV::kMaxF64S : return "v_max_f64s";
case UniOpVVV::kMaxF32 : return "v_max_f32";
case UniOpVVV::kMaxF64 : return "v_max_f64";
case UniOpVVV::kCmpEqF32S : return "v_cmp_eq_f32s";
case UniOpVVV::kCmpEqF64S : return "v_cmp_eq_f64s";
case UniOpVVV::kCmpEqF32 : return "v_cmp_eq_f32";
case UniOpVVV::kCmpEqF64 : return "v_cmp_eq_f64";
case UniOpVVV::kCmpNeF32S : return "v_cmp_ne_f32s";
case UniOpVVV::kCmpNeF64S : return "v_cmp_ne_f64s";
case UniOpVVV::kCmpNeF32 : return "v_cmp_ne_f32";
case UniOpVVV::kCmpNeF64 : return "v_cmp_ne_f64";
case UniOpVVV::kCmpGtF32S : return "v_cmp_gt_f32s";
case UniOpVVV::kCmpGtF64S : return "v_cmp_gt_f64s";
case UniOpVVV::kCmpGtF32 : return "v_cmp_gt_f32";
case UniOpVVV::kCmpGtF64 : return "v_cmp_gt_f64";
case UniOpVVV::kCmpGeF32S : return "v_cmp_ge_f32s";
case UniOpVVV::kCmpGeF64S : return "v_cmp_ge_f64s";
case UniOpVVV::kCmpGeF32 : return "v_cmp_ge_f32";
case UniOpVVV::kCmpGeF64 : return "v_cmp_ge_f64";
case UniOpVVV::kCmpLtF32S : return "v_cmp_lt_f32s";
case UniOpVVV::kCmpLtF64S : return "v_cmp_lt_f64s";
case UniOpVVV::kCmpLtF32 : return "v_cmp_lt_f32";
case UniOpVVV::kCmpLtF64 : return "v_cmp_lt_f64";
case UniOpVVV::kCmpLeF32S : return "v_cmp_le_f32s";
case UniOpVVV::kCmpLeF64S : return "v_cmp_le_f64s";
case UniOpVVV::kCmpLeF32 : return "v_cmp_le_f32";
case UniOpVVV::kCmpLeF64 : return "v_cmp_le_f64";
case UniOpVVV::kCmpOrdF32S : return "v_cmp_ord_f32s";
case UniOpVVV::kCmpOrdF64S : return "v_cmp_ord_f64s";
case UniOpVVV::kCmpOrdF32 : return "v_cmp_ord_f32";
case UniOpVVV::kCmpOrdF64 : return "v_cmp_ord_f64";
case UniOpVVV::kCmpUnordF32S : return "v_cmp_unord_f32s";
case UniOpVVV::kCmpUnordF64S : return "v_cmp_unord_f64s";
case UniOpVVV::kCmpUnordF32 : return "v_cmp_unord_f32";
case UniOpVVV::kCmpUnordF64 : return "v_cmp_unord_f64";
case UniOpVVV::kHAddF64 : return "v_hadd_f64";
case UniOpVVV::kCombineLoHiU64 : return "v_combine_lo_hi_u64";
case UniOpVVV::kCombineLoHiF64 : return "v_combine_lo_hi_f64";
case UniOpVVV::kCombineHiLoU64 : return "v_combine_hi_lo_u64";
case UniOpVVV::kCombineHiLoF64 : return "v_combine_hi_lo_f64";
case UniOpVVV::kInterleaveLoU8 : return "v_interleave_lo_u8";
case UniOpVVV::kInterleaveHiU8 : return "v_interleave_hi_u8";
case UniOpVVV::kInterleaveLoU16: return "v_interleave_lo_u16";
case UniOpVVV::kInterleaveHiU16: return "v_interleave_hi_u16";
case UniOpVVV::kInterleaveLoU32: return "v_interleave_lo_u32";
case UniOpVVV::kInterleaveHiU32: return "v_interleave_hi_u32";
case UniOpVVV::kInterleaveLoU64: return "v_interleave_lo_u64";
case UniOpVVV::kInterleaveHiU64: return "v_interleave_hi_u64";
case UniOpVVV::kInterleaveLoF32: return "v_interleave_lo_f32";
case UniOpVVV::kInterleaveHiF32: return "v_interleave_hi_f32";
case UniOpVVV::kInterleaveLoF64: return "v_interleave_lo_f64";
case UniOpVVV::kInterleaveHiF64: return "v_interleave_hi_f64";
case UniOpVVV::kPacksI16_I8 : return "v_packs_i16_i8";
case UniOpVVV::kPacksI16_U8 : return "v_packs_i16_u8";
case UniOpVVV::kPacksI32_I16 : return "v_packs_i32_i16";
case UniOpVVV::kPacksI32_U16 : return "v_packs_i32_u16";
case UniOpVVV::kSwizzlev_U8 : return "v_swizzlev_u8";
#if defined(ASMJIT_UJIT_AARCH64)
case UniOpVVV::kMulwLoI8 : return "v_mulw_lo_i8";
case UniOpVVV::kMulwLoU8 : return "v_mulw_lo_u8";
case UniOpVVV::kMulwHiI8 : return "v_mulw_hi_i8";
case UniOpVVV::kMulwHiU8 : return "v_mulw_hi_u8";
case UniOpVVV::kMulwLoI16 : return "v_mulw_lo_i16";
case UniOpVVV::kMulwLoU16 : return "v_mulw_lo_u16";
case UniOpVVV::kMulwHiI16 : return "v_mulw_hi_i16";
case UniOpVVV::kMulwHiU16 : return "v_mulw_hi_u16";
case UniOpVVV::kMulwLoI32 : return "v_mulw_lo_i32";
case UniOpVVV::kMulwLoU32 : return "v_mulw_lo_u32";
case UniOpVVV::kMulwHiI32 : return "v_mulw_hi_i32";
case UniOpVVV::kMulwHiU32 : return "v_mulw_hi_u32";
case UniOpVVV::kMAddwLoI8 : return "v_maddw_lo_i8";
case UniOpVVV::kMAddwLoU8 : return "v_maddw_lo_u8";
case UniOpVVV::kMAddwHiI8 : return "v_maddw_hi_i8";
case UniOpVVV::kMAddwHiU8 : return "v_maddw_hi_u8";
case UniOpVVV::kMAddwLoI16 : return "v_maddw_lo_i16";
case UniOpVVV::kMAddwLoU16 : return "v_maddw_lo_u16";
case UniOpVVV::kMAddwHiI16 : return "v_maddw_hi_i16";
case UniOpVVV::kMAddwHiU16 : return "v_maddw_hi_u16";
case UniOpVVV::kMAddwLoI32 : return "v_maddw_lo_i32";
case UniOpVVV::kMAddwLoU32 : return "v_maddw_lo_u32";
case UniOpVVV::kMAddwHiI32 : return "v_maddw_hi_i32";
case UniOpVVV::kMAddwHiU32 : return "v_maddw_hi_u32";
#endif // ASMJIT_UJIT_AARCH64
#if defined(ASMJIT_UJIT_X86)
case UniOpVVV::kPermuteU8 : return "v_permute_u8";
case UniOpVVV::kPermuteU16 : return "v_permute_u16";
case UniOpVVV::kPermuteU32 : return "v_permute_u32";
case UniOpVVV::kPermuteU64 : return "v_permute_u64";
#endif // ASMJIT_UJIT_X86
}
ASMJIT_NOT_REACHED();
}
static VecOpInfo vec_op_info_vvv(UniOpVVV op) noexcept {
using VE = VecElementType;
switch (op) {
case UniOpVVV::kAndU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kAndU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kOrU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kOrU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kXorU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kXorU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAndnU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kAndnU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kBicU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kBicU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAvgrU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kAvgrU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kAddU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kAddU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kAddU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kAddU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kSubU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kSubU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kSubU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kSubU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAddsI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kAddsU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kAddsI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kAddsU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kSubsI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kSubsU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kSubsI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kSubsU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMulU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMulU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMulU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kMulhI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kMulhU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMulU64_LoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt32);
case UniOpVVV::kMHAddI16_I32 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16);
case UniOpVVV::kMinI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kMinU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMinI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kMinU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMinI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kMinU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMinI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kMinU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kMaxI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kMaxU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMaxI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kMaxU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMaxI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kMaxU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMaxI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kMaxU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCmpEqU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kCmpEqU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kCmpEqU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kCmpEqU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCmpGtI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kCmpGtU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kCmpGtI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kCmpGtU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kCmpGtI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kCmpGtU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kCmpGtI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kCmpGtU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCmpGeI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kCmpGeU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kCmpGeI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kCmpGeU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kCmpGeI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kCmpGeU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kCmpGeI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kCmpGeU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCmpLtI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kCmpLtU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kCmpLtI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kCmpLtU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kCmpLtI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kCmpLtU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kCmpLtI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kCmpLtU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCmpLeI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8);
case UniOpVVV::kCmpLeU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kCmpLeI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16);
case UniOpVVV::kCmpLeU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kCmpLeI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32);
case UniOpVVV::kCmpLeU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kCmpLeI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64);
case UniOpVVV::kCmpLeU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAndF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kAndF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kOrF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kOrF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kXorF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kXorF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAndnF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kAndnF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kBicF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kBicF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kAddF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kAddF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kSubF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kSubF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMulF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMulF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMulF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMulF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kDivF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kDivF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kDivF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kDivF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kModF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kModF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kModF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kModF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMinF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMinF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMinF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMinF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMaxF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMaxF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kMaxF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kMaxF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpEqF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpEqF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpEqF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpEqF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpNeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpNeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpNeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpNeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpGtF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpGtF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpGtF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpGtF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpGeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpGeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpGeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpGeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpLtF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpLtF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpLtF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpLtF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpLeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpLeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpLeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpLeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpOrdF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpOrdF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpOrdF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpOrdF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpUnordF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpUnordF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCmpUnordF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kCmpUnordF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kHAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCombineLoHiU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCombineLoHiF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kCombineHiLoU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kCombineHiLoF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kInterleaveLoU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kInterleaveHiU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kInterleaveLoU16: return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kInterleaveHiU16: return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kInterleaveLoU32: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kInterleaveHiU32: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kInterleaveLoU64: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kInterleaveHiU64: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVV::kInterleaveLoF32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kInterleaveHiF32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVV::kInterleaveLoF64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kInterleaveHiF64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVV::kPacksI16_I8 : return VecOpInfo::make(VE::kInt8, VE::kInt16, VE::kInt16);
case UniOpVVV::kPacksI16_U8 : return VecOpInfo::make(VE::kUInt8, VE::kInt16, VE::kInt16);
case UniOpVVV::kPacksI32_I16 : return VecOpInfo::make(VE::kInt16, VE::kInt32, VE::kInt32);
case UniOpVVV::kPacksI32_U16 : return VecOpInfo::make(VE::kUInt16, VE::kInt32, VE::kInt32);
case UniOpVVV::kSwizzlev_U8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
#if defined(ASMJIT_UJIT_AARCH64)
case UniOpVVV::kMulwLoI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8);
case UniOpVVV::kMulwLoU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMulwHiI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8);
case UniOpVVV::kMulwHiU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMulwLoI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16);
case UniOpVVV::kMulwLoU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMulwHiI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16);
case UniOpVVV::kMulwHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMulwLoI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32);
case UniOpVVV::kMulwLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMulwHiI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32);
case UniOpVVV::kMulwHiU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMAddwLoI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8);
case UniOpVVV::kMAddwLoU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMAddwHiI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8);
case UniOpVVV::kMAddwHiU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kMAddwLoI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16);
case UniOpVVV::kMAddwLoU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMAddwHiI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16);
case UniOpVVV::kMAddwHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kMAddwLoI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32);
case UniOpVVV::kMAddwLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kMAddwHiI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32);
case UniOpVVV::kMAddwHiU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32);
#endif // ASMJIT_UJIT_AARCH64
#if defined(ASMJIT_UJIT_X86)
case UniOpVVV::kPermuteU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVV::kPermuteU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVV::kPermuteU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVV::kPermuteU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
#endif // ASMJIT_UJIT_X86
}
ASMJIT_NOT_REACHED();
}
static const char* vec_op_name_vvvi(UniOpVVVI op) noexcept {
switch (op) {
case UniOpVVVI::kAlignr_U128 : return "v_alignr_u128";
case UniOpVVVI::kInterleaveShuffleU32x4: return "v_interleave_shuffle_u32x4";
case UniOpVVVI::kInterleaveShuffleU64x2: return "v_interleave_shuffle_u64x2";
case UniOpVVVI::kInterleaveShuffleF32x4: return "v_interleave_shuffle_f32x4";
case UniOpVVVI::kInterleaveShuffleF64x2: return "v_interleave_shuffle_f64x2";
case UniOpVVVI::kInsertV128_U32 : return "v_insert_v128_u32";
case UniOpVVVI::kInsertV128_F32 : return "v_insert_v128_f32";
case UniOpVVVI::kInsertV128_U64 : return "v_insert_v128_u64";
case UniOpVVVI::kInsertV128_F64 : return "v_insert_v128_f64";
case UniOpVVVI::kInsertV256_U32 : return "v_insert_v256_u32";
case UniOpVVVI::kInsertV256_F32 : return "v_insert_v256_f32";
case UniOpVVVI::kInsertV256_U64 : return "v_insert_v256_u64";
case UniOpVVVI::kInsertV256_F64 : return "v_insert_v256_f64";
}
ASMJIT_NOT_REACHED();
}
static VecOpInfo vec_op_info_vvvi(UniOpVVVI op) noexcept {
using VE = VecElementType;
switch (op) {
case UniOpVVVI::kAlignr_U128 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVVI::kInterleaveShuffleU32x4: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVVI::kInterleaveShuffleU64x2: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVVI::kInterleaveShuffleF32x4: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVI::kInterleaveShuffleF64x2: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVI::kInsertV128_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVVI::kInsertV128_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVI::kInsertV128_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVVI::kInsertV128_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVI::kInsertV256_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVVI::kInsertV256_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVI::kInsertV256_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64);
case UniOpVVVI::kInsertV256_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64);
default:
ASMJIT_NOT_REACHED();
}
}
static const char* vec_op_name_vvvv(UniOpVVVV op) noexcept {
switch (op) {
case UniOpVVVV::kBlendV_U8: return "v_blendv_u8";
case UniOpVVVV::kMAddU16 : return "v_madd_u16";
case UniOpVVVV::kMAddU32 : return "v_madd_u32";
case UniOpVVVV::kMAddF32S : return "v_madd_f32s";
case UniOpVVVV::kMAddF64S : return "v_madd_f64s";
case UniOpVVVV::kMAddF32 : return "v_madd_f32";
case UniOpVVVV::kMAddF64 : return "v_madd_f64";
case UniOpVVVV::kMSubF32S : return "v_msub_f32s";
case UniOpVVVV::kMSubF64S : return "v_msub_f64s";
case UniOpVVVV::kMSubF32 : return "v_msub_f32";
case UniOpVVVV::kMSubF64 : return "v_msub_f64";
case UniOpVVVV::kNMAddF32S: return "v_nmadd_f32s";
case UniOpVVVV::kNMAddF64S: return "v_nmadd_f64s";
case UniOpVVVV::kNMAddF32 : return "v_nmadd_f32";
case UniOpVVVV::kNMAddF64 : return "v_nmadd_f64";
case UniOpVVVV::kNMSubF32S: return "v_nmsub_f32s";
case UniOpVVVV::kNMSubF64S: return "v_nmsub_f64s";
case UniOpVVVV::kNMSubF32 : return "v_nmsub_f32";
case UniOpVVVV::kNMSubF64 : return "v_nmsub_f64";
}
ASMJIT_NOT_REACHED();
}
static VecOpInfo vec_op_info_vvvv(UniOpVVVV op) noexcept {
using VE = VecElementType;
switch (op) {
case UniOpVVVV::kBlendV_U8: return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8, VE::kUInt8);
case UniOpVVVV::kMAddU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16, VE::kUInt16);
case UniOpVVVV::kMAddU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32, VE::kUInt32);
case UniOpVVVV::kMAddF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kMAddF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kMAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kMAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kMSubF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kMSubF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kMSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kMSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kNMAddF32S: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kNMAddF64S: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kNMAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kNMAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kNMSubF32S: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kNMSubF64S: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
case UniOpVVVV::kNMSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32);
case UniOpVVVV::kNMSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64);
}
ASMJIT_NOT_REACHED();
}
// ujit::UniCompiler - Tests - SIMD - Float To Int - Machine Behavior
// ==================================================================
template<FloatToIntOutsideRangeBehavior behavior, typename IntT, typename FloatT>
static ASMJIT_INLINE_NODEBUG int32_t cvt_float_to_int_trunc(const FloatT& x) noexcept {
constexpr IntT min_value = std::numeric_limits<IntT>::lowest();
constexpr IntT max_value = std::numeric_limits<IntT>::max();
constexpr IntT zero = IntT(0);
if (std::isnan(x)) {
return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : zero;
}
if (x < FloatT(min_value)) {
return min_value;
}
if (x > FloatT(max_value)) {
return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : max_value;
}
return IntT(x);
}
template<FloatToIntOutsideRangeBehavior behavior, typename IntT, typename FloatT>
static ASMJIT_INLINE_NODEBUG int32_t cvt_float_to_int_round(const FloatT& x) noexcept {
constexpr IntT min_value = std::numeric_limits<IntT>::lowest();
constexpr IntT max_value = std::numeric_limits<IntT>::max();
constexpr IntT zero = IntT(0);
if (std::isnan(x)) {
return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : zero;
}
if (x < FloatT(min_value)) {
return min_value;
}
if (x > FloatT(max_value)) {
return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : max_value;
}
return IntT(std::nearbyint(x));
}
// ujit::UniCompiler - Tests - SIMD - Data Generators & Constraints
// ================================================================
// Data generator, which is used to fill the content of SIMD registers.
class DataGenInt {
public:
TestUtils::Random rng;
uint32_t step;
struct Float32Data {
uint32_t u32;
float f32;
};
ASMJIT_INLINE explicit DataGenInt(uint64_t seed) noexcept
: rng(seed),
step(0) {}
ASMJIT_NOINLINE uint64_t next_uint64() noexcept {
if (++step >= 256) {
step = 0;
}
// NOTE: Nothing really elaborate - sometimes we want to test also numbers
// that random number generators won't return often, so we hardcode some.
switch (step) {
case 0: return 0u;
case 1: return 0u;
case 2: return 0u;
case 6: return 1u;
case 7: return 0u;
case 10: return 0u;
case 11: return 0xFFu;
case 15: return 0xFFFFu;
case 17: return 0xFFFFFFFFu;
case 21: return 0xFFFFFFFFFFFFFFFFu;
case 24: return 1u;
case 40: return 0xFFu;
case 55: return 0x8080808080808080u;
case 66: return 0x80000080u;
case 69: return 1u;
case 79: return 0x7F;
case 122: return 0xFFFFu;
case 123: return 0xFFFFu;
case 124: return 0xFFFFu;
case 127: return 1u;
case 130: return 0xFFu;
case 142: return 0x7FFFu;
case 143: return 0x7FFFu;
case 144: return 0u;
case 145: return 0x7FFFu;
default : return rng.next_uint64();
}
}
ASMJIT_NOINLINE float next_float32() noexcept {
if (++step >= 256) {
step = 0;
}
switch (step) {
case 0: return 0.0f;
case 1: return 0.0f;
case 2: return 0.0f;
case 6: return 1.0f;
case 7: return 0.0f;
case 10: return 0.00001f;
case 11: return 2.0f;
case 12: return -std::numeric_limits<float>::infinity();
case 15: return 3.0f;
case 17: return 256.0f;
case 21: return 0.5f;
case 23: return std::numeric_limits<float>::quiet_NaN();
case 24: return 0.25f;
case 27: return std::numeric_limits<float>::quiet_NaN();
case 29: return std::numeric_limits<float>::infinity();
case 31: return std::numeric_limits<float>::quiet_NaN();
case 35: return std::numeric_limits<float>::quiet_NaN();
case 40: return 5.12323f;
case 45: return -std::numeric_limits<float>::infinity();
case 55: return 100.5f;
case 66: return 0.1f;
case 69: return 0.2f;
case 79: return 0.3f;
case 89: return -13005961.0f;
case 99: return -std::numeric_limits<float>::infinity();
case 100:
case 102:
case 104:
case 106:
case 108: return float(rng.next_double());
case 110:
case 112:
case 114:
case 116:
case 118: return -float(rng.next_double());
case 122: return 10.3f;
case 123: return 20.3f;
case 124: return -100.3f;
case 127: return 1.3f;
case 130: return std::numeric_limits<float>::quiet_NaN();
case 135: return -std::numeric_limits<float>::infinity();
case 142: return 1.0f;
case 143: return 1.5f;
case 144: return 2.0f;
case 145: return std::numeric_limits<float>::infinity();
case 155: return -1.5f;
case 165: return -0.5f;
case 175: return -1.0f;
case 245: return 2.5f;
default: {
float sign = rng.next_uint32() < 0x7FFFFFF ? 1.0f : -1.0f;
return float(rng.next_double() * double(rng.next_uint32() & 0xFFFFFFu)) * sign;
}
}
}
ASMJIT_NOINLINE double next_float64() noexcept {
if (++step >= 256) {
step = 0;
}
switch (step) {
case 0: return 0.0;
case 1: return 0.0;
case 2: return 0.0;
case 6: return 1.0;
case 7: return 0.0;
case 10: return 0.00001;
case 11: return 2.0;
case 12: return -std::numeric_limits<double>::infinity();
case 15: return 3.0;
case 17: return 256.0;
case 21: return 0.5;
case 23: return std::numeric_limits<double>::quiet_NaN();
case 24: return 0.25;
case 27: return std::numeric_limits<double>::quiet_NaN();
case 29: return std::numeric_limits<double>::infinity();
case 31: return std::numeric_limits<double>::quiet_NaN();
case 35: return std::numeric_limits<double>::quiet_NaN();
case 40: return 5.12323;
case 45: return -std::numeric_limits<double>::infinity();
case 55: return 100.5;
case 66: return 0.1;
case 69: return 0.2;
case 79: return 0.3;
case 80: return 4503599627370495.5;
case 99: return -std::numeric_limits<double>::infinity();
case 100:
case 102:
case 104:
case 106:
case 108: return rng.next_double();
case 110:
case 112:
case 114:
case 116:
case 118: return -rng.next_double();
case 122: return 10.3;
case 123: return 20.3;
case 124: return -100.3;
case 125: return 4503599627370496.0;
case 127: return 1.3;
case 130: return std::numeric_limits<double>::quiet_NaN();
case 135: return -std::numeric_limits<double>::infinity();
case 142: return 1.0;
case 143: return 1.5;
case 144: return 2.0;
case 145: return std::numeric_limits<double>::infinity();
case 155: return -1.5;
case 165: return -0.5;
case 175: return -1.0;
case 245: return 2.5;
case 248: return -4503599627370495.5;
default: {
double sign = rng.next_uint32() < 0x7FFFFFF ? 1.0 : -1.0;
return double(rng.next_double() * double(rng.next_uint32() & 0x3FFFFFFFu)) * sign;
}
}
}
};
template<typename T>
struct half_minus_1ulp_const;
template<> struct half_minus_1ulp_const<float> { static inline constexpr float value = 0.49999997f; };
template<> struct half_minus_1ulp_const<double> { static inline constexpr double value = 0.49999999999999994; };
// Some SIMD operations are constrained, especially those higher level. So, to successfully test these we
// have to model the constraints in a way that the SIMD instruction we test actually gets the correct input.
// Note that a constraint doesn't have to be always range based, it could be anything.
struct ConstraintNone {
template<uint32_t kW>
static ASMJIT_INLINE_NODEBUG void apply(VecOverlay<kW>& v) noexcept { Support::maybe_unused(v); }
};
template<typename ElementT, typename Derived>
struct ConstraintBase {
template<uint32_t kW>
static ASMJIT_INLINE void apply(VecOverlay<kW>& v) noexcept {
ElementT* elements = v.template data<ElementT>();
for (size_t i = 0; i < kW / sizeof(ElementT); i++) {
elements[i] = Derived::apply_one(elements[i]);
}
}
};
template<uint8_t kMin, uint8_t kMax>
struct ConstraintRangeU8 : public ConstraintBase<uint16_t, ConstraintRangeU8<kMin, kMax>> {
static ASMJIT_INLINE_NODEBUG uint8_t apply_one(uint8_t x) noexcept { return std::clamp(x, kMin, kMax); }
};
template<uint16_t kMin, uint16_t kMax>
struct ConstraintRangeU16 : public ConstraintBase<uint16_t, ConstraintRangeU16<kMin, kMax>> {
static ASMJIT_INLINE_NODEBUG uint16_t apply_one(uint16_t x) noexcept { return std::clamp(x, kMin, kMax); }
};
template<uint32_t kMin, uint32_t kMax>
struct ConstraintRangeU32 : public ConstraintBase<uint32_t, ConstraintRangeU32<kMin, kMax>> {
static ASMJIT_INLINE_NODEBUG uint32_t apply_one(uint32_t x) noexcept { return std::clamp(x, kMin, kMax); }
};
// ujit::UniCompiler - Tests - Generic Operations
// ==============================================
template<typename T>
static ASMJIT_INLINE_NODEBUG std::make_unsigned_t<T> cast_uint(const T& x) noexcept {
return static_cast<std::make_unsigned_t<T>>(x);
}
template<typename T>
static ASMJIT_INLINE_NODEBUG std::make_signed_t<T> cast_int(const T& x) noexcept {
return static_cast<std::make_signed_t<T>>(x);
}
static ASMJIT_INLINE_NODEBUG int8_t saturate_i16_to_i8(int16_t x) noexcept {
return x < int16_t(-128) ? int8_t(-128) :
x > int16_t( 127) ? int8_t( 127) : int8_t(x & 0xFF);
}
static ASMJIT_INLINE_NODEBUG uint8_t saturate_i16_to_u8(int16_t x) noexcept {
return x < int16_t(0x00) ? uint8_t(0x00) :
x > int16_t(0xFF) ? uint8_t(0xFF) : uint8_t(x & 0xFF);
}
static ASMJIT_INLINE_NODEBUG int16_t saturate_i32_to_i16(int32_t x) noexcept {
return x < int32_t(-32768) ? int16_t(-32768) :
x > int32_t( 32767) ? int16_t( 32767) : int16_t(x & 0xFFFF);
}
static ASMJIT_INLINE_NODEBUG uint16_t saturate_i32_to_u16(int32_t x) noexcept {
return x < int32_t(0x0000) ? uint16_t(0x0000) :
x > int32_t(0xFFFF) ? uint16_t(0xFFFF) : uint16_t(x & 0xFFFF);
}
template<typename T, typename Derived> struct op_each_vv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / sizeof(T); i++) {
out.set(i, Derived::apply_one(a.template get<T>(i)));
}
return out;
}
};
template<typename T, typename Derived> struct op_each_vvi {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / sizeof(T); i++) {
out.set(i, Derived::apply_one(a.template get<T>(i), imm));
}
return out;
}
};
template<typename T, typename Derived> struct op_each_vvv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / sizeof(T); i++) {
out.set(i, Derived::apply_one(a.template get<T>(i), b.template get<T>(i)));
}
return out;
}
};
template<typename T, typename Derived> struct op_each_vvvi {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, uint32_t imm) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / sizeof(T); i++) {
out.set(i, Derived::apply_one(a.template get<T>(i), b.template get<T>(i), imm));
}
return out;
}
};
template<typename T, typename Derived> struct op_each_vvvv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, const VecOverlay<kW>& c) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / sizeof(T); i++) {
out.set(i, Derived::apply_one(a.template get<T>(i), b.template get<T>(i), c.template get<T>(i)));
}
return out;
}
};
template<ScalarOpBehavior kB, typename T, typename Derived> struct op_scalar_vv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out {};
if constexpr (kB == ScalarOpBehavior::kPreservingVec128) {
out.copy_16b_from(a);
}
out.set(0, Derived::apply_one(a.template get<T>(0)));
return out;
}
};
template<ScalarOpBehavior kB, typename T, typename Derived> struct op_scalar_vvi {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
VecOverlay<kW> out {};
if constexpr (kB == ScalarOpBehavior::kPreservingVec128) {
out.copy_16b_from(a);
}
out.set(0, Derived::apply_one(a.template get<T>(0), imm));
return out;
}
};
template<ScalarOpBehavior kB, typename T, typename Derived> struct op_scalar_vvv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out {};
if constexpr (kB == ScalarOpBehavior::kPreservingVec128) {
out.copy_16b_from(a);
}
out.set(0, Derived::apply_one(a.template get<T>(0), b.template get<T>(0)));
return out;
}
};
template<ScalarOpBehavior kB, typename T, typename Derived> struct op_scalar_vvvv {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, const VecOverlay<kW>& c) noexcept {
VecOverlay<kW> out {};
if constexpr (kB == ScalarOpBehavior::kPreservingVec128) {
out.copy_16b_from(a);
}
out.set(0, Derived::apply_one(a.template get<T>(0), b.template get<T>(0), c.template get<T>(0)));
return out;
}
};
// ujit::UniCompiler - Tests - Generic Operations - VV
// ===================================================
struct vec_op_mov : public op_each_vv<uint32_t, vec_op_mov> {
static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a) noexcept { return a; }
};
struct vec_op_mov_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
out.data_u64[0] = a.data_u64[0];
return out;
}
};
struct vec_op_broadcast_u8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW; i++)
out.data_u8[i] = a.data_u8[0];
return out;
}
};
struct vec_op_broadcast_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / 2u; i++) {
out.data_u16[i] = a.data_u16[0];
}
return out;
}
};
struct vec_op_broadcast_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / 4u; i++) {
out.data_u32[i] = a.data_u32[0];
}
return out;
}
};
struct vec_op_broadcast_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / 8u; i++) {
out.data_u64[i] = a.data_u64[0];
}
return out;
}
};
struct vec_op_broadcast_u128 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t i = 0; i < kW / 8u; i += 2) {
out.data_u64[i + 0] = a.data_u64[0];
out.data_u64[i + 1] = a.data_u64[1];
}
return out;
}
};
struct vec_op_broadcast_u256 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
if constexpr (kW < 32) {
out = a;
}
else {
for (uint32_t i = 0; i < kW / 8u; i += 4) {
out.data_u64[i + 0] = a.data_u64[0];
out.data_u64[i + 1] = a.data_u64[1];
out.data_u64[i + 2] = a.data_u64[2];
out.data_u64[i + 3] = a.data_u64[3];
}
}
return out;
}
};
template<typename T> struct vec_op_abs : public op_each_vv<T, vec_op_abs<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return a < 0 ? T(cast_uint(T(0)) - cast_uint(a)) : a; }
};
template<typename T> struct vec_op_neg : public op_each_vv<T, vec_op_neg<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return T(cast_uint(T(0)) - cast_uint(a)); }
};
template<typename T> struct vec_op_not : public op_each_vv<T, vec_op_not<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return T(~a); }
};
struct vec_op_cvt_i8_lo_to_i16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i16[off / 2 + 0] = a.data_i8[off / 2 + 0];
out.data_i16[off / 2 + 1] = a.data_i8[off / 2 + 1];
out.data_i16[off / 2 + 2] = a.data_i8[off / 2 + 2];
out.data_i16[off / 2 + 3] = a.data_i8[off / 2 + 3];
out.data_i16[off / 2 + 4] = a.data_i8[off / 2 + 4];
out.data_i16[off / 2 + 5] = a.data_i8[off / 2 + 5];
out.data_i16[off / 2 + 6] = a.data_i8[off / 2 + 6];
out.data_i16[off / 2 + 7] = a.data_i8[off / 2 + 7];
}
return out;
}
};
struct vec_op_cvt_i8_hi_to_i16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i16[off / 2 + 0] = a.data_i8[kW / 2 + off / 2 + 0];
out.data_i16[off / 2 + 1] = a.data_i8[kW / 2 + off / 2 + 1];
out.data_i16[off / 2 + 2] = a.data_i8[kW / 2 + off / 2 + 2];
out.data_i16[off / 2 + 3] = a.data_i8[kW / 2 + off / 2 + 3];
out.data_i16[off / 2 + 4] = a.data_i8[kW / 2 + off / 2 + 4];
out.data_i16[off / 2 + 5] = a.data_i8[kW / 2 + off / 2 + 5];
out.data_i16[off / 2 + 6] = a.data_i8[kW / 2 + off / 2 + 6];
out.data_i16[off / 2 + 7] = a.data_i8[kW / 2 + off / 2 + 7];
}
return out;
}
};
struct vec_op_cvt_u8_lo_to_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u16[off / 2 + 0] = a.data_u8[off / 2 + 0];
out.data_u16[off / 2 + 1] = a.data_u8[off / 2 + 1];
out.data_u16[off / 2 + 2] = a.data_u8[off / 2 + 2];
out.data_u16[off / 2 + 3] = a.data_u8[off / 2 + 3];
out.data_u16[off / 2 + 4] = a.data_u8[off / 2 + 4];
out.data_u16[off / 2 + 5] = a.data_u8[off / 2 + 5];
out.data_u16[off / 2 + 6] = a.data_u8[off / 2 + 6];
out.data_u16[off / 2 + 7] = a.data_u8[off / 2 + 7];
}
return out;
}
};
struct vec_op_cvt_u8_hi_to_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u16[off / 2 + 0] = a.data_u8[kW / 2 + off / 2 + 0];
out.data_u16[off / 2 + 1] = a.data_u8[kW / 2 + off / 2 + 1];
out.data_u16[off / 2 + 2] = a.data_u8[kW / 2 + off / 2 + 2];
out.data_u16[off / 2 + 3] = a.data_u8[kW / 2 + off / 2 + 3];
out.data_u16[off / 2 + 4] = a.data_u8[kW / 2 + off / 2 + 4];
out.data_u16[off / 2 + 5] = a.data_u8[kW / 2 + off / 2 + 5];
out.data_u16[off / 2 + 6] = a.data_u8[kW / 2 + off / 2 + 6];
out.data_u16[off / 2 + 7] = a.data_u8[kW / 2 + off / 2 + 7];
}
return out;
}
};
struct vec_op_cvt_i8_to_i32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 4 + 0] = a.data_i8[off / 4 + 0];
out.data_i32[off / 4 + 1] = a.data_i8[off / 4 + 1];
out.data_i32[off / 4 + 2] = a.data_i8[off / 4 + 2];
out.data_i32[off / 4 + 3] = a.data_i8[off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_u8_to_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u32[off / 4 + 0] = a.data_u8[off / 4 + 0];
out.data_u32[off / 4 + 1] = a.data_u8[off / 4 + 1];
out.data_u32[off / 4 + 2] = a.data_u8[off / 4 + 2];
out.data_u32[off / 4 + 3] = a.data_u8[off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_i16_lo_to_i32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 4 + 0] = a.data_i16[off / 4 + 0];
out.data_i32[off / 4 + 1] = a.data_i16[off / 4 + 1];
out.data_i32[off / 4 + 2] = a.data_i16[off / 4 + 2];
out.data_i32[off / 4 + 3] = a.data_i16[off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_i16_hi_to_i32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 4 + 0] = a.data_i16[kW / 4 + off / 4 + 0];
out.data_i32[off / 4 + 1] = a.data_i16[kW / 4 + off / 4 + 1];
out.data_i32[off / 4 + 2] = a.data_i16[kW / 4 + off / 4 + 2];
out.data_i32[off / 4 + 3] = a.data_i16[kW / 4 + off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_u16_lo_to_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u32[off / 4 + 0] = a.data_u16[off / 4 + 0];
out.data_u32[off / 4 + 1] = a.data_u16[off / 4 + 1];
out.data_u32[off / 4 + 2] = a.data_u16[off / 4 + 2];
out.data_u32[off / 4 + 3] = a.data_u16[off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_u16_hi_to_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u32[off / 4 + 0] = a.data_u16[kW / 4 + off / 4 + 0];
out.data_u32[off / 4 + 1] = a.data_u16[kW / 4 + off / 4 + 1];
out.data_u32[off / 4 + 2] = a.data_u16[kW / 4 + off / 4 + 2];
out.data_u32[off / 4 + 3] = a.data_u16[kW / 4 + off / 4 + 3];
}
return out;
}
};
struct vec_op_cvt_i32_lo_to_i64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i64[off / 8 + 0] = a.data_i32[off / 8 + 0];
out.data_i64[off / 8 + 1] = a.data_i32[off / 8 + 1];
}
return out;
}
};
struct vec_op_cvt_i32_hi_to_i64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i64[off / 8 + 0] = a.data_i32[kW / 8 + off / 8 + 0];
out.data_i64[off / 8 + 1] = a.data_i32[kW / 8 + off / 8 + 1];
}
return out;
}
};
struct vec_op_cvt_u32_lo_to_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u32[off / 8 + 0];
out.data_u64[off / 8 + 1] = a.data_u32[off / 8 + 1];
}
return out;
}
};
struct vec_op_cvt_u32_hi_to_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u32[kW / 8 + off / 8 + 0];
out.data_u64[off / 8 + 1] = a.data_u32[kW / 8 + off / 8 + 1];
}
return out;
}
};
template<typename T> struct vec_op_fabs : public op_each_vv<T, vec_op_fabs<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::fabs(a); }
};
template<typename T> struct vec_op_trunc : public op_each_vv<T, vec_op_trunc<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(a); }
};
template<typename T> struct vec_op_floor : public op_each_vv<T, vec_op_floor<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(a); }
};
template<typename T> struct vec_op_ceil : public op_each_vv<T, vec_op_ceil<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::ceil(a); }
};
template<typename T> struct vec_op_round_even : public op_each_vv<T, vec_op_round_even<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::nearbyint(a); }
};
template<typename T> struct vec_op_round_half_away : public op_each_vv<T, vec_op_round_half_away<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(fadd(a, fxor(half_minus_1ulp_const<T>::value, fsign(a)))); }
};
template<typename T> struct vec_op_round_half_up : public op_each_vv<T, vec_op_round_half_up<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(fadd(a, half_minus_1ulp_const<T>::value)); }
};
template<typename T> struct vec_op_sqrt : public op_each_vv<T, vec_op_sqrt<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fsqrt(a); }
};
template<typename T> struct vec_op_rcp : public op_each_vv<T, vec_op_rcp<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fdiv(T(1), a); }
};
struct vec_op_cvt_i32_to_f32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_f32[off / 4 + 0] = float(a.data_i32[off / 4 + 0]);
out.data_f32[off / 4 + 1] = float(a.data_i32[off / 4 + 1]);
out.data_f32[off / 4 + 2] = float(a.data_i32[off / 4 + 2]);
out.data_f32[off / 4 + 3] = float(a.data_i32[off / 4 + 3]);
}
return out;
}
};
template<bool kHi>
struct vec_op_cvt_f32_to_f64_impl {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
uint32_t adj = kHi ? kW / 8 : 0u;
for (uint32_t off = 0; off < kW; off += 16) {
out.data_f64[off / 8 + 0] = double(a.data_f32[off / 8 + adj + 0]);
out.data_f64[off / 8 + 1] = double(a.data_f32[off / 8 + adj + 1]);
}
return out;
}
};
struct vec_op_cvt_f32_lo_to_f64 : public vec_op_cvt_f32_to_f64_impl<false> {};
struct vec_op_cvt_f32_hi_to_f64 : public vec_op_cvt_f32_to_f64_impl<true> {};
template<bool kHi>
struct vec_op_cvt_f64_to_f32_impl {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
uint32_t adj = kHi ? kW / 8 : 0u;
for (uint32_t off = 0; off < kW; off += 16) {
out.data_f32[off / 8 + adj + 0] = float(a.data_f64[off / 8 + 0]);
out.data_f32[off / 8 + adj + 1] = float(a.data_f64[off / 8 + 1]);
}
return out;
}
};
struct vec_op_cvt_f64_to_f32_lo : public vec_op_cvt_f64_to_f32_impl<false> {};
struct vec_op_cvt_f64_to_f32_hi : public vec_op_cvt_f64_to_f32_impl<true> {};
template<bool kHi>
struct vec_op_cvt_i32_to_f64_impl {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
uint32_t adj = kHi ? kW / 8 : 0u;
for (uint32_t off = 0; off < kW; off += 16) {
out.data_f64[off / 8 + 0] = double(a.data_i32[off / 8 + adj + 0]);
out.data_f64[off / 8 + 1] = double(a.data_i32[off / 8 + adj + 1]);
}
return out;
}
};
struct vec_op_cvt_i32_lo_to_f64 : public vec_op_cvt_i32_to_f64_impl<false> {};
struct vec_op_cvt_i32_hi_to_f64 : public vec_op_cvt_i32_to_f64_impl<true> {};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_trunc_f32_to_i32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 4 + 0] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f32[off / 4 + 0]);
out.data_i32[off / 4 + 1] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f32[off / 4 + 1]);
out.data_i32[off / 4 + 2] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f32[off / 4 + 2]);
out.data_i32[off / 4 + 3] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f32[off / 4 + 3]);
}
return out;
}
};
template<FloatToIntOutsideRangeBehavior behavior, bool kHi>
struct vec_op_cvt_trunc_f64_to_i32_impl {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
uint32_t adj = kHi ? kW / 8 : 0u;
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 8 + adj + 0] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f64[off / 8 + 0]);
out.data_i32[off / 8 + adj + 1] = cvt_float_to_int_trunc<behavior, int32_t>(a.data_f64[off / 8 + 1]);
}
return out;
}
};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_trunc_f64_to_i32_lo : vec_op_cvt_trunc_f64_to_i32_impl<behavior, false> {};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_trunc_f64_to_i32_hi : vec_op_cvt_trunc_f64_to_i32_impl<behavior, true> {};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_round_f32_to_i32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 4 + 0] = cvt_float_to_int_round<behavior, int32_t>(a.data_f32[off / 4 + 0]);
out.data_i32[off / 4 + 1] = cvt_float_to_int_round<behavior, int32_t>(a.data_f32[off / 4 + 1]);
out.data_i32[off / 4 + 2] = cvt_float_to_int_round<behavior, int32_t>(a.data_f32[off / 4 + 2]);
out.data_i32[off / 4 + 3] = cvt_float_to_int_round<behavior, int32_t>(a.data_f32[off / 4 + 3]);
}
return out;
}
};
template<FloatToIntOutsideRangeBehavior behavior, bool kHi>
struct vec_op_cvt_round_f64_to_i32_impl {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
uint32_t adj = kHi ? kW / 8 : 0u;
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i32[off / 8 + adj + 0] = cvt_float_to_int_round<behavior, int32_t>(a.data_f64[off / 8 + 0]);
out.data_i32[off / 8 + adj + 1] = cvt_float_to_int_round<behavior, int32_t>(a.data_f64[off / 8 + 1]);
}
return out;
}
};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_round_f64_to_i32_lo : vec_op_cvt_round_f64_to_i32_impl<behavior, false> {};
template<FloatToIntOutsideRangeBehavior behavior>
struct vec_op_cvt_round_f64_to_i32_hi : vec_op_cvt_round_f64_to_i32_impl<behavior, true> {};
struct scalar_op_cvt_f32_to_f64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
out.data_f64[0] = a.data_f32[0];
return out;
}
};
struct scalar_op_cvt_f64_to_f32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a) noexcept {
VecOverlay<kW> out{};
out.data_f32[0] = a.data_f64[0];
return out;
}
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_trunc : public op_scalar_vv<kB, T, scalar_op_trunc<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(a); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_floor : public op_scalar_vv<kB, T, scalar_op_floor<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(a); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_ceil : public op_scalar_vv<kB, T, scalar_op_ceil<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::ceil(a); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_round_even : public op_scalar_vv<kB, T, scalar_op_round_even<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::nearbyint(a); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_round_half_away : public op_scalar_vv<kB, T, scalar_op_round_half_away<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(fadd(a, fxor(half_minus_1ulp_const<T>::value, fsign(a)))); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_round_half_up : public op_scalar_vv<kB, T, scalar_op_round_half_up<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(fadd(a, half_minus_1ulp_const<T>::value)); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_sqrt : public op_scalar_vv<kB, T, scalar_op_sqrt<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fsqrt(a); }
};
// ujit::UniCompiler - Tests - Generic Operations - VVI
// ====================================================
template<typename T> struct vec_op_slli : public op_each_vvi<T, vec_op_slli<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_uint(a) << imm); }
};
template<typename T> struct vec_op_srli : public op_each_vvi<T, vec_op_srli<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_uint(a) >> imm); }
};
template<typename T> struct vec_op_rsrli : public op_each_vvi<T, vec_op_rsrli<T>> {
static ASMJIT_INLINE T apply_one(const T& a, uint32_t imm) noexcept {
T add = T((a & (T(1) << (imm - 1))) != 0);
return T((cast_uint(a) >> imm) + cast_uint(add));
}
};
template<typename T> struct vec_op_srai : public op_each_vvi<T, vec_op_srai<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_int(a) >> imm); }
};
struct vec_op_sllb_u128 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 16; i++) {
out.data_u8[off + i] = i < imm ? uint8_t(0) : a.data_u8[off + i - imm];
}
}
return out;
}
};
struct vec_op_srlb_u128 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 16; i++) {
out.data_u8[off + i] = i + imm < 16u ? a.data_u8[off + i + imm] : uint8_t(0);
}
}
return out;
}
};
struct vec_op_swizzle_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
uint32_t D = (imm >> 24) & 0x3;
uint32_t C = (imm >> 16) & 0x3;
uint32_t B = (imm >> 8) & 0x3;
uint32_t A = (imm >> 0) & 0x3;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u16[off / 2 + 0] = a.data_u16[off / 2 + 0 + A];
out.data_u16[off / 2 + 1] = a.data_u16[off / 2 + 0 + B];
out.data_u16[off / 2 + 2] = a.data_u16[off / 2 + 0 + C];
out.data_u16[off / 2 + 3] = a.data_u16[off / 2 + 0 + D];
out.data_u16[off / 2 + 4] = a.data_u16[off / 2 + 4 + A];
out.data_u16[off / 2 + 5] = a.data_u16[off / 2 + 4 + B];
out.data_u16[off / 2 + 6] = a.data_u16[off / 2 + 4 + C];
out.data_u16[off / 2 + 7] = a.data_u16[off / 2 + 4 + D];
}
return out;
}
};
struct vec_op_swizzle_lo_u16x4 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
uint32_t D = (imm >> 24) & 0x3;
uint32_t C = (imm >> 16) & 0x3;
uint32_t B = (imm >> 8) & 0x3;
uint32_t A = (imm >> 0) & 0x3;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u16[off / 2 + 0] = a.data_u16[off / 2 + A];
out.data_u16[off / 2 + 1] = a.data_u16[off / 2 + B];
out.data_u16[off / 2 + 2] = a.data_u16[off / 2 + C];
out.data_u16[off / 2 + 3] = a.data_u16[off / 2 + D];
memcpy(out.data_u8 + off + 8, a.data_u8 + off + 8, 8);
}
return out;
}
};
struct vec_op_swizzle_hi_u16x4 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
uint32_t D = (imm >> 24) & 0x3;
uint32_t C = (imm >> 16) & 0x3;
uint32_t B = (imm >> 8) & 0x3;
uint32_t A = (imm >> 0) & 0x3;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
memcpy(out.data_u8 + off, a.data_u8 + off, 8);
out.data_u16[off / 2 + 4] = a.data_u16[off / 2 + 4 + A];
out.data_u16[off / 2 + 5] = a.data_u16[off / 2 + 4 + B];
out.data_u16[off / 2 + 6] = a.data_u16[off / 2 + 4 + C];
out.data_u16[off / 2 + 7] = a.data_u16[off / 2 + 4 + D];
}
return out;
}
};
struct vec_op_swizzle_u32x4 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
uint32_t D = (imm >> 24) & 0x3;
uint32_t C = (imm >> 16) & 0x3;
uint32_t B = (imm >> 8) & 0x3;
uint32_t A = (imm >> 0) & 0x3;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u32[off / 4 + 0] = a.data_u32[off / 4 + A];
out.data_u32[off / 4 + 1] = a.data_u32[off / 4 + B];
out.data_u32[off / 4 + 2] = a.data_u32[off / 4 + C];
out.data_u32[off / 4 + 3] = a.data_u32[off / 4 + D];
}
return out;
}
};
struct vec_op_swizzle_u64x2 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, uint32_t imm) noexcept {
uint32_t B = (imm >> 8) & 0x1;
uint32_t A = (imm >> 0) & 0x1;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + A];
out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + B];
}
return out;
}
};
// ujit::UniCompiler - Tests - SIMD - Generic Operations - VVV
// ===========================================================
template<typename T> struct vec_op_and : public op_each_vvv<T, vec_op_and<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a & b); }
};
template<typename T> struct vec_op_or : public op_each_vvv<T, vec_op_or<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a | b); }
};
template<typename T> struct vec_op_xor : public op_each_vvv<T, vec_op_xor<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a ^ b); }
};
template<typename T> struct vec_op_andn : public op_each_vvv<T, vec_op_andn<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(~a & b); }
};
template<typename T> struct vec_op_bic : public op_each_vvv<T, vec_op_bic<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a & ~b); }
};
template<typename T> struct vec_op_add : public op_each_vvv<T, vec_op_add<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(cast_uint(a) + cast_uint(b)); }
};
template<typename T> struct vec_op_adds : public op_each_vvv<T, vec_op_adds<T>> {
static ASMJIT_INLINE T apply_one(const T& a, const T& b) noexcept {
Support::FastUInt8 of{};
T result = Support::add_overflow(a, b, &of);
if (!of) {
return result;
}
if constexpr (std::is_unsigned_v<T>) {
return std::numeric_limits<T>::max();
}
else {
return b > T(0) ? std::numeric_limits<T>::max() : std::numeric_limits<T>::lowest();
}
}
};
template<typename T> struct vec_op_sub : public op_each_vvv<T, vec_op_sub<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(cast_uint(a) - cast_uint(b)); }
};
template<typename T> struct vec_op_subs : public op_each_vvv<T, vec_op_subs<T>> {
static ASMJIT_INLINE T apply_one(const T& a, const T& b) noexcept {
Support::FastUInt8 of{};
T result = Support::sub_overflow(a, b, &of);
if (!of) {
return result;
}
if constexpr (std::is_unsigned_v<T>) {
return std::numeric_limits<T>::lowest();
}
else {
return b > T(0) ? std::numeric_limits<T>::lowest() : std::numeric_limits<T>::max();
}
}
};
template<typename T> struct vec_op_mul : public op_each_vvv<T, vec_op_mul<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T((uint64_t(a) * uint64_t(b)) & uint64_t(~T(0))); }
};
template<typename T> struct vec_op_mulhi : public op_each_vvv<T, vec_op_mulhi<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept {
uint64_t result = uint64_t(int64_t(cast_int(a))) * uint64_t(int64_t(cast_int(b)));
return T(T(result >> (sizeof(T) * 8u)) & T(~T(0)));
}
};
template<typename T> struct vec_op_mulhu : public op_each_vvv<T, vec_op_mulhu<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept {
uint64_t result = uint64_t(a) * uint64_t(b);
return T(result >> (sizeof(T) * 8u)) & uint64_t(~T(0));
}
};
struct vec_op_mul_u64_lo_u32 : public op_each_vvv<uint64_t, vec_op_mul_u64_lo_u32> {
static ASMJIT_INLINE_NODEBUG uint64_t apply_one(const uint64_t& a, const uint64_t& b) noexcept {
return uint64_t(a) * uint64_t(b & 0xFFFFFFFFu);
}
};
struct vec_op_mhadd_i16_i32 : public op_each_vvv<uint32_t, vec_op_mhadd_i16_i32> {
static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a, const uint32_t& b) noexcept {
uint32_t al = uint32_t(int32_t(int16_t(a & 0xFFFF)));
uint32_t ah = uint32_t(int32_t(int16_t(a >> 16)));
uint32_t bl = uint32_t(int32_t(int16_t(b & 0xFFFF)));
uint32_t bh = uint32_t(int32_t(int16_t(b >> 16)));
return al * bl + ah * bh;
}
};
template<typename T> struct vec_op_madd : public op_each_vvvv<T, vec_op_madd<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return T((uint64_t(a) * uint64_t(b) + uint64_t(c)) & uint64_t(~T(0))); }
};
template<typename T> struct vec_op_min : public op_each_vvv<T, vec_op_min<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; }
};
template<typename T> struct vec_op_max : public op_each_vvv<T, vec_op_max<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; }
};
template<typename T> struct vec_op_cmp_eq : public op_each_vvv<T, vec_op_cmp_eq<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a == b ? Support::bit_ones<T> : T(0); }
};
template<typename T> struct vec_op_cmp_ne : public op_each_vvv<T, vec_op_cmp_ne<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a != b ? Support::bit_ones<T> : T(0); }
};
template<typename T> struct vec_op_cmp_gt : public op_each_vvv<T, vec_op_cmp_gt<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? Support::bit_ones<T> : T(0); }
};
template<typename T> struct vec_op_cmp_ge : public op_each_vvv<T, vec_op_cmp_ge<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a >= b ? Support::bit_ones<T> : T(0); }
};
template<typename T> struct vec_op_cmp_lt : public op_each_vvv<T, vec_op_cmp_lt<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? Support::bit_ones<T> : T(0); }
};
template<typename T> struct vec_op_cmp_le : public op_each_vvv<T, vec_op_cmp_le<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a <= b ? Support::bit_ones<T> : T(0); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fadd : public op_scalar_vvv<kB, T, scalar_op_fadd<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fadd(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fsub : public op_scalar_vvv<kB, T, scalar_op_fsub<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fsub(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmul : public op_scalar_vvv<kB, T, scalar_op_fmul<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fmul(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fdiv : public op_scalar_vvv<kB, T, scalar_op_fdiv<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fdiv(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmin_ternary : public op_scalar_vvv<kB, T, scalar_op_fmin_ternary<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmax_ternary : public op_scalar_vvv<kB, T, scalar_op_fmax_ternary<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmin_finite : public op_scalar_vvv<kB, T, scalar_op_fmin_finite<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::min(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmax_finite : public op_scalar_vvv<kB, T, scalar_op_fmax_finite<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::max(a, b); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmadd_nofma : public op_scalar_vvvv<kB, T, scalar_op_fmadd_nofma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmsub_nofma : public op_scalar_vvvv<kB, T, scalar_op_fmsub_nofma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, -c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fnmadd_nofma : public op_scalar_vvvv<kB, T, scalar_op_fnmadd_nofma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fnmsub_nofma : public op_scalar_vvvv<kB, T, scalar_op_fnmsub_nofma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, -c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmadd_fma : public op_scalar_vvvv<kB, T, scalar_op_fmadd_fma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fmsub_fma : public op_scalar_vvvv<kB, T, scalar_op_fmsub_fma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, -c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fnmadd_fma : public op_scalar_vvvv<kB, T, scalar_op_fnmadd_fma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, c); }
};
template<ScalarOpBehavior kB, typename T> struct scalar_op_fnmsub_fma : public op_scalar_vvvv<kB, T, scalar_op_fnmsub_fma<kB, T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, -c); }
};
template<typename T> struct vec_op_fadd : public op_each_vvv<T, vec_op_fadd<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fadd(a, b); }
};
template<typename T> struct vec_op_fsub : public op_each_vvv<T, vec_op_fsub<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fsub(a, b); }
};
template<typename T> struct vec_op_fmul : public op_each_vvv<T, vec_op_fmul<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fmul(a, b); }
};
template<typename T> struct vec_op_fdiv : public op_each_vvv<T, vec_op_fdiv<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fdiv(a, b); }
};
template<typename T> struct vec_op_fmin_ternary : public op_each_vvv<T, vec_op_fmin_ternary<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; }
};
template<typename T> struct vec_op_fmax_ternary : public op_each_vvv<T, vec_op_fmax_ternary<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; }
};
template<typename T> struct vec_op_fmin_finite : public op_each_vvv<T, vec_op_fmin_finite<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::min(a, b); }
};
template<typename T> struct vec_op_fmax_finite : public op_each_vvv<T, vec_op_fmax_finite<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::max(a, b); }
};
template<typename T> struct vec_op_fmadd_nofma : public op_each_vvvv<T, vec_op_fmadd_nofma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, c); }
};
template<typename T> struct vec_op_fmsub_nofma : public op_each_vvvv<T, vec_op_fmsub_nofma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, -c); }
};
template<typename T> struct vec_op_fnmadd_nofma : public op_each_vvvv<T, vec_op_fnmadd_nofma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, c); }
};
template<typename T> struct vec_op_fnmsub_nofma : public op_each_vvvv<T, vec_op_fnmsub_nofma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, -c); }
};
template<typename T> struct vec_op_fmadd_fma : public op_each_vvvv<T, vec_op_fmadd_fma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, c); }
};
template<typename T> struct vec_op_fmsub_fma : public op_each_vvvv<T, vec_op_fmsub_fma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, -c); }
};
template<typename T> struct vec_op_fnmadd_fma : public op_each_vvvv<T, vec_op_fnmadd_fma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, c); }
};
template<typename T> struct vec_op_fnmsub_fma : public op_each_vvvv<T, vec_op_fnmsub_fma<T>> {
static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, -c); }
};
template<typename T>
struct cmp_result {
typedef T Result;
static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? Result(~Result(0)) : Result(0); }
};
template<>
struct cmp_result<float> {
typedef uint32_t Result;
static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? ~Result(0) : Result(0); }
};
template<>
struct cmp_result<double> {
typedef uint64_t Result;
static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? ~Result(0) : Result(0); }
};
template<typename T> struct vec_op_fcmpo_eq : public op_each_vvv<T, vec_op_fcmpo_eq<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(a == b); }
};
template<typename T> struct vec_op_fcmpu_ne : public op_each_vvv<T, vec_op_fcmpu_ne<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(!(a == b)); }
};
template<typename T> struct vec_op_fcmpo_gt : public op_each_vvv<T, vec_op_fcmpo_gt<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(a > b); }
};
template<typename T> struct vec_op_fcmpo_ge : public op_each_vvv<T, vec_op_fcmpo_ge<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(a >= b); }
};
template<typename T> struct vec_op_fcmpo_lt : public op_each_vvv<T, vec_op_fcmpo_lt<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(a < b); }
};
template<typename T> struct vec_op_fcmpo_le : public op_each_vvv<T, vec_op_fcmpo_le<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(a <= b); }
};
template<typename T> struct vec_op_fcmp_ord : public op_each_vvv<T, vec_op_fcmp_ord<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(!std::isnan(a) && !std::isnan(b)); }
};
template<typename T> struct vec_op_fcmp_unord : public op_each_vvv<T, vec_op_fcmp_unord<T>> {
static ASMJIT_INLINE_NODEBUG typename cmp_result<T>::Result apply_one(const T& a, const T& b) noexcept { return cmp_result<T>::make(std::isnan(a) || std::isnan(b)); }
};
struct vec_op_hadd_f64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_f64[off / 8 + 0] = a.data_f64[off / 8 + 0] + a.data_f64[off / 8 + 1];
out.data_f64[off / 8 + 1] = b.data_f64[off / 8 + 0] + b.data_f64[off / 8 + 1];
}
return out;
}
};
struct vec_op_combine_lo_hi_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = b.data_u64[off / 8 + 1];
out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + 0];
}
return out;
}
};
struct vec_op_combine_hi_lo_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = b.data_u64[off / 8 + 0];
out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + 1];
}
return out;
}
};
struct vec_op_interleave_lo_u8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 8; i++) {
out.data_u8[off + i * 2 + 0] = a.data_u8[off + i];
out.data_u8[off + i * 2 + 1] = b.data_u8[off + i];
}
}
return out;
}
};
struct vec_op_interleave_hi_u8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 8; i++) {
out.data_u8[off + i * 2 + 0] = a.data_u8[off + 8 + i];
out.data_u8[off + i * 2 + 1] = b.data_u8[off + 8 + i];
}
}
return out;
}
};
struct vec_op_interleave_lo_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 4; i++) {
out.data_u16[off / 2 + i * 2 + 0] = a.data_u16[off / 2 + i];
out.data_u16[off / 2 + i * 2 + 1] = b.data_u16[off / 2 + i];
}
}
return out;
}
};
struct vec_op_interleave_hi_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 4; i++) {
out.data_u16[off / 2 + i * 2 + 0] = a.data_u16[off / 2 + 4 + i];
out.data_u16[off / 2 + i * 2 + 1] = b.data_u16[off / 2 + 4 + i];
}
}
return out;
}
};
struct vec_op_interleave_lo_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 2; i++) {
out.data_u32[off / 4 + i * 2 + 0] = a.data_u32[off / 4 + i];
out.data_u32[off / 4 + i * 2 + 1] = b.data_u32[off / 4 + i];
}
}
return out;
}
};
struct vec_op_interleave_hi_u32 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 2; i++) {
out.data_u32[off / 4 + i * 2 + 0] = a.data_u32[off / 4 + 2 + i];
out.data_u32[off / 4 + i * 2 + 1] = b.data_u32[off / 4 + 2 + i];
}
}
return out;
}
};
struct vec_op_interleave_lo_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + 0];
out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + 0];
}
return out;
}
};
struct vec_op_interleave_hi_u64 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + 1];
out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + 1];
}
return out;
}
};
// ujit::UniCompiler - Tests - SIMD - Generic Operations - VVVI
// ============================================================
struct vec_op_alignr_u128 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, uint32_t imm) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 16; i++) {
out.data_u8[off + i] = i + imm < 16 ? b.data_u8[off + i + imm] : a.data_u8[off + i + imm - 16];
}
}
return out;
}
};
struct vec_op_interleave_shuffle_u32x4 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, uint32_t imm) noexcept {
uint32_t D = (imm >> 24) & 0x3;
uint32_t C = (imm >> 16) & 0x3;
uint32_t B = (imm >> 8) & 0x3;
uint32_t A = (imm >> 0) & 0x3;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u32[off / 4 + 0] = a.data_u32[off / 4 + A];
out.data_u32[off / 4 + 1] = a.data_u32[off / 4 + B];
out.data_u32[off / 4 + 2] = b.data_u32[off / 4 + C];
out.data_u32[off / 4 + 3] = b.data_u32[off / 4 + D];
}
return out;
}
};
struct vec_op_interleave_shuffle_u64x2 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b, uint32_t imm) noexcept {
uint32_t B = (imm >> 8) & 0x1;
uint32_t A = (imm >> 0) & 0x1;
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + A];
out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + B];
}
return out;
}
};
struct vec_op_packs_i16_i8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i8[off + 0] = saturate_i16_to_i8(a.data_i16[off / 2 + 0]);
out.data_i8[off + 1] = saturate_i16_to_i8(a.data_i16[off / 2 + 1]);
out.data_i8[off + 2] = saturate_i16_to_i8(a.data_i16[off / 2 + 2]);
out.data_i8[off + 3] = saturate_i16_to_i8(a.data_i16[off / 2 + 3]);
out.data_i8[off + 4] = saturate_i16_to_i8(a.data_i16[off / 2 + 4]);
out.data_i8[off + 5] = saturate_i16_to_i8(a.data_i16[off / 2 + 5]);
out.data_i8[off + 6] = saturate_i16_to_i8(a.data_i16[off / 2 + 6]);
out.data_i8[off + 7] = saturate_i16_to_i8(a.data_i16[off / 2 + 7]);
out.data_i8[off + 8] = saturate_i16_to_i8(b.data_i16[off / 2 + 0]);
out.data_i8[off + 9] = saturate_i16_to_i8(b.data_i16[off / 2 + 1]);
out.data_i8[off + 10] = saturate_i16_to_i8(b.data_i16[off / 2 + 2]);
out.data_i8[off + 11] = saturate_i16_to_i8(b.data_i16[off / 2 + 3]);
out.data_i8[off + 12] = saturate_i16_to_i8(b.data_i16[off / 2 + 4]);
out.data_i8[off + 13] = saturate_i16_to_i8(b.data_i16[off / 2 + 5]);
out.data_i8[off + 14] = saturate_i16_to_i8(b.data_i16[off / 2 + 6]);
out.data_i8[off + 15] = saturate_i16_to_i8(b.data_i16[off / 2 + 7]);
}
return out;
}
};
struct vec_op_packs_i16_u8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u8[off + 0] = saturate_i16_to_u8(a.data_i16[off / 2 + 0]);
out.data_u8[off + 1] = saturate_i16_to_u8(a.data_i16[off / 2 + 1]);
out.data_u8[off + 2] = saturate_i16_to_u8(a.data_i16[off / 2 + 2]);
out.data_u8[off + 3] = saturate_i16_to_u8(a.data_i16[off / 2 + 3]);
out.data_u8[off + 4] = saturate_i16_to_u8(a.data_i16[off / 2 + 4]);
out.data_u8[off + 5] = saturate_i16_to_u8(a.data_i16[off / 2 + 5]);
out.data_u8[off + 6] = saturate_i16_to_u8(a.data_i16[off / 2 + 6]);
out.data_u8[off + 7] = saturate_i16_to_u8(a.data_i16[off / 2 + 7]);
out.data_u8[off + 8] = saturate_i16_to_u8(b.data_i16[off / 2 + 0]);
out.data_u8[off + 9] = saturate_i16_to_u8(b.data_i16[off / 2 + 1]);
out.data_u8[off + 10] = saturate_i16_to_u8(b.data_i16[off / 2 + 2]);
out.data_u8[off + 11] = saturate_i16_to_u8(b.data_i16[off / 2 + 3]);
out.data_u8[off + 12] = saturate_i16_to_u8(b.data_i16[off / 2 + 4]);
out.data_u8[off + 13] = saturate_i16_to_u8(b.data_i16[off / 2 + 5]);
out.data_u8[off + 14] = saturate_i16_to_u8(b.data_i16[off / 2 + 6]);
out.data_u8[off + 15] = saturate_i16_to_u8(b.data_i16[off / 2 + 7]);
}
return out;
}
};
struct vec_op_packs_i32_i16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_i16[off / 2 + 0] = saturate_i32_to_i16(a.data_i32[off / 4 + 0]);
out.data_i16[off / 2 + 1] = saturate_i32_to_i16(a.data_i32[off / 4 + 1]);
out.data_i16[off / 2 + 2] = saturate_i32_to_i16(a.data_i32[off / 4 + 2]);
out.data_i16[off / 2 + 3] = saturate_i32_to_i16(a.data_i32[off / 4 + 3]);
out.data_i16[off / 2 + 4] = saturate_i32_to_i16(b.data_i32[off / 4 + 0]);
out.data_i16[off / 2 + 5] = saturate_i32_to_i16(b.data_i32[off / 4 + 1]);
out.data_i16[off / 2 + 6] = saturate_i32_to_i16(b.data_i32[off / 4 + 2]);
out.data_i16[off / 2 + 7] = saturate_i32_to_i16(b.data_i32[off / 4 + 3]);
}
return out;
}
};
struct vec_op_packs_i32_u16 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
out.data_u16[off / 2 + 0] = saturate_i32_to_u16(a.data_i32[off / 4 + 0]);
out.data_u16[off / 2 + 1] = saturate_i32_to_u16(a.data_i32[off / 4 + 1]);
out.data_u16[off / 2 + 2] = saturate_i32_to_u16(a.data_i32[off / 4 + 2]);
out.data_u16[off / 2 + 3] = saturate_i32_to_u16(a.data_i32[off / 4 + 3]);
out.data_u16[off / 2 + 4] = saturate_i32_to_u16(b.data_i32[off / 4 + 0]);
out.data_u16[off / 2 + 5] = saturate_i32_to_u16(b.data_i32[off / 4 + 1]);
out.data_u16[off / 2 + 6] = saturate_i32_to_u16(b.data_i32[off / 4 + 2]);
out.data_u16[off / 2 + 7] = saturate_i32_to_u16(b.data_i32[off / 4 + 3]);
}
return out;
}
};
// ujit::UniCompiler - Tests - SIMD - Generic Operations - VVVV
// ============================================================
struct vec_op_blendv_bits : public op_each_vvvv<uint32_t, vec_op_blendv_bits> {
static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a, const uint32_t& b, const uint32_t& c) noexcept { return ((a & ~c) | (b & c)); }
};
struct vec_op_swizzlev_u8 {
template<uint32_t kW>
static ASMJIT_INLINE VecOverlay<kW> apply(const VecOverlay<kW>& a, const VecOverlay<kW>& b) noexcept {
VecOverlay<kW> out{};
for (uint32_t off = 0; off < kW; off += 16) {
for (uint32_t i = 0; i < 16; i++) {
size_t sel = b.data_u8[off + i] & (0x8F); // 3 bits ignored.
out.data_u8[off + i] = sel & 0x80 ? uint8_t(0) : a.data_u8[off + sel];
}
}
return out;
}
};
struct vec_op_div255_u16 : public op_each_vv<uint16_t, vec_op_div255_u16> {
static ASMJIT_INLINE uint16_t apply_one(const uint16_t& a) noexcept {
uint32_t x = a + 0x80u;
return uint16_t((x + (x >> 8)) >> 8);
}
};
struct vec_op_div65535_u32 : public op_each_vv<uint32_t, vec_op_div65535_u32> {
static ASMJIT_INLINE uint32_t apply_one(const uint32_t& a) noexcept {
uint32_t x = a + 0x8000u;
return uint32_t((x + (x >> 16)) >> 16);
}
};
// ujit::UniCompiler - Tests - SIMD - Utilities
// ============================================
template<uint32_t kW>
static void fill_random_bytes(DataGenInt& dg, VecOverlay<kW>& dst) noexcept {
for (uint32_t i = 0; i < kW / 8u; i++) {
dst.data_u64[i] = dg.next_uint64();
}
}
template<uint32_t kW>
static void fill_random_f32(DataGenInt& dg, VecOverlay<kW>& dst) noexcept {
for (uint32_t i = 0; i < kW / 4u; i++) {
dst.data_f32[i] = dg.next_float32();
}
}
template<uint32_t kW>
static void fill_random_f64(DataGenInt& dg, VecOverlay<kW>& dst) noexcept {
for (uint32_t i = 0; i < kW / 8u; i++) {
dst.data_f64[i] = dg.next_float64();
}
}
template<uint32_t kW>
static void fill_random_data(DataGenInt& dg, VecOverlay<kW>& dst, VecElementType element_type) noexcept {
switch (element_type) {
case VecElementType::kFloat32:
fill_random_f32(dg, dst);
break;
case VecElementType::kFloat64:
fill_random_f64(dg, dst);
break;
default:
fill_random_bytes(dg, dst);
break;
}
}
// ujit::UniCompiler - Tests - SIMD - Verification
// ===============================================
template<uint32_t kW>
static ASMJIT_NOINLINE void test_vecop_vv_failed(UniOpVV op, Variation variation, const VecOverlay<kW>& arg0, const VecOverlay<kW>& observed, const VecOverlay<kW>& expected, const char* assembly) noexcept {
VecOpInfo op_info = vec_op_info_vv(op);
String arg0_str = vec_stringify(arg0, op_info.arg(0));
String observed_str = vec_stringify(observed, op_info.ret());
String expected_str = vec_stringify(expected, op_info.ret());
EXPECT(false)
.message("Operation '%s' (variation %u) failed:\n"
" Input #0: %s\n"
" Expected: %s\n"
" Observed: %s\n"
"Assembly:\n%s",
vec_op_name_vv(op),
variation.value,
arg0_str.data(),
expected_str.data(),
observed_str.data(),
assembly);
}
template<uint32_t kW>
static ASMJIT_NOINLINE void test_vecop_vvi_failed(UniOpVVI op, Variation variation, const VecOverlay<kW>& arg0, const VecOverlay<kW>& observed, const VecOverlay<kW>& expected, uint32_t imm, const char* assembly) noexcept {
VecOpInfo op_info = vec_op_info_vvi(op);
String arg0_str = vec_stringify(arg0, op_info.arg(0));
String observed_str = vec_stringify(observed, op_info.ret());
String expected_str = vec_stringify(expected, op_info.ret());
EXPECT(false)
.message("Operation '%s' (variation %u) failed:\n"
" Input #0: %s\n"
" ImmValue: %u (0x%08X)\n"
" Expected: %s\n"
" Observed: %s\n"
"Assembly:\n%s",
vec_op_name_vvi(op),
variation.value,
arg0_str.data(),
imm, imm,
expected_str.data(),
observed_str.data(),
assembly);
}
template<uint32_t kW>
static ASMJIT_NOINLINE void test_vecop_vvv_failed(UniOpVVV op, Variation variation, const VecOverlay<kW>& arg0, const VecOverlay<kW>& arg1, const VecOverlay<kW>& observed, const VecOverlay<kW>& expected, const char* assembly) noexcept {
VecOpInfo op_info = vec_op_info_vvv(op);
String arg0_str = vec_stringify(arg0, op_info.arg(0));
String arg1_str = vec_stringify(arg1, op_info.arg(1));
String observed_str = vec_stringify(observed, op_info.ret());
String expected_str = vec_stringify(expected, op_info.ret());
EXPECT(false)
.message("Operation '%s' (variation %u) failed:\n"
" Input #0: %s\n"
" Input #1: %s\n"
" Expected: %s\n"
" Observed: %s\n"
"Assembly:\n%s",
vec_op_name_vvv(op),
variation.value,
arg0_str.data(),
arg1_str.data(),
expected_str.data(),
observed_str.data(),
assembly);
}
template<uint32_t kW>
static ASMJIT_NOINLINE void test_vecop_vvvi_failed(UniOpVVVI op, Variation variation, const VecOverlay<kW>& arg0, const VecOverlay<kW>& arg1, const VecOverlay<kW>& observed, const VecOverlay<kW>& expected, uint32_t imm, const char* assembly) noexcept {
VecOpInfo op_info = vec_op_info_vvvi(op);
String arg0_str = vec_stringify(arg0, op_info.arg(0));
String arg1_str = vec_stringify(arg1, op_info.arg(1));
String observed_str = vec_stringify(observed, op_info.ret());
String expected_str = vec_stringify(expected, op_info.ret());
EXPECT(false)
.message("Operation '%s' (variation %u) failed:\n"
" Input #1: %s\n"
" Input #2: %s\n"
" ImmValue: %u (0x%08X)\n"
" Expected: %s\n"
" Observed: %s\n"
"Assembly:\n%s",
vec_op_name_vvvi(op),
variation.value,
arg0_str.data(),
arg1_str.data(),
imm, imm,
expected_str.data(),
observed_str.data(),
assembly);
}
template<uint32_t kW>
static ASMJIT_NOINLINE void test_vecop_vvvv_failed(UniOpVVVV op, Variation variation, const VecOverlay<kW>& arg0, const VecOverlay<kW>& arg1, const VecOverlay<kW>& arg2, const VecOverlay<kW>& observed, const VecOverlay<kW>& expected, const char* assembly) noexcept {
VecOpInfo op_info = vec_op_info_vvvv(op);
String arg0_str = vec_stringify(arg0, op_info.arg(0));
String arg1_str = vec_stringify(arg1, op_info.arg(1));
String arg2_str = vec_stringify(arg2, op_info.arg(2));
String observed_str = vec_stringify(observed, op_info.ret());
String expected_str = vec_stringify(expected, op_info.ret());
EXPECT(false)
.message("Operation '%s' (variation %u) failed\n"
" Input #1: %s\n"
" Input #2: %s\n"
" Input #3: %s\n"
" Expected: %s\n"
" Observed: %s\n"
"Assembly:\n%s",
vec_op_name_vvvv(op),
variation.value,
arg0_str.data(),
arg1_str.data(),
arg2_str.data(),
expected_str.data(),
observed_str.data(),
assembly);
}
// ujit::UniCompiler - Tests - Integer Operations - VV
// ===================================================
template<VecWidth kVecWidth, UniOpVV kOp, typename GenericOp, typename Constraint>
static ASMJIT_NOINLINE void test_vecop_vv_constraint(JitContext& ctx, Variation variation = Variation{0}) noexcept {
constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth);
TestVVFunc compiled_apply = create_func_vv(ctx, kVecWidth, kOp, variation);
DataGenInt dg(kRandomSeed);
VecOpInfo op_info = vec_op_info_vv(kOp);
for (uint32_t iter = 0; iter < kTestIterCount; iter++) {
VecOverlay<kW> a {};
VecOverlay<kW> observed {};
VecOverlay<kW> expected {};
fill_random_data(dg, a, op_info.arg(0));
Constraint::apply(a);
compiled_apply(&observed, &a);
expected = GenericOp::apply(a);
if (!vec_eq(observed, expected, op_info.ret())) {
test_vecop_vv_failed(kOp, variation, a, observed, expected, ctx.logger_content());
}
}
ctx.rt.release(compiled_apply);
}
template<VecWidth kVecWidth, UniOpVV kOp, typename GenericOp>
static void test_vecop_vv(JitContext& ctx, Variation variation = Variation{0}) noexcept {
return test_vecop_vv_constraint<kVecWidth, kOp, GenericOp, ConstraintNone>(ctx, variation);
}
// ujit::UniCompiler - Tests - SIMD - Integer Operations - VVI
// ===========================================================
template<VecWidth kVecWidth, UniOpVVI kOp, typename GenericOp, typename Constraint>
static ASMJIT_NOINLINE void test_vecop_vvi_constraint(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) noexcept {
constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth);
TestVVFunc compiled_apply = create_func_vvi(ctx, kVecWidth, kOp, imm, variation);
DataGenInt dg(kRandomSeed);
VecOpInfo op_info = vec_op_info_vvi(kOp);
for (uint32_t iter = 0; iter < kTestIterCount; iter++) {
VecOverlay<kW> a {};
VecOverlay<kW> observed {};
VecOverlay<kW> expected {};
fill_random_data(dg, a, op_info.arg(0));
Constraint::apply(a);
compiled_apply(&observed, &a);
expected = GenericOp::apply(a, imm);
if (!vec_eq(observed, expected, op_info.ret())) {
test_vecop_vvi_failed(kOp, variation, a, observed, expected, imm, ctx.logger_content());
}
}
ctx.rt.release(compiled_apply);
}
template<VecWidth kVecWidth, UniOpVVI kOp, typename GenericOp>
static void test_vecop_vvi(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) noexcept {
return test_vecop_vvi_constraint<kVecWidth, kOp, GenericOp, ConstraintNone>(ctx, imm, variation);
}
// ujit::UniCompiler - Tests - SIMD - Integer Operations - VVV
// ===========================================================
template<VecWidth kVecWidth, UniOpVVV kOp, typename GenericOp, typename Constraint>
static ASMJIT_NOINLINE void test_vecop_vvv_constraint(JitContext& ctx, Variation variation = Variation{0}) noexcept {
constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth);
TestVVVFunc compiled_apply = create_func_vvv(ctx, kVecWidth, kOp, variation);
DataGenInt dg(kRandomSeed);
VecOpInfo op_info = vec_op_info_vvv(kOp);
for (uint32_t iter = 0; iter < kTestIterCount; iter++) {
VecOverlay<kW> a {};
VecOverlay<kW> b {};
VecOverlay<kW> observed {};
VecOverlay<kW> expected {};
fill_random_data(dg, a, op_info.arg(0));
fill_random_data(dg, b, op_info.arg(1));
Constraint::apply(a);
Constraint::apply(b);
compiled_apply(&observed, &a, &b);
expected = GenericOp::apply(a, b);
if (!vec_eq(observed, expected, op_info.ret())) {
test_vecop_vvv_failed(kOp, variation, a, b, observed, expected, ctx.logger_content());
}
}
ctx.rt.release(compiled_apply);
}
template<VecWidth kVecWidth, UniOpVVV kOp, typename GenericOp>
static void test_vecop_vvv(JitContext& ctx, Variation variation = Variation{0}) noexcept {
return test_vecop_vvv_constraint<kVecWidth, kOp, GenericOp, ConstraintNone>(ctx, variation);
}
// ujit::UniCompiler - Tests - SIMD - Integer Operations - VVVI
// ============================================================
template<VecWidth kVecWidth, UniOpVVVI kOp, typename GenericOp, typename Constraint>
static ASMJIT_NOINLINE void test_vecop_vvvi_constraint(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) noexcept {
constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth);
TestVVVFunc compiled_apply = create_func_vvvi(ctx, kVecWidth, kOp, imm, variation);
DataGenInt dg(kRandomSeed);
VecOpInfo op_info = vec_op_info_vvvi(kOp);
for (uint32_t iter = 0; iter < kTestIterCount; iter++) {
VecOverlay<kW> a {};
VecOverlay<kW> b {};
VecOverlay<kW> observed {};
VecOverlay<kW> expected {};
fill_random_data(dg, a, op_info.arg(0));
fill_random_data(dg, b, op_info.arg(1));
Constraint::apply(a);
Constraint::apply(b);
compiled_apply(&observed, &a, &b);
expected = GenericOp::apply(a, b, imm);
if (!vec_eq(observed, expected, op_info.ret())) {
test_vecop_vvvi_failed(kOp, variation, a, b, observed, expected, imm, ctx.logger_content());
}
}
ctx.rt.release(compiled_apply);
}
template<VecWidth kVecWidth, UniOpVVVI kOp, typename GenericOp>
static void test_vecop_vvvi(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) noexcept {
return test_vecop_vvvi_constraint<kVecWidth, kOp, GenericOp, ConstraintNone>(ctx, imm, variation);
}
// ujit::UniCompiler - Tests - SIMD - Integer Operations - VVVV
// ============================================================
template<VecWidth kVecWidth, UniOpVVVV kOp, typename GenericOp, typename Constraint>
static ASMJIT_NOINLINE void test_vecop_vvvv_constraint(JitContext& ctx, Variation variation = Variation{0}) noexcept {
constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth);
TestVVVVFunc compiled_apply = create_func_vvvv(ctx, kVecWidth, kOp, variation);
DataGenInt dg(kRandomSeed);
VecOpInfo op_info = vec_op_info_vvvv(kOp);
for (uint32_t iter = 0; iter < kTestIterCount; iter++) {
VecOverlay<kW> a {};
VecOverlay<kW> b {};
VecOverlay<kW> c {};
VecOverlay<kW> observed {};
VecOverlay<kW> expected {};
fill_random_data(dg, a, op_info.arg(0));
fill_random_data(dg, b, op_info.arg(1));
fill_random_data(dg, c, op_info.arg(2));
Constraint::apply(a);
Constraint::apply(b);
Constraint::apply(c);
compiled_apply(&observed, &a, &b, &c);
expected = GenericOp::apply(a, b, c);
if (!vec_eq(observed, expected, op_info.ret())) {
test_vecop_vvvv_failed(kOp, variation, a, b, c, observed, expected, ctx.logger_content());
}
}
}
template<VecWidth kVecWidth, UniOpVVVV kOp, typename GenericOp>
static void test_vecop_vvvv(JitContext& ctx, Variation variation = Variation{0}) noexcept {
return test_vecop_vvvv_constraint<kVecWidth, kOp, GenericOp, ConstraintNone>(ctx, variation);
}
// ujit::UniCompiler - Tests - SIMD - Runner
// =========================================
template<VecWidth kVecWidth>
static ASMJIT_NOINLINE void test_simd_ops(JitContext& ctx) noexcept {
// We need to know some behaviors in advance so we can select the right test function,
// so create a dummy compiler and extract the necessary information from it.
ScalarOpBehavior scalar_op_behavior {};
FMAddOpBehavior fmadd_op_behavior {};
FloatToIntOutsideRangeBehavior float_to_int_behavior {};
{
ctx.prepare();
UniCompiler pc(&ctx.cc, ctx.features, ctx.cpu_hints);
scalar_op_behavior = pc.scalar_op_behavior();
fmadd_op_behavior = pc.fmadd_op_behavior();
float_to_int_behavior = pc.float_to_int_outside_range_behavior();
}
bool valgrind_fma_bug = false;
#if defined(ASMJIT_UJIT_X86)
// When running under valgrind there is a bug in its instrumentation of FMA SS/SD instructions.
// Instead of keeping the unaffected elements in the destination register they are cleared instead,
// which would cause test failures. So, detect whether we are running under Valgind that has this
// bug and avoid scalar FMA tests in that case.
if (fmadd_op_behavior != FMAddOpBehavior::kNoFMA) {
float a[4] = { 1, 2, 3, 4 };
float b[4] = { 2, 4, 8, 1 };
float c[4] = { 4, 7, 3, 9 };
float d[4] {};
madd_fma_check_valgrind_bug(a, b, c, d);
valgrind_fma_bug = d[1] == 0.0f;
}
#endif // ASMJIT_UJIT_X86
INFO(" Testing mov");
{
test_vecop_vv<kVecWidth, UniOpVV::kMov, vec_op_mov>(ctx);
test_vecop_vv<kVecWidth, UniOpVV::kMovU64, vec_op_mov_u64>(ctx);
}
INFO(" Testing broadcast");
{
// Test all broadcasts - vector based, GP to vector, and memory to vector.
for (uint32_t v = 0; v < kNumVariationsVV_Broadcast; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU8Z, vec_op_broadcast_u8>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU16Z, vec_op_broadcast_u16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU8, vec_op_broadcast_u8>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU16, vec_op_broadcast_u16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU32, vec_op_broadcast_u32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastU64, vec_op_broadcast_u64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastF32, vec_op_broadcast_u32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastF64, vec_op_broadcast_u64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV128_U32, vec_op_broadcast_u128>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV128_U64, vec_op_broadcast_u128>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV128_F32, vec_op_broadcast_u128>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV128_F64, vec_op_broadcast_u128>(ctx, Variation{v});
if constexpr (kVecWidth > VecWidth::k256) {
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV256_U32, vec_op_broadcast_u256>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV256_U64, vec_op_broadcast_u256>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV256_F32, vec_op_broadcast_u256>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kBroadcastV256_F64, vec_op_broadcast_u256>(ctx, Variation{v});
}
}
}
INFO(" Testing abs (int)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kAbsI8, vec_op_abs<int8_t>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kAbsI16, vec_op_abs<int16_t>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kAbsI32, vec_op_abs<int32_t>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kAbsI64, vec_op_abs<int64_t>>(ctx, Variation{v});
}
}
INFO(" Testing not (int)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kNotU32, vec_op_not<uint32_t>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kNotU64, vec_op_not<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing cvt (int)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kCvtI8LoToI16, vec_op_cvt_i8_lo_to_i16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI8HiToI16, vec_op_cvt_i8_hi_to_i16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU8LoToU16, vec_op_cvt_u8_lo_to_u16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU8HiToU16, vec_op_cvt_u8_hi_to_u16>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI8ToI32, vec_op_cvt_i8_to_i32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU8ToU32, vec_op_cvt_u8_to_u32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI16LoToI32, vec_op_cvt_i16_lo_to_i32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI16HiToI32, vec_op_cvt_i16_hi_to_i32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU16LoToU32, vec_op_cvt_u16_lo_to_u32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU16HiToU32, vec_op_cvt_u16_hi_to_u32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI32LoToI64, vec_op_cvt_i32_lo_to_i64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI32HiToI64, vec_op_cvt_i32_hi_to_i64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU32LoToU64, vec_op_cvt_u32_lo_to_u64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtU32HiToU64, vec_op_cvt_u32_hi_to_u64>(ctx, Variation{v});
}
}
INFO(" Testing abs (float)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kAbsF32, vec_op_fabs<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kAbsF64, vec_op_fabs<double>>(ctx, Variation{v});
}
}
INFO(" Testing not (float)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kNotF32, vec_op_not<uint32_t>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kNotF64, vec_op_not<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing rounding (float)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
// Variation 2 means that the source operand is memory, which would ALWAYS zero the rest of the register.
if (scalar_op_behavior == ScalarOpBehavior::kZeroing || v == 2u) {
test_vecop_vv<kVecWidth, UniOpVV::kTruncF32S, scalar_op_trunc<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kTruncF64S, scalar_op_trunc<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF32S, scalar_op_floor<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF64S, scalar_op_floor<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF32S, scalar_op_ceil<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF64S, scalar_op_ceil<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF32S, scalar_op_round_even<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF64S, scalar_op_round_even<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF32S, scalar_op_round_half_away<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF64S, scalar_op_round_half_away<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF32S, scalar_op_round_half_up<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF64S, scalar_op_round_half_up<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{v});
}
else {
test_vecop_vv<kVecWidth, UniOpVV::kTruncF32S, scalar_op_trunc<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kTruncF64S, scalar_op_trunc<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF32S, scalar_op_floor<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF64S, scalar_op_floor<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF32S, scalar_op_ceil<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF64S, scalar_op_ceil<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF32S, scalar_op_round_even<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF64S, scalar_op_round_even<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF32S, scalar_op_round_half_away<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF64S, scalar_op_round_half_away<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF32S, scalar_op_round_half_up<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF64S, scalar_op_round_half_up<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{v});
}
test_vecop_vv<kVecWidth, UniOpVV::kTruncF32, vec_op_trunc<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kTruncF64, vec_op_trunc<double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF32, vec_op_floor<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kFloorF64, vec_op_floor<double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF32, vec_op_ceil<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCeilF64, vec_op_ceil<double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF32, vec_op_round_even<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundEvenF64, vec_op_round_even<double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF32, vec_op_round_half_away<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfAwayF64, vec_op_round_half_away<double>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF32, vec_op_round_half_up<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRoundHalfUpF64, vec_op_round_half_up<double>>(ctx, Variation{v});
}
}
INFO(" Testing rcp (float)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kRcpF32, vec_op_rcp<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kRcpF64, vec_op_rcp<double>>(ctx, Variation{v});
}
}
INFO(" Testing sqrt (float)");
{
if (scalar_op_behavior == ScalarOpBehavior::kZeroing) {
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF32S, scalar_op_sqrt<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF64S, scalar_op_sqrt<ScalarOpBehavior::kZeroing, double>>(ctx);
}
else {
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF32S, scalar_op_sqrt<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF64S, scalar_op_sqrt<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
}
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF32, vec_op_sqrt<float>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kSqrtF64, vec_op_sqrt<double>>(ctx, Variation{v});
}
}
INFO(" Testing cvt (float)");
{
for (uint32_t v = 0; v < kNumVariationsVV; v++) {
// TODO: [JIT] Re-enable when the content of the remaining part of the register is formalized.
// test_vecop_vv<kVecWidth, UniOpVV::kCvtF32ToF64S, scalar_op_cvt_f32_to_f64>(ctx);
// test_vecop_vv<kVecWidth, UniOpVV::kCvtF64ToF32S, scalar_op_cvt_f64_to_f32>(ctx);
test_vecop_vv<kVecWidth, UniOpVV::kCvtI32ToF32, vec_op_cvt_i32_to_f32>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtF32LoToF64, vec_op_cvt_f32_lo_to_f64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtF32HiToF64, vec_op_cvt_f32_hi_to_f64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtF64ToF32Lo, vec_op_cvt_f64_to_f32_lo>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtF64ToF32Hi, vec_op_cvt_f64_to_f32_hi>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI32LoToF64, vec_op_cvt_i32_lo_to_f64>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtI32HiToF64, vec_op_cvt_i32_hi_to_f64>(ctx, Variation{v});
if (float_to_int_behavior == FloatToIntOutsideRangeBehavior::kSmallestValue) {
constexpr FloatToIntOutsideRangeBehavior behavior = FloatToIntOutsideRangeBehavior::kSmallestValue;
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF32ToI32, vec_op_cvt_trunc_f32_to_i32<behavior>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF64ToI32Lo, vec_op_cvt_trunc_f64_to_i32_lo<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF64ToI32Hi, vec_op_cvt_trunc_f64_to_i32_hi<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF32ToI32, vec_op_cvt_round_f32_to_i32<behavior>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF64ToI32Lo, vec_op_cvt_round_f64_to_i32_lo<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF64ToI32Hi, vec_op_cvt_round_f64_to_i32_hi<behavior>>(ctx, Variation{0});
}
else {
constexpr FloatToIntOutsideRangeBehavior behavior = FloatToIntOutsideRangeBehavior::kSaturatedValue;
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF32ToI32, vec_op_cvt_trunc_f32_to_i32<behavior>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF64ToI32Lo, vec_op_cvt_trunc_f64_to_i32_lo<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtTruncF64ToI32Hi, vec_op_cvt_trunc_f64_to_i32_hi<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF32ToI32, vec_op_cvt_round_f32_to_i32<behavior>>(ctx, Variation{v});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF64ToI32Lo, vec_op_cvt_round_f64_to_i32_lo<behavior>>(ctx, Variation{0});
test_vecop_vv<kVecWidth, UniOpVV::kCvtRoundF64ToI32Hi, vec_op_cvt_round_f64_to_i32_hi<behavior>>(ctx, Variation{0});
}
}
}
INFO(" Testing bit shift");
{
for (uint32_t v = 0; v < kNumVariationsVVI; v++) {
/*
for (uint32_t i = 1; i < 8; i++) {
test_vecop_vvi<kVecWidth, UniOpVVI::kSllU8 , vec_op_slli<uint8_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSrlU8 , vec_op_srli<uint8_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSraI8 , vec_op_srai<int8_t>>(ctx, i, Variation{v});
}
*/
for (uint32_t i = 1; i < 16; i++) {
test_vecop_vvi<kVecWidth, UniOpVVI::kSllU16, vec_op_slli<uint16_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSrlU16, vec_op_srli<uint16_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSraI16, vec_op_srai<int16_t>>(ctx, i, Variation{v});
}
for (uint32_t i = 1; i < 32; i++) {
test_vecop_vvi<kVecWidth, UniOpVVI::kSllU32, vec_op_slli<uint32_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSrlU32, vec_op_srli<uint32_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSraI32, vec_op_srai<int32_t>>(ctx, i, Variation{v});
}
for (uint32_t i = 1; i < 64; i++) {
test_vecop_vvi<kVecWidth, UniOpVVI::kSllU64, vec_op_slli<uint64_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSrlU64, vec_op_srli<uint64_t>>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSraI64, vec_op_srai<int64_t>>(ctx, i, Variation{v});
}
}
}
INFO(" Testing sllb_u128 & srlb_u128");
{
for (uint32_t v = 0; v < kNumVariationsVVI; v++) {
for (uint32_t i = 1; i < 16; i++) {
test_vecop_vvi<kVecWidth, UniOpVVI::kSllbU128, vec_op_sllb_u128>(ctx, i, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSrlbU128, vec_op_srlb_u128>(ctx, i, Variation{v});
}
}
}
INFO(" Testing swizzle_[lo|hi]_u16x4");
{
for (uint32_t v = 0; v < kNumVariationsVVI; v++) {
for (uint32_t i = 0; i < 256; i++) {
uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value;
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleLoU16x4, vec_op_swizzle_lo_u16x4>(ctx, imm, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleHiU16x4, vec_op_swizzle_hi_u16x4>(ctx, imm, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleU16x4, vec_op_swizzle_u16>(ctx, imm, Variation{v});
}
}
}
INFO(" Testing swizzle_u32x4");
{
for (uint32_t v = 0; v < kNumVariationsVVI; v++) {
for (uint32_t i = 0; i < 256; i++) {
uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value;
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleU32x4, vec_op_swizzle_u32x4>(ctx, imm, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleF32x4, vec_op_swizzle_u32x4>(ctx, imm, Variation{v});
}
}
}
INFO(" Testing swizzle_u64x2");
{
for (uint32_t v = 0; v < kNumVariationsVVI; v++) {
for (uint32_t i = 0; i < 4; i++) {
uint32_t imm = swizzle((i >> 1) & 1, i & 1).value;
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleU64x2, vec_op_swizzle_u64x2>(ctx, imm, Variation{v});
test_vecop_vvi<kVecWidth, UniOpVVI::kSwizzleF64x2, vec_op_swizzle_u64x2>(ctx, imm, Variation{v});
}
}
}
INFO(" Testing logical (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kAndU32, vec_op_and<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndU64, vec_op_and<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kOrU32, vec_op_or<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kOrU64, vec_op_or<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kXorU32, vec_op_xor<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kXorU64, vec_op_xor<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndnU32, vec_op_andn<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndnU64, vec_op_andn<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kBicU32, vec_op_bic<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kBicU64, vec_op_bic<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing add / adds (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kAddU8, vec_op_add<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddU16, vec_op_add<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddU32, vec_op_add<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddU64, vec_op_add<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddsI8, vec_op_adds<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddsI16, vec_op_adds<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddsU8, vec_op_adds<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddsU16, vec_op_adds<uint16_t>>(ctx, Variation{v});
}
}
INFO(" Testing sub / subs (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kSubU8, vec_op_sub<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubU16, vec_op_sub<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubU32, vec_op_sub<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubU64, vec_op_sub<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubsI8, vec_op_subs<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubsI16, vec_op_subs<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubsU8, vec_op_subs<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubsU16, vec_op_subs<uint16_t>>(ctx, Variation{v});
}
}
INFO(" Testing mul (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kMulU16, vec_op_mul<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulU32, vec_op_mul<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulU64, vec_op_mul<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulhI16, vec_op_mulhi<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulhU16, vec_op_mulhu<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulU64_LoU32, vec_op_mul_u64_lo_u32>(ctx, Variation{v});
}
}
INFO(" Testing mhadd (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kMHAddI16_I32, vec_op_mhadd_i16_i32>(ctx, Variation{v});
}
}
INFO(" Testing madd (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVVV; v++) {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddU16, vec_op_madd<uint16_t>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddU32, vec_op_madd<uint32_t>>(ctx, Variation{v});
}
}
INFO(" Testing min / max (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kMinI8, vec_op_min<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinI16, vec_op_min<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinI32, vec_op_min<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinI64, vec_op_min<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinU8, vec_op_min<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinU16, vec_op_min<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinU32, vec_op_min<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinU64, vec_op_min<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxI8, vec_op_max<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxI16, vec_op_max<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxI32, vec_op_max<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxI64, vec_op_max<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxU8, vec_op_max<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxU16, vec_op_max<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxU32, vec_op_max<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxU64, vec_op_max<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing cmp (int)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqU8, vec_op_cmp_eq<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqU16, vec_op_cmp_eq<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqU32, vec_op_cmp_eq<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqU64, vec_op_cmp_eq<uint64_t>>(ctx, Variation{v});
/*
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeU8, vec_op_cmp_ne<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeU16, vec_op_cmp_ne<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeU32, vec_op_cmp_ne<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeU64, vec_op_cmp_ne<uint64_t>>(ctx, Variation{v});
*/
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtI8, vec_op_cmp_gt<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtI16, vec_op_cmp_gt<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtI32, vec_op_cmp_gt<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtI64, vec_op_cmp_gt<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtU8, vec_op_cmp_gt<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtU16, vec_op_cmp_gt<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtU32, vec_op_cmp_gt<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtU64, vec_op_cmp_gt<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeI8, vec_op_cmp_ge<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeI16, vec_op_cmp_ge<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeI32, vec_op_cmp_ge<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeI64, vec_op_cmp_ge<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeU8, vec_op_cmp_ge<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeU16, vec_op_cmp_ge<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeU32, vec_op_cmp_ge<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeU64, vec_op_cmp_ge<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtI8, vec_op_cmp_lt<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtI16, vec_op_cmp_lt<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtI32, vec_op_cmp_lt<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtI64, vec_op_cmp_lt<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtU8, vec_op_cmp_lt<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtU16, vec_op_cmp_lt<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtU32, vec_op_cmp_lt<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtU64, vec_op_cmp_lt<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeI8, vec_op_cmp_le<int8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeI16, vec_op_cmp_le<int16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeI32, vec_op_cmp_le<int32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeI64, vec_op_cmp_le<int64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeU8, vec_op_cmp_le<uint8_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeU16, vec_op_cmp_le<uint16_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeU32, vec_op_cmp_le<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeU64, vec_op_cmp_le<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing logical (float)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kAndF32, vec_op_and<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndF64, vec_op_and<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kOrF32, vec_op_or<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kOrF64, vec_op_or<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kXorF32, vec_op_xor<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kXorF64, vec_op_xor<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndnF32, vec_op_andn<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAndnF64, vec_op_andn<uint64_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kBicF32, vec_op_bic<uint32_t>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kBicF64, vec_op_bic<uint64_t>>(ctx, Variation{v});
}
}
INFO(" Testing arithmetic (float)");
{
if (scalar_op_behavior == ScalarOpBehavior::kZeroing) {
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF32S, scalar_op_fadd<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF64S, scalar_op_fadd<ScalarOpBehavior::kZeroing, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF32S, scalar_op_fsub<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF64S, scalar_op_fsub<ScalarOpBehavior::kZeroing, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF32S, scalar_op_fmul<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF64S, scalar_op_fmul<ScalarOpBehavior::kZeroing, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF32S, scalar_op_fdiv<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF64S, scalar_op_fdiv<ScalarOpBehavior::kZeroing, double>>(ctx);
}
else {
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF32S, scalar_op_fadd<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF64S, scalar_op_fadd<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF32S, scalar_op_fsub<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF64S, scalar_op_fsub<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF32S, scalar_op_fmul<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF64S, scalar_op_fmul<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF32S, scalar_op_fdiv<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF64S, scalar_op_fdiv<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
}
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF32, vec_op_fadd<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kAddF64, vec_op_fadd<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF32, vec_op_fsub<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kSubF64, vec_op_fsub<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF32, vec_op_fmul<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMulF64, vec_op_fmul<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF32, vec_op_fdiv<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kDivF64, vec_op_fdiv<double>>(ctx, Variation{v});
}
}
if (fmadd_op_behavior == FMAddOpBehavior::kNoFMA) {
INFO(" Testing madd (no-fma) (float)");
{
if (scalar_op_behavior == ScalarOpBehavior::kZeroing) {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32S, scalar_op_fmadd_nofma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64S, scalar_op_fmadd_nofma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32S, scalar_op_fmsub_nofma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64S, scalar_op_fmsub_nofma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32S, scalar_op_fnmadd_nofma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64S, scalar_op_fnmadd_nofma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32S, scalar_op_fnmsub_nofma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64S, scalar_op_fnmsub_nofma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
}
else {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32S, scalar_op_fmadd_nofma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64S, scalar_op_fmadd_nofma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32S, scalar_op_fmsub_nofma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64S, scalar_op_fmsub_nofma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32S, scalar_op_fnmadd_nofma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64S, scalar_op_fnmadd_nofma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32S, scalar_op_fnmsub_nofma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64S, scalar_op_fnmsub_nofma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
}
for (uint32_t v = 0; v < kNumVariationsVVVV; v++) {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32, vec_op_fmadd_nofma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64, vec_op_fmadd_nofma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32, vec_op_fmsub_nofma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64, vec_op_fmsub_nofma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32, vec_op_fnmadd_nofma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64, vec_op_fnmadd_nofma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32, vec_op_fnmsub_nofma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64, vec_op_fnmsub_nofma<double>>(ctx, Variation{v});
}
}
}
else {
INFO(" Testing madd (fma) (float)");
{
if (valgrind_fma_bug) {
INFO(" (scalar FMA tests ignored due to a Valgrind bug!)");
}
else {
if (scalar_op_behavior == ScalarOpBehavior::kZeroing) {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32S, scalar_op_fmadd_fma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64S, scalar_op_fmadd_fma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32S, scalar_op_fmsub_fma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64S, scalar_op_fmsub_fma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32S, scalar_op_fnmadd_fma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64S, scalar_op_fnmadd_fma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32S, scalar_op_fnmsub_fma<ScalarOpBehavior::kZeroing, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64S, scalar_op_fnmsub_fma<ScalarOpBehavior::kZeroing, double>>(ctx, Variation{0});
}
else {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32S, scalar_op_fmadd_fma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64S, scalar_op_fmadd_fma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32S, scalar_op_fmsub_fma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64S, scalar_op_fmsub_fma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32S, scalar_op_fnmadd_fma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64S, scalar_op_fnmadd_fma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32S, scalar_op_fnmsub_fma<ScalarOpBehavior::kPreservingVec128, float>>(ctx, Variation{0});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64S, scalar_op_fnmsub_fma<ScalarOpBehavior::kPreservingVec128, double>>(ctx, Variation{0});
}
}
for (uint32_t v = 0; v < kNumVariationsVVVV; v++) {
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF32, vec_op_fmadd_fma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMAddF64, vec_op_fmadd_fma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF32, vec_op_fmsub_fma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kMSubF64, vec_op_fmsub_fma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF32, vec_op_fnmadd_fma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMAddF64, vec_op_fnmadd_fma<double>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF32, vec_op_fnmsub_fma<float>>(ctx, Variation{v});
test_vecop_vvvv<kVecWidth, UniOpVVVV::kNMSubF64, vec_op_fnmsub_fma<double>>(ctx, Variation{v});
}
}
}
INFO(" Testing min / max (float)");
{
#if defined(ASMJIT_UJIT_X86)
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF32S, scalar_op_fmin_ternary<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF64S, scalar_op_fmin_ternary<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF32S, scalar_op_fmax_ternary<ScalarOpBehavior::kPreservingVec128, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF64S, scalar_op_fmax_ternary<ScalarOpBehavior::kPreservingVec128, double>>(ctx);
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF32, vec_op_fmin_ternary<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF64, vec_op_fmin_ternary<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF32, vec_op_fmax_ternary<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF64, vec_op_fmax_ternary<double>>(ctx, Variation{v});
}
#endif
#if defined(ASMJIT_UJIT_AARCH64)
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF32S, scalar_op_fmin_finite<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF64S, scalar_op_fmin_finite<ScalarOpBehavior::kZeroing, double>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF32S, scalar_op_fmax_finite<ScalarOpBehavior::kZeroing, float>>(ctx);
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF64S, scalar_op_fmax_finite<ScalarOpBehavior::kZeroing, double>>(ctx);
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF32, vec_op_fmin_finite<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMinF64, vec_op_fmin_finite<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF32, vec_op_fmax_finite<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kMaxF64, vec_op_fmax_finite<double>>(ctx, Variation{v});
}
#endif
}
INFO(" Testing cmp (float)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqF32, vec_op_fcmpo_eq<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpEqF64, vec_op_fcmpo_eq<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeF32, vec_op_fcmpu_ne<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpNeF64, vec_op_fcmpu_ne<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtF32, vec_op_fcmpo_gt<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGtF64, vec_op_fcmpo_gt<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeF32, vec_op_fcmpo_ge<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpGeF64, vec_op_fcmpo_ge<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtF32, vec_op_fcmpo_lt<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLtF64, vec_op_fcmpo_lt<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeF32, vec_op_fcmpo_le<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpLeF64, vec_op_fcmpo_le<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpOrdF32, vec_op_fcmp_ord<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpOrdF64, vec_op_fcmp_ord<double>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpUnordF32, vec_op_fcmp_unord<float>>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCmpUnordF64, vec_op_fcmp_unord<double>>(ctx, Variation{v});
}
}
INFO(" Testing hadd (float)");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kHAddF64, vec_op_hadd_f64>(ctx, Variation{v});
}
}
INFO(" Testing combine");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kCombineLoHiU64, vec_op_combine_lo_hi_u64>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCombineLoHiF64, vec_op_combine_lo_hi_u64>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCombineHiLoU64, vec_op_combine_hi_lo_u64>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kCombineHiLoF64, vec_op_combine_hi_lo_u64>(ctx, Variation{v});
}
}
INFO(" Testing interleave");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveLoU8, vec_op_interleave_lo_u8>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveHiU8, vec_op_interleave_hi_u8>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveLoU16, vec_op_interleave_lo_u16>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveHiU16, vec_op_interleave_hi_u16>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveLoU32, vec_op_interleave_lo_u32>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveHiU32, vec_op_interleave_hi_u32>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveLoU64, vec_op_interleave_lo_u64>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kInterleaveHiU64, vec_op_interleave_hi_u64>(ctx, Variation{v});
}
}
INFO(" Testing packs");
{
for (uint32_t v = 0; v < kNumVariationsVVV; v++) {
test_vecop_vvv<kVecWidth, UniOpVVV::kPacksI16_I8, vec_op_packs_i16_i8>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kPacksI16_U8, vec_op_packs_i16_u8>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kPacksI32_I16, vec_op_packs_i32_i16>(ctx, Variation{v});
test_vecop_vvv<kVecWidth, UniOpVVV::kPacksI32_U16, vec_op_packs_i32_u16>(ctx, Variation{v});
}
}
INFO(" Testing alignr_u128");
{
for (uint32_t v = 0; v < kNumVariationsVVVI; v++) {
for (uint32_t i = 1; i < 16; i++) {
test_vecop_vvvi<kVecWidth, UniOpVVVI::kAlignr_U128, vec_op_alignr_u128>(ctx, i, Variation{v});
}
}
}
INFO(" Testing interleave_shuffle");
{
for (uint32_t v = 0; v < kNumVariationsVVVI; v++) {
for (uint32_t i = 0; i < 256; i++) {
uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value;
test_vecop_vvvi<kVecWidth, UniOpVVVI::kInterleaveShuffleU32x4, vec_op_interleave_shuffle_u32x4>(ctx, imm, Variation{v});
test_vecop_vvvi<kVecWidth, UniOpVVVI::kInterleaveShuffleF32x4, vec_op_interleave_shuffle_u32x4>(ctx, imm, Variation{v});
}
for (uint32_t i = 0; i < 4; i++) {
uint32_t imm = swizzle((i >> 1) & 1, i & 1).value;
test_vecop_vvvi<kVecWidth, UniOpVVVI::kInterleaveShuffleU64x2, vec_op_interleave_shuffle_u64x2>(ctx, imm, Variation{v});
test_vecop_vvvi<kVecWidth, UniOpVVVI::kInterleaveShuffleF64x2, vec_op_interleave_shuffle_u64x2>(ctx, imm, Variation{v});
}
}
}
}
static void test_gp_ops(JitContext& ctx) noexcept {
test_cond_ops(ctx);
test_m_ops(ctx);
test_rm_ops(ctx);
test_mr_ops(ctx);
test_rr_ops(ctx);
test_rrr_ops(ctx);
}
#if defined(ASMJIT_UJIT_X86)
static void dump_feature_list(String& out, const CpuFeatures& features) noexcept {
#if !defined(ASMJIT_NO_LOGGING)
CpuFeatures::Iterator it = features.iterator();
bool first = true;
while (it.has_next()) {
size_t feature_id = it.next();
if (!first) {
out.append(' ');
}
Formatter::format_feature(out, Arch::kHost, uint32_t(feature_id));
first = false;
}
#else
Support::maybe_unused(features);
out.append("<ASMJIT_NO_LOGGING>");
#endif
}
static void test_x86_ops(JitContext& ctx, const CpuFeatures& host_features) noexcept {
using Ext = CpuFeatures::X86;
{
String s;
dump_feature_list(s, host_features);
INFO("Available CPU features: %s", s.data());
}
// Features that must always be available;
CpuFeatures base;
base.add(Ext::kI486, Ext::kCMOV, Ext::kCMPXCHG8B, Ext::kFPU, Ext::kSSE, Ext::kSSE2);
// To verify that JIT implements ALL features with ALL possible CPU flags, we use profiles to select features
// that the JIT compiler will be allowed to use. The features are gradually increased similarly to how new CPU
// generations introduced them. We cannot cover ALL possible CPUs, but that's not even necessary as we test
// individual operations where instructions can be selected on the features available.
// GP variations.
{
CpuFeatures profiles[4] {};
profiles[0] = base;
profiles[1] = profiles[0];
profiles[1].add(Ext::kADX, Ext::kBMI);
profiles[2] = profiles[1];
profiles[2].add(Ext::kBMI2, Ext::kLZCNT, Ext::kMOVBE, Ext::kPOPCNT);
profiles[3] = host_features;
bool first = true;
CpuFeatures last_filtered;
for (const CpuFeatures& profile : profiles) {
CpuFeatures filtered = profile;
for (uint32_t i = 0; i < CpuFeatures::kNumBitWords; i++) {
filtered.data()._bits[i] &= host_features.data()._bits[i];
}
if (!first && filtered == last_filtered) {
continue;
}
String s;
if (filtered == host_features) {
s.assign("[ALL]");
}
else {
dump_feature_list(s, filtered);
}
ctx.features = filtered;
INFO("Testing JIT compiler GP ops with [%s]", s.data());
test_gp_ops(ctx);
first = false;
last_filtered = filtered;
}
}
// SIMD variations covering SSE2+, AVX+, and AVX512+ cases.
{
CpuFeatures profiles[15] {};
profiles[0] = base;
profiles[1] = profiles[0];
profiles[1].add(Ext::kSSE3);
profiles[2] = profiles[1];
profiles[2].add(Ext::kSSSE3);
profiles[3] = profiles[2];
profiles[3].add(Ext::kSSE4_1);
profiles[4] = profiles[3];
profiles[4].add(Ext::kSSE4_2, Ext::kADX, Ext::kBMI, Ext::kBMI2, Ext::kLZCNT, Ext::kMOVBE, Ext::kPOPCNT);
profiles[5] = profiles[4];
profiles[5].add(Ext::kPCLMULQDQ);
profiles[6] = profiles[5];
profiles[6].add(Ext::kAVX);
profiles[7] = profiles[6];
profiles[7].add(Ext::kAVX2);
profiles[8] = profiles[7];
profiles[8].add(Ext::kF16C, Ext::kFMA, Ext::kVAES, Ext::kVPCLMULQDQ);
profiles[9] = profiles[8];
profiles[9].add(Ext::kAVX_IFMA, Ext::kAVX_NE_CONVERT, Ext::kAVX_VNNI, Ext::kAVX_VNNI_INT8, Ext::kAVX_VNNI_INT16);
// We start deliberately from a profile that doesn't contains AVX_xxx
// extensions as these didn't exist when the first AVX512 CPUs were shipped.
profiles[10] = profiles[8];
profiles[10].add(Ext::kAVX512_F, Ext::kAVX512_BW, Ext::kAVX512_DQ, Ext::kAVX512_CD, Ext::kAVX512_VL);
profiles[11] = profiles[10];
profiles[11].add(Ext::kAVX512_IFMA, Ext::kAVX512_VBMI);
profiles[12] = profiles[11];
profiles[12].add(Ext::kAVX512_BITALG, Ext::kAVX512_VBMI2, Ext::kAVX512_VNNI, Ext::kAVX512_VPOPCNTDQ);
profiles[13] = profiles[12];
profiles[13].add(Ext::kAVX512_BF16, Ext::kAVX512_FP16);
profiles[14] = host_features;
bool first = true;
CpuFeatures last_filtered;
for (const CpuFeatures& profile : profiles) {
CpuFeatures filtered = profile;
for (uint32_t i = 0; i < CpuFeatures::kNumBitWords; i++) {
filtered.data()._bits[i] &= host_features.data()._bits[i];
}
if (!first && filtered == last_filtered) {
continue;
}
String s;
if (filtered == host_features) {
s.assign("[ALL]");
}
else {
dump_feature_list(s, filtered);
}
ctx.features = filtered;
INFO("Testing JIT compiler 128-bit SIMD ops with [%s]", s.data());
test_simd_ops<VecWidth::k128>(ctx);
if (filtered.x86().has_avx2()) {
INFO("Testing JIT compiler 256-bit SIMD ops with [%s]", s.data());
test_simd_ops<VecWidth::k256>(ctx);
}
if (filtered.x86().has_avx512_f()) {
INFO("Testing JIT compiler 512-bit SIMD ops with [%s]", s.data());
test_simd_ops<VecWidth::k512>(ctx);
}
first = false;
last_filtered = filtered;
}
}
}
#endif // ASMJIT_UJIT_X86
#if defined(ASMJIT_UJIT_AARCH64)
static void test_a64_ops(JitContext& ctx, const CpuFeatures& host_features) noexcept {
ctx.features = host_features;
test_gp_ops(ctx);
test_simd_ops<VecWidth::k128>(ctx);
}
#endif // ASMJIT_UJIT_AARCH64
} // {UniCompilerTests}
UNIT(unicompiler) {
UniCompilerTests::JitContext ctx;
asmjit::CpuFeatures host_features = asmjit::CpuInfo::host().features();
#if defined(ASMJIT_UJIT_X86)
UniCompilerTests::test_x86_ops(ctx, host_features);
#elif defined(ASMJIT_UJIT_AARCH64)
UniCompilerTests::test_a64_ops(ctx, host_features);
#endif
}
int main(int argc, const char* argv[]) {
print_app_info();
return BrokenAPI::run(argc, argv);
}
#else
int main() {
print_app_info();
printf("!! This test is disabled: <ASMJIT_NO_[U]JIT> or unsuitable arvhitecture !!\n");
return 0;
}
#endif // !ASMJIT_NO_UJIT && !ASMJIT_NO_JIT