// This file is part of AsmJit project // // See or LICENSE.md for license and copyright information // SPDX-License-Identifier: Zlib #include #include #include #include #include #include static void print_app_info() noexcept { printf("AsmJit UniCompiler Test Suite v%u.%u.%u [Arch=%s] [Mode=%s]\n\n", unsigned((ASMJIT_LIBRARY_VERSION >> 16) ), unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF), unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF), asmjit_arch_as_string(asmjit::Arch::kHost), asmjit_build_type() ); } #if !defined(ASMJIT_NO_UJIT) && !defined(ASMJIT_NO_JIT) #include namespace UniCompilerTests { using namespace asmjit; using namespace asmjit::ujit; // ujit::UniCompiler - Tests - Constants // ===================================== static constexpr uint64_t kRandomSeed = 0x1234u; static constexpr uint32_t kTestIterCount = 1000u; static ASMJIT_INLINE_CONSTEXPR uint32_t byte_width_from_vec_width(VecWidth vw) noexcept { return 16u << uint32_t(vw); } // ujit::UniCompiler - Tests - MulAdd // ================================== #if defined(ASMJIT_UJIT_X86) float fadd(float a, float b) noexcept; float fsub(float a, float b) noexcept; float fmul(float a, float b) noexcept; float fdiv(float a, float b) noexcept; float fsqrt(float a) noexcept; float fmadd_nofma_ref(float a, float b, float c) noexcept; float fmadd_fma_ref(float a, float b, float c) noexcept; double fadd(double a, double b) noexcept; double fsub(double a, double b) noexcept; double fmul(double a, double b) noexcept; double fdiv(double a, double b) noexcept; double fsqrt(double a) noexcept; double fmadd_nofma_ref(double a, double b, double c) noexcept; double fmadd_fma_ref(double a, double b, double c) noexcept; void madd_fma_check_valgrind_bug(const float a[4], const float b[4], const float c[4], float dst[4]) noexcept; #else static ASMJIT_NOINLINE float fadd(float a, float b) noexcept { return a + b; } static ASMJIT_NOINLINE float fsub(float a, float b) noexcept { return a - b; } static ASMJIT_NOINLINE float fmul(float a, float b) noexcept { return a * b; } static ASMJIT_NOINLINE float fdiv(float a, float b) noexcept { return a / b; } static ASMJIT_NOINLINE float fsqrt(float a) noexcept { return std::sqrt(a); } static ASMJIT_NOINLINE float fmadd_nofma_ref(float a, float b, float c) noexcept { return a * b + c; } static ASMJIT_NOINLINE float fmadd_fma_ref(float a, float b, float c) noexcept { return std::fma(a, b, c); } static ASMJIT_NOINLINE double fadd(double a, double b) noexcept { return a + b; } static ASMJIT_NOINLINE double fsub(double a, double b) noexcept { return a - b; } static ASMJIT_NOINLINE double fmul(double a, double b) noexcept { return a * b; } static ASMJIT_NOINLINE double fdiv(double a, double b) noexcept { return a / b; } static ASMJIT_NOINLINE double fsqrt(double a) noexcept { return std::sqrt(a); } static ASMJIT_NOINLINE double fmadd_nofma_ref(double a, double b, double c) noexcept { return fadd(fmul(a, b), c); } static ASMJIT_NOINLINE double fmadd_fma_ref(double a, double b, double c) noexcept { return std::fma(a, b, c); } #endif static ASMJIT_INLINE float fsign(float a) noexcept { return Support::bit_cast(Support::bit_cast(a) & (uint32_t(1) << 31)); } static ASMJIT_INLINE double fsign(double a) noexcept { return Support::bit_cast(Support::bit_cast(a) & (uint64_t(1) << 63)); } static ASMJIT_INLINE float fxor(float a, float b) noexcept { return Support::bit_cast(Support::bit_cast(a) ^ Support::bit_cast(b)); } static ASMJIT_INLINE double fxor(double a, double b) noexcept { return Support::bit_cast(Support::bit_cast(a) ^ Support::bit_cast(b)); } // ujit::UniCompiler - Tests - Types // ================================= struct Variation { uint32_t value; [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator==(uint32_t v) const noexcept { return value == v; } [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator!=(uint32_t v) const noexcept { return value != v; } [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator<=(uint32_t v) const noexcept { return value <= v; } [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator< (uint32_t v) const noexcept { return value < v; } [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator>=(uint32_t v) const noexcept { return value >= v; } [[nodiscard]] ASMJIT_INLINE_CONSTEXPR bool operator> (uint32_t v) const noexcept { return value > v; } }; // ujit::UniCompiler - Tests - JIT Function Prototypes // =================================================== typedef uint32_t (*TestCondRRFunc)(int32_t a, int32_t b); typedef uint32_t (*TestCondRIFunc)(int32_t a); typedef void (*TestMFunc)(void* ptr); typedef uintptr_t (*TestRMFunc)(uintptr_t reg, void* ptr); typedef void (*TestMRFunc)(void* ptr, uintptr_t reg); typedef uint32_t (*TestRRFunc)(uint32_t a); typedef uint32_t (*TestRRRFunc)(uint32_t a, uint32_t b); typedef uint32_t (*TestRRIFunc)(uint32_t a); typedef void (*TestVVFunc)(void* dst, const void* src); typedef void (*TestVVVFunc)(void* dst, const void* src1, const void* src2); typedef void (*TestVVVVFunc)(void* dst, const void* src1, const void* src2, const void* src3); // ujit::UniCompiler - Tests - JIT Context Error Handler // ===================================================== class TestErrorHandler : public ErrorHandler { public: TestErrorHandler() noexcept {} ~TestErrorHandler() noexcept override {} void handle_error(Error err, const char* message, BaseEmitter* origin) override { Support::maybe_unused(origin); EXPECT_EQ(err, Error::kOk) .message("AsmJit Error: %s", message); } }; // ujit::UniCompiler - Tests - JIT Context for Testing // =================================================== class JitContext { public: JitRuntime rt; CpuFeatures features {}; CpuHints cpu_hints {}; #if !defined(ASMJIT_NO_LOGGING) StringLogger logger; #endif // !ASMJIT_NO_LOGGING TestErrorHandler eh; CodeHolder code; BackendCompiler cc; void prepare() noexcept { code.reset(); code.init(rt.environment()); code.set_error_handler(&eh); #if !defined(ASMJIT_NO_LOGGING) logger.clear(); code.set_logger(&logger); #endif // !ASMJIT_NO_LOGGING code.attach(&cc); cc.add_diagnostic_options(DiagnosticOptions::kRAAnnotate); cc.add_diagnostic_options(DiagnosticOptions::kValidateAssembler); cc.add_diagnostic_options(DiagnosticOptions::kValidateIntermediate); } template Fn finish() { Fn fn; EXPECT_EQ(cc.finalize(), Error::kOk); EXPECT_EQ(rt.add(&fn, &code), Error::kOk); code.reset(); return fn; } #if !defined(ASMJIT_NO_LOGGING) const char* logger_content() const noexcept { return logger.data(); } #else const char* logger_content() const noexcept { return ""; } #endif }; // ujit::UniCompiler - Tests - Conditional Operations - Functions // ============================================================== static TestCondRRFunc create_func_cond_rr(JitContext& ctx, UniOpCond op, CondCode cond_code, uint32_t variation) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp a = uc.new_gp32("a"); Gp b = uc.new_gp32("b"); Gp result = uc.new_gp32("result"); node->set_arg(0, a); node->set_arg(1, b); switch (variation) { case 0: { // Test a conditional branch based on the given condition. Label done = uc.new_label(); uc.mov(result, 1); uc.j(done, UniCondition(op, cond_code, a, b)); uc.mov(result, 0); uc.bind(done); break; } case 1: { // Test a cmov functionality. Gp true_value = uc.new_gp32("true_value"); uc.mov(result, 0); uc.mov(true_value, 1); uc.cmov(result, true_value, UniCondition(op, cond_code, a, b)); break; } case 2: { // Test a select functionality. Gp false_value = uc.new_gp32("false_value"); Gp true_value = uc.new_gp32("true_value"); uc.mov(false_value, 0); uc.mov(true_value, 1); uc.select(result, true_value, false_value, UniCondition(op, cond_code, a, b)); break; } } uc.ret(result); uc.end_func(); return ctx.finish(); } static TestCondRIFunc create_func_cond_ri(JitContext& ctx, UniOpCond op, CondCode cond_code, Imm b_imm) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp a = uc.new_gp32("a"); Gp result = uc.new_gp32("result"); Label done = uc.new_label(); node->set_arg(0, a); uc.mov(result, 1); uc.j(done, UniCondition(op, cond_code, a, b_imm)); uc.mov(result, 0); uc.bind(done); uc.ret(result); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - Conditional Operations - Runner // =========================================================== static ASMJIT_NOINLINE void test_conditional_op(JitContext& ctx, UniOpCond op, CondCode cond_code, int32_t a, int32_t b, bool expected) { for (uint32_t variation = 0; variation < 3; variation++) { TestCondRRFunc fn_rr = create_func_cond_rr(ctx, op, cond_code, variation); TestCondRIFunc fn_ri = create_func_cond_ri(ctx, op, cond_code, b); uint32_t observed_rr = fn_rr(a, b); EXPECT_EQ(observed_rr, uint32_t(expected)) .message("Operation failed (RR):\n" " Input #1: %d\n" " Input #2: %d\n" " Expected: %d\n" " Observed: %d\n" "Assembly:\n%s", a, b, uint32_t(expected), observed_rr, ctx.logger_content()); uint32_t observed_ri = fn_ri(a); EXPECT_EQ(observed_ri, uint32_t(expected)) .message("Operation failed (RI):\n" " Input #1: %d\n" " Input #2: %d\n" " Expected: %d\n" " Observed: %d\n" "Assembly:\n%s", a, b, uint32_t(expected), observed_ri, ctx.logger_content()); ctx.rt.reset(); } } static ASMJIT_NOINLINE void test_cond_ops(JitContext& ctx) { test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 0, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 1, 1, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 1, 2, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kEqual, 100, 31, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 0, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 1, 1, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 1, 2, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kNotEqual, 100, 31, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 0, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 1, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 111111, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 111111, 222, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 222, 111111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGT, 222, 111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 0, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 1, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 111111, 222, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedGE, 222, 111111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 0, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 1, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 0, 1, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 111111, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 111111, 222, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 222, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLT, 222, 111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 0, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 1, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 0, 1, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 111111, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 111111, 222, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 222, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kUnsignedLE, 22222, 22222, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 0, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 1, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 111111, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, 111111, -222, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -222, 111111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -222, -111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGT, -111, -1, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 0, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 1, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, 111111, -222, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -222, 111111, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -111, -1, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedGE, -111, -111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 0, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 1, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 111111, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, 111111, -222, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -222, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -222, -111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -111, -1, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLT, -1, -1, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 0, 0, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 1, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 111111, 0, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, 111111, -222, false); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -222, 111111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -222, -111, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -111, -1, true); test_conditional_op(ctx, UniOpCond::kCompare, CondCode::kSignedLE, -1, -1, true); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 0, 0, true); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 1, 0, true); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 111111, 0, true); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, 111111, -222, false); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kZero, -222, 111111, false); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 0, 0, false); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 1, 0, false); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 111111, 0, false); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, 111111, -222, true); test_conditional_op(ctx, UniOpCond::kTest, CondCode::kNotZero, -222, 111111, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x0), 0, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x1), 0, false); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFF), 7, false); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFF), 9, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0xFFFFFFFF), 31, false); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTZero, int32_t(0x7FFFFFFF), 31, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x0), 0, false); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x1), 0, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFF), 7, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFF), 9, false); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0xFFFFFFFF), 31, true); test_conditional_op(ctx, UniOpCond::kBitTest, CondCode::kBTNotZero, int32_t(0x7FFFFFFF), 31, false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignAnd, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignOr, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x00000001), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x00000001), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignXor, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0xFF000000), int32_t(0x01000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kCarry, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0xFF000000), int32_t(0x01000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotCarry, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0xFF000000), int32_t(0x01000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x000000FF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x000000FF), int32_t(0x800000FF), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x000000FF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x000000FF), int32_t(0x800000FF), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignAdd, CondCode::kNotSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0xFF000000), int32_t(0x01000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotZero, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0xFF000000), int32_t(0x01000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x000000FF), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedLT, int32_t(0x00000111), int32_t(0x0000F0FF), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x000000FF), int32_t(0x000000FF), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGE, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0xFFFFFFFF), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000000), int32_t(0x00000001), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x00000001), int32_t(0x00000010), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0xFFFFFFFF), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000000), int32_t(0x00000001), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x00000001), int32_t(0x00000010), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kNotSign, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x00000000), int32_t(0x00000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0xFF000000), int32_t(0x01000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x000000FF), int32_t(0x00000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x000000FF), int32_t(0x000000FF), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0xFFFFFFFF), int32_t(0xFF000000), true); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x7FFFFFFF), int32_t(0x80000000), false); test_conditional_op(ctx, UniOpCond::kAssignSub, CondCode::kUnsignedGT, int32_t(0x00000111), int32_t(0x0000F0FF), false); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x00000000), 1, true); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x000000FF), 8, true); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x000000FF), 7, false); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0xFFFFFFFF), 31, false); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kZero, int32_t(0x7FFFFFFF), 31, true); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x00000000), 1, false); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x000000FF), 8, false); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x000000FF), 7, true); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0xFFFFFFFF), 31, true); test_conditional_op(ctx, UniOpCond::kAssignShr, CondCode::kNotZero, int32_t(0x7FFFFFFF), 31, false); } // ujit::UniCompiler - Tests - M Operations - Functions // ==================================================== static TestMFunc create_func_m(JitContext& ctx, UniOpM op) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp ptr = uc.new_gpz("ptr"); node->set_arg(0, ptr); uc.emit_m(op, mem_ptr(ptr)); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - M Operations - Runner // ================================================= static ASMJIT_NOINLINE void test_m_ops(JitContext& ctx) { uint8_t buffer[8]; TestMFunc fn_zero_u8 = create_func_m(ctx, UniOpM::kStoreZeroU8); memcpy(buffer, "ABCDEFGH", 8); fn_zero_u8(buffer + 0); EXPECT_EQ(memcmp(buffer, "\0BCDEFGH", 8), 0); fn_zero_u8(buffer + 5); EXPECT_EQ(memcmp(buffer, "\0BCDE\0GH", 8), 0); TestMFunc fn_zero_u16 = create_func_m(ctx, UniOpM::kStoreZeroU16); memcpy(buffer, "ABCDEFGH", 8); fn_zero_u16(buffer + 0); EXPECT_EQ(memcmp(buffer, "\0\0CDEFGH", 8), 0); fn_zero_u16(buffer + 4); EXPECT_EQ(memcmp(buffer, "\0\0CD\0\0GH", 8), 0); TestMFunc fn_zero_u32 = create_func_m(ctx, UniOpM::kStoreZeroU32); memcpy(buffer, "ABCDEFGH", 8); fn_zero_u32(buffer + 0); EXPECT_EQ(memcmp(buffer, "\0\0\0\0EFGH", 8), 0); fn_zero_u32(buffer + 4); EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0); #if ASMJIT_ARCH_BITS >= 64 TestMFunc fn_zero_u64 = create_func_m(ctx, UniOpM::kStoreZeroU64); memcpy(buffer, "ABCDEFGH", 8); fn_zero_u64(buffer + 0); EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0); #endif TestMFunc fn_zero_reg = create_func_m(ctx, UniOpM::kStoreZeroReg); memcpy(buffer, "ABCDEFGH", 8); fn_zero_reg(buffer + 0); #if ASMJIT_ARCH_BITS >= 64 EXPECT_EQ(memcmp(buffer, "\0\0\0\0\0\0\0\0", 8), 0); #else EXPECT_EQ(memcmp(buffer, "\0\0\0\0EFGH", 8), 0); #endif ctx.rt.reset(); } // ujit::UniCompiler - Tests - RM Operations - Functions // ===================================================== static TestRMFunc create_func_rm(JitContext& ctx, UniOpRM op) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp reg = uc.new_gpz("reg"); Gp ptr = uc.new_gpz("ptr"); node->set_arg(0, reg); node->set_arg(1, ptr); uc.emit_rm(op, reg, mem_ptr(ptr)); uc.ret(reg); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - RM Operations - Runner // ================================================== static ASMJIT_NOINLINE void test_rm_ops(JitContext& ctx) { union Mem { uint8_t buffer[8]; uint16_t u8; uint16_t u16; uint32_t u32; uint64_t u64; }; Mem mem{}; TestRMFunc fn_load_i8 = create_func_rm(ctx, UniOpRM::kLoadI8); mem.u8 = uint8_t(int8_t(6)); EXPECT_EQ(fn_load_i8(0, mem.buffer), uintptr_t(intptr_t(6))); mem.u8 = uint8_t(int8_t(-6)); EXPECT_EQ(fn_load_i8(0, mem.buffer), uintptr_t(intptr_t(-6))); TestRMFunc fn_load_u8 = create_func_rm(ctx, UniOpRM::kLoadU8); mem.u8 = uint8_t(0x80); EXPECT_EQ(fn_load_u8(0, mem.buffer), 0x80u); mem.u8 = uint8_t(0xFF); EXPECT_EQ(fn_load_u8(0, mem.buffer), 0xFFu); TestRMFunc fn_load_i16 = create_func_rm(ctx, UniOpRM::kLoadI16); mem.u16 = uint16_t(int16_t(666)); EXPECT_EQ(fn_load_i16(0, mem.buffer), uintptr_t(intptr_t(666))); mem.u16 = uint16_t(int16_t(-666)); EXPECT_EQ(fn_load_i16(0, mem.buffer), uintptr_t(intptr_t(-666))); TestRMFunc fn_load_u16 = create_func_rm(ctx, UniOpRM::kLoadU16); mem.u16 = uint16_t(0x8000); EXPECT_EQ(fn_load_u16(0, mem.buffer), 0x8000u); mem.u16 = uint16_t(0xFEED); EXPECT_EQ(fn_load_u16(0, mem.buffer), 0xFEEDu); TestRMFunc fn_load_i32 = create_func_rm(ctx, UniOpRM::kLoadI32); mem.u32 = uint32_t(int32_t(666666)); EXPECT_EQ(fn_load_i32(0, mem.buffer), uintptr_t(intptr_t(666666))); mem.u32 = uint32_t(int32_t(-666666)); EXPECT_EQ(fn_load_i32(0, mem.buffer), uintptr_t(intptr_t(-666666))); TestRMFunc fn_load_u32 = create_func_rm(ctx, UniOpRM::kLoadU32); mem.u32 = 0x12345678; EXPECT_EQ(fn_load_u32(0, mem.buffer), uint32_t(0x12345678)); #if ASMJIT_ARCH_BITS >= 64 TestRMFunc fn_load_i64 = create_func_rm(ctx, UniOpRM::kLoadI64); mem.u64 = 0xF123456789ABCDEFu; EXPECT_EQ(fn_load_i64(0, mem.buffer), 0xF123456789ABCDEFu); TestRMFunc fn_load_u64 = create_func_rm(ctx, UniOpRM::kLoadU64); mem.u64 = 0xF123456789ABCDEFu; EXPECT_EQ(fn_load_u64(0, mem.buffer), 0xF123456789ABCDEFu); #endif TestRMFunc fn_load_reg = create_func_rm(ctx, UniOpRM::kLoadReg); mem.u64 = 0xF123456789ABCDEFu; #if ASMJIT_ARCH_BITS >= 64 EXPECT_EQ(fn_load_reg(0, mem.buffer), 0xF123456789ABCDEFu); #else EXPECT_EQ(fn_load_reg(0, mem.buffer), 0x89ABCDEFu); #endif TestRMFunc fn_load_merge_u8 = create_func_rm(ctx, UniOpRM::kLoadMergeU8); mem.u8 = uint8_t(0xAA); EXPECT_EQ(fn_load_merge_u8(0x1F2FFF00, mem.buffer), 0x1F2FFFAAu); TestRMFunc fn_load_shift_u8 = create_func_rm(ctx, UniOpRM::kLoadShiftU8); mem.u8 = uint8_t(0xAA); EXPECT_EQ(fn_load_shift_u8(0x002FFF00, mem.buffer), 0x2FFF00AAu); TestRMFunc fn_load_merge_u16 = create_func_rm(ctx, UniOpRM::kLoadMergeU16); mem.u16 = uint16_t(0xAABB); EXPECT_EQ(fn_load_merge_u16(0x1F2F0000, mem.buffer), 0x1F2FAABBu); TestRMFunc fn_load_shift_u16 = create_func_rm(ctx, UniOpRM::kLoadShiftU16); mem.u16 = uint16_t(0xAABB); EXPECT_EQ(fn_load_shift_u16(0x00001F2F, mem.buffer), 0x1F2FAABBu); ctx.rt.reset(); } // ujit::UniCompiler - Tests - MR Operations - Functions // ===================================================== static TestMRFunc create_func_mr(JitContext& ctx, UniOpMR op) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp ptr = uc.new_gpz("ptr"); Gp reg = uc.new_gpz("reg"); node->set_arg(0, ptr); node->set_arg(1, reg); uc.emit_mr(op, mem_ptr(ptr), reg); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - MR Operations - Runner // ================================================== static ASMJIT_NOINLINE void test_mr_ops(JitContext& ctx) { union Mem { uint8_t buffer[8]; uint16_t u8; uint16_t u16; uint32_t u32; uint64_t u64; }; Mem mem{}; TestMRFunc fn_store_u8 = create_func_mr(ctx, UniOpMR::kStoreU8); memcpy(mem.buffer, "ABCDEFGH", 8); fn_store_u8(mem.buffer, 0x7A); EXPECT_EQ(memcmp(mem.buffer, "zBCDEFGH", 8), 0); TestMRFunc fn_store_u16 = create_func_mr(ctx, UniOpMR::kStoreU16); memcpy(mem.buffer, "ABCDEFGH", 8); fn_store_u16(mem.buffer, 0x7A7A); EXPECT_EQ(memcmp(mem.buffer, "zzCDEFGH", 8), 0); TestMRFunc fn_store_u32 = create_func_mr(ctx, UniOpMR::kStoreU32); memcpy(mem.buffer, "ABCDEFGH", 8); fn_store_u32(mem.buffer, 0x7A7A7A7A); EXPECT_EQ(memcmp(mem.buffer, "zzzzEFGH", 8), 0); #if ASMJIT_ARCH_BITS >= 64 TestMRFunc fn_store_u64 = create_func_mr(ctx, UniOpMR::kStoreU64); memcpy(mem.buffer, "ABCDEFGH", 8); fn_store_u64(mem.buffer, 0x7A7A7A7A7A7A7A7A); EXPECT_EQ(memcmp(mem.buffer, "zzzzzzzz", 8), 0); #endif TestMRFunc fn_store_reg = create_func_mr(ctx, UniOpMR::kStoreReg); memcpy(mem.buffer, "ABCDEFGH", 8); #if ASMJIT_ARCH_BITS >= 64 fn_store_reg(mem.buffer, 0x7A7A7A7A7A7A7A7A); EXPECT_EQ(memcmp(mem.buffer, "zzzzzzzz", 8), 0); #else fn_store_reg(mem.buffer, 0x7A7A7A7A); EXPECT_EQ(memcmp(mem.buffer, "zzzzEFGH", 8), 0); #endif TestMRFunc fn_add_u8 = create_func_mr(ctx, UniOpMR::kAddU8); mem.u64 = 0; mem.u8 = 42; fn_add_u8(mem.buffer, 13); EXPECT_EQ(mem.u8, 55u); EXPECT_EQ(memcmp(mem.buffer + 1, "\0\0\0\0\0\0\0", 7), 0); TestMRFunc fn_add_u16 = create_func_mr(ctx, UniOpMR::kAddU16); mem.u64 = 0; mem.u16 = 442; fn_add_u16(mem.buffer, 335); EXPECT_EQ(mem.u16, 777u); EXPECT_EQ(memcmp(mem.buffer + 2, "\0\0\0\0\0\0", 6), 0); TestMRFunc fn_add_u32 = create_func_mr(ctx, UniOpMR::kAddU32); mem.u64 = 0; mem.u32 = 442332; fn_add_u32(mem.buffer, 335223); EXPECT_EQ(mem.u32, 777555u); EXPECT_EQ(memcmp(mem.buffer + 4, "\0\0\0\0", 4), 0); #if ASMJIT_ARCH_BITS >= 64 TestMRFunc fn_add_u64 = create_func_mr(ctx, UniOpMR::kAddU64); mem.u64 = 0xF123456789ABCDEFu; fn_add_u64(mem.buffer, 0x0102030405060708u); EXPECT_EQ(mem.u64, 0xF225486B8EB1D4F7u); #endif TestMRFunc fn_add_reg = create_func_mr(ctx, UniOpMR::kAddReg); mem.u64 = 0xFFFFFFFFFFFFFFFF; #if ASMJIT_ARCH_BITS >= 64 fn_add_reg(mem.buffer, 1); EXPECT_EQ(mem.u64, 0u); #else mem.u32 = 0x01020304; fn_add_reg(mem.buffer, 0x02030405); EXPECT_EQ(mem.u32, 0x03050709u); EXPECT_EQ(memcmp(mem.buffer + 4, "\xFF\xFF\xFF\xFF", 4), 0); #endif ctx.rt.reset(); } // ujit::UniCompiler - Tests - RR Operations - Functions // ===================================================== static TestRRFunc create_func_rr(JitContext& ctx, UniOpRR op) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp r = uc.new_gp32("r"); node->set_arg(0, r); uc.emit_2i(op, r, r); uc.ret(r); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - RR Operations - Runner // ================================================== static ASMJIT_NOINLINE void test_rr_ops(JitContext& ctx) { TestRRFunc fn_abs = create_func_rr(ctx, UniOpRR::kAbs); EXPECT_EQ(fn_abs(0u), 0u); EXPECT_EQ(fn_abs(1u), 1u); EXPECT_EQ(fn_abs(uint32_t(-1)), 1u); EXPECT_EQ(fn_abs(uint32_t(-333)), 333u); EXPECT_EQ(fn_abs(0x80000000u), 0x80000000u); TestRRFunc fn_neg = create_func_rr(ctx, UniOpRR::kNeg); EXPECT_EQ(fn_neg(0u), 0u); EXPECT_EQ(fn_neg(1u), uint32_t(-1)); EXPECT_EQ(fn_neg(uint32_t(-1)), 1u); EXPECT_EQ(fn_neg(uint32_t(-333)), 333u); EXPECT_EQ(fn_neg(333u), uint32_t(-333)); EXPECT_EQ(fn_neg(0x80000000u), 0x80000000u); TestRRFunc fn_not = create_func_rr(ctx, UniOpRR::kNot); EXPECT_EQ(fn_not(0u), 0xFFFFFFFFu); EXPECT_EQ(fn_not(1u), 0xFFFFFFFEu); EXPECT_EQ(fn_not(0xFFFFFFFF), 0u); EXPECT_EQ(fn_not(0x12333245), ~0x12333245u); EXPECT_EQ(fn_not(0x80000000u), 0x7FFFFFFFu); TestRRFunc fn_bswap32 = create_func_rr(ctx, UniOpRR::kBSwap); EXPECT_EQ(fn_bswap32(0x11223344u), 0x44332211u); EXPECT_EQ(fn_bswap32(0xFFFF0000u), 0x0000FFFFu); EXPECT_EQ(fn_bswap32(0x00000000u), 0x00000000u); TestRRFunc fn_clz32 = create_func_rr(ctx, UniOpRR::kCLZ); EXPECT_EQ(fn_clz32(0x80000000u), 0u); EXPECT_EQ(fn_clz32(0x40000000u), 1u); EXPECT_EQ(fn_clz32(0x00800000u), 8u); EXPECT_EQ(fn_clz32(0x00008000u), 16u); EXPECT_EQ(fn_clz32(0x00000080u), 24u); EXPECT_EQ(fn_clz32(0x00000001u), 31u); TestRRFunc fn_ctz32 = create_func_rr(ctx, UniOpRR::kCTZ); EXPECT_EQ(fn_ctz32(0x80000000u), 31u); EXPECT_EQ(fn_ctz32(0x40000000u), 30u); EXPECT_EQ(fn_ctz32(0x00800000u), 23u); EXPECT_EQ(fn_ctz32(0x00008000u), 15u); EXPECT_EQ(fn_ctz32(0x00000080u), 7u); EXPECT_EQ(fn_ctz32(0x00000001u), 0u); TestRRFunc fn_reflect = create_func_rr(ctx, UniOpRR::kReflect); EXPECT_EQ(fn_reflect(0x00000000u), 0x00000000u); EXPECT_EQ(fn_reflect(0x00FF0000u), 0x00FF0000u); EXPECT_EQ(fn_reflect(0x000000FFu), 0x000000FFu); EXPECT_EQ(fn_reflect(0x80000000u), 0x7FFFFFFFu); EXPECT_EQ(fn_reflect(0xFFFFFFFFu), 0x00000000u); EXPECT_EQ(fn_reflect(0x88FF0000u), 0x7700FFFFu); ctx.rt.reset(); } // ujit::UniCompiler - Tests - RRR Operations - Functions // ====================================================== static TestRRRFunc create_func_rrr(JitContext& ctx, UniOpRRR op) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp a = uc.new_gp32("a"); Gp b = uc.new_gp32("b"); Gp result = uc.new_gp32("result"); node->set_arg(0, a); node->set_arg(1, b); uc.emit_3i(op, result, a, b); uc.ret(result); uc.end_func(); return ctx.finish(); } static TestRRIFunc create_func_rri(JitContext& ctx, UniOpRRR op, Imm bImm) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(VecWidth::k128); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp a = uc.new_gp32("a"); Gp result = uc.new_gp32("result"); node->set_arg(0, a); uc.emit_3i(op, result, a, bImm); uc.ret(result); uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - RRR Operations - Runner // =================================================== static ASMJIT_NOINLINE void test_rrr_op(JitContext& ctx, UniOpRRR op, uint32_t a, uint32_t b, uint32_t expected) { TestRRRFunc fn_rrr = create_func_rrr(ctx, op); uint32_t observed_rrr = fn_rrr(a, b); EXPECT_EQ(observed_rrr, expected) .message("Operation failed (RRR):\n" " Input #1: %d\n" " Input #2: %d\n" " Expected: %d\n" " Observed: %d\n" "Assembly:\n%s", a, b, uint32_t(expected), observed_rrr, ctx.logger_content()); TestRRIFunc fn_rri = create_func_rri(ctx, op, Imm(b)); uint32_t observed_rri = fn_rri(a); EXPECT_EQ(observed_rri, expected) .message("Operation failed (RRI):\n" " Input #1: %d\n" " Input #2: %d\n" " Expected: %d\n" " Observed: %d\n" "Assembly:\n%s", a, b, uint32_t(expected), observed_rri, ctx.logger_content()); ctx.rt.reset(); } static ASMJIT_NOINLINE void test_rrr_ops(JitContext& ctx) { test_rrr_op(ctx, UniOpRRR::kAnd, 0u, 0u, 0u); test_rrr_op(ctx, UniOpRRR::kAnd, 0xFFu, 0x11u, 0x11u); test_rrr_op(ctx, UniOpRRR::kAnd, 0x11u, 0xFFu, 0x11u); test_rrr_op(ctx, UniOpRRR::kAnd, 0xFF11u, 0x1111u, 0x1111u); test_rrr_op(ctx, UniOpRRR::kAnd, 0x1111u, 0xFF11u, 0x1111u); test_rrr_op(ctx, UniOpRRR::kAnd, 0x0000FFFFu, 0xFFFF0000u, 0u); test_rrr_op(ctx, UniOpRRR::kAnd, 0xFFFFFFFFu, 0xFFFF0000u, 0xFFFF0000u); test_rrr_op(ctx, UniOpRRR::kAnd, 0x11111111u, 0x11223344u, 0x11001100u); test_rrr_op(ctx, UniOpRRR::kOr, 0u, 0u, 0u); test_rrr_op(ctx, UniOpRRR::kOr, 0xFFu, 0x11u, 0xFFu); test_rrr_op(ctx, UniOpRRR::kOr, 0x11u, 0xFFu, 0xFFu); test_rrr_op(ctx, UniOpRRR::kOr, 0xFF11u, 0x1111u, 0xFF11u); test_rrr_op(ctx, UniOpRRR::kOr, 0x1111u, 0xFF11u, 0xFF11u); test_rrr_op(ctx, UniOpRRR::kOr, 0x0000FFFFu, 0xFFFF0001u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kOr, 0xFFFFFFFFu, 0xFF000000u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kOr, 0x11111111u, 0x00223344u, 0x11333355u); test_rrr_op(ctx, UniOpRRR::kXor, 0u, 0u, 0u); test_rrr_op(ctx, UniOpRRR::kXor, 0xFFu, 0x11u, 0xEEu); test_rrr_op(ctx, UniOpRRR::kXor, 0x11u, 0xFFu, 0xEEu); test_rrr_op(ctx, UniOpRRR::kXor, 0xFF11u, 0x1111u, 0xEE00u); test_rrr_op(ctx, UniOpRRR::kXor, 0x1111u, 0xFF11u, 0xEE00u); test_rrr_op(ctx, UniOpRRR::kXor, 0x0000FFFFu, 0xFFFF0001u, 0xFFFFFFFEu); test_rrr_op(ctx, UniOpRRR::kXor, 0xFFFFFFFFu, 0xFF000000u, 0x00FFFFFFu); test_rrr_op(ctx, UniOpRRR::kXor, 0x11111111u, 0x00223344u, 0x11332255u); test_rrr_op(ctx, UniOpRRR::kBic, 0u, 0u, 0u); test_rrr_op(ctx, UniOpRRR::kBic, 0xFFu, 0x11u, 0xEEu); test_rrr_op(ctx, UniOpRRR::kBic, 0x11u, 0xFFu, 0x00u); test_rrr_op(ctx, UniOpRRR::kBic, 0xFF11u, 0x1111u, 0xEE00u); test_rrr_op(ctx, UniOpRRR::kBic, 0x1111u, 0xFF11u, 0x0000u); test_rrr_op(ctx, UniOpRRR::kBic, 0x0000FFFFu, 0xFFFF0000u, 0x0000FFFFu); test_rrr_op(ctx, UniOpRRR::kBic, 0xFFFFFFFFu, 0xFFFF0000u, 0x0000FFFFu); test_rrr_op(ctx, UniOpRRR::kBic, 0x11111111u, 0x11223344u, 0x00110011u); test_rrr_op(ctx, UniOpRRR::kAdd, 0u, 0u, 0u); test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 2u, 3u); test_rrr_op(ctx, UniOpRRR::kAdd, 0xFF000000u, 0x00FFFFFFu, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 0xFFFu, 0x1000u); test_rrr_op(ctx, UniOpRRR::kAdd, 1u, 0xFFF000u, 0xFFF001u); test_rrr_op(ctx, UniOpRRR::kSub, 1u, 2u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kMul, 1000u, 999u, 999000u); test_rrr_op(ctx, UniOpRRR::kMul, 0xFFFFu, 0x00010001u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kUDiv, 100000u, 1000u, 100u); test_rrr_op(ctx, UniOpRRR::kUMod, 1999u, 1000u, 999u); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1111), uint32_t(0), uint32_t(0)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1111), uint32_t(0), uint32_t(-1111)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1), uint32_t(22), uint32_t(1)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1), uint32_t(0), uint32_t(0)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(100101033), uint32_t(999), uint32_t(999)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(100101033), uint32_t(112), uint32_t(112)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(112), uint32_t(1125532), uint32_t(112)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(1111), uint32_t(-1), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1111), uint32_t(-1), uint32_t(-1111)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1), uint32_t(-22), uint32_t(-22)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-1), uint32_t(-128), uint32_t(-128)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-128), uint32_t(-1), uint32_t(-128)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(-128), uint32_t(9), uint32_t(-128)); test_rrr_op(ctx, UniOpRRR::kSMin, uint32_t(12444), uint32_t(-1), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1), uint32_t(22), uint32_t(22)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1), uint32_t(0), uint32_t(1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(100101033), uint32_t(999), uint32_t(100101033)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(100101033), uint32_t(112), uint32_t(100101033)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(112), uint32_t(1125532), uint32_t(1125532)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(1111), uint32_t(-1), uint32_t(1111)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1111), uint32_t(-1), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1), uint32_t(-22), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-1), uint32_t(-128), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-128), uint32_t(-1), uint32_t(-1)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(-128), uint32_t(9), uint32_t(9)); test_rrr_op(ctx, UniOpRRR::kSMax, uint32_t(12444), uint32_t(-1), uint32_t(12444)); test_rrr_op(ctx, UniOpRRR::kUMin, 1, 22, 1); test_rrr_op(ctx, UniOpRRR::kUMin, 22, 1, 1); test_rrr_op(ctx, UniOpRRR::kUMin, 1, 255, 1); test_rrr_op(ctx, UniOpRRR::kUMin, 255, 1, 1); test_rrr_op(ctx, UniOpRRR::kUMin, 1023, 255, 255); test_rrr_op(ctx, UniOpRRR::kUMin, 255, 1023, 255); test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 255, 255); test_rrr_op(ctx, UniOpRRR::kUMin, 255, 0xFFFFFFFFu, 255); test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 0xFFFFFF00u, 0xFFFFFF00u); test_rrr_op(ctx, UniOpRRR::kUMin, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kUMax, 1, 22, 22); test_rrr_op(ctx, UniOpRRR::kUMax, 22, 1, 22); test_rrr_op(ctx, UniOpRRR::kUMax, 1, 255, 255); test_rrr_op(ctx, UniOpRRR::kUMax, 255, 1, 255); test_rrr_op(ctx, UniOpRRR::kUMax, 1023, 255, 1023); test_rrr_op(ctx, UniOpRRR::kUMax, 255, 1023, 1023); test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 255, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kUMax, 255, 0xFFFFFFFFu, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 0xFFFFFF00u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kUMax, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kSll, 1u, 1u, 1u << 1); test_rrr_op(ctx, UniOpRRR::kSll, 1u, 22u, 1u << 22); test_rrr_op(ctx, UniOpRRR::kSll, 1u, 31u, 1u << 31); test_rrr_op(ctx, UniOpRRR::kSll, 0x7FFFFFFFu, 1u, 0xFFFFFFFEu); test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 1u, 1u >> 1); test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 22u, 1u >> 22); test_rrr_op(ctx, UniOpRRR::kSrl, 1u, 31u, 1u >> 31); test_rrr_op(ctx, UniOpRRR::kSrl, 0x7FFFFFFFu, 1u, 0x7FFFFFFFu >> 1); test_rrr_op(ctx, UniOpRRR::kSra, 1u, 1u, 1u >> 1); test_rrr_op(ctx, UniOpRRR::kSra, 1u, 22u, 1u >> 22); test_rrr_op(ctx, UniOpRRR::kSra, 1u, 31u, 1u >> 31); test_rrr_op(ctx, UniOpRRR::kSra, 0x7FFFFFFFu, 1u, 0x7FFFFFFFu >> 1); test_rrr_op(ctx, UniOpRRR::kSra, 0xF0000000u, 4u, 0xFF000000u); test_rrr_op(ctx, UniOpRRR::kSra, 0x80000000u, 31u, 0xFFFFFFFFu); test_rrr_op(ctx, UniOpRRR::kRol, 0x11223344u, 8u, 0x22334411u); test_rrr_op(ctx, UniOpRRR::kRol, 0x11223344u, 16u, 0x33441122u); test_rrr_op(ctx, UniOpRRR::kRol, 0xFCFFDABBu, 1u, 0xF9FFB577u); test_rrr_op(ctx, UniOpRRR::kRor, 0x11223344u, 8u, 0x44112233u); test_rrr_op(ctx, UniOpRRR::kRor, 0x11223344u, 16u, 0x33441122u); test_rrr_op(ctx, UniOpRRR::kRor, 0xF0000000u, 1u, 0x78000000u); test_rrr_op(ctx, UniOpRRR::kSBound, 0, 244u, 0); test_rrr_op(ctx, UniOpRRR::kSBound, 42, 244u, 42u); test_rrr_op(ctx, UniOpRRR::kSBound, 1111, 244u, 244u); test_rrr_op(ctx, UniOpRRR::kSBound, 9999999, 111244u, 111244u); test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(int32_t(-1)), 1000u, 0u); test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MIN), 100000u, 0u); test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), 0u, 0u); test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), 100000u, 100000u); test_rrr_op(ctx, UniOpRRR::kSBound, uint32_t(INT32_MAX), uint32_t(INT32_MAX), uint32_t(INT32_MAX)); } // ujit::UniCompiler - Tests - SIMD - Functions // ============================================ // The following variations are supported: // - 0 - separate destination & source registers // - 1 - destination register is a source register as well // - 2 - source is a memory operand // - 3 - source register is a GP register (only for broadcasts from a GP register, otherwise maps to 0) static constexpr uint32_t kNumVariationsVV = 3; static constexpr uint32_t kNumVariationsVV_Broadcast = 4; static TestVVFunc create_func_vv(JitContext& ctx, VecWidth vw, UniOpVV op, Variation variation = Variation{0}) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(vw); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp dst_ptr = uc.new_gpz("dst_ptr"); Gp src_ptr = uc.new_gpz("src_ptr"); node->set_arg(0, dst_ptr); node->set_arg(1, src_ptr); Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); // There are some instructions that fill the high part of the register, so just zero the destination to make // sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage). uc.v_zero_i(dst_vec); if (variation == 3u && (op == UniOpVV::kBroadcastU8 || op == UniOpVV::kBroadcastU8Z || op == UniOpVV::kBroadcastU32 || op == UniOpVV::kBroadcastU64 || op == UniOpVV::kBroadcastF32 || op == UniOpVV::kBroadcastF64)) { // This is used to test broadcasts from a GP register to a vector register. Gp src_gp = uc.new_gpz("src_gp"); switch (op) { case UniOpVV::kBroadcastU8: case UniOpVV::kBroadcastU8Z: uc.load_u8(src_gp, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, src_gp); break; case UniOpVV::kBroadcastU16: case UniOpVV::kBroadcastU16Z: uc.load_u16(src_gp, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, src_gp); break; case UniOpVV::kBroadcastU32: case UniOpVV::kBroadcastF32: uc.load_u32(src_gp, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, src_gp); break; case UniOpVV::kBroadcastU64: case UniOpVV::kBroadcastF64: // Prevent using 64-bit registers on 32-bit architectures (that would fail). if (uc.is_64bit()) { uc.load_u64(src_gp, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, src_gp); } else { uc.emit_2v(op, dst_vec, mem_ptr(src_ptr)); } break; default: ASMJIT_NOT_REACHED(); } } else if (variation == 2u) { uc.emit_2v(op, dst_vec, mem_ptr(src_ptr)); } else if (variation == 1u) { uc.v_loaduvec(dst_vec, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, dst_vec); } else { Vec src_vec = uc.new_vec_with_width(vw, "src_vec"); uc.v_loaduvec(src_vec, mem_ptr(src_ptr)); uc.emit_2v(op, dst_vec, src_vec); } uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); uc.end_func(); return ctx.finish(); } // The following variations are supported: // - 0 - separate destination & source registers // - 1 - destination register is a source register as well // - 2 - source is a memory operand static constexpr uint32_t kNumVariationsVVI = 3; static TestVVFunc create_func_vvi(JitContext& ctx, VecWidth vw, UniOpVVI op, uint32_t imm, Variation variation = Variation{0}) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(vw); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp dst_ptr = uc.new_gpz("dst_ptr"); Gp src_ptr = uc.new_gpz("src_ptr"); node->set_arg(0, dst_ptr); node->set_arg(1, src_ptr); Vec src_vec = uc.new_vec_with_width(vw, "src_vec"); switch (variation.value) { default: case 0: { // There are some instructions that fill the high part of the register, so just zero the destination to make // sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage). Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.v_loaduvec(src_vec, mem_ptr(src_ptr)); uc.emit_2vi(op, dst_vec, src_vec, imm); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 1: { uc.v_loaduvec(src_vec, mem_ptr(src_ptr)); uc.emit_2vi(op, src_vec, src_vec, imm); uc.v_storeuvec(mem_ptr(dst_ptr), src_vec); break; } case 2: { Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.emit_2vi(op, dst_vec, mem_ptr(src_ptr), imm); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } } uc.end_func(); return ctx.finish(); } // The following variations are supported: // - 0 - separate destination & source registers // - 1 - destination register is the same as the first source register // - 2 - destination register is the same as the second source register // - 3 - separate destination & source registers, the second source is a memory operand // - 4 - destination register is the same as the first source register, second source is a memory operand static constexpr uint32_t kNumVariationsVVV = 5; static TestVVVFunc create_func_vvv(JitContext& ctx, VecWidth vw, UniOpVVV op, Variation variation = Variation{0}) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(vw); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp dst_ptr = uc.new_gpz("dst_ptr"); Gp src1_ptr = uc.new_gpz("src1_ptr"); Gp src2_ptr = uc.new_gpz("src2_ptr"); node->set_arg(0, dst_ptr); node->set_arg(1, src1_ptr); node->set_arg(2, src2_ptr); Vec src1_vec = uc.new_vec_with_width(vw, "src1_vec"); Vec src2_vec = uc.new_vec_with_width(vw, "src2_vec"); switch (variation.value) { default: case 0: { // There are some instructions that fill the high part of the register, so just zero the destination to make // sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage). Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3v(op, dst_vec, src1_vec, src2_vec); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 1: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3v(op, src1_vec, src1_vec, src2_vec); uc.v_storeuvec(mem_ptr(dst_ptr), src1_vec); break; } case 2: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3v(op, src2_vec, src1_vec, src2_vec); uc.v_storeuvec(mem_ptr(dst_ptr), src2_vec); break; } case 3: { Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.emit_3v(op, dst_vec, src1_vec, mem_ptr(src2_ptr)); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 4: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.emit_3v(op, src1_vec, src1_vec, mem_ptr(src2_ptr)); uc.v_storeuvec(mem_ptr(dst_ptr), src1_vec); break; } } uc.end_func(); return ctx.finish(); } static constexpr uint32_t kNumVariationsVVVI = 5; static TestVVVFunc create_func_vvvi(JitContext& ctx, VecWidth vw, UniOpVVVI op, uint32_t imm, Variation variation = Variation{0}) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(vw); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp dst_ptr = uc.new_gpz("dst_ptr"); Gp src1_ptr = uc.new_gpz("src1_ptr"); Gp src2_ptr = uc.new_gpz("src2_ptr"); node->set_arg(0, dst_ptr); node->set_arg(1, src1_ptr); node->set_arg(2, src2_ptr); Vec src1_vec = uc.new_vec_with_width(vw, "src1_vec"); Vec src2_vec = uc.new_vec_with_width(vw, "src2_vec"); switch (variation.value) { default: case 0: { // There are some instructions that fill the high part of the register, so just zero the destination to make // sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage). Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3vi(op, dst_vec, src1_vec, src2_vec, imm); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 1: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3vi(op, src1_vec, src1_vec, src2_vec, imm); uc.v_storeuvec(mem_ptr(dst_ptr), src1_vec); break; } case 2: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.emit_3vi(op, src2_vec, src1_vec, src2_vec, imm); uc.v_storeuvec(mem_ptr(dst_ptr), src2_vec); break; } case 3: { Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.emit_3vi(op, dst_vec, src1_vec, mem_ptr(src2_ptr), imm); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 4: { uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.emit_3vi(op, src1_vec, src1_vec, mem_ptr(src2_ptr), imm); uc.v_storeuvec(mem_ptr(dst_ptr), src1_vec); break; } } uc.end_func(); return ctx.finish(); } // The following variations are supported: // - 0 - separate destination & source registers // - 1 - destination register is the first source register // - 2 - destination register is the second source register // - 3 - destination register is the third source register static constexpr uint32_t kNumVariationsVVVV = 4; static TestVVVVFunc create_func_vvvv(JitContext& ctx, VecWidth vw, UniOpVVVV op, Variation variation = Variation{0}) { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); uc.init_vec_width(vw); FuncNode* node = uc.add_func(FuncSignature::build()); EXPECT_NOT_NULL(node); Gp dst_ptr = uc.new_gpz("dst_ptr"); Gp src1_ptr = uc.new_gpz("src1_ptr"); Gp src2_ptr = uc.new_gpz("src2_ptr"); Gp src3_ptr = uc.new_gpz("src3_ptr"); node->set_arg(0, dst_ptr); node->set_arg(1, src1_ptr); node->set_arg(2, src2_ptr); node->set_arg(3, src3_ptr); Vec src1_vec = uc.new_vec_with_width(vw, "src1_vec"); Vec src2_vec = uc.new_vec_with_width(vw, "src2_vec"); Vec src3_vec = uc.new_vec_with_width(vw, "src3_vec"); uc.v_loaduvec(src1_vec, mem_ptr(src1_ptr)); uc.v_loaduvec(src2_vec, mem_ptr(src2_ptr)); uc.v_loaduvec(src3_vec, mem_ptr(src3_ptr)); switch (variation.value) { default: case 0: { // There are some instructions that fill the high part of the register, so just zero the destination to make // sure that we can test this function (that the low part is actually zeroed and doesn't contain garbage). Vec dst_vec = uc.new_vec_with_width(vw, "dst_vec"); uc.v_zero_i(dst_vec); uc.emit_4v(op, dst_vec, src1_vec, src2_vec, src3_vec); uc.v_storeuvec(mem_ptr(dst_ptr), dst_vec); break; } case 1: { uc.emit_4v(op, src1_vec, src1_vec, src2_vec, src3_vec); uc.v_storeuvec(mem_ptr(dst_ptr), src1_vec); break; } case 2: { uc.emit_4v(op, src2_vec, src1_vec, src2_vec, src3_vec); uc.v_storeuvec(mem_ptr(dst_ptr), src2_vec); break; } case 3: { uc.emit_4v(op, src3_vec, src1_vec, src2_vec, src3_vec); uc.v_storeuvec(mem_ptr(dst_ptr), src3_vec); break; } } uc.end_func(); return ctx.finish(); } // ujit::UniCompiler - Tests - SIMD - Vector Overlay // ================================================= enum class VecElementType : uint8_t { kInt8, kInt16, kInt32, kInt64, kUInt8, kUInt16, kUInt32, kUInt64, kFloat32, kFloat64 }; struct VecOpInfo { uint32_t _data; ASMJIT_INLINE_NODEBUG uint32_t count() const noexcept { return _data >> 28; } ASMJIT_INLINE_NODEBUG VecElementType ret() const noexcept { return VecElementType((_data >> 24) & 0xFu); } ASMJIT_INLINE_NODEBUG VecElementType arg(uint32_t i) const noexcept { return VecElementType((_data >> (i * 4)) & 0xFu); } static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0) noexcept { return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0)}; } static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1) noexcept { return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4)}; } static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1, VecElementType arg2) noexcept { return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4) | (uint32_t(arg2) << 8)}; } static ASMJIT_INLINE_NODEBUG VecOpInfo make(VecElementType ret, VecElementType arg0, VecElementType arg1, VecElementType arg2, VecElementType arg3) noexcept { return VecOpInfo{(1u << 28) | (uint32_t(ret) << 24) | uint32_t(arg0) | (uint32_t(arg1) << 4) | (uint32_t(arg2) << 8) | (uint32_t(arg3) << 12)}; } }; template struct alignas(16) VecOverlay { union { int8_t data_i8[kW]; uint8_t data_u8[kW]; int16_t data_i16[kW / 2u]; uint16_t data_u16[kW / 2u]; int32_t data_i32[kW / 4u]; uint32_t data_u32[kW / 4u]; int64_t data_i64[kW / 8u]; uint64_t data_u64[kW / 8u]; float data_f32[kW / 4u]; double data_f64[kW / 8u]; }; template ASMJIT_INLINE_NODEBUG T* data() noexcept; template ASMJIT_INLINE_NODEBUG const T* data() const noexcept; template ASMJIT_INLINE_NODEBUG T get(size_t index) const noexcept; template ASMJIT_INLINE_NODEBUG void set(size_t index, const T& value) noexcept; template ASMJIT_INLINE_NODEBUG void copy_16b_from(const VecOverlay& other) noexcept { data_u64[0] = other.data_u64[0]; data_u64[1] = other.data_u64[1]; } }; template struct VecAccess; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG int8_t* data(VecOverlay& vec) noexcept { return vec.data_i8; } template static ASMJIT_INLINE_NODEBUG const int8_t* data(const VecOverlay& vec) noexcept { return vec.data_i8; } template static ASMJIT_INLINE_NODEBUG int8_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_i8[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, int8_t value) noexcept { vec.data_i8[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG int16_t* data(VecOverlay& vec) noexcept { return vec.data_i16; } template static ASMJIT_INLINE_NODEBUG const int16_t* data(const VecOverlay& vec) noexcept { return vec.data_i16; } template static ASMJIT_INLINE_NODEBUG int16_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_i16[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, int16_t value) noexcept { vec.data_i16[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG int32_t* data(VecOverlay& vec) noexcept { return vec.data_i32; } template static ASMJIT_INLINE_NODEBUG const int32_t* data(const VecOverlay& vec) noexcept { return vec.data_i32; } template static ASMJIT_INLINE_NODEBUG int32_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_i32[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, int32_t value) noexcept { vec.data_i32[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG int64_t* data(VecOverlay& vec) noexcept { return vec.data_i64; } template static ASMJIT_INLINE_NODEBUG const int64_t* data(const VecOverlay& vec) noexcept { return vec.data_i64; } template static ASMJIT_INLINE_NODEBUG int64_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_i64[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, int64_t value) noexcept { vec.data_i64[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG uint8_t* data(VecOverlay& vec) noexcept { return vec.data_u8; } template static ASMJIT_INLINE_NODEBUG const uint8_t* data(const VecOverlay& vec) noexcept { return vec.data_u8; } template static ASMJIT_INLINE_NODEBUG uint8_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_u8[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, uint8_t value) noexcept { vec.data_u8[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG uint16_t* data(VecOverlay& vec) noexcept { return vec.data_u16; } template static ASMJIT_INLINE_NODEBUG const uint16_t* data(const VecOverlay& vec) noexcept { return vec.data_u16; } template static ASMJIT_INLINE_NODEBUG uint16_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_u16[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, uint16_t value) noexcept { vec.data_u16[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG uint32_t* data(VecOverlay& vec) noexcept { return vec.data_u32; } template static ASMJIT_INLINE_NODEBUG const uint32_t* data(const VecOverlay& vec) noexcept { return vec.data_u32; } template static ASMJIT_INLINE_NODEBUG uint32_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_u32[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, uint32_t value) noexcept { vec.data_u32[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG uint64_t* data(VecOverlay& vec) noexcept { return vec.data_u64; } template static ASMJIT_INLINE_NODEBUG const uint64_t* data(const VecOverlay& vec) noexcept { return vec.data_u64; } template static ASMJIT_INLINE_NODEBUG uint64_t get(const VecOverlay& vec, size_t index) noexcept { return vec.data_u64[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, uint64_t value) noexcept { vec.data_u64[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG float* data(VecOverlay& vec) noexcept { return vec.data_f32; } template static ASMJIT_INLINE_NODEBUG const float* data(const VecOverlay& vec) noexcept { return vec.data_f32; } template static ASMJIT_INLINE_NODEBUG float get(const VecOverlay& vec, size_t index) noexcept { return vec.data_f32[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, float value) noexcept { vec.data_f32[index] = value; } }; template<> struct VecAccess { template static ASMJIT_INLINE_NODEBUG double* data(VecOverlay& vec) noexcept { return vec.data_f64; } template static ASMJIT_INLINE_NODEBUG const double* data(const VecOverlay& vec) noexcept { return vec.data_f64; } template static ASMJIT_INLINE_NODEBUG double get(const VecOverlay& vec, size_t index) noexcept { return vec.data_f64[index]; } template static ASMJIT_INLINE_NODEBUG void set(VecOverlay& vec, size_t index, double value) noexcept { vec.data_f64[index] = value; } }; template template ASMJIT_INLINE_NODEBUG T* VecOverlay::data() noexcept { return VecAccess::data(*this); } template template ASMJIT_INLINE_NODEBUG const T* VecOverlay::data() const noexcept { return VecAccess::data(*this); } template template ASMJIT_INLINE_NODEBUG T VecOverlay::get(size_t index) const noexcept { return VecAccess::get(*this, index); } template template ASMJIT_INLINE_NODEBUG void VecOverlay::set(size_t index, const T& value) noexcept { return VecAccess::set(*this, index, value); } template struct TypeNameToString {}; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int8"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int16"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int32"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "int64"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint8"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint16"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint32"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "uint64"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "float32"; } }; template<> struct TypeNameToString { static ASMJIT_INLINE_NODEBUG const char* get() noexcept { return "float64"; } }; template static bool vec_eq(const VecOverlay& a, const VecOverlay& b) noexcept { return memcmp(a.data_u8, b.data_u8, kW) == 0; } template static bool float_eq(const T& a, const T& b) noexcept { return a == b || (std::isnan(a) && std::isnan(b)); } template static bool vec_eq(const VecOverlay& a, const VecOverlay& b, VecElementType element_type) noexcept { if (element_type == VecElementType::kFloat32) { size_t count = kW / sizeof(float); for (size_t i = 0; i < count; i++) { if (!float_eq(a.data_f32[i], b.data_f32[i])) { return false; } } return true; } else if (element_type == VecElementType::kFloat64) { size_t count = kW / sizeof(double); for (size_t i = 0; i < count; i++) { if (!float_eq(a.data_f64[i], b.data_f64[i])) { return false; } } return true; } else { return vec_eq(a, b); } } template static ASMJIT_NOINLINE String vec_stringify(const VecOverlay& vec, VecElementType element_type) noexcept { String s; s.append('{'); switch (element_type) { case VecElementType::kInt8 : { for (uint32_t i = 0; i < kW ; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i8[i]); break; } case VecElementType::kInt16 : { for (uint32_t i = 0; i < kW / 2; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i16[i]); break; } case VecElementType::kInt32 : { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%d" , i == 0 ? "" : ", ", vec.data_i32[i]); break; } case VecElementType::kInt64 : { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%lld", i == 0 ? "" : ", ", (long long)vec.data_i64[i]); break; } case VecElementType::kUInt8 : { for (uint32_t i = 0; i < kW ; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u8[i]); break; } case VecElementType::kUInt16 : { for (uint32_t i = 0; i < kW / 2; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u16[i]); break; } case VecElementType::kUInt32 : { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%u" , i == 0 ? "" : ", ", vec.data_u32[i]); break; } case VecElementType::kUInt64 : { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%llu", i == 0 ? "" : ", ", (unsigned long long)vec.data_u64[i]); break; } case VecElementType::kFloat32: { for (uint32_t i = 0; i < kW / 4; i++) s.append_format("%s%.20f" , i == 0 ? "" : ", ", double(vec.data_f32[i])); break; } case VecElementType::kFloat64: { for (uint32_t i = 0; i < kW / 8; i++) s.append_format("%s%.20f" , i == 0 ? "" : ", ", double(vec.data_f64[i])); break; } default: ASMJIT_NOT_REACHED(); } s.append('}'); return s; } // ujit::UniCompiler - Tests - SIMD - Metadata // =========================================== static const char* vec_op_name_vv(UniOpVV op) noexcept { switch (op) { case UniOpVV::kMov : return "v_mov"; case UniOpVV::kMovU64 : return "v_mov_u64"; case UniOpVV::kBroadcastU8Z : return "v_broadcast_u8z"; case UniOpVV::kBroadcastU16Z : return "v_broadcast_u16z"; case UniOpVV::kBroadcastU8 : return "v_broadcast_u8"; case UniOpVV::kBroadcastU16 : return "v_broadcast_u16"; case UniOpVV::kBroadcastU32 : return "v_broadcast_u32"; case UniOpVV::kBroadcastU64 : return "v_broadcast_u64"; case UniOpVV::kBroadcastF32 : return "v_broadcast_f32"; case UniOpVV::kBroadcastF64 : return "v_broadcast_f64"; case UniOpVV::kBroadcastV128_U32 : return "v_broadcast_v128_u32"; case UniOpVV::kBroadcastV128_U64 : return "v_broadcast_v128_u64"; case UniOpVV::kBroadcastV128_F32 : return "v_broadcast_v128_f32"; case UniOpVV::kBroadcastV128_F64 : return "v_broadcast_v128_f64"; case UniOpVV::kBroadcastV256_U32 : return "v_broadcast_v256_u32"; case UniOpVV::kBroadcastV256_U64 : return "v_broadcast_v256_u64"; case UniOpVV::kBroadcastV256_F32 : return "v_broadcast_v256_f32"; case UniOpVV::kBroadcastV256_F64 : return "v_broadcast_v256_f64"; case UniOpVV::kAbsI8 : return "v_abs_i8"; case UniOpVV::kAbsI16 : return "v_abs_i16"; case UniOpVV::kAbsI32 : return "v_abs_i32"; case UniOpVV::kAbsI64 : return "v_abs_i64"; case UniOpVV::kNotU32 : return "v_not_u32"; case UniOpVV::kNotU64 : return "v_not_u64"; case UniOpVV::kCvtI8LoToI16 : return "v_cvt_i8_lo_to_i16"; case UniOpVV::kCvtI8HiToI16 : return "v_cvt_i8_hi_to_i16"; case UniOpVV::kCvtU8LoToU16 : return "v_cvt_u8_lo_to_u16"; case UniOpVV::kCvtU8HiToU16 : return "v_cvt_u8_hi_to_u16"; case UniOpVV::kCvtI8ToI32 : return "v_cvt_i8_to_i32"; case UniOpVV::kCvtU8ToU32 : return "v_cvt_u8_to_u32"; case UniOpVV::kCvtI16LoToI32 : return "v_cvt_i16_lo_to_i32"; case UniOpVV::kCvtI16HiToI32 : return "v_cvt_i16_hi_to_i32"; case UniOpVV::kCvtU16LoToU32 : return "v_cvt_u16_lo_to_u32"; case UniOpVV::kCvtU16HiToU32 : return "v_cvt_u16_hi_to_u32"; case UniOpVV::kCvtI32LoToI64 : return "v_cvt_i32_lo_to_i64"; case UniOpVV::kCvtI32HiToI64 : return "v_cvt_i32_hi_to_i64"; case UniOpVV::kCvtU32LoToU64 : return "v_cvt_u32_lo_to_u64"; case UniOpVV::kCvtU32HiToU64 : return "v_cvt_u32_hi_to_u64"; case UniOpVV::kAbsF32S : return "s_abs_f32"; case UniOpVV::kAbsF64S : return "s_abs_f64"; case UniOpVV::kAbsF32 : return "v_abs_f32"; case UniOpVV::kAbsF64 : return "v_abs_f64"; case UniOpVV::kNegF32S : return "s_neg_f32"; case UniOpVV::kNegF64S : return "s_neg_f64"; case UniOpVV::kNegF32 : return "v_neg_f32"; case UniOpVV::kNegF64 : return "v_neg_f64"; case UniOpVV::kNotF32 : return "v_not_f32"; case UniOpVV::kNotF64 : return "v_not_f64"; case UniOpVV::kTruncF32S : return "v_trunc_f32s"; case UniOpVV::kTruncF64S : return "v_trunc_f64s"; case UniOpVV::kTruncF32 : return "v_trunc_f32"; case UniOpVV::kTruncF64 : return "v_trunc_f64"; case UniOpVV::kFloorF32S : return "v_floor_f32s"; case UniOpVV::kFloorF64S : return "v_floor_f64s"; case UniOpVV::kFloorF32 : return "v_floor_f32"; case UniOpVV::kFloorF64 : return "v_floor_f64"; case UniOpVV::kCeilF32S : return "v_ceil_f32s"; case UniOpVV::kCeilF64S : return "v_ceil_f64s"; case UniOpVV::kCeilF32 : return "v_ceil_f32"; case UniOpVV::kCeilF64 : return "v_ceil_f64"; case UniOpVV::kRoundEvenF32S : return "v_round_even_f32s"; case UniOpVV::kRoundEvenF64S : return "v_round_even_f64s"; case UniOpVV::kRoundEvenF32 : return "v_round_even_f32"; case UniOpVV::kRoundEvenF64 : return "v_round_even_f64"; case UniOpVV::kRoundHalfAwayF32S : return "v_round_half_away_f32s"; case UniOpVV::kRoundHalfAwayF64S : return "v_round_half_away_f64s"; case UniOpVV::kRoundHalfAwayF32 : return "v_round_half_away_f32"; case UniOpVV::kRoundHalfAwayF64 : return "v_round_half_away_f64"; case UniOpVV::kRoundHalfUpF32S : return "v_round_half_up_f32s"; case UniOpVV::kRoundHalfUpF64S : return "v_round_half_up_f64s"; case UniOpVV::kRoundHalfUpF32 : return "v_round_half_up_f32"; case UniOpVV::kRoundHalfUpF64 : return "v_round_half_up_f64"; case UniOpVV::kRcpF32 : return "v_rcp_f32"; case UniOpVV::kRcpF64 : return "v_rcp_f64"; case UniOpVV::kSqrtF32S : return "v_sqrt_f32s"; case UniOpVV::kSqrtF64S : return "v_sqrt_f64s"; case UniOpVV::kSqrtF32 : return "v_sqrt_f32"; case UniOpVV::kSqrtF64 : return "v_sqrt_f64"; case UniOpVV::kCvtF32ToF64S : return "v_cvt_f32_to_f64s"; case UniOpVV::kCvtF64ToF32S : return "v_cvt_f64_to_f32s"; case UniOpVV::kCvtI32ToF32 : return "v_cvt_i32_to_f32"; case UniOpVV::kCvtF32LoToF64 : return "v_cvt_f32_lo_to_f64"; case UniOpVV::kCvtF32HiToF64 : return "v_cvt_f32_hi_to_f64"; case UniOpVV::kCvtF64ToF32Lo : return "v_cvt_f64_to_f32_lo"; case UniOpVV::kCvtF64ToF32Hi : return "v_cvt_f64_to_f32_hi"; case UniOpVV::kCvtI32LoToF64 : return "v_cvt_i32_lo_to_f64"; case UniOpVV::kCvtI32HiToF64 : return "v_cvt_i32_hi_to_f64"; case UniOpVV::kCvtTruncF32ToI32 : return "v_cvt_trunc_f32_to_i32"; case UniOpVV::kCvtTruncF64ToI32Lo: return "v_cvt_trunc_f64_to_i32_lo"; case UniOpVV::kCvtTruncF64ToI32Hi: return "v_cvt_trunc_f64_to_i32_hi"; case UniOpVV::kCvtRoundF32ToI32 : return "v_cvt_round_f32_to_i32"; case UniOpVV::kCvtRoundF64ToI32Lo: return "v_cvt_round_f64_to_i32_lo"; case UniOpVV::kCvtRoundF64ToI32Hi: return "v_cvt_round_f64_to_i32_hi"; } ASMJIT_NOT_REACHED(); } static VecOpInfo vec_op_info_vv(UniOpVV op) noexcept { using VE = VecElementType; switch (op) { case UniOpVV::kMov : return VecOpInfo::make(VE::kUInt8, VE::kUInt8); case UniOpVV::kMovU64 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8); case UniOpVV::kBroadcastU8Z : return VecOpInfo::make(VE::kUInt8, VE::kUInt8); case UniOpVV::kBroadcastU16Z : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVV::kBroadcastU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8); case UniOpVV::kBroadcastU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVV::kBroadcastU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVV::kBroadcastU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVV::kBroadcastF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kBroadcastF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kBroadcastV128_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVV::kBroadcastV128_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVV::kBroadcastV128_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kBroadcastV128_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kBroadcastV256_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVV::kBroadcastV256_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVV::kBroadcastV256_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kBroadcastV256_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kAbsI8 : return VecOpInfo::make(VE::kUInt8, VE::kInt8); case UniOpVV::kAbsI16 : return VecOpInfo::make(VE::kUInt16, VE::kInt16); case UniOpVV::kAbsI32 : return VecOpInfo::make(VE::kUInt32, VE::kInt32); case UniOpVV::kAbsI64 : return VecOpInfo::make(VE::kUInt64, VE::kInt64); case UniOpVV::kNotU32 : return VecOpInfo::make(VE::kUInt32, VE::kInt32); case UniOpVV::kNotU64 : return VecOpInfo::make(VE::kUInt64, VE::kInt64); case UniOpVV::kCvtI8LoToI16 : return VecOpInfo::make(VE::kInt16, VE::kInt8); case UniOpVV::kCvtI8HiToI16 : return VecOpInfo::make(VE::kInt16, VE::kInt8); case UniOpVV::kCvtU8LoToU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8); case UniOpVV::kCvtU8HiToU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8); case UniOpVV::kCvtI8ToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt8); case UniOpVV::kCvtU8ToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt8); case UniOpVV::kCvtI16LoToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt16); case UniOpVV::kCvtI16HiToI32 : return VecOpInfo::make(VE::kInt32, VE::kInt16); case UniOpVV::kCvtU16LoToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16); case UniOpVV::kCvtU16HiToU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16); case UniOpVV::kCvtI32LoToI64 : return VecOpInfo::make(VE::kInt64, VE::kInt32); case UniOpVV::kCvtI32HiToI64 : return VecOpInfo::make(VE::kInt64, VE::kInt32); case UniOpVV::kCvtU32LoToU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32); case UniOpVV::kCvtU32HiToU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32); case UniOpVV::kAbsF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kAbsF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kAbsF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kAbsF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kNegF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kNegF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kNegF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kNegF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kNotF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVV::kNotF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVV::kTruncF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kTruncF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kTruncF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kTruncF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kFloorF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kFloorF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kFloorF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kFloorF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kCeilF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kCeilF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kCeilF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kCeilF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundEvenF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundEvenF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundEvenF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundEvenF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundHalfAwayF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundHalfAwayF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundHalfAwayF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundHalfAwayF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundHalfUpF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundHalfUpF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRoundHalfUpF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRoundHalfUpF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kRcpF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kRcpF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kSqrtF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kSqrtF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kSqrtF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVV::kSqrtF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVV::kCvtF32ToF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat32); case UniOpVV::kCvtF64ToF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat64); case UniOpVV::kCvtI32ToF32 : return VecOpInfo::make(VE::kFloat32, VE::kInt32); case UniOpVV::kCvtF32LoToF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat32); case UniOpVV::kCvtF32HiToF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat32); case UniOpVV::kCvtF64ToF32Lo : return VecOpInfo::make(VE::kFloat32, VE::kFloat64); case UniOpVV::kCvtF64ToF32Hi : return VecOpInfo::make(VE::kFloat32, VE::kFloat64); case UniOpVV::kCvtI32LoToF64 : return VecOpInfo::make(VE::kFloat64, VE::kInt32); case UniOpVV::kCvtI32HiToF64 : return VecOpInfo::make(VE::kFloat64, VE::kInt32); case UniOpVV::kCvtTruncF32ToI32 : return VecOpInfo::make(VE::kInt32, VE::kFloat32); case UniOpVV::kCvtTruncF64ToI32Lo: return VecOpInfo::make(VE::kInt32, VE::kFloat64); case UniOpVV::kCvtTruncF64ToI32Hi: return VecOpInfo::make(VE::kInt32, VE::kFloat64); case UniOpVV::kCvtRoundF32ToI32 : return VecOpInfo::make(VE::kInt32, VE::kFloat32); case UniOpVV::kCvtRoundF64ToI32Lo: return VecOpInfo::make(VE::kInt32, VE::kFloat64); case UniOpVV::kCvtRoundF64ToI32Hi: return VecOpInfo::make(VE::kInt32, VE::kFloat64); } ASMJIT_NOT_REACHED(); } static const char* vec_op_name_vvi(UniOpVVI op) noexcept { switch (op) { case UniOpVVI::kSllU16 : return "v_sll_u16"; case UniOpVVI::kSllU32 : return "v_sll_u32"; case UniOpVVI::kSllU64 : return "v_sll_u64"; case UniOpVVI::kSrlU16 : return "v_srl_u16"; case UniOpVVI::kSrlU32 : return "v_srl_u32"; case UniOpVVI::kSrlU64 : return "v_srl_u64"; case UniOpVVI::kSraI16 : return "v_sra_i16"; case UniOpVVI::kSraI32 : return "v_sra_i32"; case UniOpVVI::kSraI64 : return "v_sra_i64"; case UniOpVVI::kSllbU128 : return "v_sllb_u128"; case UniOpVVI::kSrlbU128 : return "v_srlb_u128"; case UniOpVVI::kSwizzleU16x4 : return "v_swizzle_u16x4"; case UniOpVVI::kSwizzleLoU16x4 : return "v_swizzle_lo_u16x4"; case UniOpVVI::kSwizzleHiU16x4 : return "v_swizzle_hi_u16x4"; case UniOpVVI::kSwizzleU32x4 : return "v_swizzle_u32x4"; case UniOpVVI::kSwizzleU64x2 : return "v_swizzle_u64x2"; case UniOpVVI::kSwizzleF32x4 : return "v_swizzle_f32x4"; case UniOpVVI::kSwizzleF64x2 : return "v_swizzle_f64x2"; case UniOpVVI::kSwizzleU64x4 : return "v_swizzle_u64x4"; case UniOpVVI::kSwizzleF64x4 : return "v_swizzle_f64x4"; case UniOpVVI::kExtractV128_I32: return "v_extract_v128_i32"; case UniOpVVI::kExtractV128_I64: return "v_extract_v128_i64"; case UniOpVVI::kExtractV128_F32: return "v_extract_v128_f32"; case UniOpVVI::kExtractV128_F64: return "v_extract_v128_f64"; case UniOpVVI::kExtractV256_I32: return "v_extract_v256_i32"; case UniOpVVI::kExtractV256_I64: return "v_extract_v256_i64"; case UniOpVVI::kExtractV256_F32: return "v_extract_v256_f32"; case UniOpVVI::kExtractV256_F64: return "v_extract_v256_f64"; #if defined(ASMJIT_UJIT_AARCH64) case UniOpVVI::kSrlRndU16 : return "v_srl_rnd_u16"; case UniOpVVI::kSrlRndU32 : return "v_srl_rnd_u32"; case UniOpVVI::kSrlRndU64 : return "v_srl_rnd_u64"; case UniOpVVI::kSrlAccU16 : return "v_srl_acc_u16"; case UniOpVVI::kSrlAccU32 : return "v_srl_acc_u32"; case UniOpVVI::kSrlAccU64 : return "v_srl_acc_u64"; case UniOpVVI::kSrlRndAccU16 : return "v_srl_rnd_acc_u16"; case UniOpVVI::kSrlRndAccU32 : return "v_srl_rnd_acc_u32"; case UniOpVVI::kSrlRndAccU64 : return "v_srl_rnd_acc_u64"; case UniOpVVI::kSrlnLoU16 : return "v_srln_lo_u16"; case UniOpVVI::kSrlnHiU16 : return "v_srln_hi_u16"; case UniOpVVI::kSrlnLoU32 : return "v_srln_lo_u32"; case UniOpVVI::kSrlnHiU32 : return "v_srln_hi_u32"; case UniOpVVI::kSrlnLoU64 : return "v_srln_lo_u64"; case UniOpVVI::kSrlnHiU64 : return "v_srln_hi_u64"; case UniOpVVI::kSrlnRndLoU16 : return "v_srln_rnd_lo_u16"; case UniOpVVI::kSrlnRndHiU16 : return "v_srln_rnd_hi_u16"; case UniOpVVI::kSrlnRndLoU32 : return "v_srln_rnd_lo_u32"; case UniOpVVI::kSrlnRndHiU32 : return "v_srln_rnd_hi_u32"; case UniOpVVI::kSrlnRndLoU64 : return "v_srln_rnd_lo_u64"; case UniOpVVI::kSrlnRndHiU64 : return "v_srln_rnd_hi_u64"; #endif // ASMJIT_UJIT_AARCH64 } ASMJIT_NOT_REACHED(); } static VecOpInfo vec_op_info_vvi(UniOpVVI op) noexcept { using VE = VecElementType; switch (op) { case UniOpVVI::kSllU16 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16); case UniOpVVI::kSllU32 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32); case UniOpVVI::kSllU64 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64); case UniOpVVI::kSrlU16 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16); case UniOpVVI::kSrlU32 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32); case UniOpVVI::kSrlU64 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64); case UniOpVVI::kSraI16 : return VecOpInfo::make(VE::kInt16 , VE::kInt16); case UniOpVVI::kSraI32 : return VecOpInfo::make(VE::kInt32 , VE::kInt32); case UniOpVVI::kSraI64 : return VecOpInfo::make(VE::kInt64 , VE::kInt64); case UniOpVVI::kSllbU128 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt8); case UniOpVVI::kSrlbU128 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt8); case UniOpVVI::kSwizzleU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16); case UniOpVVI::kSwizzleLoU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16); case UniOpVVI::kSwizzleHiU16x4 : return VecOpInfo::make(VE::kUInt16 , VE::kUInt16); case UniOpVVI::kSwizzleU32x4 : return VecOpInfo::make(VE::kUInt32 , VE::kUInt32); case UniOpVVI::kSwizzleU64x2 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64); case UniOpVVI::kSwizzleF32x4 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVVI::kSwizzleF64x2 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVVI::kSwizzleU64x4 : return VecOpInfo::make(VE::kUInt64 , VE::kUInt64); case UniOpVVI::kSwizzleF64x4 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVVI::kExtractV128_I32: return VecOpInfo::make(VE::kInt32 , VE::kInt32); case UniOpVVI::kExtractV128_I64: return VecOpInfo::make(VE::kInt64 , VE::kInt64); case UniOpVVI::kExtractV128_F32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVVI::kExtractV128_F64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64); case UniOpVVI::kExtractV256_I32: return VecOpInfo::make(VE::kUInt32 , VE::kUInt32); case UniOpVVI::kExtractV256_I64: return VecOpInfo::make(VE::kUInt64 , VE::kUInt64); case UniOpVVI::kExtractV256_F32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32); case UniOpVVI::kExtractV256_F64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64); #if defined(ASMJIT_UJIT_AARCH64) case UniOpVVI::kSrlRndU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVVI::kSrlRndU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVVI::kSrlRndU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVVI::kSrlAccU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVVI::kSrlAccU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVVI::kSrlAccU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVVI::kSrlRndAccU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVVI::kSrlRndAccU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVVI::kSrlRndAccU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVVI::kSrlnLoU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVVI::kSrlnHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVVI::kSrlnLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVVI::kSrlnHiU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16); case UniOpVVI::kSrlnLoU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32); case UniOpVVI::kSrlnHiU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64); case UniOpVVI::kSrlnRndLoU16 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt16); case UniOpVVI::kSrlnRndHiU16 : return VecOpInfo::make(VE::kUInt8 , VE::kUInt16); case UniOpVVI::kSrlnRndLoU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt32); case UniOpVVI::kSrlnRndHiU32 : return VecOpInfo::make(VE::kUInt16, VE::kUInt32); case UniOpVVI::kSrlnRndLoU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt64); case UniOpVVI::kSrlnRndHiU64 : return VecOpInfo::make(VE::kUInt32, VE::kUInt64); #endif // ASMJIT_UJIT_AARCH64 } ASMJIT_NOT_REACHED(); } static const char* vec_op_name_vvv(UniOpVVV op) noexcept { switch (op) { case UniOpVVV::kAndU32 : return "v_and_u32"; case UniOpVVV::kAndU64 : return "v_and_u64"; case UniOpVVV::kOrU32 : return "v_or_u32"; case UniOpVVV::kOrU64 : return "v_or_u64"; case UniOpVVV::kXorU32 : return "v_xor_u32"; case UniOpVVV::kXorU64 : return "v_xor_u64"; case UniOpVVV::kAndnU32 : return "v_andn_u32"; case UniOpVVV::kAndnU64 : return "v_andn_u64"; case UniOpVVV::kBicU32 : return "v_bic_u32"; case UniOpVVV::kBicU64 : return "v_bic_u64"; case UniOpVVV::kAvgrU8 : return "v_avgr_u8"; case UniOpVVV::kAvgrU16 : return "v_avgr_u16"; case UniOpVVV::kAddU8 : return "v_add_u8"; case UniOpVVV::kAddU16 : return "v_add_u16"; case UniOpVVV::kAddU32 : return "v_add_u32"; case UniOpVVV::kAddU64 : return "v_add_u64"; case UniOpVVV::kSubU8 : return "v_sub_u8"; case UniOpVVV::kSubU16 : return "v_sub_u16"; case UniOpVVV::kSubU32 : return "v_sub_u32"; case UniOpVVV::kSubU64 : return "v_sub_u64"; case UniOpVVV::kAddsI8 : return "v_adds_i8"; case UniOpVVV::kAddsU8 : return "v_adds_u8"; case UniOpVVV::kAddsI16 : return "v_adds_i16"; case UniOpVVV::kAddsU16 : return "v_adds_u16"; case UniOpVVV::kSubsI8 : return "v_subs_i8"; case UniOpVVV::kSubsU8 : return "v_subs_u8"; case UniOpVVV::kSubsI16 : return "v_subs_i16"; case UniOpVVV::kSubsU16 : return "v_subs_u16"; case UniOpVVV::kMulU16 : return "v_mul_u16"; case UniOpVVV::kMulU32 : return "v_mul_u32"; case UniOpVVV::kMulU64 : return "v_mul_u64"; case UniOpVVV::kMulhI16 : return "v_mulh_i16"; case UniOpVVV::kMulhU16 : return "v_mulh_u16"; case UniOpVVV::kMulU64_LoU32 : return "v_mul_u64_lo_u32"; case UniOpVVV::kMHAddI16_I32 : return "v_mhadd_i16_i32"; case UniOpVVV::kMinI8 : return "v_min_i8"; case UniOpVVV::kMinU8 : return "v_min_u8"; case UniOpVVV::kMinI16 : return "v_min_i16"; case UniOpVVV::kMinU16 : return "v_min_u16"; case UniOpVVV::kMinI32 : return "v_min_i32"; case UniOpVVV::kMinU32 : return "v_min_u32"; case UniOpVVV::kMinI64 : return "v_min_i64"; case UniOpVVV::kMinU64 : return "v_min_u64"; case UniOpVVV::kMaxI8 : return "v_max_i8"; case UniOpVVV::kMaxU8 : return "v_max_u8"; case UniOpVVV::kMaxI16 : return "v_max_i16"; case UniOpVVV::kMaxU16 : return "v_max_u16"; case UniOpVVV::kMaxI32 : return "v_max_i32"; case UniOpVVV::kMaxU32 : return "v_max_u32"; case UniOpVVV::kMaxI64 : return "v_max_i64"; case UniOpVVV::kMaxU64 : return "v_max_u64"; case UniOpVVV::kCmpEqU8 : return "v_cmp_eq_u8"; case UniOpVVV::kCmpEqU16 : return "v_cmp_eq_u16"; case UniOpVVV::kCmpEqU32 : return "v_cmp_eq_u32"; case UniOpVVV::kCmpEqU64 : return "v_cmp_eq_u64"; case UniOpVVV::kCmpGtI8 : return "v_cmp_gt_i8"; case UniOpVVV::kCmpGtU8 : return "v_cmp_gt_u8"; case UniOpVVV::kCmpGtI16 : return "v_cmp_gt_i16"; case UniOpVVV::kCmpGtU16 : return "v_cmp_gt_u16"; case UniOpVVV::kCmpGtI32 : return "v_cmp_gt_i32"; case UniOpVVV::kCmpGtU32 : return "v_cmp_gt_u32"; case UniOpVVV::kCmpGtI64 : return "v_cmp_gt_i64"; case UniOpVVV::kCmpGtU64 : return "v_cmp_gt_u64"; case UniOpVVV::kCmpGeI8 : return "v_cmp_ge_i8"; case UniOpVVV::kCmpGeU8 : return "v_cmp_ge_u8"; case UniOpVVV::kCmpGeI16 : return "v_cmp_ge_i16"; case UniOpVVV::kCmpGeU16 : return "v_cmp_ge_u16"; case UniOpVVV::kCmpGeI32 : return "v_cmp_ge_i32"; case UniOpVVV::kCmpGeU32 : return "v_cmp_ge_u32"; case UniOpVVV::kCmpGeI64 : return "v_cmp_ge_i64"; case UniOpVVV::kCmpGeU64 : return "v_cmp_ge_u64"; case UniOpVVV::kCmpLtI8 : return "v_cmp_lt_i8"; case UniOpVVV::kCmpLtU8 : return "v_cmp_lt_u8"; case UniOpVVV::kCmpLtI16 : return "v_cmp_lt_i16"; case UniOpVVV::kCmpLtU16 : return "v_cmp_lt_u16"; case UniOpVVV::kCmpLtI32 : return "v_cmp_lt_i32"; case UniOpVVV::kCmpLtU32 : return "v_cmp_lt_u32"; case UniOpVVV::kCmpLtI64 : return "v_cmp_lt_i64"; case UniOpVVV::kCmpLtU64 : return "v_cmp_lt_u64"; case UniOpVVV::kCmpLeI8 : return "v_cmp_le_i8"; case UniOpVVV::kCmpLeU8 : return "v_cmp_le_u8"; case UniOpVVV::kCmpLeI16 : return "v_cmp_le_i16"; case UniOpVVV::kCmpLeU16 : return "v_cmp_le_u16"; case UniOpVVV::kCmpLeI32 : return "v_cmp_le_i32"; case UniOpVVV::kCmpLeU32 : return "v_cmp_le_u32"; case UniOpVVV::kCmpLeI64 : return "v_cmp_le_i64"; case UniOpVVV::kCmpLeU64 : return "v_cmp_le_u64"; case UniOpVVV::kAndF32 : return "v_and_f32"; case UniOpVVV::kAndF64 : return "v_and_f64"; case UniOpVVV::kOrF32 : return "v_or_f32"; case UniOpVVV::kOrF64 : return "v_or_f64"; case UniOpVVV::kXorF32 : return "v_xor_f32"; case UniOpVVV::kXorF64 : return "v_xor_f64"; case UniOpVVV::kAndnF32 : return "v_andn_f32"; case UniOpVVV::kAndnF64 : return "v_andn_f64"; case UniOpVVV::kBicF32 : return "v_bic_f32"; case UniOpVVV::kBicF64 : return "v_bic_f64"; case UniOpVVV::kAddF32S : return "v_add_f32s"; case UniOpVVV::kAddF64S : return "v_add_f64s"; case UniOpVVV::kAddF32 : return "v_add_f32"; case UniOpVVV::kAddF64 : return "v_add_f64"; case UniOpVVV::kSubF32S : return "v_sub_f32s"; case UniOpVVV::kSubF64S : return "v_sub_f64s"; case UniOpVVV::kSubF32 : return "v_sub_f32"; case UniOpVVV::kSubF64 : return "v_sub_f64"; case UniOpVVV::kMulF32S : return "v_mul_f32s"; case UniOpVVV::kMulF64S : return "v_mul_f64s"; case UniOpVVV::kMulF32 : return "v_mul_f32"; case UniOpVVV::kMulF64 : return "v_mul_f64"; case UniOpVVV::kDivF32S : return "v_div_f32s"; case UniOpVVV::kDivF64S : return "v_div_f64s"; case UniOpVVV::kDivF32 : return "v_div_f32"; case UniOpVVV::kDivF64 : return "v_div_f64"; case UniOpVVV::kModF32S : return "v_mod_f32s"; case UniOpVVV::kModF64S : return "v_mod_f64s"; case UniOpVVV::kModF32 : return "v_mod_f32"; case UniOpVVV::kModF64 : return "v_mod_f64"; case UniOpVVV::kMinF32S : return "v_min_f32s"; case UniOpVVV::kMinF64S : return "v_min_f64s"; case UniOpVVV::kMinF32 : return "v_min_f32"; case UniOpVVV::kMinF64 : return "v_min_f64"; case UniOpVVV::kMaxF32S : return "v_max_f32s"; case UniOpVVV::kMaxF64S : return "v_max_f64s"; case UniOpVVV::kMaxF32 : return "v_max_f32"; case UniOpVVV::kMaxF64 : return "v_max_f64"; case UniOpVVV::kCmpEqF32S : return "v_cmp_eq_f32s"; case UniOpVVV::kCmpEqF64S : return "v_cmp_eq_f64s"; case UniOpVVV::kCmpEqF32 : return "v_cmp_eq_f32"; case UniOpVVV::kCmpEqF64 : return "v_cmp_eq_f64"; case UniOpVVV::kCmpNeF32S : return "v_cmp_ne_f32s"; case UniOpVVV::kCmpNeF64S : return "v_cmp_ne_f64s"; case UniOpVVV::kCmpNeF32 : return "v_cmp_ne_f32"; case UniOpVVV::kCmpNeF64 : return "v_cmp_ne_f64"; case UniOpVVV::kCmpGtF32S : return "v_cmp_gt_f32s"; case UniOpVVV::kCmpGtF64S : return "v_cmp_gt_f64s"; case UniOpVVV::kCmpGtF32 : return "v_cmp_gt_f32"; case UniOpVVV::kCmpGtF64 : return "v_cmp_gt_f64"; case UniOpVVV::kCmpGeF32S : return "v_cmp_ge_f32s"; case UniOpVVV::kCmpGeF64S : return "v_cmp_ge_f64s"; case UniOpVVV::kCmpGeF32 : return "v_cmp_ge_f32"; case UniOpVVV::kCmpGeF64 : return "v_cmp_ge_f64"; case UniOpVVV::kCmpLtF32S : return "v_cmp_lt_f32s"; case UniOpVVV::kCmpLtF64S : return "v_cmp_lt_f64s"; case UniOpVVV::kCmpLtF32 : return "v_cmp_lt_f32"; case UniOpVVV::kCmpLtF64 : return "v_cmp_lt_f64"; case UniOpVVV::kCmpLeF32S : return "v_cmp_le_f32s"; case UniOpVVV::kCmpLeF64S : return "v_cmp_le_f64s"; case UniOpVVV::kCmpLeF32 : return "v_cmp_le_f32"; case UniOpVVV::kCmpLeF64 : return "v_cmp_le_f64"; case UniOpVVV::kCmpOrdF32S : return "v_cmp_ord_f32s"; case UniOpVVV::kCmpOrdF64S : return "v_cmp_ord_f64s"; case UniOpVVV::kCmpOrdF32 : return "v_cmp_ord_f32"; case UniOpVVV::kCmpOrdF64 : return "v_cmp_ord_f64"; case UniOpVVV::kCmpUnordF32S : return "v_cmp_unord_f32s"; case UniOpVVV::kCmpUnordF64S : return "v_cmp_unord_f64s"; case UniOpVVV::kCmpUnordF32 : return "v_cmp_unord_f32"; case UniOpVVV::kCmpUnordF64 : return "v_cmp_unord_f64"; case UniOpVVV::kHAddF64 : return "v_hadd_f64"; case UniOpVVV::kCombineLoHiU64 : return "v_combine_lo_hi_u64"; case UniOpVVV::kCombineLoHiF64 : return "v_combine_lo_hi_f64"; case UniOpVVV::kCombineHiLoU64 : return "v_combine_hi_lo_u64"; case UniOpVVV::kCombineHiLoF64 : return "v_combine_hi_lo_f64"; case UniOpVVV::kInterleaveLoU8 : return "v_interleave_lo_u8"; case UniOpVVV::kInterleaveHiU8 : return "v_interleave_hi_u8"; case UniOpVVV::kInterleaveLoU16: return "v_interleave_lo_u16"; case UniOpVVV::kInterleaveHiU16: return "v_interleave_hi_u16"; case UniOpVVV::kInterleaveLoU32: return "v_interleave_lo_u32"; case UniOpVVV::kInterleaveHiU32: return "v_interleave_hi_u32"; case UniOpVVV::kInterleaveLoU64: return "v_interleave_lo_u64"; case UniOpVVV::kInterleaveHiU64: return "v_interleave_hi_u64"; case UniOpVVV::kInterleaveLoF32: return "v_interleave_lo_f32"; case UniOpVVV::kInterleaveHiF32: return "v_interleave_hi_f32"; case UniOpVVV::kInterleaveLoF64: return "v_interleave_lo_f64"; case UniOpVVV::kInterleaveHiF64: return "v_interleave_hi_f64"; case UniOpVVV::kPacksI16_I8 : return "v_packs_i16_i8"; case UniOpVVV::kPacksI16_U8 : return "v_packs_i16_u8"; case UniOpVVV::kPacksI32_I16 : return "v_packs_i32_i16"; case UniOpVVV::kPacksI32_U16 : return "v_packs_i32_u16"; case UniOpVVV::kSwizzlev_U8 : return "v_swizzlev_u8"; #if defined(ASMJIT_UJIT_AARCH64) case UniOpVVV::kMulwLoI8 : return "v_mulw_lo_i8"; case UniOpVVV::kMulwLoU8 : return "v_mulw_lo_u8"; case UniOpVVV::kMulwHiI8 : return "v_mulw_hi_i8"; case UniOpVVV::kMulwHiU8 : return "v_mulw_hi_u8"; case UniOpVVV::kMulwLoI16 : return "v_mulw_lo_i16"; case UniOpVVV::kMulwLoU16 : return "v_mulw_lo_u16"; case UniOpVVV::kMulwHiI16 : return "v_mulw_hi_i16"; case UniOpVVV::kMulwHiU16 : return "v_mulw_hi_u16"; case UniOpVVV::kMulwLoI32 : return "v_mulw_lo_i32"; case UniOpVVV::kMulwLoU32 : return "v_mulw_lo_u32"; case UniOpVVV::kMulwHiI32 : return "v_mulw_hi_i32"; case UniOpVVV::kMulwHiU32 : return "v_mulw_hi_u32"; case UniOpVVV::kMAddwLoI8 : return "v_maddw_lo_i8"; case UniOpVVV::kMAddwLoU8 : return "v_maddw_lo_u8"; case UniOpVVV::kMAddwHiI8 : return "v_maddw_hi_i8"; case UniOpVVV::kMAddwHiU8 : return "v_maddw_hi_u8"; case UniOpVVV::kMAddwLoI16 : return "v_maddw_lo_i16"; case UniOpVVV::kMAddwLoU16 : return "v_maddw_lo_u16"; case UniOpVVV::kMAddwHiI16 : return "v_maddw_hi_i16"; case UniOpVVV::kMAddwHiU16 : return "v_maddw_hi_u16"; case UniOpVVV::kMAddwLoI32 : return "v_maddw_lo_i32"; case UniOpVVV::kMAddwLoU32 : return "v_maddw_lo_u32"; case UniOpVVV::kMAddwHiI32 : return "v_maddw_hi_i32"; case UniOpVVV::kMAddwHiU32 : return "v_maddw_hi_u32"; #endif // ASMJIT_UJIT_AARCH64 #if defined(ASMJIT_UJIT_X86) case UniOpVVV::kPermuteU8 : return "v_permute_u8"; case UniOpVVV::kPermuteU16 : return "v_permute_u16"; case UniOpVVV::kPermuteU32 : return "v_permute_u32"; case UniOpVVV::kPermuteU64 : return "v_permute_u64"; #endif // ASMJIT_UJIT_X86 } ASMJIT_NOT_REACHED(); } static VecOpInfo vec_op_info_vvv(UniOpVVV op) noexcept { using VE = VecElementType; switch (op) { case UniOpVVV::kAndU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kAndU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kOrU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kOrU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kXorU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kXorU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAndnU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kAndnU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kBicU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kBicU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAvgrU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kAvgrU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kAddU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kAddU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kAddU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kAddU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kSubU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kSubU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kSubU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kSubU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAddsI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kAddsU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kAddsI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kAddsU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kSubsI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kSubsU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kSubsI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kSubsU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMulU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMulU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMulU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kMulhI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kMulhU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMulU64_LoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt32); case UniOpVVV::kMHAddI16_I32 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16); case UniOpVVV::kMinI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kMinU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMinI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kMinU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMinI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kMinU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMinI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kMinU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kMaxI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kMaxU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMaxI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kMaxU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMaxI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kMaxU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMaxI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kMaxU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCmpEqU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kCmpEqU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kCmpEqU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kCmpEqU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCmpGtI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kCmpGtU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kCmpGtI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kCmpGtU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kCmpGtI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kCmpGtU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kCmpGtI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kCmpGtU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCmpGeI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kCmpGeU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kCmpGeI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kCmpGeU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kCmpGeI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kCmpGeU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kCmpGeI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kCmpGeU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCmpLtI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kCmpLtU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kCmpLtI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kCmpLtU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kCmpLtI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kCmpLtU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kCmpLtI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kCmpLtU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCmpLeI8 : return VecOpInfo::make(VE::kInt8, VE::kInt8, VE::kInt8); case UniOpVVV::kCmpLeU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kCmpLeI16 : return VecOpInfo::make(VE::kInt16, VE::kInt16, VE::kInt16); case UniOpVVV::kCmpLeU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kCmpLeI32 : return VecOpInfo::make(VE::kInt32, VE::kInt32, VE::kInt32); case UniOpVVV::kCmpLeU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kCmpLeI64 : return VecOpInfo::make(VE::kInt64, VE::kInt64, VE::kInt64); case UniOpVVV::kCmpLeU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAndF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kAndF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kOrF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kOrF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kXorF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kXorF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAndnF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kAndnF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kBicF32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kBicF64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kAddF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kAddF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kSubF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kSubF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMulF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMulF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMulF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMulF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kDivF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kDivF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kDivF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kDivF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kModF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kModF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kModF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kModF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMinF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMinF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMinF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMinF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMaxF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMaxF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kMaxF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kMaxF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpEqF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpEqF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpEqF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpEqF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpNeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpNeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpNeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpNeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpGtF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpGtF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpGtF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpGtF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpGeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpGeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpGeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpGeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpLtF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpLtF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpLtF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpLtF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpLeF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpLeF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpLeF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpLeF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpOrdF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpOrdF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpOrdF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpOrdF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpUnordF32S : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpUnordF64S : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCmpUnordF32 : return VecOpInfo::make(VE::kUInt32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kCmpUnordF64 : return VecOpInfo::make(VE::kUInt64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kHAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCombineLoHiU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCombineLoHiF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kCombineHiLoU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kCombineHiLoF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kInterleaveLoU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kInterleaveHiU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kInterleaveLoU16: return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kInterleaveHiU16: return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kInterleaveLoU32: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kInterleaveHiU32: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kInterleaveLoU64: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kInterleaveHiU64: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVV::kInterleaveLoF32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kInterleaveHiF32: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVV::kInterleaveLoF64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kInterleaveHiF64: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVV::kPacksI16_I8 : return VecOpInfo::make(VE::kInt8, VE::kInt16, VE::kInt16); case UniOpVVV::kPacksI16_U8 : return VecOpInfo::make(VE::kUInt8, VE::kInt16, VE::kInt16); case UniOpVVV::kPacksI32_I16 : return VecOpInfo::make(VE::kInt16, VE::kInt32, VE::kInt32); case UniOpVVV::kPacksI32_U16 : return VecOpInfo::make(VE::kUInt16, VE::kInt32, VE::kInt32); case UniOpVVV::kSwizzlev_U8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); #if defined(ASMJIT_UJIT_AARCH64) case UniOpVVV::kMulwLoI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8); case UniOpVVV::kMulwLoU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMulwHiI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8); case UniOpVVV::kMulwHiU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMulwLoI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16); case UniOpVVV::kMulwLoU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMulwHiI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16); case UniOpVVV::kMulwHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMulwLoI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32); case UniOpVVV::kMulwLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMulwHiI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32); case UniOpVVV::kMulwHiU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMAddwLoI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8); case UniOpVVV::kMAddwLoU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMAddwHiI8 : return VecOpInfo::make(VE::kInt16, VE::kInt8, VE::kInt8); case UniOpVVV::kMAddwHiU8 : return VecOpInfo::make(VE::kUInt16, VE::kUInt8, VE::kUInt8); case UniOpVVV::kMAddwLoI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16); case UniOpVVV::kMAddwLoU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMAddwHiI16 : return VecOpInfo::make(VE::kInt32, VE::kInt16, VE::kInt16); case UniOpVVV::kMAddwHiU16 : return VecOpInfo::make(VE::kUInt32, VE::kUInt16, VE::kUInt16); case UniOpVVV::kMAddwLoI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32); case UniOpVVV::kMAddwLoU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32); case UniOpVVV::kMAddwHiI32 : return VecOpInfo::make(VE::kInt64, VE::kInt32, VE::kInt32); case UniOpVVV::kMAddwHiU32 : return VecOpInfo::make(VE::kUInt64, VE::kUInt32, VE::kUInt32); #endif // ASMJIT_UJIT_AARCH64 #if defined(ASMJIT_UJIT_X86) case UniOpVVV::kPermuteU8 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVV::kPermuteU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVV::kPermuteU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVV::kPermuteU64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); #endif // ASMJIT_UJIT_X86 } ASMJIT_NOT_REACHED(); } static const char* vec_op_name_vvvi(UniOpVVVI op) noexcept { switch (op) { case UniOpVVVI::kAlignr_U128 : return "v_alignr_u128"; case UniOpVVVI::kInterleaveShuffleU32x4: return "v_interleave_shuffle_u32x4"; case UniOpVVVI::kInterleaveShuffleU64x2: return "v_interleave_shuffle_u64x2"; case UniOpVVVI::kInterleaveShuffleF32x4: return "v_interleave_shuffle_f32x4"; case UniOpVVVI::kInterleaveShuffleF64x2: return "v_interleave_shuffle_f64x2"; case UniOpVVVI::kInsertV128_U32 : return "v_insert_v128_u32"; case UniOpVVVI::kInsertV128_F32 : return "v_insert_v128_f32"; case UniOpVVVI::kInsertV128_U64 : return "v_insert_v128_u64"; case UniOpVVVI::kInsertV128_F64 : return "v_insert_v128_f64"; case UniOpVVVI::kInsertV256_U32 : return "v_insert_v256_u32"; case UniOpVVVI::kInsertV256_F32 : return "v_insert_v256_f32"; case UniOpVVVI::kInsertV256_U64 : return "v_insert_v256_u64"; case UniOpVVVI::kInsertV256_F64 : return "v_insert_v256_f64"; } ASMJIT_NOT_REACHED(); } static VecOpInfo vec_op_info_vvvi(UniOpVVVI op) noexcept { using VE = VecElementType; switch (op) { case UniOpVVVI::kAlignr_U128 : return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVVI::kInterleaveShuffleU32x4: return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVVI::kInterleaveShuffleU64x2: return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVVI::kInterleaveShuffleF32x4: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVI::kInterleaveShuffleF64x2: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVI::kInsertV128_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVVI::kInsertV128_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVI::kInsertV128_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVVI::kInsertV128_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVI::kInsertV256_U32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVVI::kInsertV256_F32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVI::kInsertV256_U64 : return VecOpInfo::make(VE::kUInt64, VE::kUInt64, VE::kUInt64); case UniOpVVVI::kInsertV256_F64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64); default: ASMJIT_NOT_REACHED(); } } static const char* vec_op_name_vvvv(UniOpVVVV op) noexcept { switch (op) { case UniOpVVVV::kBlendV_U8: return "v_blendv_u8"; case UniOpVVVV::kMAddU16 : return "v_madd_u16"; case UniOpVVVV::kMAddU32 : return "v_madd_u32"; case UniOpVVVV::kMAddF32S : return "v_madd_f32s"; case UniOpVVVV::kMAddF64S : return "v_madd_f64s"; case UniOpVVVV::kMAddF32 : return "v_madd_f32"; case UniOpVVVV::kMAddF64 : return "v_madd_f64"; case UniOpVVVV::kMSubF32S : return "v_msub_f32s"; case UniOpVVVV::kMSubF64S : return "v_msub_f64s"; case UniOpVVVV::kMSubF32 : return "v_msub_f32"; case UniOpVVVV::kMSubF64 : return "v_msub_f64"; case UniOpVVVV::kNMAddF32S: return "v_nmadd_f32s"; case UniOpVVVV::kNMAddF64S: return "v_nmadd_f64s"; case UniOpVVVV::kNMAddF32 : return "v_nmadd_f32"; case UniOpVVVV::kNMAddF64 : return "v_nmadd_f64"; case UniOpVVVV::kNMSubF32S: return "v_nmsub_f32s"; case UniOpVVVV::kNMSubF64S: return "v_nmsub_f64s"; case UniOpVVVV::kNMSubF32 : return "v_nmsub_f32"; case UniOpVVVV::kNMSubF64 : return "v_nmsub_f64"; } ASMJIT_NOT_REACHED(); } static VecOpInfo vec_op_info_vvvv(UniOpVVVV op) noexcept { using VE = VecElementType; switch (op) { case UniOpVVVV::kBlendV_U8: return VecOpInfo::make(VE::kUInt8, VE::kUInt8, VE::kUInt8, VE::kUInt8); case UniOpVVVV::kMAddU16 : return VecOpInfo::make(VE::kUInt16, VE::kUInt16, VE::kUInt16, VE::kUInt16); case UniOpVVVV::kMAddU32 : return VecOpInfo::make(VE::kUInt32, VE::kUInt32, VE::kUInt32, VE::kUInt32); case UniOpVVVV::kMAddF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kMAddF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kMAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kMAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kMSubF32S : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kMSubF64S : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kMSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kMSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kNMAddF32S: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kNMAddF64S: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kNMAddF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kNMAddF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kNMSubF32S: return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kNMSubF64S: return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); case UniOpVVVV::kNMSubF32 : return VecOpInfo::make(VE::kFloat32, VE::kFloat32, VE::kFloat32, VE::kFloat32); case UniOpVVVV::kNMSubF64 : return VecOpInfo::make(VE::kFloat64, VE::kFloat64, VE::kFloat64, VE::kFloat64); } ASMJIT_NOT_REACHED(); } // ujit::UniCompiler - Tests - SIMD - Float To Int - Machine Behavior // ================================================================== template static ASMJIT_INLINE_NODEBUG int32_t cvt_float_to_int_trunc(const FloatT& x) noexcept { constexpr IntT min_value = std::numeric_limits::lowest(); constexpr IntT max_value = std::numeric_limits::max(); constexpr IntT zero = IntT(0); if (std::isnan(x)) { return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : zero; } if (x < FloatT(min_value)) { return min_value; } if (x > FloatT(max_value)) { return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : max_value; } return IntT(x); } template static ASMJIT_INLINE_NODEBUG int32_t cvt_float_to_int_round(const FloatT& x) noexcept { constexpr IntT min_value = std::numeric_limits::lowest(); constexpr IntT max_value = std::numeric_limits::max(); constexpr IntT zero = IntT(0); if (std::isnan(x)) { return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : zero; } if (x < FloatT(min_value)) { return min_value; } if (x > FloatT(max_value)) { return behavior == FloatToIntOutsideRangeBehavior::kSmallestValue ? min_value : max_value; } return IntT(std::nearbyint(x)); } // ujit::UniCompiler - Tests - SIMD - Data Generators & Constraints // ================================================================ // Data generator, which is used to fill the content of SIMD registers. class DataGenInt { public: TestUtils::Random rng; uint32_t step; struct Float32Data { uint32_t u32; float f32; }; ASMJIT_INLINE explicit DataGenInt(uint64_t seed) noexcept : rng(seed), step(0) {} ASMJIT_NOINLINE uint64_t next_uint64() noexcept { if (++step >= 256) { step = 0; } // NOTE: Nothing really elaborate - sometimes we want to test also numbers // that random number generators won't return often, so we hardcode some. switch (step) { case 0: return 0u; case 1: return 0u; case 2: return 0u; case 6: return 1u; case 7: return 0u; case 10: return 0u; case 11: return 0xFFu; case 15: return 0xFFFFu; case 17: return 0xFFFFFFFFu; case 21: return 0xFFFFFFFFFFFFFFFFu; case 24: return 1u; case 40: return 0xFFu; case 55: return 0x8080808080808080u; case 66: return 0x80000080u; case 69: return 1u; case 79: return 0x7F; case 122: return 0xFFFFu; case 123: return 0xFFFFu; case 124: return 0xFFFFu; case 127: return 1u; case 130: return 0xFFu; case 142: return 0x7FFFu; case 143: return 0x7FFFu; case 144: return 0u; case 145: return 0x7FFFu; default : return rng.next_uint64(); } } ASMJIT_NOINLINE float next_float32() noexcept { if (++step >= 256) { step = 0; } switch (step) { case 0: return 0.0f; case 1: return 0.0f; case 2: return 0.0f; case 6: return 1.0f; case 7: return 0.0f; case 10: return 0.00001f; case 11: return 2.0f; case 12: return -std::numeric_limits::infinity(); case 15: return 3.0f; case 17: return 256.0f; case 21: return 0.5f; case 23: return std::numeric_limits::quiet_NaN(); case 24: return 0.25f; case 27: return std::numeric_limits::quiet_NaN(); case 29: return std::numeric_limits::infinity(); case 31: return std::numeric_limits::quiet_NaN(); case 35: return std::numeric_limits::quiet_NaN(); case 40: return 5.12323f; case 45: return -std::numeric_limits::infinity(); case 55: return 100.5f; case 66: return 0.1f; case 69: return 0.2f; case 79: return 0.3f; case 89: return -13005961.0f; case 99: return -std::numeric_limits::infinity(); case 100: case 102: case 104: case 106: case 108: return float(rng.next_double()); case 110: case 112: case 114: case 116: case 118: return -float(rng.next_double()); case 122: return 10.3f; case 123: return 20.3f; case 124: return -100.3f; case 127: return 1.3f; case 130: return std::numeric_limits::quiet_NaN(); case 135: return -std::numeric_limits::infinity(); case 142: return 1.0f; case 143: return 1.5f; case 144: return 2.0f; case 145: return std::numeric_limits::infinity(); case 155: return -1.5f; case 165: return -0.5f; case 175: return -1.0f; case 245: return 2.5f; default: { float sign = rng.next_uint32() < 0x7FFFFFF ? 1.0f : -1.0f; return float(rng.next_double() * double(rng.next_uint32() & 0xFFFFFFu)) * sign; } } } ASMJIT_NOINLINE double next_float64() noexcept { if (++step >= 256) { step = 0; } switch (step) { case 0: return 0.0; case 1: return 0.0; case 2: return 0.0; case 6: return 1.0; case 7: return 0.0; case 10: return 0.00001; case 11: return 2.0; case 12: return -std::numeric_limits::infinity(); case 15: return 3.0; case 17: return 256.0; case 21: return 0.5; case 23: return std::numeric_limits::quiet_NaN(); case 24: return 0.25; case 27: return std::numeric_limits::quiet_NaN(); case 29: return std::numeric_limits::infinity(); case 31: return std::numeric_limits::quiet_NaN(); case 35: return std::numeric_limits::quiet_NaN(); case 40: return 5.12323; case 45: return -std::numeric_limits::infinity(); case 55: return 100.5; case 66: return 0.1; case 69: return 0.2; case 79: return 0.3; case 80: return 4503599627370495.5; case 99: return -std::numeric_limits::infinity(); case 100: case 102: case 104: case 106: case 108: return rng.next_double(); case 110: case 112: case 114: case 116: case 118: return -rng.next_double(); case 122: return 10.3; case 123: return 20.3; case 124: return -100.3; case 125: return 4503599627370496.0; case 127: return 1.3; case 130: return std::numeric_limits::quiet_NaN(); case 135: return -std::numeric_limits::infinity(); case 142: return 1.0; case 143: return 1.5; case 144: return 2.0; case 145: return std::numeric_limits::infinity(); case 155: return -1.5; case 165: return -0.5; case 175: return -1.0; case 245: return 2.5; case 248: return -4503599627370495.5; default: { double sign = rng.next_uint32() < 0x7FFFFFF ? 1.0 : -1.0; return double(rng.next_double() * double(rng.next_uint32() & 0x3FFFFFFFu)) * sign; } } } }; template struct half_minus_1ulp_const; template<> struct half_minus_1ulp_const { static inline constexpr float value = 0.49999997f; }; template<> struct half_minus_1ulp_const { static inline constexpr double value = 0.49999999999999994; }; // Some SIMD operations are constrained, especially those higher level. So, to successfully test these we // have to model the constraints in a way that the SIMD instruction we test actually gets the correct input. // Note that a constraint doesn't have to be always range based, it could be anything. struct ConstraintNone { template static ASMJIT_INLINE_NODEBUG void apply(VecOverlay& v) noexcept { Support::maybe_unused(v); } }; template struct ConstraintBase { template static ASMJIT_INLINE void apply(VecOverlay& v) noexcept { ElementT* elements = v.template data(); for (size_t i = 0; i < kW / sizeof(ElementT); i++) { elements[i] = Derived::apply_one(elements[i]); } } }; template struct ConstraintRangeU8 : public ConstraintBase> { static ASMJIT_INLINE_NODEBUG uint8_t apply_one(uint8_t x) noexcept { return std::clamp(x, kMin, kMax); } }; template struct ConstraintRangeU16 : public ConstraintBase> { static ASMJIT_INLINE_NODEBUG uint16_t apply_one(uint16_t x) noexcept { return std::clamp(x, kMin, kMax); } }; template struct ConstraintRangeU32 : public ConstraintBase> { static ASMJIT_INLINE_NODEBUG uint32_t apply_one(uint32_t x) noexcept { return std::clamp(x, kMin, kMax); } }; // ujit::UniCompiler - Tests - Generic Operations // ============================================== template static ASMJIT_INLINE_NODEBUG std::make_unsigned_t cast_uint(const T& x) noexcept { return static_cast>(x); } template static ASMJIT_INLINE_NODEBUG std::make_signed_t cast_int(const T& x) noexcept { return static_cast>(x); } static ASMJIT_INLINE_NODEBUG int8_t saturate_i16_to_i8(int16_t x) noexcept { return x < int16_t(-128) ? int8_t(-128) : x > int16_t( 127) ? int8_t( 127) : int8_t(x & 0xFF); } static ASMJIT_INLINE_NODEBUG uint8_t saturate_i16_to_u8(int16_t x) noexcept { return x < int16_t(0x00) ? uint8_t(0x00) : x > int16_t(0xFF) ? uint8_t(0xFF) : uint8_t(x & 0xFF); } static ASMJIT_INLINE_NODEBUG int16_t saturate_i32_to_i16(int32_t x) noexcept { return x < int32_t(-32768) ? int16_t(-32768) : x > int32_t( 32767) ? int16_t( 32767) : int16_t(x & 0xFFFF); } static ASMJIT_INLINE_NODEBUG uint16_t saturate_i32_to_u16(int32_t x) noexcept { return x < int32_t(0x0000) ? uint16_t(0x0000) : x > int32_t(0xFFFF) ? uint16_t(0xFFFF) : uint16_t(x & 0xFFFF); } template struct op_each_vv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / sizeof(T); i++) { out.set(i, Derived::apply_one(a.template get(i))); } return out; } }; template struct op_each_vvi { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / sizeof(T); i++) { out.set(i, Derived::apply_one(a.template get(i), imm)); } return out; } }; template struct op_each_vvv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / sizeof(T); i++) { out.set(i, Derived::apply_one(a.template get(i), b.template get(i))); } return out; } }; template struct op_each_vvvi { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, uint32_t imm) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / sizeof(T); i++) { out.set(i, Derived::apply_one(a.template get(i), b.template get(i), imm)); } return out; } }; template struct op_each_vvvv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, const VecOverlay& c) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / sizeof(T); i++) { out.set(i, Derived::apply_one(a.template get(i), b.template get(i), c.template get(i))); } return out; } }; template struct op_scalar_vv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out {}; if constexpr (kB == ScalarOpBehavior::kPreservingVec128) { out.copy_16b_from(a); } out.set(0, Derived::apply_one(a.template get(0))); return out; } }; template struct op_scalar_vvi { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { VecOverlay out {}; if constexpr (kB == ScalarOpBehavior::kPreservingVec128) { out.copy_16b_from(a); } out.set(0, Derived::apply_one(a.template get(0), imm)); return out; } }; template struct op_scalar_vvv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out {}; if constexpr (kB == ScalarOpBehavior::kPreservingVec128) { out.copy_16b_from(a); } out.set(0, Derived::apply_one(a.template get(0), b.template get(0))); return out; } }; template struct op_scalar_vvvv { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, const VecOverlay& c) noexcept { VecOverlay out {}; if constexpr (kB == ScalarOpBehavior::kPreservingVec128) { out.copy_16b_from(a); } out.set(0, Derived::apply_one(a.template get(0), b.template get(0), c.template get(0))); return out; } }; // ujit::UniCompiler - Tests - Generic Operations - VV // =================================================== struct vec_op_mov : public op_each_vv { static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a) noexcept { return a; } }; struct vec_op_mov_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; out.data_u64[0] = a.data_u64[0]; return out; } }; struct vec_op_broadcast_u8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW; i++) out.data_u8[i] = a.data_u8[0]; return out; } }; struct vec_op_broadcast_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / 2u; i++) { out.data_u16[i] = a.data_u16[0]; } return out; } }; struct vec_op_broadcast_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / 4u; i++) { out.data_u32[i] = a.data_u32[0]; } return out; } }; struct vec_op_broadcast_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / 8u; i++) { out.data_u64[i] = a.data_u64[0]; } return out; } }; struct vec_op_broadcast_u128 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t i = 0; i < kW / 8u; i += 2) { out.data_u64[i + 0] = a.data_u64[0]; out.data_u64[i + 1] = a.data_u64[1]; } return out; } }; struct vec_op_broadcast_u256 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; if constexpr (kW < 32) { out = a; } else { for (uint32_t i = 0; i < kW / 8u; i += 4) { out.data_u64[i + 0] = a.data_u64[0]; out.data_u64[i + 1] = a.data_u64[1]; out.data_u64[i + 2] = a.data_u64[2]; out.data_u64[i + 3] = a.data_u64[3]; } } return out; } }; template struct vec_op_abs : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return a < 0 ? T(cast_uint(T(0)) - cast_uint(a)) : a; } }; template struct vec_op_neg : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return T(cast_uint(T(0)) - cast_uint(a)); } }; template struct vec_op_not : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return T(~a); } }; struct vec_op_cvt_i8_lo_to_i16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i16[off / 2 + 0] = a.data_i8[off / 2 + 0]; out.data_i16[off / 2 + 1] = a.data_i8[off / 2 + 1]; out.data_i16[off / 2 + 2] = a.data_i8[off / 2 + 2]; out.data_i16[off / 2 + 3] = a.data_i8[off / 2 + 3]; out.data_i16[off / 2 + 4] = a.data_i8[off / 2 + 4]; out.data_i16[off / 2 + 5] = a.data_i8[off / 2 + 5]; out.data_i16[off / 2 + 6] = a.data_i8[off / 2 + 6]; out.data_i16[off / 2 + 7] = a.data_i8[off / 2 + 7]; } return out; } }; struct vec_op_cvt_i8_hi_to_i16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i16[off / 2 + 0] = a.data_i8[kW / 2 + off / 2 + 0]; out.data_i16[off / 2 + 1] = a.data_i8[kW / 2 + off / 2 + 1]; out.data_i16[off / 2 + 2] = a.data_i8[kW / 2 + off / 2 + 2]; out.data_i16[off / 2 + 3] = a.data_i8[kW / 2 + off / 2 + 3]; out.data_i16[off / 2 + 4] = a.data_i8[kW / 2 + off / 2 + 4]; out.data_i16[off / 2 + 5] = a.data_i8[kW / 2 + off / 2 + 5]; out.data_i16[off / 2 + 6] = a.data_i8[kW / 2 + off / 2 + 6]; out.data_i16[off / 2 + 7] = a.data_i8[kW / 2 + off / 2 + 7]; } return out; } }; struct vec_op_cvt_u8_lo_to_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u16[off / 2 + 0] = a.data_u8[off / 2 + 0]; out.data_u16[off / 2 + 1] = a.data_u8[off / 2 + 1]; out.data_u16[off / 2 + 2] = a.data_u8[off / 2 + 2]; out.data_u16[off / 2 + 3] = a.data_u8[off / 2 + 3]; out.data_u16[off / 2 + 4] = a.data_u8[off / 2 + 4]; out.data_u16[off / 2 + 5] = a.data_u8[off / 2 + 5]; out.data_u16[off / 2 + 6] = a.data_u8[off / 2 + 6]; out.data_u16[off / 2 + 7] = a.data_u8[off / 2 + 7]; } return out; } }; struct vec_op_cvt_u8_hi_to_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u16[off / 2 + 0] = a.data_u8[kW / 2 + off / 2 + 0]; out.data_u16[off / 2 + 1] = a.data_u8[kW / 2 + off / 2 + 1]; out.data_u16[off / 2 + 2] = a.data_u8[kW / 2 + off / 2 + 2]; out.data_u16[off / 2 + 3] = a.data_u8[kW / 2 + off / 2 + 3]; out.data_u16[off / 2 + 4] = a.data_u8[kW / 2 + off / 2 + 4]; out.data_u16[off / 2 + 5] = a.data_u8[kW / 2 + off / 2 + 5]; out.data_u16[off / 2 + 6] = a.data_u8[kW / 2 + off / 2 + 6]; out.data_u16[off / 2 + 7] = a.data_u8[kW / 2 + off / 2 + 7]; } return out; } }; struct vec_op_cvt_i8_to_i32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 4 + 0] = a.data_i8[off / 4 + 0]; out.data_i32[off / 4 + 1] = a.data_i8[off / 4 + 1]; out.data_i32[off / 4 + 2] = a.data_i8[off / 4 + 2]; out.data_i32[off / 4 + 3] = a.data_i8[off / 4 + 3]; } return out; } }; struct vec_op_cvt_u8_to_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u32[off / 4 + 0] = a.data_u8[off / 4 + 0]; out.data_u32[off / 4 + 1] = a.data_u8[off / 4 + 1]; out.data_u32[off / 4 + 2] = a.data_u8[off / 4 + 2]; out.data_u32[off / 4 + 3] = a.data_u8[off / 4 + 3]; } return out; } }; struct vec_op_cvt_i16_lo_to_i32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 4 + 0] = a.data_i16[off / 4 + 0]; out.data_i32[off / 4 + 1] = a.data_i16[off / 4 + 1]; out.data_i32[off / 4 + 2] = a.data_i16[off / 4 + 2]; out.data_i32[off / 4 + 3] = a.data_i16[off / 4 + 3]; } return out; } }; struct vec_op_cvt_i16_hi_to_i32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 4 + 0] = a.data_i16[kW / 4 + off / 4 + 0]; out.data_i32[off / 4 + 1] = a.data_i16[kW / 4 + off / 4 + 1]; out.data_i32[off / 4 + 2] = a.data_i16[kW / 4 + off / 4 + 2]; out.data_i32[off / 4 + 3] = a.data_i16[kW / 4 + off / 4 + 3]; } return out; } }; struct vec_op_cvt_u16_lo_to_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u32[off / 4 + 0] = a.data_u16[off / 4 + 0]; out.data_u32[off / 4 + 1] = a.data_u16[off / 4 + 1]; out.data_u32[off / 4 + 2] = a.data_u16[off / 4 + 2]; out.data_u32[off / 4 + 3] = a.data_u16[off / 4 + 3]; } return out; } }; struct vec_op_cvt_u16_hi_to_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u32[off / 4 + 0] = a.data_u16[kW / 4 + off / 4 + 0]; out.data_u32[off / 4 + 1] = a.data_u16[kW / 4 + off / 4 + 1]; out.data_u32[off / 4 + 2] = a.data_u16[kW / 4 + off / 4 + 2]; out.data_u32[off / 4 + 3] = a.data_u16[kW / 4 + off / 4 + 3]; } return out; } }; struct vec_op_cvt_i32_lo_to_i64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i64[off / 8 + 0] = a.data_i32[off / 8 + 0]; out.data_i64[off / 8 + 1] = a.data_i32[off / 8 + 1]; } return out; } }; struct vec_op_cvt_i32_hi_to_i64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i64[off / 8 + 0] = a.data_i32[kW / 8 + off / 8 + 0]; out.data_i64[off / 8 + 1] = a.data_i32[kW / 8 + off / 8 + 1]; } return out; } }; struct vec_op_cvt_u32_lo_to_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u32[off / 8 + 0]; out.data_u64[off / 8 + 1] = a.data_u32[off / 8 + 1]; } return out; } }; struct vec_op_cvt_u32_hi_to_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u32[kW / 8 + off / 8 + 0]; out.data_u64[off / 8 + 1] = a.data_u32[kW / 8 + off / 8 + 1]; } return out; } }; template struct vec_op_fabs : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::fabs(a); } }; template struct vec_op_trunc : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(a); } }; template struct vec_op_floor : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(a); } }; template struct vec_op_ceil : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::ceil(a); } }; template struct vec_op_round_even : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::nearbyint(a); } }; template struct vec_op_round_half_away : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(fadd(a, fxor(half_minus_1ulp_const::value, fsign(a)))); } }; template struct vec_op_round_half_up : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(fadd(a, half_minus_1ulp_const::value)); } }; template struct vec_op_sqrt : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fsqrt(a); } }; template struct vec_op_rcp : public op_each_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fdiv(T(1), a); } }; struct vec_op_cvt_i32_to_f32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_f32[off / 4 + 0] = float(a.data_i32[off / 4 + 0]); out.data_f32[off / 4 + 1] = float(a.data_i32[off / 4 + 1]); out.data_f32[off / 4 + 2] = float(a.data_i32[off / 4 + 2]); out.data_f32[off / 4 + 3] = float(a.data_i32[off / 4 + 3]); } return out; } }; template struct vec_op_cvt_f32_to_f64_impl { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; uint32_t adj = kHi ? kW / 8 : 0u; for (uint32_t off = 0; off < kW; off += 16) { out.data_f64[off / 8 + 0] = double(a.data_f32[off / 8 + adj + 0]); out.data_f64[off / 8 + 1] = double(a.data_f32[off / 8 + adj + 1]); } return out; } }; struct vec_op_cvt_f32_lo_to_f64 : public vec_op_cvt_f32_to_f64_impl {}; struct vec_op_cvt_f32_hi_to_f64 : public vec_op_cvt_f32_to_f64_impl {}; template struct vec_op_cvt_f64_to_f32_impl { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; uint32_t adj = kHi ? kW / 8 : 0u; for (uint32_t off = 0; off < kW; off += 16) { out.data_f32[off / 8 + adj + 0] = float(a.data_f64[off / 8 + 0]); out.data_f32[off / 8 + adj + 1] = float(a.data_f64[off / 8 + 1]); } return out; } }; struct vec_op_cvt_f64_to_f32_lo : public vec_op_cvt_f64_to_f32_impl {}; struct vec_op_cvt_f64_to_f32_hi : public vec_op_cvt_f64_to_f32_impl {}; template struct vec_op_cvt_i32_to_f64_impl { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; uint32_t adj = kHi ? kW / 8 : 0u; for (uint32_t off = 0; off < kW; off += 16) { out.data_f64[off / 8 + 0] = double(a.data_i32[off / 8 + adj + 0]); out.data_f64[off / 8 + 1] = double(a.data_i32[off / 8 + adj + 1]); } return out; } }; struct vec_op_cvt_i32_lo_to_f64 : public vec_op_cvt_i32_to_f64_impl {}; struct vec_op_cvt_i32_hi_to_f64 : public vec_op_cvt_i32_to_f64_impl {}; template struct vec_op_cvt_trunc_f32_to_i32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 4 + 0] = cvt_float_to_int_trunc(a.data_f32[off / 4 + 0]); out.data_i32[off / 4 + 1] = cvt_float_to_int_trunc(a.data_f32[off / 4 + 1]); out.data_i32[off / 4 + 2] = cvt_float_to_int_trunc(a.data_f32[off / 4 + 2]); out.data_i32[off / 4 + 3] = cvt_float_to_int_trunc(a.data_f32[off / 4 + 3]); } return out; } }; template struct vec_op_cvt_trunc_f64_to_i32_impl { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; uint32_t adj = kHi ? kW / 8 : 0u; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 8 + adj + 0] = cvt_float_to_int_trunc(a.data_f64[off / 8 + 0]); out.data_i32[off / 8 + adj + 1] = cvt_float_to_int_trunc(a.data_f64[off / 8 + 1]); } return out; } }; template struct vec_op_cvt_trunc_f64_to_i32_lo : vec_op_cvt_trunc_f64_to_i32_impl {}; template struct vec_op_cvt_trunc_f64_to_i32_hi : vec_op_cvt_trunc_f64_to_i32_impl {}; template struct vec_op_cvt_round_f32_to_i32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 4 + 0] = cvt_float_to_int_round(a.data_f32[off / 4 + 0]); out.data_i32[off / 4 + 1] = cvt_float_to_int_round(a.data_f32[off / 4 + 1]); out.data_i32[off / 4 + 2] = cvt_float_to_int_round(a.data_f32[off / 4 + 2]); out.data_i32[off / 4 + 3] = cvt_float_to_int_round(a.data_f32[off / 4 + 3]); } return out; } }; template struct vec_op_cvt_round_f64_to_i32_impl { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; uint32_t adj = kHi ? kW / 8 : 0u; for (uint32_t off = 0; off < kW; off += 16) { out.data_i32[off / 8 + adj + 0] = cvt_float_to_int_round(a.data_f64[off / 8 + 0]); out.data_i32[off / 8 + adj + 1] = cvt_float_to_int_round(a.data_f64[off / 8 + 1]); } return out; } }; template struct vec_op_cvt_round_f64_to_i32_lo : vec_op_cvt_round_f64_to_i32_impl {}; template struct vec_op_cvt_round_f64_to_i32_hi : vec_op_cvt_round_f64_to_i32_impl {}; struct scalar_op_cvt_f32_to_f64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; out.data_f64[0] = a.data_f32[0]; return out; } }; struct scalar_op_cvt_f64_to_f32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a) noexcept { VecOverlay out{}; out.data_f32[0] = a.data_f64[0]; return out; } }; template struct scalar_op_trunc : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(a); } }; template struct scalar_op_floor : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(a); } }; template struct scalar_op_ceil : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::ceil(a); } }; template struct scalar_op_round_even : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::nearbyint(a); } }; template struct scalar_op_round_half_away : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::trunc(fadd(a, fxor(half_minus_1ulp_const::value, fsign(a)))); } }; template struct scalar_op_round_half_up : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return std::floor(fadd(a, half_minus_1ulp_const::value)); } }; template struct scalar_op_sqrt : public op_scalar_vv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a) noexcept { return fsqrt(a); } }; // ujit::UniCompiler - Tests - Generic Operations - VVI // ==================================================== template struct vec_op_slli : public op_each_vvi> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_uint(a) << imm); } }; template struct vec_op_srli : public op_each_vvi> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_uint(a) >> imm); } }; template struct vec_op_rsrli : public op_each_vvi> { static ASMJIT_INLINE T apply_one(const T& a, uint32_t imm) noexcept { T add = T((a & (T(1) << (imm - 1))) != 0); return T((cast_uint(a) >> imm) + cast_uint(add)); } }; template struct vec_op_srai : public op_each_vvi> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, uint32_t imm) noexcept { return T(cast_int(a) >> imm); } }; struct vec_op_sllb_u128 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 16; i++) { out.data_u8[off + i] = i < imm ? uint8_t(0) : a.data_u8[off + i - imm]; } } return out; } }; struct vec_op_srlb_u128 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 16; i++) { out.data_u8[off + i] = i + imm < 16u ? a.data_u8[off + i + imm] : uint8_t(0); } } return out; } }; struct vec_op_swizzle_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { uint32_t D = (imm >> 24) & 0x3; uint32_t C = (imm >> 16) & 0x3; uint32_t B = (imm >> 8) & 0x3; uint32_t A = (imm >> 0) & 0x3; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u16[off / 2 + 0] = a.data_u16[off / 2 + 0 + A]; out.data_u16[off / 2 + 1] = a.data_u16[off / 2 + 0 + B]; out.data_u16[off / 2 + 2] = a.data_u16[off / 2 + 0 + C]; out.data_u16[off / 2 + 3] = a.data_u16[off / 2 + 0 + D]; out.data_u16[off / 2 + 4] = a.data_u16[off / 2 + 4 + A]; out.data_u16[off / 2 + 5] = a.data_u16[off / 2 + 4 + B]; out.data_u16[off / 2 + 6] = a.data_u16[off / 2 + 4 + C]; out.data_u16[off / 2 + 7] = a.data_u16[off / 2 + 4 + D]; } return out; } }; struct vec_op_swizzle_lo_u16x4 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { uint32_t D = (imm >> 24) & 0x3; uint32_t C = (imm >> 16) & 0x3; uint32_t B = (imm >> 8) & 0x3; uint32_t A = (imm >> 0) & 0x3; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u16[off / 2 + 0] = a.data_u16[off / 2 + A]; out.data_u16[off / 2 + 1] = a.data_u16[off / 2 + B]; out.data_u16[off / 2 + 2] = a.data_u16[off / 2 + C]; out.data_u16[off / 2 + 3] = a.data_u16[off / 2 + D]; memcpy(out.data_u8 + off + 8, a.data_u8 + off + 8, 8); } return out; } }; struct vec_op_swizzle_hi_u16x4 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { uint32_t D = (imm >> 24) & 0x3; uint32_t C = (imm >> 16) & 0x3; uint32_t B = (imm >> 8) & 0x3; uint32_t A = (imm >> 0) & 0x3; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { memcpy(out.data_u8 + off, a.data_u8 + off, 8); out.data_u16[off / 2 + 4] = a.data_u16[off / 2 + 4 + A]; out.data_u16[off / 2 + 5] = a.data_u16[off / 2 + 4 + B]; out.data_u16[off / 2 + 6] = a.data_u16[off / 2 + 4 + C]; out.data_u16[off / 2 + 7] = a.data_u16[off / 2 + 4 + D]; } return out; } }; struct vec_op_swizzle_u32x4 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { uint32_t D = (imm >> 24) & 0x3; uint32_t C = (imm >> 16) & 0x3; uint32_t B = (imm >> 8) & 0x3; uint32_t A = (imm >> 0) & 0x3; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u32[off / 4 + 0] = a.data_u32[off / 4 + A]; out.data_u32[off / 4 + 1] = a.data_u32[off / 4 + B]; out.data_u32[off / 4 + 2] = a.data_u32[off / 4 + C]; out.data_u32[off / 4 + 3] = a.data_u32[off / 4 + D]; } return out; } }; struct vec_op_swizzle_u64x2 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, uint32_t imm) noexcept { uint32_t B = (imm >> 8) & 0x1; uint32_t A = (imm >> 0) & 0x1; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + A]; out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + B]; } return out; } }; // ujit::UniCompiler - Tests - SIMD - Generic Operations - VVV // =========================================================== template struct vec_op_and : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a & b); } }; template struct vec_op_or : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a | b); } }; template struct vec_op_xor : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a ^ b); } }; template struct vec_op_andn : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(~a & b); } }; template struct vec_op_bic : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(a & ~b); } }; template struct vec_op_add : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(cast_uint(a) + cast_uint(b)); } }; template struct vec_op_adds : public op_each_vvv> { static ASMJIT_INLINE T apply_one(const T& a, const T& b) noexcept { Support::FastUInt8 of{}; T result = Support::add_overflow(a, b, &of); if (!of) { return result; } if constexpr (std::is_unsigned_v) { return std::numeric_limits::max(); } else { return b > T(0) ? std::numeric_limits::max() : std::numeric_limits::lowest(); } } }; template struct vec_op_sub : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T(cast_uint(a) - cast_uint(b)); } }; template struct vec_op_subs : public op_each_vvv> { static ASMJIT_INLINE T apply_one(const T& a, const T& b) noexcept { Support::FastUInt8 of{}; T result = Support::sub_overflow(a, b, &of); if (!of) { return result; } if constexpr (std::is_unsigned_v) { return std::numeric_limits::lowest(); } else { return b > T(0) ? std::numeric_limits::lowest() : std::numeric_limits::max(); } } }; template struct vec_op_mul : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return T((uint64_t(a) * uint64_t(b)) & uint64_t(~T(0))); } }; template struct vec_op_mulhi : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { uint64_t result = uint64_t(int64_t(cast_int(a))) * uint64_t(int64_t(cast_int(b))); return T(T(result >> (sizeof(T) * 8u)) & T(~T(0))); } }; template struct vec_op_mulhu : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { uint64_t result = uint64_t(a) * uint64_t(b); return T(result >> (sizeof(T) * 8u)) & uint64_t(~T(0)); } }; struct vec_op_mul_u64_lo_u32 : public op_each_vvv { static ASMJIT_INLINE_NODEBUG uint64_t apply_one(const uint64_t& a, const uint64_t& b) noexcept { return uint64_t(a) * uint64_t(b & 0xFFFFFFFFu); } }; struct vec_op_mhadd_i16_i32 : public op_each_vvv { static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a, const uint32_t& b) noexcept { uint32_t al = uint32_t(int32_t(int16_t(a & 0xFFFF))); uint32_t ah = uint32_t(int32_t(int16_t(a >> 16))); uint32_t bl = uint32_t(int32_t(int16_t(b & 0xFFFF))); uint32_t bh = uint32_t(int32_t(int16_t(b >> 16))); return al * bl + ah * bh; } }; template struct vec_op_madd : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return T((uint64_t(a) * uint64_t(b) + uint64_t(c)) & uint64_t(~T(0))); } }; template struct vec_op_min : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; } }; template struct vec_op_max : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; } }; template struct vec_op_cmp_eq : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a == b ? Support::bit_ones : T(0); } }; template struct vec_op_cmp_ne : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a != b ? Support::bit_ones : T(0); } }; template struct vec_op_cmp_gt : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? Support::bit_ones : T(0); } }; template struct vec_op_cmp_ge : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a >= b ? Support::bit_ones : T(0); } }; template struct vec_op_cmp_lt : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? Support::bit_ones : T(0); } }; template struct vec_op_cmp_le : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a <= b ? Support::bit_ones : T(0); } }; template struct scalar_op_fadd : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fadd(a, b); } }; template struct scalar_op_fsub : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fsub(a, b); } }; template struct scalar_op_fmul : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fmul(a, b); } }; template struct scalar_op_fdiv : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fdiv(a, b); } }; template struct scalar_op_fmin_ternary : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; } }; template struct scalar_op_fmax_ternary : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; } }; template struct scalar_op_fmin_finite : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::min(a, b); } }; template struct scalar_op_fmax_finite : public op_scalar_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::max(a, b); } }; template struct scalar_op_fmadd_nofma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, c); } }; template struct scalar_op_fmsub_nofma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, -c); } }; template struct scalar_op_fnmadd_nofma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, c); } }; template struct scalar_op_fnmsub_nofma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, -c); } }; template struct scalar_op_fmadd_fma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, c); } }; template struct scalar_op_fmsub_fma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, -c); } }; template struct scalar_op_fnmadd_fma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, c); } }; template struct scalar_op_fnmsub_fma : public op_scalar_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, -c); } }; template struct vec_op_fadd : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fadd(a, b); } }; template struct vec_op_fsub : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fsub(a, b); } }; template struct vec_op_fmul : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fmul(a, b); } }; template struct vec_op_fdiv : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return fdiv(a, b); } }; template struct vec_op_fmin_ternary : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a < b ? a : b; } }; template struct vec_op_fmax_ternary : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return a > b ? a : b; } }; template struct vec_op_fmin_finite : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::min(a, b); } }; template struct vec_op_fmax_finite : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b) noexcept { return std::isnan(a) ? b : std::isnan(b) ? a : Support::max(a, b); } }; template struct vec_op_fmadd_nofma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, c); } }; template struct vec_op_fmsub_nofma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(a, b, -c); } }; template struct vec_op_fnmadd_nofma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, c); } }; template struct vec_op_fnmsub_nofma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_nofma_ref(-a, b, -c); } }; template struct vec_op_fmadd_fma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, c); } }; template struct vec_op_fmsub_fma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(a, b, -c); } }; template struct vec_op_fnmadd_fma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, c); } }; template struct vec_op_fnmsub_fma : public op_each_vvvv> { static ASMJIT_INLINE_NODEBUG T apply_one(const T& a, const T& b, const T& c) noexcept { return fmadd_fma_ref(-a, b, -c); } }; template struct cmp_result { typedef T Result; static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? Result(~Result(0)) : Result(0); } }; template<> struct cmp_result { typedef uint32_t Result; static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? ~Result(0) : Result(0); } }; template<> struct cmp_result { typedef uint64_t Result; static ASMJIT_INLINE_NODEBUG Result make(bool result) noexcept { return result ? ~Result(0) : Result(0); } }; template struct vec_op_fcmpo_eq : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(a == b); } }; template struct vec_op_fcmpu_ne : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(!(a == b)); } }; template struct vec_op_fcmpo_gt : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(a > b); } }; template struct vec_op_fcmpo_ge : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(a >= b); } }; template struct vec_op_fcmpo_lt : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(a < b); } }; template struct vec_op_fcmpo_le : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(a <= b); } }; template struct vec_op_fcmp_ord : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(!std::isnan(a) && !std::isnan(b)); } }; template struct vec_op_fcmp_unord : public op_each_vvv> { static ASMJIT_INLINE_NODEBUG typename cmp_result::Result apply_one(const T& a, const T& b) noexcept { return cmp_result::make(std::isnan(a) || std::isnan(b)); } }; struct vec_op_hadd_f64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_f64[off / 8 + 0] = a.data_f64[off / 8 + 0] + a.data_f64[off / 8 + 1]; out.data_f64[off / 8 + 1] = b.data_f64[off / 8 + 0] + b.data_f64[off / 8 + 1]; } return out; } }; struct vec_op_combine_lo_hi_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = b.data_u64[off / 8 + 1]; out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + 0]; } return out; } }; struct vec_op_combine_hi_lo_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = b.data_u64[off / 8 + 0]; out.data_u64[off / 8 + 1] = a.data_u64[off / 8 + 1]; } return out; } }; struct vec_op_interleave_lo_u8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 8; i++) { out.data_u8[off + i * 2 + 0] = a.data_u8[off + i]; out.data_u8[off + i * 2 + 1] = b.data_u8[off + i]; } } return out; } }; struct vec_op_interleave_hi_u8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 8; i++) { out.data_u8[off + i * 2 + 0] = a.data_u8[off + 8 + i]; out.data_u8[off + i * 2 + 1] = b.data_u8[off + 8 + i]; } } return out; } }; struct vec_op_interleave_lo_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 4; i++) { out.data_u16[off / 2 + i * 2 + 0] = a.data_u16[off / 2 + i]; out.data_u16[off / 2 + i * 2 + 1] = b.data_u16[off / 2 + i]; } } return out; } }; struct vec_op_interleave_hi_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 4; i++) { out.data_u16[off / 2 + i * 2 + 0] = a.data_u16[off / 2 + 4 + i]; out.data_u16[off / 2 + i * 2 + 1] = b.data_u16[off / 2 + 4 + i]; } } return out; } }; struct vec_op_interleave_lo_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 2; i++) { out.data_u32[off / 4 + i * 2 + 0] = a.data_u32[off / 4 + i]; out.data_u32[off / 4 + i * 2 + 1] = b.data_u32[off / 4 + i]; } } return out; } }; struct vec_op_interleave_hi_u32 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 2; i++) { out.data_u32[off / 4 + i * 2 + 0] = a.data_u32[off / 4 + 2 + i]; out.data_u32[off / 4 + i * 2 + 1] = b.data_u32[off / 4 + 2 + i]; } } return out; } }; struct vec_op_interleave_lo_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + 0]; out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + 0]; } return out; } }; struct vec_op_interleave_hi_u64 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + 1]; out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + 1]; } return out; } }; // ujit::UniCompiler - Tests - SIMD - Generic Operations - VVVI // ============================================================ struct vec_op_alignr_u128 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, uint32_t imm) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 16; i++) { out.data_u8[off + i] = i + imm < 16 ? b.data_u8[off + i + imm] : a.data_u8[off + i + imm - 16]; } } return out; } }; struct vec_op_interleave_shuffle_u32x4 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, uint32_t imm) noexcept { uint32_t D = (imm >> 24) & 0x3; uint32_t C = (imm >> 16) & 0x3; uint32_t B = (imm >> 8) & 0x3; uint32_t A = (imm >> 0) & 0x3; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u32[off / 4 + 0] = a.data_u32[off / 4 + A]; out.data_u32[off / 4 + 1] = a.data_u32[off / 4 + B]; out.data_u32[off / 4 + 2] = b.data_u32[off / 4 + C]; out.data_u32[off / 4 + 3] = b.data_u32[off / 4 + D]; } return out; } }; struct vec_op_interleave_shuffle_u64x2 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b, uint32_t imm) noexcept { uint32_t B = (imm >> 8) & 0x1; uint32_t A = (imm >> 0) & 0x1; VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u64[off / 8 + 0] = a.data_u64[off / 8 + A]; out.data_u64[off / 8 + 1] = b.data_u64[off / 8 + B]; } return out; } }; struct vec_op_packs_i16_i8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i8[off + 0] = saturate_i16_to_i8(a.data_i16[off / 2 + 0]); out.data_i8[off + 1] = saturate_i16_to_i8(a.data_i16[off / 2 + 1]); out.data_i8[off + 2] = saturate_i16_to_i8(a.data_i16[off / 2 + 2]); out.data_i8[off + 3] = saturate_i16_to_i8(a.data_i16[off / 2 + 3]); out.data_i8[off + 4] = saturate_i16_to_i8(a.data_i16[off / 2 + 4]); out.data_i8[off + 5] = saturate_i16_to_i8(a.data_i16[off / 2 + 5]); out.data_i8[off + 6] = saturate_i16_to_i8(a.data_i16[off / 2 + 6]); out.data_i8[off + 7] = saturate_i16_to_i8(a.data_i16[off / 2 + 7]); out.data_i8[off + 8] = saturate_i16_to_i8(b.data_i16[off / 2 + 0]); out.data_i8[off + 9] = saturate_i16_to_i8(b.data_i16[off / 2 + 1]); out.data_i8[off + 10] = saturate_i16_to_i8(b.data_i16[off / 2 + 2]); out.data_i8[off + 11] = saturate_i16_to_i8(b.data_i16[off / 2 + 3]); out.data_i8[off + 12] = saturate_i16_to_i8(b.data_i16[off / 2 + 4]); out.data_i8[off + 13] = saturate_i16_to_i8(b.data_i16[off / 2 + 5]); out.data_i8[off + 14] = saturate_i16_to_i8(b.data_i16[off / 2 + 6]); out.data_i8[off + 15] = saturate_i16_to_i8(b.data_i16[off / 2 + 7]); } return out; } }; struct vec_op_packs_i16_u8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u8[off + 0] = saturate_i16_to_u8(a.data_i16[off / 2 + 0]); out.data_u8[off + 1] = saturate_i16_to_u8(a.data_i16[off / 2 + 1]); out.data_u8[off + 2] = saturate_i16_to_u8(a.data_i16[off / 2 + 2]); out.data_u8[off + 3] = saturate_i16_to_u8(a.data_i16[off / 2 + 3]); out.data_u8[off + 4] = saturate_i16_to_u8(a.data_i16[off / 2 + 4]); out.data_u8[off + 5] = saturate_i16_to_u8(a.data_i16[off / 2 + 5]); out.data_u8[off + 6] = saturate_i16_to_u8(a.data_i16[off / 2 + 6]); out.data_u8[off + 7] = saturate_i16_to_u8(a.data_i16[off / 2 + 7]); out.data_u8[off + 8] = saturate_i16_to_u8(b.data_i16[off / 2 + 0]); out.data_u8[off + 9] = saturate_i16_to_u8(b.data_i16[off / 2 + 1]); out.data_u8[off + 10] = saturate_i16_to_u8(b.data_i16[off / 2 + 2]); out.data_u8[off + 11] = saturate_i16_to_u8(b.data_i16[off / 2 + 3]); out.data_u8[off + 12] = saturate_i16_to_u8(b.data_i16[off / 2 + 4]); out.data_u8[off + 13] = saturate_i16_to_u8(b.data_i16[off / 2 + 5]); out.data_u8[off + 14] = saturate_i16_to_u8(b.data_i16[off / 2 + 6]); out.data_u8[off + 15] = saturate_i16_to_u8(b.data_i16[off / 2 + 7]); } return out; } }; struct vec_op_packs_i32_i16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_i16[off / 2 + 0] = saturate_i32_to_i16(a.data_i32[off / 4 + 0]); out.data_i16[off / 2 + 1] = saturate_i32_to_i16(a.data_i32[off / 4 + 1]); out.data_i16[off / 2 + 2] = saturate_i32_to_i16(a.data_i32[off / 4 + 2]); out.data_i16[off / 2 + 3] = saturate_i32_to_i16(a.data_i32[off / 4 + 3]); out.data_i16[off / 2 + 4] = saturate_i32_to_i16(b.data_i32[off / 4 + 0]); out.data_i16[off / 2 + 5] = saturate_i32_to_i16(b.data_i32[off / 4 + 1]); out.data_i16[off / 2 + 6] = saturate_i32_to_i16(b.data_i32[off / 4 + 2]); out.data_i16[off / 2 + 7] = saturate_i32_to_i16(b.data_i32[off / 4 + 3]); } return out; } }; struct vec_op_packs_i32_u16 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { out.data_u16[off / 2 + 0] = saturate_i32_to_u16(a.data_i32[off / 4 + 0]); out.data_u16[off / 2 + 1] = saturate_i32_to_u16(a.data_i32[off / 4 + 1]); out.data_u16[off / 2 + 2] = saturate_i32_to_u16(a.data_i32[off / 4 + 2]); out.data_u16[off / 2 + 3] = saturate_i32_to_u16(a.data_i32[off / 4 + 3]); out.data_u16[off / 2 + 4] = saturate_i32_to_u16(b.data_i32[off / 4 + 0]); out.data_u16[off / 2 + 5] = saturate_i32_to_u16(b.data_i32[off / 4 + 1]); out.data_u16[off / 2 + 6] = saturate_i32_to_u16(b.data_i32[off / 4 + 2]); out.data_u16[off / 2 + 7] = saturate_i32_to_u16(b.data_i32[off / 4 + 3]); } return out; } }; // ujit::UniCompiler - Tests - SIMD - Generic Operations - VVVV // ============================================================ struct vec_op_blendv_bits : public op_each_vvvv { static ASMJIT_INLINE_NODEBUG uint32_t apply_one(const uint32_t& a, const uint32_t& b, const uint32_t& c) noexcept { return ((a & ~c) | (b & c)); } }; struct vec_op_swizzlev_u8 { template static ASMJIT_INLINE VecOverlay apply(const VecOverlay& a, const VecOverlay& b) noexcept { VecOverlay out{}; for (uint32_t off = 0; off < kW; off += 16) { for (uint32_t i = 0; i < 16; i++) { size_t sel = b.data_u8[off + i] & (0x8F); // 3 bits ignored. out.data_u8[off + i] = sel & 0x80 ? uint8_t(0) : a.data_u8[off + sel]; } } return out; } }; struct vec_op_div255_u16 : public op_each_vv { static ASMJIT_INLINE uint16_t apply_one(const uint16_t& a) noexcept { uint32_t x = a + 0x80u; return uint16_t((x + (x >> 8)) >> 8); } }; struct vec_op_div65535_u32 : public op_each_vv { static ASMJIT_INLINE uint32_t apply_one(const uint32_t& a) noexcept { uint32_t x = a + 0x8000u; return uint32_t((x + (x >> 16)) >> 16); } }; // ujit::UniCompiler - Tests - SIMD - Utilities // ============================================ template static void fill_random_bytes(DataGenInt& dg, VecOverlay& dst) noexcept { for (uint32_t i = 0; i < kW / 8u; i++) { dst.data_u64[i] = dg.next_uint64(); } } template static void fill_random_f32(DataGenInt& dg, VecOverlay& dst) noexcept { for (uint32_t i = 0; i < kW / 4u; i++) { dst.data_f32[i] = dg.next_float32(); } } template static void fill_random_f64(DataGenInt& dg, VecOverlay& dst) noexcept { for (uint32_t i = 0; i < kW / 8u; i++) { dst.data_f64[i] = dg.next_float64(); } } template static void fill_random_data(DataGenInt& dg, VecOverlay& dst, VecElementType element_type) noexcept { switch (element_type) { case VecElementType::kFloat32: fill_random_f32(dg, dst); break; case VecElementType::kFloat64: fill_random_f64(dg, dst); break; default: fill_random_bytes(dg, dst); break; } } // ujit::UniCompiler - Tests - SIMD - Verification // =============================================== template static ASMJIT_NOINLINE void test_vecop_vv_failed(UniOpVV op, Variation variation, const VecOverlay& arg0, const VecOverlay& observed, const VecOverlay& expected, const char* assembly) noexcept { VecOpInfo op_info = vec_op_info_vv(op); String arg0_str = vec_stringify(arg0, op_info.arg(0)); String observed_str = vec_stringify(observed, op_info.ret()); String expected_str = vec_stringify(expected, op_info.ret()); EXPECT(false) .message("Operation '%s' (variation %u) failed:\n" " Input #0: %s\n" " Expected: %s\n" " Observed: %s\n" "Assembly:\n%s", vec_op_name_vv(op), variation.value, arg0_str.data(), expected_str.data(), observed_str.data(), assembly); } template static ASMJIT_NOINLINE void test_vecop_vvi_failed(UniOpVVI op, Variation variation, const VecOverlay& arg0, const VecOverlay& observed, const VecOverlay& expected, uint32_t imm, const char* assembly) noexcept { VecOpInfo op_info = vec_op_info_vvi(op); String arg0_str = vec_stringify(arg0, op_info.arg(0)); String observed_str = vec_stringify(observed, op_info.ret()); String expected_str = vec_stringify(expected, op_info.ret()); EXPECT(false) .message("Operation '%s' (variation %u) failed:\n" " Input #0: %s\n" " ImmValue: %u (0x%08X)\n" " Expected: %s\n" " Observed: %s\n" "Assembly:\n%s", vec_op_name_vvi(op), variation.value, arg0_str.data(), imm, imm, expected_str.data(), observed_str.data(), assembly); } template static ASMJIT_NOINLINE void test_vecop_vvv_failed(UniOpVVV op, Variation variation, const VecOverlay& arg0, const VecOverlay& arg1, const VecOverlay& observed, const VecOverlay& expected, const char* assembly) noexcept { VecOpInfo op_info = vec_op_info_vvv(op); String arg0_str = vec_stringify(arg0, op_info.arg(0)); String arg1_str = vec_stringify(arg1, op_info.arg(1)); String observed_str = vec_stringify(observed, op_info.ret()); String expected_str = vec_stringify(expected, op_info.ret()); EXPECT(false) .message("Operation '%s' (variation %u) failed:\n" " Input #0: %s\n" " Input #1: %s\n" " Expected: %s\n" " Observed: %s\n" "Assembly:\n%s", vec_op_name_vvv(op), variation.value, arg0_str.data(), arg1_str.data(), expected_str.data(), observed_str.data(), assembly); } template static ASMJIT_NOINLINE void test_vecop_vvvi_failed(UniOpVVVI op, Variation variation, const VecOverlay& arg0, const VecOverlay& arg1, const VecOverlay& observed, const VecOverlay& expected, uint32_t imm, const char* assembly) noexcept { VecOpInfo op_info = vec_op_info_vvvi(op); String arg0_str = vec_stringify(arg0, op_info.arg(0)); String arg1_str = vec_stringify(arg1, op_info.arg(1)); String observed_str = vec_stringify(observed, op_info.ret()); String expected_str = vec_stringify(expected, op_info.ret()); EXPECT(false) .message("Operation '%s' (variation %u) failed:\n" " Input #1: %s\n" " Input #2: %s\n" " ImmValue: %u (0x%08X)\n" " Expected: %s\n" " Observed: %s\n" "Assembly:\n%s", vec_op_name_vvvi(op), variation.value, arg0_str.data(), arg1_str.data(), imm, imm, expected_str.data(), observed_str.data(), assembly); } template static ASMJIT_NOINLINE void test_vecop_vvvv_failed(UniOpVVVV op, Variation variation, const VecOverlay& arg0, const VecOverlay& arg1, const VecOverlay& arg2, const VecOverlay& observed, const VecOverlay& expected, const char* assembly) noexcept { VecOpInfo op_info = vec_op_info_vvvv(op); String arg0_str = vec_stringify(arg0, op_info.arg(0)); String arg1_str = vec_stringify(arg1, op_info.arg(1)); String arg2_str = vec_stringify(arg2, op_info.arg(2)); String observed_str = vec_stringify(observed, op_info.ret()); String expected_str = vec_stringify(expected, op_info.ret()); EXPECT(false) .message("Operation '%s' (variation %u) failed\n" " Input #1: %s\n" " Input #2: %s\n" " Input #3: %s\n" " Expected: %s\n" " Observed: %s\n" "Assembly:\n%s", vec_op_name_vvvv(op), variation.value, arg0_str.data(), arg1_str.data(), arg2_str.data(), expected_str.data(), observed_str.data(), assembly); } // ujit::UniCompiler - Tests - Integer Operations - VV // =================================================== template static ASMJIT_NOINLINE void test_vecop_vv_constraint(JitContext& ctx, Variation variation = Variation{0}) { constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth); TestVVFunc compiled_apply = create_func_vv(ctx, kVecWidth, kOp, variation); DataGenInt dg(kRandomSeed); VecOpInfo op_info = vec_op_info_vv(kOp); for (uint32_t iter = 0; iter < kTestIterCount; iter++) { VecOverlay a {}; VecOverlay observed {}; VecOverlay expected {}; fill_random_data(dg, a, op_info.arg(0)); Constraint::apply(a); compiled_apply(&observed, &a); expected = GenericOp::apply(a); if (!vec_eq(observed, expected, op_info.ret())) { test_vecop_vv_failed(kOp, variation, a, observed, expected, ctx.logger_content()); } } ctx.rt.release(compiled_apply); } template static void test_vecop_vv(JitContext& ctx, Variation variation = Variation{0}) { return test_vecop_vv_constraint(ctx, variation); } // ujit::UniCompiler - Tests - SIMD - Integer Operations - VVI // =========================================================== template static ASMJIT_NOINLINE void test_vecop_vvi_constraint(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) { constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth); TestVVFunc compiled_apply = create_func_vvi(ctx, kVecWidth, kOp, imm, variation); DataGenInt dg(kRandomSeed); VecOpInfo op_info = vec_op_info_vvi(kOp); for (uint32_t iter = 0; iter < kTestIterCount; iter++) { VecOverlay a {}; VecOverlay observed {}; VecOverlay expected {}; fill_random_data(dg, a, op_info.arg(0)); Constraint::apply(a); compiled_apply(&observed, &a); expected = GenericOp::apply(a, imm); if (!vec_eq(observed, expected, op_info.ret())) { test_vecop_vvi_failed(kOp, variation, a, observed, expected, imm, ctx.logger_content()); } } ctx.rt.release(compiled_apply); } template static void test_vecop_vvi(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) { return test_vecop_vvi_constraint(ctx, imm, variation); } // ujit::UniCompiler - Tests - SIMD - Integer Operations - VVV // =========================================================== template static ASMJIT_NOINLINE void test_vecop_vvv_constraint(JitContext& ctx, Variation variation = Variation{0}) { constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth); TestVVVFunc compiled_apply = create_func_vvv(ctx, kVecWidth, kOp, variation); DataGenInt dg(kRandomSeed); VecOpInfo op_info = vec_op_info_vvv(kOp); for (uint32_t iter = 0; iter < kTestIterCount; iter++) { VecOverlay a {}; VecOverlay b {}; VecOverlay observed {}; VecOverlay expected {}; fill_random_data(dg, a, op_info.arg(0)); fill_random_data(dg, b, op_info.arg(1)); Constraint::apply(a); Constraint::apply(b); compiled_apply(&observed, &a, &b); expected = GenericOp::apply(a, b); if (!vec_eq(observed, expected, op_info.ret())) { test_vecop_vvv_failed(kOp, variation, a, b, observed, expected, ctx.logger_content()); } } ctx.rt.release(compiled_apply); } template static void test_vecop_vvv(JitContext& ctx, Variation variation = Variation{0}) { return test_vecop_vvv_constraint(ctx, variation); } // ujit::UniCompiler - Tests - SIMD - Integer Operations - VVVI // ============================================================ template static ASMJIT_NOINLINE void test_vecop_vvvi_constraint(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) { constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth); TestVVVFunc compiled_apply = create_func_vvvi(ctx, kVecWidth, kOp, imm, variation); DataGenInt dg(kRandomSeed); VecOpInfo op_info = vec_op_info_vvvi(kOp); for (uint32_t iter = 0; iter < kTestIterCount; iter++) { VecOverlay a {}; VecOverlay b {}; VecOverlay observed {}; VecOverlay expected {}; fill_random_data(dg, a, op_info.arg(0)); fill_random_data(dg, b, op_info.arg(1)); Constraint::apply(a); Constraint::apply(b); compiled_apply(&observed, &a, &b); expected = GenericOp::apply(a, b, imm); if (!vec_eq(observed, expected, op_info.ret())) { test_vecop_vvvi_failed(kOp, variation, a, b, observed, expected, imm, ctx.logger_content()); } } ctx.rt.release(compiled_apply); } template static void test_vecop_vvvi(JitContext& ctx, uint32_t imm, Variation variation = Variation{0}) { return test_vecop_vvvi_constraint(ctx, imm, variation); } // ujit::UniCompiler - Tests - SIMD - Integer Operations - VVVV // ============================================================ template static ASMJIT_NOINLINE void test_vecop_vvvv_constraint(JitContext& ctx, Variation variation = Variation{0}) { constexpr uint32_t kW = byte_width_from_vec_width(kVecWidth); TestVVVVFunc compiled_apply = create_func_vvvv(ctx, kVecWidth, kOp, variation); DataGenInt dg(kRandomSeed); VecOpInfo op_info = vec_op_info_vvvv(kOp); for (uint32_t iter = 0; iter < kTestIterCount; iter++) { VecOverlay a {}; VecOverlay b {}; VecOverlay c {}; VecOverlay observed {}; VecOverlay expected {}; fill_random_data(dg, a, op_info.arg(0)); fill_random_data(dg, b, op_info.arg(1)); fill_random_data(dg, c, op_info.arg(2)); Constraint::apply(a); Constraint::apply(b); Constraint::apply(c); compiled_apply(&observed, &a, &b, &c); expected = GenericOp::apply(a, b, c); if (!vec_eq(observed, expected, op_info.ret())) { test_vecop_vvvv_failed(kOp, variation, a, b, c, observed, expected, ctx.logger_content()); } } } template static void test_vecop_vvvv(JitContext& ctx, Variation variation = Variation{0}) { return test_vecop_vvvv_constraint(ctx, variation); } // ujit::UniCompiler - Tests - SIMD - Runner // ========================================= template static ASMJIT_NOINLINE void test_simd_ops(JitContext& ctx) { // We need to know some behaviors in advance so we can select the right test function, // so create a dummy compiler and extract the necessary information from it. ScalarOpBehavior scalar_op_behavior {}; FMAddOpBehavior fmadd_op_behavior {}; FloatToIntOutsideRangeBehavior float_to_int_behavior {}; { ctx.prepare(); UniCompiler uc(&ctx.cc, ctx.features, ctx.cpu_hints); scalar_op_behavior = uc.scalar_op_behavior(); fmadd_op_behavior = uc.fmadd_op_behavior(); float_to_int_behavior = uc.float_to_int_outside_range_behavior(); } bool valgrind_fma_bug = false; #if defined(ASMJIT_UJIT_X86) // When running under valgrind there is a bug in its instrumentation of FMA SS/SD instructions. // Instead of keeping the unaffected elements in the destination register they are cleared instead, // which would cause test failures. So, detect whether we are running under Valgind that has this // bug and avoid scalar FMA tests in that case. if (fmadd_op_behavior != FMAddOpBehavior::kNoFMA) { float a[4] = { 1, 2, 3, 4 }; float b[4] = { 2, 4, 8, 1 }; float c[4] = { 4, 7, 3, 9 }; float d[4] {}; madd_fma_check_valgrind_bug(a, b, c, d); valgrind_fma_bug = d[1] == 0.0f; } #endif // ASMJIT_UJIT_X86 INFO(" Testing mov"); { test_vecop_vv(ctx); test_vecop_vv(ctx); } INFO(" Testing broadcast"); { // Test all broadcasts - vector based, GP to vector, and memory to vector. for (uint32_t v = 0; v < kNumVariationsVV_Broadcast; v++) { test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); if constexpr (kVecWidth > VecWidth::k256) { test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); } } } INFO(" Testing abs (int)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing not (int)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing cvt (int)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); } } INFO(" Testing abs (float)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing not (float)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing rounding (float)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { // Variation 2 means that the source operand is memory, which would ALWAYS zero the rest of the register. if (scalar_op_behavior == ScalarOpBehavior::kZeroing || v == 2u) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } else { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing rcp (float)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing sqrt (float)"); { if (scalar_op_behavior == ScalarOpBehavior::kZeroing) { test_vecop_vv>(ctx); test_vecop_vv>(ctx); } else { test_vecop_vv>(ctx); test_vecop_vv>(ctx); } for (uint32_t v = 0; v < kNumVariationsVV; v++) { test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{v}); } } INFO(" Testing cvt (float)"); { for (uint32_t v = 0; v < kNumVariationsVV; v++) { // TODO: [JIT] Re-enable when the content of the remaining part of the register is formalized. // test_vecop_vv(ctx); // test_vecop_vv(ctx); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{0}); test_vecop_vv(ctx, Variation{0}); test_vecop_vv(ctx, Variation{v}); test_vecop_vv(ctx, Variation{v}); if (float_to_int_behavior == FloatToIntOutsideRangeBehavior::kSmallestValue) { constexpr FloatToIntOutsideRangeBehavior behavior = FloatToIntOutsideRangeBehavior::kSmallestValue; test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{0}); } else { constexpr FloatToIntOutsideRangeBehavior behavior = FloatToIntOutsideRangeBehavior::kSaturatedValue; test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{v}); test_vecop_vv>(ctx, Variation{0}); test_vecop_vv>(ctx, Variation{0}); } } } INFO(" Testing bit shift"); { for (uint32_t v = 0; v < kNumVariationsVVI; v++) { /* for (uint32_t i = 1; i < 8; i++) { test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); } */ for (uint32_t i = 1; i < 16; i++) { test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); } for (uint32_t i = 1; i < 32; i++) { test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); } for (uint32_t i = 1; i < 64; i++) { test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); test_vecop_vvi>(ctx, i, Variation{v}); } } } INFO(" Testing sllb_u128 & srlb_u128"); { for (uint32_t v = 0; v < kNumVariationsVVI; v++) { for (uint32_t i = 1; i < 16; i++) { test_vecop_vvi(ctx, i, Variation{v}); test_vecop_vvi(ctx, i, Variation{v}); } } } INFO(" Testing swizzle_[lo|hi]_u16x4"); { for (uint32_t v = 0; v < kNumVariationsVVI; v++) { for (uint32_t i = 0; i < 256; i++) { uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value; test_vecop_vvi(ctx, imm, Variation{v}); test_vecop_vvi(ctx, imm, Variation{v}); test_vecop_vvi(ctx, imm, Variation{v}); } } } INFO(" Testing swizzle_u32x4"); { for (uint32_t v = 0; v < kNumVariationsVVI; v++) { for (uint32_t i = 0; i < 256; i++) { uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value; test_vecop_vvi(ctx, imm, Variation{v}); test_vecop_vvi(ctx, imm, Variation{v}); } } } INFO(" Testing swizzle_u64x2"); { for (uint32_t v = 0; v < kNumVariationsVVI; v++) { for (uint32_t i = 0; i < 4; i++) { uint32_t imm = swizzle((i >> 1) & 1, i & 1).value; test_vecop_vvi(ctx, imm, Variation{v}); test_vecop_vvi(ctx, imm, Variation{v}); } } } INFO(" Testing logical (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing add / adds (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing sub / subs (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing mul (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing mhadd (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing madd (int)"); { for (uint32_t v = 0; v < kNumVariationsVVVV; v++) { test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); } } INFO(" Testing min / max (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing cmp (int)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); /* test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); */ test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing logical (float)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing arithmetic (float)"); { if (scalar_op_behavior == ScalarOpBehavior::kZeroing) { test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); } else { test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); } for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } if (fmadd_op_behavior == FMAddOpBehavior::kNoFMA) { INFO(" Testing madd (no-fma) (float)"); { if (scalar_op_behavior == ScalarOpBehavior::kZeroing) { test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); } else { test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); } for (uint32_t v = 0; v < kNumVariationsVVVV; v++) { test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); } } } else { INFO(" Testing madd (fma) (float)"); { if (valgrind_fma_bug) { INFO(" (scalar FMA tests ignored due to a Valgrind bug!)"); } else { if (scalar_op_behavior == ScalarOpBehavior::kZeroing) { test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); } else { test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); test_vecop_vvvv>(ctx, Variation{0}); } } for (uint32_t v = 0; v < kNumVariationsVVVV; v++) { test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); test_vecop_vvvv>(ctx, Variation{v}); } } } INFO(" Testing min / max (float)"); { #if defined(ASMJIT_UJIT_X86) test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } #endif #if defined(ASMJIT_UJIT_AARCH64) test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); test_vecop_vvv>(ctx); for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } #endif } INFO(" Testing cmp (float)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); test_vecop_vvv>(ctx, Variation{v}); } } INFO(" Testing hadd (float)"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing combine"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing interleave"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing packs"); { for (uint32_t v = 0; v < kNumVariationsVVV; v++) { test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); test_vecop_vvv(ctx, Variation{v}); } } INFO(" Testing alignr_u128"); { for (uint32_t v = 0; v < kNumVariationsVVVI; v++) { for (uint32_t i = 1; i < 16; i++) { test_vecop_vvvi(ctx, i, Variation{v}); } } } INFO(" Testing interleave_shuffle"); { for (uint32_t v = 0; v < kNumVariationsVVVI; v++) { for (uint32_t i = 0; i < 256; i++) { uint32_t imm = swizzle((i >> 6) & 3, (i >> 4) & 3, (i >> 2) & 3, i & 3).value; test_vecop_vvvi(ctx, imm, Variation{v}); test_vecop_vvvi(ctx, imm, Variation{v}); } for (uint32_t i = 0; i < 4; i++) { uint32_t imm = swizzle((i >> 1) & 1, i & 1).value; test_vecop_vvvi(ctx, imm, Variation{v}); test_vecop_vvvi(ctx, imm, Variation{v}); } } } } static void test_gp_ops(JitContext& ctx) { test_cond_ops(ctx); test_m_ops(ctx); test_rm_ops(ctx); test_mr_ops(ctx); test_rr_ops(ctx); test_rrr_ops(ctx); } #if defined(ASMJIT_UJIT_X86) static void dump_feature_list(String& out, const CpuFeatures& features) { #if !defined(ASMJIT_NO_LOGGING) CpuFeatures::Iterator it = features.iterator(); bool first = true; while (it.has_next()) { size_t feature_id = it.next(); if (!first) { out.append(' '); } Formatter::format_feature(out, Arch::kHost, uint32_t(feature_id)); first = false; } #else Support::maybe_unused(features); out.append(""); #endif } static void test_x86_ops(JitContext& ctx, const CpuFeatures& host_features) { using Ext = CpuFeatures::X86; { String s; dump_feature_list(s, host_features); INFO("Available CPU features: %s", s.data()); } // Features that must always be available; CpuFeatures base; base.add(Ext::kI486, Ext::kCMOV, Ext::kCMPXCHG8B, Ext::kFPU, Ext::kSSE, Ext::kSSE2); // To verify that JIT implements ALL features with ALL possible CPU flags, we use profiles to select features // that the JIT compiler will be allowed to use. The features are gradually increased similarly to how new CPU // generations introduced them. We cannot cover ALL possible CPUs, but that's not even necessary as we test // individual operations where instructions can be selected on the features available. // GP variations. { CpuFeatures profiles[4] {}; profiles[0] = base; profiles[1] = profiles[0]; profiles[1].add(Ext::kADX, Ext::kBMI); profiles[2] = profiles[1]; profiles[2].add(Ext::kBMI2, Ext::kLZCNT, Ext::kMOVBE, Ext::kPOPCNT); profiles[3] = host_features; bool first = true; CpuFeatures last_filtered; for (const CpuFeatures& profile : profiles) { CpuFeatures filtered = profile; for (uint32_t i = 0; i < CpuFeatures::kNumBitWords; i++) { filtered.data()._bits[i] &= host_features.data()._bits[i]; } if (!first && filtered == last_filtered) { continue; } String s; if (filtered == host_features) { s.assign("[ALL]"); } else { dump_feature_list(s, filtered); } ctx.features = filtered; INFO("Testing JIT compiler GP ops with [%s]", s.data()); test_gp_ops(ctx); first = false; last_filtered = filtered; } } // SIMD variations covering SSE2+, AVX+, and AVX512+ cases. { CpuFeatures profiles[15] {}; profiles[0] = base; profiles[1] = profiles[0]; profiles[1].add(Ext::kSSE3); profiles[2] = profiles[1]; profiles[2].add(Ext::kSSSE3); profiles[3] = profiles[2]; profiles[3].add(Ext::kSSE4_1); profiles[4] = profiles[3]; profiles[4].add(Ext::kSSE4_2, Ext::kADX, Ext::kBMI, Ext::kBMI2, Ext::kLZCNT, Ext::kMOVBE, Ext::kPOPCNT); profiles[5] = profiles[4]; profiles[5].add(Ext::kPCLMULQDQ); profiles[6] = profiles[5]; profiles[6].add(Ext::kAVX); profiles[7] = profiles[6]; profiles[7].add(Ext::kAVX2); profiles[8] = profiles[7]; profiles[8].add(Ext::kF16C, Ext::kFMA, Ext::kVAES, Ext::kVPCLMULQDQ); profiles[9] = profiles[8]; profiles[9].add(Ext::kAVX_IFMA, Ext::kAVX_NE_CONVERT, Ext::kAVX_VNNI, Ext::kAVX_VNNI_INT8, Ext::kAVX_VNNI_INT16); // We start deliberately from a profile that doesn't contains AVX_xxx // extensions as these didn't exist when the first AVX512 CPUs were shipped. profiles[10] = profiles[8]; profiles[10].add(Ext::kAVX512_F, Ext::kAVX512_BW, Ext::kAVX512_DQ, Ext::kAVX512_CD, Ext::kAVX512_VL); profiles[11] = profiles[10]; profiles[11].add(Ext::kAVX512_IFMA, Ext::kAVX512_VBMI); profiles[12] = profiles[11]; profiles[12].add(Ext::kAVX512_BITALG, Ext::kAVX512_VBMI2, Ext::kAVX512_VNNI, Ext::kAVX512_VPOPCNTDQ); profiles[13] = profiles[12]; profiles[13].add(Ext::kAVX512_BF16, Ext::kAVX512_FP16); profiles[14] = host_features; bool first = true; CpuFeatures last_filtered; for (const CpuFeatures& profile : profiles) { CpuFeatures filtered = profile; for (uint32_t i = 0; i < CpuFeatures::kNumBitWords; i++) { filtered.data()._bits[i] &= host_features.data()._bits[i]; } if (!first && filtered == last_filtered) { continue; } String s; if (filtered == host_features) { s.assign("[ALL]"); } else { dump_feature_list(s, filtered); } ctx.features = filtered; INFO("Testing JIT compiler 128-bit SIMD ops with [%s]", s.data()); test_simd_ops(ctx); if (filtered.x86().has_avx2()) { INFO("Testing JIT compiler 256-bit SIMD ops with [%s]", s.data()); test_simd_ops(ctx); } if (filtered.x86().has_avx512_f()) { INFO("Testing JIT compiler 512-bit SIMD ops with [%s]", s.data()); test_simd_ops(ctx); } first = false; last_filtered = filtered; } } } #endif // ASMJIT_UJIT_X86 #if defined(ASMJIT_UJIT_AARCH64) static void test_a64_ops(JitContext& ctx, const CpuFeatures& host_features) { ctx.features = host_features; test_gp_ops(ctx); test_simd_ops(ctx); } #endif // ASMJIT_UJIT_AARCH64 } // {UniCompilerTests} UNIT(unicompiler) { UniCompilerTests::JitContext ctx; asmjit::CpuFeatures host_features = asmjit::CpuInfo::host().features(); #if defined(ASMJIT_UJIT_X86) UniCompilerTests::test_x86_ops(ctx, host_features); #elif defined(ASMJIT_UJIT_AARCH64) UniCompilerTests::test_a64_ops(ctx, host_features); #endif } int main(int argc, const char* argv[]) { print_app_info(); return BrokenAPI::run(argc, argv); } #else int main() { print_app_info(); printf("!! This test is disabled: or unsuitable arvhitecture !!\n"); return 0; } #endif // !ASMJIT_NO_UJIT && !ASMJIT_NO_JIT