Files
asmjit/testing/tests/asmjit_test_unicompiler_avx2fma.cpp
kobalicek cdc4eacbb1 [abi] Added more functionality to ujit
* Renamed round to round_even
  * Added round_half_up intrinsic
  * Added floating-point mod
  * Added a scalar version of floating-point abs and neg
  * Added a behavior enum to specify how float to int conversion
    handles out-of-range and NaN cases
  * Updated some APX stuff in instruction database
2025-10-05 17:31:24 +02:00

50 lines
1.3 KiB
C++

// This file is part of AsmJit project <https://asmjit.com>
//
// See <asmjit/core.h> or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
#include <asmjit/ujit.h>
#if defined(ASMJIT_UJIT_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <immintrin.h>
#endif
namespace UniCompilerTests {
// A reference implementation of MUL+ADD with the use of FMA. This has to be provided otherwise the
// compiler may use FPU registers in 32-bit x86 case, which would make the result different than when
// compiled by JIT compiler that would use XMM registers (32/64-bit SSE/AVX operations).
float fmadd_fma_ref(float a, float b, float c) noexcept {
__m128 av = _mm_set1_ps(a);
__m128 bv = _mm_set1_ps(b);
__m128 cv = _mm_set1_ps(c);
return _mm_cvtss_f32(_mm_fmadd_ss(av, bv, cv));
}
double fmadd_fma_ref(double a, double b, double c) noexcept {
__m128d av = _mm_set1_pd(a);
__m128d bv = _mm_set1_pd(b);
__m128d cv = _mm_set1_pd(c);
return _mm_cvtsd_f64(_mm_fmadd_sd(av, bv, cv));
}
void madd_fma_check_valgrind_bug(const float a[4], const float b[4], const float c[4], float dst[4]) noexcept {
__m128 av = _mm_loadu_ps(a);
__m128 bv = _mm_loadu_ps(b);
__m128 cv = _mm_loadu_ps(c);
__m128 dv = _mm_fmadd_ss(av, bv, cv);
_mm_storeu_ps(dst, dv);
}
} // {UniCompilerTests}
#endif // ASMJIT_UJIT_X86