mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-17 12:34:35 +03:00
2119 lines
80 KiB
C++
2119 lines
80 KiB
C++
// This file is part of AsmJit project <https://asmjit.com>
|
|
//
|
|
// See asmjit.h or LICENSE.md for license and copyright information
|
|
// SPDX-License-Identifier: Zlib
|
|
|
|
#include "../core/api-build_p.h"
|
|
#include "../core/cpuinfo.h"
|
|
#include "../core/support.h"
|
|
|
|
#include <atomic>
|
|
|
|
// Required by `__cpuidex()` and `_xgetbv()`.
|
|
#if ASMJIT_ARCH_X86
|
|
#if defined(_MSC_VER)
|
|
#include <intrin.h>
|
|
#endif
|
|
#endif // ASMJIT_ARCH_X86
|
|
|
|
#if ASMJIT_ARCH_ARM
|
|
// Required by various utilities that are required by features detection.
|
|
#if !defined(_WIN32)
|
|
#include <errno.h>
|
|
#include <sys/utsname.h>
|
|
#endif
|
|
|
|
//! Required to detect CPU and features on Apple platforms.
|
|
#if defined(__APPLE__)
|
|
#include <mach/machine.h>
|
|
#include <sys/types.h>
|
|
#include <sys/sysctl.h>
|
|
#endif
|
|
|
|
#if (defined(__linux__) || defined(__FreeBSD__))
|
|
// Required by `getauxval()` on Linux and FreeBSD.
|
|
#include <sys/auxv.h>
|
|
#define ASMJIT_ARM_DETECT_VIA_HWCAPS
|
|
#endif
|
|
|
|
#if ASMJIT_ARCH_ARM >= 64 && defined(__GNUC__) && defined(__linux__) && 0
|
|
// This feature is disabled at the moment - it works, but it seems linux supports ARM features
|
|
// via HWCAPS pretty well and the most recent features need to access more registers that were
|
|
// not originally accessible, which would break on some systems.
|
|
#define ASMJIT_ARM_DETECT_VIA_CPUID
|
|
#endif
|
|
|
|
#if ASMJIT_ARCH_ARM >= 64 && defined(__OpenBSD__)
|
|
#include <machine/cpu.h>
|
|
#include <sys/sysctl.h>
|
|
#endif
|
|
|
|
#if ASMJIT_ARCH_ARM >= 64 && defined(__NetBSD__)
|
|
#include <sys/sysctl.h>
|
|
#endif
|
|
|
|
#endif // ASMJIT_ARCH_ARM
|
|
|
|
#if !defined(_WIN32) && (ASMJIT_ARCH_X86 || ASMJIT_ARCH_ARM)
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
ASMJIT_BEGIN_NAMESPACE
|
|
|
|
// CpuInfo - Detect - Compatibility
|
|
// ================================
|
|
|
|
// CPU features detection is a minefield on non-X86 platforms. The following list describes which
|
|
// operating systems and architectures are supported and the status of the implementation:
|
|
//
|
|
// * X86, X86_64:
|
|
// - All OSes supported
|
|
// - Detection is based on using a CPUID instruction, which is a user-space instruction, so there
|
|
// is no need to use any OS specific APIs or syscalls to detect all features provided by the CPU.
|
|
//
|
|
// * ARM32:
|
|
// - Linux - HWCAPS based detection.
|
|
// - FreeBSD - HWCAPS based detection (shared with Linux code).
|
|
// - NetBSD - NOT IMPLEMENTED!
|
|
// - OpenBSD - NOT IMPLEMENTED!
|
|
// - Apple - sysctlbyname() based detection (this architecture is deprecated on Apple HW).
|
|
// - Windows - IsProcessorFeaturePresent() based detection (only detects a subset of features).
|
|
// - Others - NOT IMPLEMENTED!
|
|
//
|
|
// * ARM64:
|
|
// - Linux - HWCAPS and CPUID based detection.
|
|
// - FreeBSD - HWCAPS and CPUID based detection (shared with Linux code).
|
|
// - NetBSD - CPUID based detection (reading CPUID via sysctl's cpu0 info)
|
|
// - OpenBSD - CPUID based detection (reading CPUID via sysctl's CTL_MACHDEP).
|
|
// - Apple - sysctlbyname() based detection with FamilyId matrix (record for each family id).
|
|
// - Windows - IsProcessorFeaturePresent() based detection (only detects a subset of features).
|
|
// - Others - NOT IMPLEMENTED!
|
|
//
|
|
// * Others
|
|
// - NOT IMPLEMENTED!
|
|
|
|
// CpuInfo - Detect - HW-Thread Count
|
|
// ==================================
|
|
|
|
#if defined(_WIN32)
|
|
static inline uint32_t detectHWThreadCount() noexcept {
|
|
SYSTEM_INFO info;
|
|
::GetSystemInfo(&info);
|
|
return info.dwNumberOfProcessors;
|
|
}
|
|
#elif defined(_SC_NPROCESSORS_ONLN)
|
|
static inline uint32_t detectHWThreadCount() noexcept {
|
|
long res = ::sysconf(_SC_NPROCESSORS_ONLN);
|
|
return res <= 0 ? uint32_t(1) : uint32_t(res);
|
|
}
|
|
#else
|
|
static inline uint32_t detectHWThreadCount() noexcept {
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
// CpuInfo - Detect - X86
|
|
// ======================
|
|
|
|
// X86 and X86_64 detection is based on CPUID.
|
|
|
|
#if ASMJIT_ARCH_X86
|
|
|
|
namespace x86 {
|
|
|
|
using Ext = CpuFeatures::X86;
|
|
|
|
struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
|
|
struct xgetbv_t { uint32_t eax, edx; };
|
|
|
|
// Executes `cpuid` instruction.
|
|
static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
|
|
#if defined(_MSC_VER)
|
|
__cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
|
|
#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
|
|
__asm__ __volatile__(
|
|
"mov %%ebx, %%edi\n"
|
|
"cpuid\n"
|
|
"xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
|
|
#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
|
|
__asm__ __volatile__(
|
|
"mov %%rbx, %%rdi\n"
|
|
"cpuid\n"
|
|
"xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
|
|
#else
|
|
#error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
|
|
#endif
|
|
}
|
|
|
|
// Executes 'xgetbv' instruction.
|
|
static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
|
|
#if defined(_MSC_VER)
|
|
uint64_t value = _xgetbv(inEcx);
|
|
out->eax = uint32_t(value & 0xFFFFFFFFu);
|
|
out->edx = uint32_t(value >> 32);
|
|
#elif defined(__GNUC__)
|
|
uint32_t outEax;
|
|
uint32_t outEdx;
|
|
|
|
// Replaced, because the world is not perfect:
|
|
// __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
|
|
__asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
|
|
|
|
out->eax = outEax;
|
|
out->edx = outEdx;
|
|
#else
|
|
out->eax = 0;
|
|
out->edx = 0;
|
|
#endif
|
|
}
|
|
|
|
// Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
|
|
static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
|
|
struct Vendor {
|
|
char normalized[8];
|
|
union { char text[12]; uint32_t d[3]; };
|
|
};
|
|
|
|
static const Vendor table[] = {
|
|
{ { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
|
|
{ { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
|
|
{ { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
|
|
{ { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} },
|
|
{ { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} }
|
|
};
|
|
|
|
uint32_t i;
|
|
for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++) {
|
|
if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2) {
|
|
break;
|
|
}
|
|
}
|
|
memcpy(cpu._vendor.str, table[i].normalized, 8);
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
|
|
char* d = s;
|
|
|
|
char c = s[0];
|
|
char prev = 0;
|
|
|
|
// Used to always clear the current character to ensure that the result
|
|
// doesn't contain garbage after a new null terminator is placed at the end.
|
|
s[0] = '\0';
|
|
|
|
for (;;) {
|
|
if (!c) {
|
|
break;
|
|
}
|
|
|
|
if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@' || s[1] == '\0'))) {
|
|
*d++ = c;
|
|
prev = c;
|
|
}
|
|
|
|
c = *++s;
|
|
s[0] = '\0';
|
|
}
|
|
|
|
d[0] = '\0';
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
|
|
using Support::bitTest;
|
|
|
|
cpuid_t regs;
|
|
xgetbv_t xcr0 { 0, 0 };
|
|
CpuFeatures::X86& features = cpu.features().x86();
|
|
|
|
cpu._wasDetected = true;
|
|
cpu._maxLogicalProcessors = 1;
|
|
|
|
// We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
|
|
features.add(Ext::kI486);
|
|
|
|
// CPUID EAX=0x00 (Basic CPUID Information)
|
|
// ----------------------------------------
|
|
|
|
// Get vendor string/id.
|
|
cpuidQuery(®s, 0x0);
|
|
|
|
uint32_t maxId = regs.eax;
|
|
uint32_t maxSubLeafId_0x7 = 0;
|
|
|
|
simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
|
|
|
|
// CPUID EAX=0x01 (Basic CPUID Information)
|
|
// ----------------------------------------
|
|
|
|
if (maxId >= 0x01u) {
|
|
// Get feature flags in ECX/EDX and family/model in EAX.
|
|
cpuidQuery(®s, 0x1);
|
|
|
|
// Fill family and model fields.
|
|
uint32_t modelId = (regs.eax >> 4) & 0x0F;
|
|
uint32_t familyId = (regs.eax >> 8) & 0x0F;
|
|
|
|
// Use extended family and model fields.
|
|
if (familyId == 0x06u || familyId == 0x0Fu) {
|
|
modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
|
|
}
|
|
|
|
if (familyId == 0x0Fu) {
|
|
familyId += ((regs.eax >> 20) & 0xFFu);
|
|
}
|
|
|
|
cpu._modelId = modelId;
|
|
cpu._familyId = familyId;
|
|
cpu._brandId = (regs.ebx) & 0xFF;
|
|
cpu._processorType = (regs.eax >> 12) & 0x03;
|
|
cpu._maxLogicalProcessors = (regs.ebx >> 16) & 0xFF;
|
|
cpu._stepping = (regs.eax) & 0x0F;
|
|
cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
|
|
|
|
features.addIf(bitTest(regs.ecx, 0), Ext::kSSE3);
|
|
features.addIf(bitTest(regs.ecx, 1), Ext::kPCLMULQDQ);
|
|
features.addIf(bitTest(regs.ecx, 3), Ext::kMONITOR);
|
|
features.addIf(bitTest(regs.ecx, 5), Ext::kVMX);
|
|
features.addIf(bitTest(regs.ecx, 6), Ext::kSMX);
|
|
features.addIf(bitTest(regs.ecx, 9), Ext::kSSSE3);
|
|
features.addIf(bitTest(regs.ecx, 13), Ext::kCMPXCHG16B);
|
|
features.addIf(bitTest(regs.ecx, 19), Ext::kSSE4_1);
|
|
features.addIf(bitTest(regs.ecx, 20), Ext::kSSE4_2);
|
|
features.addIf(bitTest(regs.ecx, 22), Ext::kMOVBE);
|
|
features.addIf(bitTest(regs.ecx, 23), Ext::kPOPCNT);
|
|
features.addIf(bitTest(regs.ecx, 25), Ext::kAESNI);
|
|
features.addIf(bitTest(regs.ecx, 26), Ext::kXSAVE);
|
|
features.addIf(bitTest(regs.ecx, 27), Ext::kOSXSAVE);
|
|
features.addIf(bitTest(regs.ecx, 30), Ext::kRDRAND);
|
|
features.addIf(bitTest(regs.edx, 0), Ext::kFPU);
|
|
features.addIf(bitTest(regs.edx, 4), Ext::kRDTSC);
|
|
features.addIf(bitTest(regs.edx, 5), Ext::kMSR);
|
|
features.addIf(bitTest(regs.edx, 8), Ext::kCMPXCHG8B);
|
|
features.addIf(bitTest(regs.edx, 15), Ext::kCMOV);
|
|
features.addIf(bitTest(regs.edx, 19), Ext::kCLFLUSH);
|
|
features.addIf(bitTest(regs.edx, 23), Ext::kMMX);
|
|
features.addIf(bitTest(regs.edx, 24), Ext::kFXSR);
|
|
features.addIf(bitTest(regs.edx, 25), Ext::kSSE, Ext::kMMX2);
|
|
features.addIf(bitTest(regs.edx, 26), Ext::kSSE2, Ext::kSSE);
|
|
features.addIf(bitTest(regs.edx, 28), Ext::kMT);
|
|
|
|
// Get the content of XCR0 if supported by the CPU and enabled by the OS.
|
|
if (features.hasXSAVE() && features.hasOSXSAVE()) {
|
|
xgetbvQuery(&xcr0, 0);
|
|
}
|
|
|
|
// Detect AVX+.
|
|
if (bitTest(regs.ecx, 28)) {
|
|
// - XCR0[2:1] == 11b
|
|
// XMM & YMM states need to be enabled by OS.
|
|
if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
|
|
features.add(Ext::kAVX);
|
|
features.addIf(bitTest(regs.ecx, 12), Ext::kFMA);
|
|
features.addIf(bitTest(regs.ecx, 29), Ext::kF16C);
|
|
}
|
|
}
|
|
}
|
|
|
|
constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
|
|
bool amxEnabled = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
|
|
|
|
#if defined(__APPLE__)
|
|
// Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
|
|
// in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
|
|
// necessary bits in XCR0 register.
|
|
bool avx512Enabled = true;
|
|
#else
|
|
// - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS.
|
|
// - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
|
|
constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
|
|
bool avx512Enabled = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
|
|
#endif
|
|
|
|
bool avx10Enabled = false;
|
|
|
|
// CPUID EAX=0x07 ECX=0 (Structured Extended Feature Flags Enumeration Leaf)
|
|
// -------------------------------------------------------------------------
|
|
|
|
if (maxId >= 0x07u) {
|
|
cpuidQuery(®s, 0x7);
|
|
|
|
maxSubLeafId_0x7 = regs.eax;
|
|
|
|
features.addIf(bitTest(regs.ebx, 0), Ext::kFSGSBASE);
|
|
features.addIf(bitTest(regs.ebx, 3), Ext::kBMI);
|
|
features.addIf(bitTest(regs.ebx, 7), Ext::kSMEP);
|
|
features.addIf(bitTest(regs.ebx, 8), Ext::kBMI2);
|
|
features.addIf(bitTest(regs.ebx, 9), Ext::kERMS);
|
|
features.addIf(bitTest(regs.ebx, 18), Ext::kRDSEED);
|
|
features.addIf(bitTest(regs.ebx, 19), Ext::kADX);
|
|
features.addIf(bitTest(regs.ebx, 20), Ext::kSMAP);
|
|
features.addIf(bitTest(regs.ebx, 23), Ext::kCLFLUSHOPT);
|
|
features.addIf(bitTest(regs.ebx, 24), Ext::kCLWB);
|
|
features.addIf(bitTest(regs.ebx, 29), Ext::kSHA);
|
|
features.addIf(bitTest(regs.ecx, 0), Ext::kPREFETCHWT1);
|
|
features.addIf(bitTest(regs.ecx, 4), Ext::kOSPKE);
|
|
features.addIf(bitTest(regs.ecx, 5), Ext::kWAITPKG);
|
|
features.addIf(bitTest(regs.ecx, 7), Ext::kCET_SS);
|
|
features.addIf(bitTest(regs.ecx, 8), Ext::kGFNI);
|
|
features.addIf(bitTest(regs.ecx, 9), Ext::kVAES);
|
|
features.addIf(bitTest(regs.ecx, 10), Ext::kVPCLMULQDQ);
|
|
features.addIf(bitTest(regs.ecx, 22), Ext::kRDPID);
|
|
features.addIf(bitTest(regs.ecx, 23), Ext::kKL);
|
|
features.addIf(bitTest(regs.ecx, 25), Ext::kCLDEMOTE);
|
|
features.addIf(bitTest(regs.ecx, 27), Ext::kMOVDIRI);
|
|
features.addIf(bitTest(regs.ecx, 28), Ext::kMOVDIR64B);
|
|
features.addIf(bitTest(regs.ecx, 29), Ext::kENQCMD);
|
|
features.addIf(bitTest(regs.edx, 4), Ext::kFSRM);
|
|
features.addIf(bitTest(regs.edx, 5), Ext::kUINTR);
|
|
features.addIf(bitTest(regs.edx, 14), Ext::kSERIALIZE);
|
|
features.addIf(bitTest(regs.edx, 16), Ext::kTSXLDTRK);
|
|
features.addIf(bitTest(regs.edx, 18), Ext::kPCONFIG);
|
|
features.addIf(bitTest(regs.edx, 20), Ext::kCET_IBT);
|
|
|
|
if (bitTest(regs.ebx, 5) && features.hasAVX()) {
|
|
features.add(Ext::kAVX2);
|
|
}
|
|
|
|
if (avx512Enabled && bitTest(regs.ebx, 16)) {
|
|
features.add(Ext::kAVX512_F);
|
|
|
|
features.addIf(bitTest(regs.ebx, 17), Ext::kAVX512_DQ);
|
|
features.addIf(bitTest(regs.ebx, 21), Ext::kAVX512_IFMA);
|
|
features.addIf(bitTest(regs.ebx, 28), Ext::kAVX512_CD);
|
|
features.addIf(bitTest(regs.ebx, 30), Ext::kAVX512_BW);
|
|
features.addIf(bitTest(regs.ebx, 31), Ext::kAVX512_VL);
|
|
features.addIf(bitTest(regs.ecx, 1), Ext::kAVX512_VBMI);
|
|
features.addIf(bitTest(regs.ecx, 6), Ext::kAVX512_VBMI2);
|
|
features.addIf(bitTest(regs.ecx, 11), Ext::kAVX512_VNNI);
|
|
features.addIf(bitTest(regs.ecx, 12), Ext::kAVX512_BITALG);
|
|
features.addIf(bitTest(regs.ecx, 14), Ext::kAVX512_VPOPCNTDQ);
|
|
features.addIf(bitTest(regs.edx, 8), Ext::kAVX512_VP2INTERSECT);
|
|
features.addIf(bitTest(regs.edx, 23), Ext::kAVX512_FP16);
|
|
}
|
|
|
|
if (amxEnabled) {
|
|
features.addIf(bitTest(regs.edx, 22), Ext::kAMX_BF16);
|
|
features.addIf(bitTest(regs.edx, 24), Ext::kAMX_TILE);
|
|
features.addIf(bitTest(regs.edx, 25), Ext::kAMX_INT8);
|
|
}
|
|
}
|
|
|
|
// CPUID EAX=0x07 ECX=1 (Structured Extended Feature Enumeration Sub-leaf)
|
|
// -----------------------------------------------------------------------
|
|
|
|
if (maxSubLeafId_0x7 >= 1) {
|
|
cpuidQuery(®s, 0x7, 1);
|
|
|
|
features.addIf(bitTest(regs.eax, 0), Ext::kSHA512);
|
|
features.addIf(bitTest(regs.eax, 1), Ext::kSM3);
|
|
features.addIf(bitTest(regs.eax, 2), Ext::kSM4);
|
|
features.addIf(bitTest(regs.eax, 3), Ext::kRAO_INT);
|
|
features.addIf(bitTest(regs.eax, 7), Ext::kCMPCCXADD);
|
|
features.addIf(bitTest(regs.eax, 10), Ext::kFZRM);
|
|
features.addIf(bitTest(regs.eax, 11), Ext::kFSRS);
|
|
features.addIf(bitTest(regs.eax, 12), Ext::kFSRC);
|
|
features.addIf(bitTest(regs.eax, 19), Ext::kWRMSRNS);
|
|
features.addIf(bitTest(regs.eax, 22), Ext::kHRESET);
|
|
features.addIf(bitTest(regs.eax, 26), Ext::kLAM);
|
|
features.addIf(bitTest(regs.eax, 27), Ext::kMSRLIST);
|
|
features.addIf(bitTest(regs.eax, 31), Ext::kMOVRS);
|
|
features.addIf(bitTest(regs.ecx, 5), Ext::kMSR_IMM);
|
|
features.addIf(bitTest(regs.ebx, 1), Ext::kTSE);
|
|
features.addIf(bitTest(regs.edx, 14), Ext::kPREFETCHI);
|
|
features.addIf(bitTest(regs.edx, 18), Ext::kCET_SSS);
|
|
features.addIf(bitTest(regs.edx, 21), Ext::kAPX_F);
|
|
|
|
if (features.hasAVX2()) {
|
|
features.addIf(bitTest(regs.eax, 4), Ext::kAVX_VNNI);
|
|
features.addIf(bitTest(regs.eax, 23), Ext::kAVX_IFMA);
|
|
features.addIf(bitTest(regs.edx, 4), Ext::kAVX_VNNI_INT8);
|
|
features.addIf(bitTest(regs.edx, 5), Ext::kAVX_NE_CONVERT);
|
|
features.addIf(bitTest(regs.edx, 10), Ext::kAVX_VNNI_INT16);
|
|
}
|
|
|
|
if (features.hasAVX512_F()) {
|
|
features.addIf(bitTest(regs.eax, 5), Ext::kAVX512_BF16);
|
|
}
|
|
|
|
if (features.hasAVX512_F()) {
|
|
avx10Enabled = Support::bitTest(regs.edx, 19);
|
|
}
|
|
|
|
if (amxEnabled) {
|
|
features.addIf(bitTest(regs.eax, 21), Ext::kAMX_FP16);
|
|
features.addIf(bitTest(regs.edx, 8), Ext::kAMX_COMPLEX);
|
|
}
|
|
}
|
|
|
|
// CPUID EAX=0x0D ECX=1 (Processor Extended State Enumeration Sub-leaf)
|
|
// --------------------------------------------------------------------
|
|
|
|
if (maxId >= 0x0Du) {
|
|
cpuidQuery(®s, 0xD, 1);
|
|
|
|
features.addIf(bitTest(regs.eax, 0), Ext::kXSAVEOPT);
|
|
features.addIf(bitTest(regs.eax, 1), Ext::kXSAVEC);
|
|
features.addIf(bitTest(regs.eax, 3), Ext::kXSAVES);
|
|
}
|
|
|
|
// CPUID EAX=0x0E ECX=0 (Processor Trace Enumeration Main Leaf)
|
|
// ------------------------------------------------------------
|
|
|
|
if (maxId >= 0x0Eu) {
|
|
cpuidQuery(®s, 0x0E, 0);
|
|
|
|
features.addIf(bitTest(regs.ebx, 4), Ext::kPTWRITE);
|
|
}
|
|
|
|
// CPUID EAX=0x19 ECX=0 (Key Locker Leaf)
|
|
// --------------------------------------
|
|
|
|
if (maxId >= 0x19u && features.hasKL()) {
|
|
cpuidQuery(®s, 0x19, 0);
|
|
|
|
features.addIf(bitTest(regs.ebx, 0), Ext::kAESKLE);
|
|
features.addIf(bitTest(regs.ebx, 0) && bitTest(regs.ebx, 2), Ext::kAESKLEWIDE_KL);
|
|
}
|
|
|
|
// CPUID EAX=0x1E ECX=1 (TMUL Information Sub-leaf)
|
|
// ------------------------------------------------
|
|
|
|
if (maxId >= 0x1Eu && features.hasAMX_TILE()) {
|
|
cpuidQuery(®s, 0x1E, 1);
|
|
|
|
// NOTE: Some AMX flags are mirrored here from CPUID[0x07, 0x00].
|
|
features.addIf(bitTest(regs.eax, 0), Ext::kAMX_INT8);
|
|
features.addIf(bitTest(regs.eax, 1), Ext::kAMX_BF16);
|
|
features.addIf(bitTest(regs.eax, 2), Ext::kAMX_COMPLEX);
|
|
features.addIf(bitTest(regs.eax, 3), Ext::kAMX_FP16);
|
|
features.addIf(bitTest(regs.eax, 4), Ext::kAMX_FP8);
|
|
features.addIf(bitTest(regs.eax, 5), Ext::kAMX_TRANSPOSE);
|
|
features.addIf(bitTest(regs.eax, 6), Ext::kAMX_TF32);
|
|
features.addIf(bitTest(regs.eax, 7), Ext::kAMX_AVX512);
|
|
features.addIf(bitTest(regs.eax, 8), Ext::kAMX_MOVRS);
|
|
}
|
|
|
|
// CPUID EAX=0x24 ECX=0 (AVX10 Information)
|
|
// ----------------------------------------
|
|
|
|
if (maxId >= 0x24u && avx10Enabled) {
|
|
// EAX output is the maximum supported sub-leaf.
|
|
cpuidQuery(®s, 0x24, 0);
|
|
|
|
// AVX10 Converged Vector ISA version.
|
|
uint32_t ver = regs.ebx & 0xFFu;
|
|
|
|
features.addIf(ver >= 1u, Ext::kAVX10_1);
|
|
features.addIf(ver >= 2u, Ext::kAVX10_2);
|
|
}
|
|
|
|
// CPUID EAX=0x80000000...maxId
|
|
// ----------------------------
|
|
|
|
maxId = 0x80000000u;
|
|
uint32_t i = maxId;
|
|
|
|
// The highest EAX that we understand.
|
|
constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
|
|
|
|
// Several CPUID calls are required to get the whole brand string. It's easier
|
|
// to copy one DWORD at a time instead of copying the string a byte by byte.
|
|
uint32_t* brand = cpu._brand.u32;
|
|
do {
|
|
cpuidQuery(®s, i);
|
|
switch (i) {
|
|
case 0x80000000u:
|
|
maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
|
|
break;
|
|
|
|
case 0x80000001u:
|
|
features.addIf(bitTest(regs.ecx, 0), Ext::kLAHFSAHF);
|
|
features.addIf(bitTest(regs.ecx, 2), Ext::kSVM);
|
|
features.addIf(bitTest(regs.ecx, 5), Ext::kLZCNT);
|
|
features.addIf(bitTest(regs.ecx, 6), Ext::kSSE4A);
|
|
features.addIf(bitTest(regs.ecx, 7), Ext::kMSSE);
|
|
features.addIf(bitTest(regs.ecx, 8), Ext::kPREFETCHW);
|
|
features.addIf(bitTest(regs.ecx, 12), Ext::kSKINIT);
|
|
features.addIf(bitTest(regs.ecx, 15), Ext::kLWP);
|
|
features.addIf(bitTest(regs.ecx, 21), Ext::kTBM);
|
|
features.addIf(bitTest(regs.ecx, 29), Ext::kMONITORX);
|
|
features.addIf(bitTest(regs.edx, 20), Ext::kNX);
|
|
features.addIf(bitTest(regs.edx, 21), Ext::kFXSROPT);
|
|
features.addIf(bitTest(regs.edx, 22), Ext::kMMX2);
|
|
features.addIf(bitTest(regs.edx, 27), Ext::kRDTSCP);
|
|
features.addIf(bitTest(regs.edx, 29), Ext::kPREFETCHW);
|
|
features.addIf(bitTest(regs.edx, 30), Ext::k3DNOW2, Ext::kMMX2);
|
|
features.addIf(bitTest(regs.edx, 31), Ext::kPREFETCHW);
|
|
|
|
if (features.hasAVX()) {
|
|
features.addIf(bitTest(regs.ecx, 11), Ext::kXOP);
|
|
features.addIf(bitTest(regs.ecx, 16), Ext::kFMA4);
|
|
}
|
|
|
|
// This feature seems to be only supported by AMD.
|
|
if (cpu.isVendor("AMD")) {
|
|
features.addIf(bitTest(regs.ecx, 4), Ext::kALTMOVCR8);
|
|
}
|
|
break;
|
|
|
|
case 0x80000002u:
|
|
case 0x80000003u:
|
|
case 0x80000004u:
|
|
*brand++ = regs.eax;
|
|
*brand++ = regs.ebx;
|
|
*brand++ = regs.ecx;
|
|
*brand++ = regs.edx;
|
|
|
|
// Go directly to the next one we are interested in.
|
|
if (i == 0x80000004u)
|
|
i = 0x80000008u - 1;
|
|
break;
|
|
|
|
case 0x80000008u:
|
|
features.addIf(bitTest(regs.ebx, 0), Ext::kCLZERO);
|
|
features.addIf(bitTest(regs.ebx, 0), Ext::kRDPRU);
|
|
features.addIf(bitTest(regs.ebx, 8), Ext::kMCOMMIT);
|
|
features.addIf(bitTest(regs.ebx, 9), Ext::kWBNOINVD);
|
|
|
|
// Go directly to the next one we are interested in.
|
|
i = 0x8000001Fu - 1;
|
|
break;
|
|
|
|
case 0x8000001Fu:
|
|
features.addIf(bitTest(regs.eax, 0), Ext::kSME);
|
|
features.addIf(bitTest(regs.eax, 1), Ext::kSEV);
|
|
features.addIf(bitTest(regs.eax, 3), Ext::kSEV_ES);
|
|
features.addIf(bitTest(regs.eax, 4), Ext::kSEV_SNP);
|
|
features.addIf(bitTest(regs.eax, 6), Ext::kRMPQUERY);
|
|
break;
|
|
}
|
|
} while (++i <= maxId);
|
|
|
|
// Simplify CPU brand string a bit by removing some unnecessary spaces.
|
|
simplifyCpuBrand(cpu._brand.str);
|
|
}
|
|
|
|
} // {x86}
|
|
|
|
#endif // ASMJIT_ARCH_X86
|
|
|
|
// CpuInfo - Detect - ARM
|
|
// ======================
|
|
|
|
// Implement the most code outside the platform specific #ifdefs to minimize breaking the detection on
|
|
// platforms that don't run on our CI infrastructure. The problem with the detection is that every OS
|
|
// requires a specific implementation as ARM features cannot be detected in user-mode without OS enablement.
|
|
|
|
// The most relevant and accurate information can be found here:
|
|
// https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
|
|
// https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
|
|
//
|
|
// Other resources:
|
|
// https://en.wikipedia.org/wiki/AArch64
|
|
// https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
|
|
// https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
|
|
// https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
|
|
|
|
#if ASMJIT_ARCH_ARM
|
|
|
|
namespace arm {
|
|
|
|
// ARM commonly refers to CPU features using FEAT_ prefix, we use Ext:: to make it compatible with other parts.
|
|
using Ext = CpuFeatures::ARM;
|
|
|
|
// CpuInfo - Detect - ARM - OS Kernel Version
|
|
// ==========================================
|
|
|
|
#if defined(__linux__)
|
|
struct UNameKernelVersion {
|
|
int parts[3];
|
|
|
|
inline bool atLeast(int major, int minor, int patch = 0) const noexcept {
|
|
if (parts[0] >= major) {
|
|
if (parts[0] > major) {
|
|
return true;
|
|
}
|
|
|
|
if (parts[1] >= minor) {
|
|
return parts[1] > minor ? true : parts[2] >= patch;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
[[maybe_unused]]
|
|
static UNameKernelVersion getUNameKernelVersion() noexcept {
|
|
UNameKernelVersion ver{};
|
|
ver.parts[0] = -1;
|
|
|
|
utsname buffer;
|
|
if (uname(&buffer) != 0) {
|
|
return ver;
|
|
}
|
|
|
|
size_t count = 0;
|
|
char* p = buffer.release;
|
|
while (*p) {
|
|
uint32_t c = uint8_t(*p);
|
|
if (c >= uint32_t('0') && c <= uint32_t('9')) {
|
|
ver.parts[count] = int(strtol(p, &p, 10));
|
|
if (++count == 3) {
|
|
break;
|
|
}
|
|
}
|
|
else if (c == '.' || c == '-') {
|
|
p++;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ver;
|
|
}
|
|
#endif // __linux__
|
|
|
|
// CpuInfo - Detect - ARM - Baseline Features of ARM Architectures
|
|
// ===============================================================
|
|
|
|
[[maybe_unused]]
|
|
static inline void populateBaseAArch32Features(CpuFeatures::ARM& features) noexcept {
|
|
// No baseline flags at the moment.
|
|
DebugUtils::unused(features);
|
|
}
|
|
|
|
[[maybe_unused]]
|
|
static inline void populateBaseAArch64Features(CpuFeatures::ARM& features) noexcept {
|
|
// AArch64 is based on ARMv8.0 and later.
|
|
features.add(Ext::kARMv6);
|
|
features.add(Ext::kARMv7);
|
|
features.add(Ext::kARMv8a);
|
|
|
|
// AArch64 comes with these features by default.
|
|
features.add(Ext::kASIMD);
|
|
features.add(Ext::kFP);
|
|
features.add(Ext::kIDIVA);
|
|
}
|
|
|
|
static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
populateBaseAArch32Features(cpu.features().arm());
|
|
#else
|
|
populateBaseAArch64Features(cpu.features().arm());
|
|
#endif
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Mandatory Features of ARM Architectures
|
|
// ================================================================
|
|
|
|
// Populates mandatory ARMv8.[v]A features.
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void populateARMv8AFeatures(CpuFeatures::ARM& features, uint32_t v) noexcept {
|
|
switch (v) {
|
|
default:
|
|
[[fallthrough]];
|
|
case 9: // ARMv8.9
|
|
features.add(Ext::kCLRBHB, Ext::kCSSC, Ext::kPRFMSLC, Ext::kSPECRES2, Ext::kRAS2);
|
|
[[fallthrough]];
|
|
case 8: // ARMv8.8
|
|
features.add(Ext::kHBC, Ext::kMOPS, Ext::kNMI);
|
|
[[fallthrough]];
|
|
case 7: // ARMv8.7
|
|
features.add(Ext::kHCX, Ext::kPAN3, Ext::kWFXT, Ext::kXS);
|
|
[[fallthrough]];
|
|
case 6: // ARMv8.6
|
|
features.add(Ext::kAMU1_1, Ext::kBF16, Ext::kECV, Ext::kFGT, Ext::kI8MM);
|
|
[[fallthrough]];
|
|
case 5: // ARMv8.5
|
|
features.add(Ext::kBTI, Ext::kCSV2, Ext::kDPB2, Ext::kFLAGM2, Ext::kFRINTTS, Ext::kSB, Ext::kSPECRES, Ext::kSSBS);
|
|
[[fallthrough]];
|
|
case 4: // ARMv8.4
|
|
features.add(Ext::kAMU1, Ext::kDIT, Ext::kDOTPROD, Ext::kFLAGM,
|
|
Ext::kLRCPC2, Ext::kLSE2, Ext::kMPAM, Ext::kNV,
|
|
Ext::kSEL2, Ext::kTLBIOS, Ext::kTLBIRANGE, Ext::kTRF);
|
|
[[fallthrough]];
|
|
case 3: // ARMv8.3
|
|
features.add(Ext::kCCIDX, Ext::kFCMA, Ext::kJSCVT, Ext::kLRCPC, Ext::kPAUTH);
|
|
[[fallthrough]];
|
|
case 2: // ARMv8.2
|
|
features.add(Ext::kDPB, Ext::kPAN2, Ext::kRAS, Ext::kUAO);
|
|
[[fallthrough]];
|
|
case 1: // ARMv8.1
|
|
features.add(Ext::kCRC32, Ext::kLOR, Ext::kLSE, Ext::kPAN, Ext::kRDM, Ext::kVHE);
|
|
[[fallthrough]];
|
|
case 0: // ARMv8.0
|
|
features.add(Ext::kASIMD, Ext::kFP, Ext::kIDIVA, Ext::kVFP_D32);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Populates mandatory ARMv9.[v] features.
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void populateARMv9AFeatures(CpuFeatures::ARM& features, uint32_t v) noexcept {
|
|
populateARMv8AFeatures(features, v <= 4u ? 5u + v : 9u);
|
|
|
|
switch (v) {
|
|
default:
|
|
[[fallthrough]];
|
|
case 4: // ARMv9.4 - based on ARMv8.9.
|
|
[[fallthrough]];
|
|
case 3: // ARMv9.3 - based on ARMv8.8.
|
|
[[fallthrough]];
|
|
case 2: // ARMv9.2 - based on ARMv8.7.
|
|
[[fallthrough]];
|
|
case 1: // ARMv9.1 - based on ARMv8.6.
|
|
[[fallthrough]];
|
|
case 0: // ARMv9.0 - based on ARMv8.5.
|
|
features.add(Ext::kRME, Ext::kSVE, Ext::kSVE2);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - CPUID Based Features
|
|
// =============================================
|
|
|
|
// This implements detection based on the content of CPUID registers. The following code doesn't actually read any
|
|
// of the registers so it's an implementation that can theoretically be tested / used in mocks.
|
|
|
|
// Merges a feature that contains 0b1111 when it doesn't exist and starts at 0b0000 when it does.
|
|
[[maybe_unused]]
|
|
static ASMJIT_INLINE void mergeAArch64CPUIDFeatureNA(
|
|
CpuFeatures::ARM& features, uint64_t regBits, uint32_t offset,
|
|
Ext::Id f0,
|
|
Ext::Id f1 = Ext::kNone,
|
|
Ext::Id f2 = Ext::kNone,
|
|
Ext::Id f3 = Ext::kNone) noexcept {
|
|
|
|
uint32_t val = uint32_t((regBits >> offset) & 0xFu);
|
|
if (val == 0xFu) {
|
|
// If val == 0b1111 then the feature is not implemented in this case (some early extensions).
|
|
return;
|
|
}
|
|
|
|
features.addIf(f0 != Ext::kNone, f0);
|
|
features.addIf(f1 != Ext::kNone && val >= 1, f1);
|
|
features.addIf(f2 != Ext::kNone && val >= 2, f2);
|
|
features.addIf(f3 != Ext::kNone && val >= 3, f3);
|
|
}
|
|
|
|
// Merges a feature identified by a single bit at `offset`.
|
|
[[maybe_unused]]
|
|
static ASMJIT_INLINE void mergeAArch64CPUIDFeature1B(CpuFeatures::ARM& features, uint64_t regBits, uint32_t offset, Ext::Id f1) noexcept {
|
|
features.addIf((regBits & (uint64_t(1) << offset)) != 0, f1);
|
|
}
|
|
|
|
// Merges a feature-list starting from 0b01 when it does (0b00 means feature not supported).
|
|
[[maybe_unused]]
|
|
static ASMJIT_INLINE void mergeAArch64CPUIDFeature2B(CpuFeatures::ARM& features, uint64_t regBits, uint32_t offset, Ext::Id f1, Ext::Id f2, Ext::Id f3) noexcept {
|
|
uint32_t val = uint32_t((regBits >> offset) & 0x3u);
|
|
|
|
features.addIf(f1 != Ext::kNone && val >= 1, f1);
|
|
features.addIf(f2 != Ext::kNone && val >= 2, f2);
|
|
features.addIf(f3 != Ext::kNone && val == 3, f3);
|
|
}
|
|
|
|
// Merges a feature-list starting from 0b0001 when it does (0b0000 means feature not supported).
|
|
[[maybe_unused]]
|
|
static ASMJIT_INLINE void mergeAArch64CPUIDFeature4B(CpuFeatures::ARM& features, uint64_t regBits, uint32_t offset,
|
|
Ext::Id f1,
|
|
Ext::Id f2 = Ext::kNone,
|
|
Ext::Id f3 = Ext::kNone,
|
|
Ext::Id f4 = Ext::kNone) noexcept {
|
|
|
|
uint32_t val = uint32_t((regBits >> offset) & 0xFu);
|
|
|
|
// if val == 0 it means that this feature is not supported.
|
|
features.addIf(f1 != Ext::kNone && val >= 1, f1);
|
|
features.addIf(f2 != Ext::kNone && val >= 2, f2);
|
|
features.addIf(f3 != Ext::kNone && val >= 3, f3);
|
|
features.addIf(f4 != Ext::kNone && val >= 4, f4);
|
|
}
|
|
|
|
// Merges a feature that is identified by an exact bit-combination of 4 bits.
|
|
[[maybe_unused]]
|
|
static ASMJIT_INLINE void mergeAArch64CPUIDFeature4S(CpuFeatures::ARM& features, uint64_t regBits, uint32_t offset, uint32_t value, Ext::Id f1) noexcept {
|
|
features.addIf(uint32_t((regBits >> offset) & 0xFu) == value, f1);
|
|
}
|
|
|
|
#define MERGE_FEATURE_NA(identifier, reg, offset, ...) mergeAArch64CPUIDFeatureNA(cpu.features().arm(), reg, offset, __VA_ARGS__)
|
|
#define MERGE_FEATURE_1B(identifier, reg, offset, ...) mergeAArch64CPUIDFeature1B(cpu.features().arm(), reg, offset, __VA_ARGS__)
|
|
#define MERGE_FEATURE_2B(identifier, reg, offset, ...) mergeAArch64CPUIDFeature2B(cpu.features().arm(), reg, offset, __VA_ARGS__)
|
|
#define MERGE_FEATURE_4B(identifier, reg, offset, ...) mergeAArch64CPUIDFeature4B(cpu.features().arm(), reg, offset, __VA_ARGS__)
|
|
#define MERGE_FEATURE_4S(identifier, reg, offset, ...) mergeAArch64CPUIDFeature4S(cpu.features().arm(), reg, offset, __VA_ARGS__)
|
|
|
|
// Detects features based on the content of ID_AA64PFR0_EL1 and ID_AA64PFR1_EL1 registers.
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64PFR0_AA64PFR1(CpuInfo& cpu, uint64_t fpr0, uint64_t fpr1) noexcept {
|
|
// ID_AA64PFR0_EL1
|
|
// ===============
|
|
|
|
// FP and AdvSIMD bits should match (i.e. if FP features FP16, ASIMD must feature it too).
|
|
MERGE_FEATURE_NA("FP bits [19:16]" , fpr0, 16, Ext::kFP, Ext::kFP16);
|
|
MERGE_FEATURE_NA("AdvSIMD bits [23:20]" , fpr0, 20, Ext::kASIMD, Ext::kFP16);
|
|
/*
|
|
MERGE_FEATURE_4B("GIC bits [27:24]" , fpr0, 24, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("RAS bits [31:28]" , fpr0, 28, Ext::kRAS, Ext::kRAS1_1, Ext::kRAS2);
|
|
MERGE_FEATURE_4B("SVE bits [35:32]" , fpr0, 32, Ext::kSVE);
|
|
MERGE_FEATURE_4B("SEL2 bits [39:36]" , fpr0, 36, Ext::kSEL2);
|
|
MERGE_FEATURE_4B("MPAM bits [43:40]" , fpr0, 40, Ext::kMPAM);
|
|
MERGE_FEATURE_4B("AMU bits [47:44]" , fpr0, 44, Ext::kAMU1, Ext::kAMU1_1);
|
|
MERGE_FEATURE_4B("DIT bits [51:48]" , fpr0, 48, Ext::kDIT);
|
|
MERGE_FEATURE_4B("RME bits [55:52]" , fpr0, 52, Ext::kRME);
|
|
MERGE_FEATURE_4B("CSV2 bits [59:56]" , fpr0, 56, Ext::kCSV2, Ext::kCSV2, Ext::kCSV2, Ext::kCSV2_3);
|
|
MERGE_FEATURE_4B("CSV3 bits [63:60]" , fpr0, 60, Ext::kCSV3);
|
|
|
|
// ID_AA64PFR1_EL1
|
|
// ===============
|
|
|
|
MERGE_FEATURE_4B("BT bits [3:0]" , fpr1, 0, Ext::kBTI);
|
|
MERGE_FEATURE_4B("SSBS bits [7:4]" , fpr1, 4, Ext::kSSBS, Ext::kSSBS2);
|
|
MERGE_FEATURE_4B("MTE bits [11:8]" , fpr1, 8, Ext::kMTE, Ext::kMTE2, Ext::kMTE3);
|
|
/*
|
|
MERGE_FEATURE_4B("RAS_frac bits [15:12]" , fpr1, 12, ...);
|
|
MERGE_FEATURE_4B("MPAM_frac bits [19:16]" , fpr1, 16, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("SME bits [27:24]" , fpr1, 24, Ext::kSME, Ext::kSME2);
|
|
MERGE_FEATURE_4B("RNDR_trap bits [31:28]" , fpr1, 28, Ext::kRNG_TRAP);
|
|
/*
|
|
MERGE_FEATURE_4B("CSV2_frac bits [35:32]" , fpr1, 32, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("NMI bits [39:36]" , fpr1, 36, Ext::kNMI);
|
|
/*
|
|
MERGE_FEATURE_4B("MTE_frac bits [43:40]" , fpr1, 40, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("GCS bits [47:44]" , fpr1, 44, Ext::kGCS);
|
|
MERGE_FEATURE_4B("THE bits [51:48]" , fpr1, 48, Ext::kTHE);
|
|
|
|
// MTEX extensions are only available when MTE3 is available.
|
|
if (cpu.features().arm().hasMTE3())
|
|
MERGE_FEATURE_4B("MTEX bits [55:52]" , fpr1, 52, Ext::kMTE4);
|
|
|
|
/*
|
|
MERGE_FEATURE_4B("DF2 bits [59:56]" , fpr1, 56, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("PFAR bits [63:60]" , fpr1, 60, Ext::kPFAR);
|
|
|
|
// ID_AA64PFR0_EL1 + ID_AA64PFR1_EL1
|
|
// =================================
|
|
|
|
uint32_t rasMain = uint32_t((fpr0 >> 28) & 0xFu);
|
|
uint32_t rasFrac = uint32_t((fpr1 >> 12) & 0xFu);
|
|
|
|
if (rasMain == 1 && rasFrac == 1) {
|
|
cpu.features().arm().add(Ext::kRAS1_1);
|
|
}
|
|
|
|
uint32_t mpamMain = uint32_t((fpr0 >> 40) & 0xFu);
|
|
uint32_t mpamFrac = uint32_t((fpr1 >> 16) & 0xFu);
|
|
|
|
if (mpamMain || mpamFrac) {
|
|
cpu.features().arm().add(Ext::kMPAM);
|
|
}
|
|
}
|
|
|
|
// Detects features based on the content of ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 registers.
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64ISAR0_AA64ISAR1(CpuInfo& cpu, uint64_t isar0, uint64_t isar1) noexcept {
|
|
// ID_AA64ISAR0_EL1
|
|
// ================
|
|
|
|
MERGE_FEATURE_4B("AES bits [7:4]" , isar0, 4, Ext::kAES, Ext::kPMULL);
|
|
MERGE_FEATURE_4B("SHA1 bits [11:8]" , isar0, 8, Ext::kSHA1);
|
|
MERGE_FEATURE_4B("SHA2 bits [15:12]" , isar0, 12, Ext::kSHA256, Ext::kSHA512);
|
|
MERGE_FEATURE_4B("CRC32 bits [19:16]" , isar0, 16, Ext::kCRC32);
|
|
MERGE_FEATURE_4B("Atomic bits [23:20]" , isar0, 20, Ext::kNone, Ext::kLSE, Ext::kLSE128);
|
|
MERGE_FEATURE_4B("TME bits [27:24]" , isar0, 24, Ext::kTME);
|
|
MERGE_FEATURE_4B("RDM bits [31:28]" , isar0, 28, Ext::kRDM);
|
|
MERGE_FEATURE_4B("SHA3 bits [35:32]" , isar0, 32, Ext::kSHA3);
|
|
MERGE_FEATURE_4B("SM3 bits [39:36]" , isar0, 36, Ext::kSM3);
|
|
MERGE_FEATURE_4B("SM4 bits [43:40]" , isar0, 40, Ext::kSM4);
|
|
MERGE_FEATURE_4B("DP bits [47:44]" , isar0, 44, Ext::kDOTPROD);
|
|
MERGE_FEATURE_4B("FHM bits [51:48]" , isar0, 48, Ext::kFHM);
|
|
MERGE_FEATURE_4B("TS bits [55:52]" , isar0, 52, Ext::kFLAGM, Ext::kFLAGM2);
|
|
/*
|
|
MERGE_FEATURE_4B("TLB bits [59:56]" , isar0, 56, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("RNDR bits [63:60]" , isar0, 60, Ext::kFLAGM, Ext::kRNG);
|
|
|
|
// ID_AA64ISAR1_EL1
|
|
// ================
|
|
|
|
MERGE_FEATURE_4B("DPB bits [3:0]" , isar1, 0, Ext::kDPB, Ext::kDPB2);
|
|
/*
|
|
MERGE_FEATURE_4B("APA bits [7:4]" , isar1, 4, ...);
|
|
MERGE_FEATURE_4B("API bits [11:8]" , isar1, 8, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("JSCVT bits [15:12]" , isar1, 12, Ext::kJSCVT);
|
|
MERGE_FEATURE_4B("FCMA bits [19:16]" , isar1, 16, Ext::kFCMA);
|
|
MERGE_FEATURE_4B("LRCPC bits [23:20]" , isar1, 20, Ext::kLRCPC, Ext::kLRCPC2, Ext::kLRCPC3);
|
|
/*
|
|
MERGE_FEATURE_4B("GPA bits [27:24]" , isar1, 24, ...);
|
|
MERGE_FEATURE_4B("GPI bits [31:28]" , isar1, 28, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("FRINTTS bits [35:32]" , isar1, 32, Ext::kFRINTTS);
|
|
MERGE_FEATURE_4B("SB bits [39:36]" , isar1, 36, Ext::kSB);
|
|
MERGE_FEATURE_4B("SPECRES bits [43:40]" , isar1, 40, Ext::kSPECRES, Ext::kSPECRES2);
|
|
MERGE_FEATURE_4B("BF16 bits [47:44]" , isar1, 44, Ext::kBF16, Ext::kEBF16);
|
|
MERGE_FEATURE_4B("DGH bits [51:48]" , isar1, 48, Ext::kDGH);
|
|
MERGE_FEATURE_4B("I8MM bits [55:52]" , isar1, 52, Ext::kI8MM);
|
|
MERGE_FEATURE_4B("XS bits [59:56]" , isar1, 56, Ext::kXS);
|
|
MERGE_FEATURE_4B("LS64 bits [63:60]" , isar1, 60, Ext::kLS64, Ext::kLS64_V, Ext::kLS64_ACCDATA);
|
|
}
|
|
|
|
// Detects features based on the content of ID_AA64ISAR2_EL1 register.
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64ISAR2(CpuInfo& cpu, uint64_t isar2) noexcept {
|
|
MERGE_FEATURE_4B("WFxT bits [3:0]" , isar2, 0, Ext::kNone, Ext::kWFXT);
|
|
MERGE_FEATURE_4B("RPRES bits [7:4]" , isar2, 4, Ext::kRPRES);
|
|
/*
|
|
MERGE_FEATURE_4B("GPA3 bits [11:8]" , isar2, 8, ...);
|
|
MERGE_FEATURE_4B("APA3 bits [15:12]" , isar2, 12, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("MOPS bits [19:16]" , isar2, 16, Ext::kMOPS);
|
|
MERGE_FEATURE_4B("BC bits [23:20]" , isar2, 20, Ext::kHBC);
|
|
MERGE_FEATURE_4B("PAC_frac bits [27:24]" , isar2, 24, Ext::kCONSTPACFIELD);
|
|
MERGE_FEATURE_4B("CLRBHB bits [31:28]" , isar2, 28, Ext::kCLRBHB);
|
|
MERGE_FEATURE_4B("SYSREG128 bits [35:32]" , isar2, 32, Ext::kSYSREG128);
|
|
MERGE_FEATURE_4B("SYSINSTR128 bits [39:36]" , isar2, 36, Ext::kSYSINSTR128);
|
|
MERGE_FEATURE_4B("PRFMSLC bits [43:40]" , isar2, 40, Ext::kPRFMSLC);
|
|
MERGE_FEATURE_4B("RPRFM bits [51:48]" , isar2, 48, Ext::kRPRFM);
|
|
MERGE_FEATURE_4B("CSSC bits [55:52]" , isar2, 52, Ext::kCSSC);
|
|
MERGE_FEATURE_4B("LUT bits [59:56]" , isar2, 56, Ext::kLUT);
|
|
}
|
|
|
|
// TODO: This register is not accessed at the moment.
|
|
#if 0
|
|
// Detects features based on the content of ID_AA64ISAR3_EL1register.
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64ISAR3(CpuInfo& cpu, uint64_t isar3) noexcept {
|
|
// ID_AA64ISAR3_EL1
|
|
// ================
|
|
|
|
MERGE_FEATURE_4B("CPA bits [3:0]" , isar3, 0, Ext::kCPA, Ext::kCPA2);
|
|
MERGE_FEATURE_4B("FAMINMAX bits [7:4]" , isar3, 4, Ext::kFAMINMAX);
|
|
MERGE_FEATURE_4B("TLBIW bits [11:8]" , isar3, 8, Ext::kTLBIW);
|
|
}
|
|
#endif
|
|
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64MMFR0(CpuInfo& cpu, uint64_t mmfr0) noexcept {
|
|
// ID_AA64MMFR0_EL1
|
|
// ================
|
|
|
|
/*
|
|
MERGE_FEATURE_4B("PARange bits [3:0]" , mmfr0, 0, ...);
|
|
MERGE_FEATURE_4B("ASIDBits bits [7:4]" , mmfr0, 4, ...);
|
|
MERGE_FEATURE_4B("BigEnd bits [11:8]" , mmfr0, 8, ...);
|
|
MERGE_FEATURE_4B("SNSMem bits [15:12]" , mmfr0, 12, ...);
|
|
MERGE_FEATURE_4B("BigEndEL0 bits [19:16]" , mmfr0, 16, ...);
|
|
MERGE_FEATURE_4B("TGran16 bits [23:20]" , mmfr0, 20, ...);
|
|
MERGE_FEATURE_4B("TGran64 bits [27:24]" , mmfr0, 24, ...);
|
|
MERGE_FEATURE_4B("TGran4 bits [31:28]" , mmfr0, 28, ...);
|
|
MERGE_FEATURE_4B("TGran16_2 bits [35:32]" , mmfr0, 32, ...);
|
|
MERGE_FEATURE_4B("TGran64_2 bits [39:36]" , mmfr0, 36, ...);
|
|
MERGE_FEATURE_4B("TGran4_2 bits [43:40]" , mmfr0, 40, ...);
|
|
MERGE_FEATURE_4B("ExS bits [47:44]" , mmfr0, 44, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("FGT bits [59:56]" , mmfr0, 56, Ext::kFGT, Ext::kFGT2);
|
|
MERGE_FEATURE_4B("ECV bits [63:60]" , mmfr0, 60, Ext::kECV);
|
|
}
|
|
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64MMFR1(CpuInfo& cpu, uint64_t mmfr1) noexcept {
|
|
// ID_AA64MMFR1_EL1
|
|
// ================
|
|
|
|
MERGE_FEATURE_4B("HAFDBS bits [3:0]" , mmfr1, 0, Ext::kHAFDBS, Ext::kNone, Ext::kHAFT, Ext::kHDBSS);
|
|
MERGE_FEATURE_4B("VMIDBits bits [7:4]" , mmfr1, 4, Ext::kVMID16);
|
|
MERGE_FEATURE_4B("VH bits [11:8]" , mmfr1, 8, Ext::kVHE);
|
|
MERGE_FEATURE_4B("HPDS bits [15:12]" , mmfr1, 12, Ext::kHPDS, Ext::kHPDS2);
|
|
MERGE_FEATURE_4B("LO bits [19:16]" , mmfr1, 16, Ext::kLOR);
|
|
MERGE_FEATURE_4B("PAN bits [23:20]" , mmfr1, 20, Ext::kPAN, Ext::kPAN2, Ext::kPAN3);
|
|
/*
|
|
MERGE_FEATURE_4B("SpecSEI bits [27:24]" , mmfr1, 24, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("XNX bits [31:28]" , mmfr1, 28, Ext::kXNX);
|
|
/*
|
|
MERGE_FEATURE_4B("TWED bits [35:32]" , mmfr1, 32, ...);
|
|
MERGE_FEATURE_4B("ETS bits [39:36]" , mmfr1, 36, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("HCX bits [43:40]" , mmfr1, 40, Ext::kHCX);
|
|
MERGE_FEATURE_4B("AFP bits [47:44]" , mmfr1, 44, Ext::kAFP);
|
|
/*
|
|
MERGE_FEATURE_4B("nTLBPA bits [51:48]" , mmfr1, 48, ...);
|
|
MERGE_FEATURE_4B("TIDCP1 bits [55:52]" , mmfr1, 52, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("CMOW bits [59:56]" , mmfr1, 56, Ext::kCMOW);
|
|
MERGE_FEATURE_4B("ECBHB bits [63:60]" , mmfr1, 60, Ext::kECBHB);
|
|
}
|
|
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64MMFR2(CpuInfo& cpu, uint64_t mmfr2) noexcept {
|
|
// ID_AA64MMFR2_EL1
|
|
// ================
|
|
|
|
/*
|
|
MERGE_FEATURE_4B("CnP bits [3:0]" , mmfr2, 0, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("UAO bits [7:4]" , mmfr2, 4, Ext::kUAO);
|
|
/*
|
|
MERGE_FEATURE_4B("LSM bits [11:8]" , mmfr2, 8, ...);
|
|
MERGE_FEATURE_4B("IESB bits [15:12]" , mmfr2, 12, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("VARange bits [19:16]" , mmfr2, 16, Ext::kLVA, Ext::kLVA3);
|
|
MERGE_FEATURE_4B("CCIDX bits [23:20]" , mmfr2, 20, Ext::kCCIDX);
|
|
MERGE_FEATURE_4B("NV bits [27:24]" , mmfr2, 24, Ext::kNV, Ext::kNV2);
|
|
/*
|
|
MERGE_FEATURE_4B("ST bits [31:28]" , mmfr2, 28, ...);
|
|
*/
|
|
MERGE_FEATURE_4B("AT bits [35:32]" , mmfr2, 32, Ext::kLSE2);
|
|
/*
|
|
MERGE_FEATURE_4B("IDS bits [39:36]" , mmfr2, 36, ...);
|
|
MERGE_FEATURE_4B("FWB bits [43:40]" , mmfr2, 40, ...);
|
|
MERGE_FEATURE_4B("TTL bits [51:48]" , mmfr2, 48, ...);
|
|
MERGE_FEATURE_4B("BBM bits [55:52]" , mmfr2, 52, ...);
|
|
MERGE_FEATURE_4B("EVT bits [59:56]" , mmfr2, 56, ...);
|
|
MERGE_FEATURE_4B("E0PD bits [63:60]" , mmfr2, 60, ...);
|
|
*/
|
|
}
|
|
|
|
// Detects features based on the content of ID_AA64ZFR0_EL1 register.
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64ZFR0(CpuInfo& cpu, uint64_t zfr0) noexcept {
|
|
MERGE_FEATURE_4B("SVEver bits [3:0]" , zfr0, 0, Ext::kSVE2, Ext::kSVE2_1);
|
|
MERGE_FEATURE_4B("AES bits [7:4]" , zfr0, 4, Ext::kSVE_AES, Ext::kSVE_PMULL128);
|
|
MERGE_FEATURE_4B("BitPerm bits [19:16]" , zfr0, 16, Ext::kSVE_BITPERM);
|
|
MERGE_FEATURE_4B("BF16 bits [23:20]" , zfr0, 20, Ext::kSVE_BF16, Ext::kSVE_EBF16);
|
|
MERGE_FEATURE_4B("B16B16 bits [27:24]" , zfr0, 24, Ext::kSVE_B16B16);
|
|
MERGE_FEATURE_4B("SHA3 bits [35:32]" , zfr0, 32, Ext::kSVE_SHA3);
|
|
MERGE_FEATURE_4B("SM4 bits [43:40]" , zfr0, 40, Ext::kSVE_SM4);
|
|
MERGE_FEATURE_4B("I8MM bits [47:44]" , zfr0, 44, Ext::kSVE_I8MM);
|
|
MERGE_FEATURE_4B("F32MM bits [55:52]" , zfr0, 52, Ext::kSVE_F32MM);
|
|
MERGE_FEATURE_4B("F64MM bits [59:56]" , zfr0, 56, Ext::kSVE_F64MM);
|
|
}
|
|
|
|
[[maybe_unused]]
|
|
static inline void detectAArch64FeaturesViaCPUID_AA64SMFR0(CpuInfo& cpu, uint64_t smfr0) noexcept {
|
|
MERGE_FEATURE_1B("SF8DP2 bit [28]" , smfr0, 29, Ext::kSSVE_FP8DOT2);
|
|
MERGE_FEATURE_1B("SF8DP4 bit [29]" , smfr0, 29, Ext::kSSVE_FP8DOT4);
|
|
MERGE_FEATURE_1B("SF8FMA bit [30]" , smfr0, 30, Ext::kSSVE_FP8FMA);
|
|
MERGE_FEATURE_1B("F32F32 bit [32]" , smfr0, 32, Ext::kSME_F32F32);
|
|
MERGE_FEATURE_1B("BI32I32 bit [33]" , smfr0, 33, Ext::kSME_BI32I32);
|
|
MERGE_FEATURE_1B("B16F32 bit [34]" , smfr0, 34, Ext::kSME_B16F32);
|
|
MERGE_FEATURE_1B("F16F32 bit [35]" , smfr0, 35, Ext::kSME_F16F32);
|
|
MERGE_FEATURE_4S("I8I32 bits [39:36]" , smfr0, 36, 0xF, Ext::kSME_I8I32);
|
|
MERGE_FEATURE_1B("F8F32 bit [40]" , smfr0, 40, Ext::kSME_F8F32);
|
|
MERGE_FEATURE_1B("F8F16 bit [41]" , smfr0, 41, Ext::kSME_F8F16);
|
|
MERGE_FEATURE_1B("F16F16 bit [42]" , smfr0, 42, Ext::kSME_F16F16);
|
|
MERGE_FEATURE_1B("B16B16 bit [43]" , smfr0, 43, Ext::kSME_B16B16);
|
|
MERGE_FEATURE_4S("I16I32 bits [47:44]" , smfr0, 44, 0x5, Ext::kSME_I16I32);
|
|
MERGE_FEATURE_1B("F64F64 bit [48]" , smfr0, 48, Ext::kSME_F64F64);
|
|
MERGE_FEATURE_4S("I16I64 bits [55:52]" , smfr0, 52, 0xF, Ext::kSME_I16I64);
|
|
MERGE_FEATURE_4B("SMEver bits [59:56]" , smfr0, 56, Ext::kSME2, Ext::kSME2_1);
|
|
MERGE_FEATURE_1B("LUTv2 bit [60]" , smfr0, 60, Ext::kSME_LUTv2);
|
|
MERGE_FEATURE_1B("FA64 bit [63]" , smfr0, 63, Ext::kSME_FA64);
|
|
}
|
|
|
|
#undef MERGE_FEATURE_4S
|
|
#undef MERGE_FEATURE_4B
|
|
#undef MERGE_FEATURE_2B
|
|
#undef MERGE_FEATURE_1B
|
|
#undef MERGE_FEATURE_NA
|
|
|
|
// CpuInfo - Detect - ARM - CPU Vendor Features
|
|
// ============================================
|
|
|
|
// CPU features detection based on Apple family ID.
|
|
enum class AppleFamilyId : uint32_t {
|
|
// Apple design.
|
|
kSWIFT = 0x1E2D6381u, // Apple A6/A6X (ARMv7s).
|
|
kCYCLONE = 0x37A09642u, // Apple A7 (ARMv8.0-A).
|
|
kTYPHOON = 0x2C91A47Eu, // Apple A8 (ARMv8.0-A).
|
|
kTWISTER = 0x92FB37C8u, // Apple A9 (ARMv8.0-A).
|
|
kHURRICANE = 0x67CEEE93u, // Apple A10 (ARMv8.1-A).
|
|
kMONSOON_MISTRAL = 0xE81E7EF6u, // Apple A11 (ARMv8.2-A).
|
|
kVORTEX_TEMPEST = 0x07D34B9Fu, // Apple A12 (ARMv8.3-A).
|
|
kLIGHTNING_THUNDER = 0x462504D2u, // Apple A13 (ARMv8.4-A).
|
|
kFIRESTORM_ICESTORM = 0x1B588BB3u, // Apple A14/M1 (ARMv8.5-A).
|
|
kAVALANCHE_BLIZZARD = 0XDA33D83Du, // Apple A15/M2.
|
|
kEVEREST_SAWTOOTH = 0X8765EDEAu // Apple A16.
|
|
};
|
|
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE bool detectARMFeaturesViaAppleFamilyId(CpuInfo& cpu) noexcept {
|
|
using Id = AppleFamilyId;
|
|
CpuFeatures::ARM& features = cpu.features().arm();
|
|
|
|
switch (cpu.familyId()) {
|
|
// Apple A7-A9 (ARMv8.0-A).
|
|
case uint32_t(Id::kCYCLONE):
|
|
case uint32_t(Id::kTYPHOON):
|
|
case uint32_t(Id::kTWISTER):
|
|
populateARMv8AFeatures(features, 0);
|
|
features.add(Ext::kAES, Ext::kPMU, Ext::kPMULL, Ext::kSHA1, Ext::kSHA256);
|
|
return true;
|
|
|
|
// Apple A10 (ARMv8.0-A).
|
|
case uint32_t(Id::kHURRICANE):
|
|
populateARMv8AFeatures(features, 0);
|
|
features.add(Ext::kAES, Ext::kCRC32, Ext::kLOR, Ext::kPAN, Ext::kPMU, Ext::kPMULL, Ext::kRDM, Ext::kSHA1,
|
|
Ext::kSHA256, Ext::kVHE);
|
|
return true;
|
|
|
|
// Apple A11 (ARMv8.2-A).
|
|
case uint32_t(Id::kMONSOON_MISTRAL):
|
|
populateARMv8AFeatures(features, 2);
|
|
features.add(Ext::kAES, Ext::kFP16, Ext::kFP16CONV, Ext::kPMU, Ext::kPMULL, Ext::kSHA1, Ext::kSHA256);
|
|
return true;
|
|
|
|
// Apple A12 (ARMv8.3-A).
|
|
case uint32_t(Id::kVORTEX_TEMPEST):
|
|
populateARMv8AFeatures(features, 3);
|
|
features.add(Ext::kAES, Ext::kFP16, Ext::kFP16CONV, Ext::kPMU, Ext::kPMULL, Ext::kSHA1, Ext::kSHA256);
|
|
return true;
|
|
|
|
// Apple A13 (ARMv8.4-A).
|
|
case uint32_t(Id::kLIGHTNING_THUNDER):
|
|
populateARMv8AFeatures(features, 4);
|
|
features.add(Ext::kAES, Ext::kFHM, Ext::kFP16, Ext::kFP16CONV, Ext::kPMU, Ext::kPMULL, Ext::kSHA1,
|
|
Ext::kSHA256, Ext::kSHA3, Ext::kSHA512);
|
|
return true;
|
|
|
|
// Apple A14/M1 (ARMv8.5-A).
|
|
case uint32_t(Id::kFIRESTORM_ICESTORM):
|
|
populateARMv8AFeatures(features, 4);
|
|
features.add(Ext::kAES, Ext::kCSV2, Ext::kCSV3, Ext::kDPB2, Ext::kECV, Ext::kFHM, Ext::kFLAGM2,
|
|
Ext::kFP16, Ext::kFP16CONV, Ext::kFRINTTS, Ext::kPMU, Ext::kPMULL, Ext::kSB,
|
|
Ext::kSHA1, Ext::kSHA256, Ext::kSHA3, Ext::kSHA512, Ext::kSSBS);
|
|
return true;
|
|
|
|
// Apple A15/M2.
|
|
case uint32_t(Id::kAVALANCHE_BLIZZARD):
|
|
populateARMv8AFeatures(features, 6);
|
|
features.add(Ext::kAES, Ext::kFHM, Ext::kFP16, Ext::kFP16CONV, Ext::kPMU, Ext::kPMULL, Ext::kSHA1,
|
|
Ext::kSHA256, Ext::kSHA3, Ext::kSHA512);
|
|
return true;
|
|
|
|
// Apple A16.
|
|
case uint32_t(Id::kEVEREST_SAWTOOTH):
|
|
populateARMv8AFeatures(features, 6);
|
|
features.add(Ext::kAES, Ext::kFHM, Ext::kFP16, Ext::kFP16CONV, Ext::kHCX, Ext::kPMU, Ext::kPMULL,
|
|
Ext::kSHA1, Ext::kSHA256, Ext::kSHA3, Ext::kSHA512);
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Compile Flags Features
|
|
// ===============================================
|
|
|
|
// Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
|
|
// compile time that should always be provided by the target CPU. This also means that if we don't provide any
|
|
// means to detect CPU features the features reported by AsmJit will at least not report less features than the
|
|
// target it was compiled to.
|
|
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void detectAArch32FeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
|
|
DebugUtils::unused(cpu);
|
|
|
|
// ARM targets have no baseline at the moment.
|
|
#if defined(__ARM_ARCH_7A__)
|
|
cpu.addFeature(CpuFeatures::ARM::kARMv7);
|
|
#endif
|
|
|
|
#if defined(__ARM_ARCH_8A__)
|
|
cpu.addFeature(CpuFeatures::ARM::kARMv8a);
|
|
#endif
|
|
|
|
#if defined(__TARGET_ARCH_THUMB)
|
|
cpu.addFeature(CpuFeatures::ARM::kTHUMB);
|
|
#if __TARGET_ARCH_THUMB >= 4
|
|
cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_FMA)
|
|
cpu.addFeature(Ext::kFP);
|
|
#endif
|
|
|
|
#if defined(__ARM_NEON)
|
|
cpu.addFeature(Ext::kASIMD);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
|
|
cpu.addFeature(Ext::kIDIVT);
|
|
#endif
|
|
#if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
|
|
cpu.addFeature(Ext::kIDIVA);
|
|
#endif
|
|
}
|
|
#endif // ASMJIT_ARCH_ARM == 32
|
|
|
|
#if ASMJIT_ARCH_ARM == 64
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void detectAArch64FeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
|
|
DebugUtils::unused(cpu);
|
|
|
|
#if defined(__ARM_ARCH_9_5A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 5);
|
|
#elif defined(__ARM_ARCH_9_4A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 4);
|
|
#elif defined(__ARM_ARCH_9_3A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 3);
|
|
#elif defined(__ARM_ARCH_9_2A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 2);
|
|
#elif defined(__ARM_ARCH_9_1A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 1);
|
|
#elif defined(__ARM_ARCH_9A__)
|
|
populateARMv9AFeatures(cpu.features().arm(), 0);
|
|
#elif defined(__ARM_ARCH_8_9A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 9);
|
|
#elif defined(__ARM_ARCH_8_8A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 8);
|
|
#elif defined(__ARM_ARCH_8_7A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 7);
|
|
#elif defined(__ARM_ARCH_8_6A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 6);
|
|
#elif defined(__ARM_ARCH_8_5A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 5);
|
|
#elif defined(__ARM_ARCH_8_4A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 4);
|
|
#elif defined(__ARM_ARCH_8_3A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 3);
|
|
#elif defined(__ARM_ARCH_8_2A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 2);
|
|
#elif defined(__ARM_ARCH_8_1A__)
|
|
populateARMv8AFeatures(cpu.features().arm(), 1);
|
|
#else
|
|
populateARMv8AFeatures(cpu.features().arm(), 0);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_AES)
|
|
cpu.addFeature(Ext::kAES);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
|
|
cpu.addFeature(Ext::kBF16);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_CRC32)
|
|
cpu.addFeature(Ext::kCRC32);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_CRYPTO)
|
|
cpu.addFeature(Ext::kAES, Ext::kSHA1, Ext::kSHA256);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_DOTPROD)
|
|
cpu.addFeature(Ext::kDOTPROD);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
|
|
cpu.addFeature(Ext::kFHM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
|
|
cpu.addFeature(Ext::kFP16);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_FRINT)
|
|
cpu.addFeature(Ext::kFRINTTS);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_JCVT)
|
|
cpu.addFeature(Ext::kJSCVT);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
|
cpu.addFeature(Ext::kI8MM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_ATOMICS)
|
|
cpu.addFeature(Ext::kLSE);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_MEMORY_TAGGING)
|
|
cpu.addFeature(Ext::kMTE);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_QRDMX)
|
|
cpu.addFeature(Ext::kRDM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_RNG)
|
|
cpu.addFeature(Ext::kRNG);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SHA2)
|
|
cpu.addFeature(Ext::kSHA256);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SHA3)
|
|
cpu.addFeature(Ext::kSHA3);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SHA512)
|
|
cpu.addFeature(Ext::kSHA512);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SM3)
|
|
cpu.addFeature(Ext::kSM3);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SM4)
|
|
cpu.addFeature(Ext::kSM4);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
|
|
cpu.addFeature(Ext::kSVE);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
|
|
cpu.addFeature(Ext::kSVE_I8MM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
|
|
cpu.addFeature(Ext::kSVE_F32MM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
|
|
cpu.addFeature(Ext::kSVE_F64MM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE2)
|
|
cpu.addFeature(Ext::kSVE2);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE2_AES)
|
|
cpu.addFeature(Ext::kSVE_AES);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE2_BITPERM)
|
|
cpu.addFeature(Ext::kSVE_BITPERM);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE2_SHA3)
|
|
cpu.addFeature(Ext::kSVE_SHA3);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_SVE2_SM4)
|
|
cpu.addFeature(Ext::kSVE_SM4);
|
|
#endif
|
|
|
|
#if defined(__ARM_FEATURE_TME)
|
|
cpu.addFeature(Ext::kTME);
|
|
#endif
|
|
}
|
|
#endif // ASMJIT_ARCH_ARM == 64
|
|
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
detectAArch32FeaturesViaCompilerFlags(cpu);
|
|
#else
|
|
detectAArch64FeaturesViaCompilerFlags(cpu);
|
|
#endif // ASMJIT_ARCH_ARM
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Post Processing ARM Features
|
|
// =====================================================
|
|
|
|
// Postprocesses AArch32 features.
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void postProcessAArch32Features(CpuFeatures::ARM& features) noexcept {
|
|
DebugUtils::unused(features);
|
|
}
|
|
|
|
// Postprocesses AArch64 features.
|
|
//
|
|
// The only reason to use this function is to deduce some flags from others.
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void postProcessAArch64Features(CpuFeatures::ARM& features) noexcept {
|
|
if (features.hasFP16()) {
|
|
features.add(Ext::kFP16CONV);
|
|
}
|
|
|
|
if (features.hasMTE3()) {
|
|
features.add(Ext::kMTE2);
|
|
}
|
|
|
|
if (features.hasMTE2()) {
|
|
features.add(Ext::kMTE);
|
|
}
|
|
|
|
if (features.hasSSBS2()) {
|
|
features.add(Ext::kSSBS);
|
|
}
|
|
}
|
|
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void postProcessARMCpuInfo(CpuInfo& cpu) noexcept {
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
postProcessAArch32Features(cpu.features().arm());
|
|
#else
|
|
postProcessAArch64Features(cpu.features().arm());
|
|
#endif // ASMJIT_ARCH_ARM
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Detect by Reading CPUID Registers
|
|
// ==========================================================
|
|
|
|
// Support CPUID-based detection on AArch64.
|
|
#if defined(ASMJIT_ARM_DETECT_VIA_CPUID)
|
|
|
|
// Since the register ID is encoded with the instruction we have to create a function for each register ID to read.
|
|
#define ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(func, regId) \
|
|
[[maybe_unused]] \
|
|
static inline uint64_t func() noexcept { \
|
|
uint64_t output; \
|
|
__asm__ __volatile__("mrs %0, " #regId : "=r"(output)); \
|
|
return output; \
|
|
}
|
|
|
|
// NOTE: Older tools don't know the IDs. For example Ubuntu on RPI (GCC 9) won't compile ID_AA64ISAR2_EL1 in 2023.
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadPFR0, ID_AA64PFR0_EL1)
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadPFR1, ID_AA64PFR1_EL1)
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadISAR0, ID_AA64ISAR0_EL1)
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadISAR1, ID_AA64ISAR1_EL1)
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadISAR2, S3_0_C0_C6_2) // ID_AA64ISAR2_EL1
|
|
ASMJIT_AARCH64_DEFINE_CPUID_READ_FN(aarch64ReadZFR0, S3_0_C0_C4_4) // ID_AA64ZFR0_EL1
|
|
|
|
#undef ASMJIT_AARCH64_DEFINE_CPUID_READ_FN
|
|
|
|
// Detects AArch64 features by reading CPUID bits directly from CPUID registers. This is the most reliable method
|
|
// as the OS doesn't have to know all supported extensions this way (if there is something missing in HWCAPS then
|
|
// there is no way to detect such feature without reading CPUID bits).
|
|
//
|
|
// This function uses MSR instructions, which means that it reads registers that cannot be read in user-mode. The
|
|
// OS typically implements this feature by handling SIGILL internally and providing a filtered content of these
|
|
// registers back to the user - at least this is what Linux documentation states - everything implementation
|
|
// dependent is zeroed, only the bits that are used for CPU feature identification would be present.
|
|
//
|
|
// References:
|
|
// - https://docs.kernel.org/arch/arm64/cpu-feature-registers.html
|
|
[[maybe_unused]]
|
|
static ASMJIT_FAVOR_SIZE void detectAArch64FeaturesViaCPUID(CpuInfo& cpu) noexcept {
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
detectAArch64FeaturesViaCPUID_AA64PFR0_AA64PFR1(cpu, aarch64ReadPFR0(), aarch64ReadPFR1());
|
|
detectAArch64FeaturesViaCPUID_AA64ISAR0_AA64ISAR1(cpu, aarch64ReadISAR0(), aarch64ReadISAR1());
|
|
|
|
// TODO: Fix this on FreeBSD - I don't know what kernel version allows to access the registers below...
|
|
|
|
#if defined(__linux__)
|
|
UNameKernelVersion kVer = getUNameKernelVersion();
|
|
|
|
// Introduced in Linux 4.19 by "arm64: add ID_AA64ISAR2_EL1 sys register"), so we want at least 4.20.
|
|
if (kVer.atLeast(4, 20)) {
|
|
detectAArch64FeaturesViaCPUID_AA64ISAR2(cpu, aarch64ReadISAR2());
|
|
}
|
|
|
|
// Introduced in Linux 5.10 by "arm64: Expose SVE2 features for userspace", so we want at least 5.11.
|
|
if (kVer.atLeast(5, 11) && cpu.features().arm().hasAny(Ext::kSVE, Ext::kSME)) {
|
|
// Only read CPU_ID_AA64ZFR0 when either SVE or SME is available.
|
|
detectAArch64FeaturesViaCPUID_AA64ZFR0(cpu, aarch64ReadZFR0());
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#endif // ASMJIT_ARM_DETECT_VIA_CPUID
|
|
|
|
// CpuInfo - Detect - ARM - Detect by Windows API
|
|
// ==============================================
|
|
|
|
#if defined(_WIN32)
|
|
struct WinPFPMapping {
|
|
uint8_t featureId;
|
|
uint8_t pfpFeatureId;
|
|
};
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
|
|
for (size_t i = 0; i < size; i++) {
|
|
if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId)) {
|
|
cpu.addFeature(mapping[i].featureId);
|
|
}
|
|
}
|
|
}
|
|
|
|
//! Detect ARM CPU features on Windows.
|
|
//!
|
|
//! The detection is based on `IsProcessorFeaturePresent()` API call.
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
cpu._wasDetected = true;
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
CpuFeatures::ARM& features = cpu.features().arm();
|
|
|
|
// Win32 for ARM requires ARMv7 with DSP extensions, VFPv3 (FP), and uses THUMBv2 by default.
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
features.add(Ext::kTHUMB);
|
|
features.add(Ext::kTHUMBv2);
|
|
features.add(Ext::kARMv6);
|
|
features.add(Ext::kARMv7);
|
|
features.add(Ext::kEDSP);
|
|
#endif
|
|
|
|
// Windows for ARM requires FP and ASIMD.
|
|
features.add(Ext::kFP);
|
|
features.add(Ext::kASIMD);
|
|
|
|
// Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
|
|
static const WinPFPMapping mapping[] = {
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
{ uint8_t(Ext::kVFP_D32) , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
|
|
{ uint8_t(Ext::kIDIVT) , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
|
|
{ uint8_t(Ext::kFMAC) , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kARMv8a) , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
|
|
#endif
|
|
{ uint8_t(Ext::kAES) , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kCRC32) , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kLSE) , 34 }, // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kDOTPROD) , 43 }, // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kJSCVT) , 44 }, // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
|
|
{ uint8_t(Ext::kLRCPC) , 45 } // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
|
|
};
|
|
detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
|
|
|
|
// Windows can only report ARMv8A at the moment.
|
|
if (features.hasARMv8a()) {
|
|
populateARMv8AFeatures(cpu.features().arm(), 0);
|
|
}
|
|
|
|
// Windows provides several instructions under a single flag:
|
|
if (features.hasAES()) {
|
|
features.add(Ext::kPMULL, Ext::kSHA1, Ext::kSHA256);
|
|
}
|
|
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Detect by Reading HWCAPS
|
|
// =================================================
|
|
|
|
#elif defined(ASMJIT_ARM_DETECT_VIA_HWCAPS)
|
|
|
|
#ifndef AT_HWCAP
|
|
#define AT_HWCAP 16
|
|
#endif // !AT_HWCAP
|
|
|
|
#ifndef AT_HWCAP2
|
|
#define AT_HWCAP2 26
|
|
#endif // !AT_HWCAP2
|
|
|
|
#if defined(__linux__)
|
|
static void getAuxValues(unsigned long* vals, const unsigned long* tags, size_t count) noexcept {
|
|
for (size_t i = 0; i < count; i++) {
|
|
vals[i] = getauxval(tags[i]);
|
|
}
|
|
}
|
|
#elif defined(__FreeBSD__)
|
|
static void getAuxValues(unsigned long* vals, const unsigned long* tags, size_t count) noexcept {
|
|
for (size_t i = 0; i < count; i++) {
|
|
unsigned long result = 0;
|
|
if (elf_aux_info(int(tags[i]), &result, int(sizeof(unsigned long))) != 0)
|
|
result = 0;
|
|
vals[i] = result;
|
|
}
|
|
}
|
|
#else
|
|
#error "[asmjit] getAuxValues() - Unsupported OS."
|
|
#endif
|
|
|
|
struct HWCapMapping {
|
|
uint8_t featureId;
|
|
uint8_t hwCapBit;
|
|
};
|
|
|
|
static const unsigned long hwCapTags[2] = { AT_HWCAP, AT_HWCAP2 };
|
|
|
|
static ASMJIT_FAVOR_SIZE void mergeHWCaps(CpuInfo& cpu, unsigned long mask, const HWCapMapping* mapping, size_t size) noexcept {
|
|
for (size_t i = 0; i < size; i++) {
|
|
cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
|
|
}
|
|
}
|
|
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
|
|
// Reference:
|
|
// - https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h
|
|
static const HWCapMapping hwCapMapping[] = {
|
|
{ uint8_t(Ext::kEDSP) , 7 }, // HWCAP_EDSP
|
|
{ uint8_t(Ext::kASIMD) , 12 }, // HWCAP_NEON
|
|
{ uint8_t(Ext::kFP) , 13 }, // HWCAP_VFPv3
|
|
{ uint8_t(Ext::kFMAC) , 16 }, // HWCAP_VFPv4
|
|
{ uint8_t(Ext::kIDIVA) , 17 }, // HWCAP_IDIVA
|
|
{ uint8_t(Ext::kIDIVT) , 18 }, // HWCAP_IDIVT
|
|
{ uint8_t(Ext::kVFP_D32) , 19 }, // HWCAP_VFPD32
|
|
{ uint8_t(Ext::kFP16CONV) , 22 }, // HWCAP_FPHP
|
|
{ uint8_t(Ext::kFP16) , 23 }, // HWCAP_ASIMDHP
|
|
{ uint8_t(Ext::kDOTPROD) , 24 }, // HWCAP_ASIMDDP
|
|
{ uint8_t(Ext::kFHM) , 25 }, // HWCAP_ASIMDFHM
|
|
{ uint8_t(Ext::kBF16) , 26 }, // HWCAP_ASIMDBF16
|
|
{ uint8_t(Ext::kI8MM) , 27 } // HWCAP_I8MM
|
|
};
|
|
|
|
static const HWCapMapping hwCap2Mapping[] = {
|
|
{ uint8_t(Ext::kAES) , 0 }, // HWCAP2_AES
|
|
{ uint8_t(Ext::kPMULL) , 1 }, // HWCAP2_PMULL
|
|
{ uint8_t(Ext::kSHA1) , 2 }, // HWCAP2_SHA1
|
|
{ uint8_t(Ext::kSHA256) , 3 }, // HWCAP2_SHA2
|
|
{ uint8_t(Ext::kCRC32) , 4 }, // HWCAP2_CRC32
|
|
{ uint8_t(Ext::kSB) , 5 }, // HWCAP2_SB
|
|
{ uint8_t(Ext::kSSBS) , 6 } // HWCAP2_SSBS
|
|
};
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
cpu._wasDetected = true;
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
unsigned long hwCapMasks[2] {};
|
|
getAuxValues(hwCapMasks, hwCapTags, 2u);
|
|
|
|
mergeHWCaps(cpu, hwCapMasks[0], hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
|
|
mergeHWCaps(cpu, hwCapMasks[1], hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
|
|
|
|
CpuFeatures::ARM& features = cpu.features().arm();
|
|
|
|
// ARMv7 provides FP|ASIMD.
|
|
if (features.hasFP() || features.hasASIMD())
|
|
features.add(CpuFeatures::ARM::kARMv7);
|
|
|
|
// ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA256.
|
|
if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA256())
|
|
features.add(CpuFeatures::ARM::kARMv8a);
|
|
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
#else
|
|
|
|
// Reference:
|
|
// - https://docs.kernel.org/arch/arm64/elf_hwcaps.html
|
|
// - https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
|
|
static const HWCapMapping hwCapMapping[] = {
|
|
{ uint8_t(Ext::kFP) , 0 }, // HWCAP_FP
|
|
{ uint8_t(Ext::kASIMD) , 1 }, // HWCAP_ASIMD
|
|
/*
|
|
{ uint8_t(Ext::k) , 2 }, // HWCAP_EVTSTRM
|
|
*/
|
|
{ uint8_t(Ext::kAES) , 3 }, // HWCAP_AES
|
|
{ uint8_t(Ext::kPMULL) , 4 }, // HWCAP_PMULL
|
|
{ uint8_t(Ext::kSHA1) , 5 }, // HWCAP_SHA1
|
|
{ uint8_t(Ext::kSHA256) , 6 }, // HWCAP_SHA2
|
|
{ uint8_t(Ext::kCRC32) , 7 }, // HWCAP_CRC32
|
|
{ uint8_t(Ext::kLSE) , 8 }, // HWCAP_ATOMICS
|
|
{ uint8_t(Ext::kFP16CONV) , 9 }, // HWCAP_FPHP
|
|
{ uint8_t(Ext::kFP16) , 10 }, // HWCAP_ASIMDHP
|
|
{ uint8_t(Ext::kCPUID) , 11 }, // HWCAP_CPUID
|
|
{ uint8_t(Ext::kRDM) , 12 }, // HWCAP_ASIMDRDM
|
|
{ uint8_t(Ext::kJSCVT) , 13 }, // HWCAP_JSCVT
|
|
{ uint8_t(Ext::kFCMA) , 14 }, // HWCAP_FCMA
|
|
{ uint8_t(Ext::kLRCPC) , 15 }, // HWCAP_LRCPC
|
|
{ uint8_t(Ext::kDPB) , 16 }, // HWCAP_DCPOP
|
|
{ uint8_t(Ext::kSHA3) , 17 }, // HWCAP_SHA3
|
|
{ uint8_t(Ext::kSM3) , 18 }, // HWCAP_SM3
|
|
{ uint8_t(Ext::kSM4) , 19 }, // HWCAP_SM4
|
|
{ uint8_t(Ext::kDOTPROD) , 20 }, // HWCAP_ASIMDDP
|
|
{ uint8_t(Ext::kSHA512) , 21 }, // HWCAP_SHA512
|
|
{ uint8_t(Ext::kSVE) , 22 }, // HWCAP_SVE
|
|
{ uint8_t(Ext::kFHM) , 23 }, // HWCAP_ASIMDFHM
|
|
{ uint8_t(Ext::kDIT) , 24 }, // HWCAP_DIT
|
|
{ uint8_t(Ext::kLSE2) , 25 }, // HWCAP_USCAT
|
|
{ uint8_t(Ext::kLRCPC2) , 26 }, // HWCAP_ILRCPC
|
|
{ uint8_t(Ext::kFLAGM) , 27 }, // HWCAP_FLAGM
|
|
{ uint8_t(Ext::kSSBS) , 28 }, // HWCAP_SSBS
|
|
{ uint8_t(Ext::kSB) , 29 } // HWCAP_SB
|
|
/*
|
|
{ uint8_t(Ext::k) , 30 }, // HWCAP_PACA
|
|
{ uint8_t(Ext::k) , 31 } // HWCAP_PACG
|
|
*/
|
|
};
|
|
|
|
static const HWCapMapping hwCap2Mapping[] = {
|
|
{ uint8_t(Ext::kDPB2) , 0 }, // HWCAP2_DCPODP
|
|
{ uint8_t(Ext::kSVE2) , 1 }, // HWCAP2_SVE2
|
|
{ uint8_t(Ext::kSVE_AES) , 2 }, // HWCAP2_SVEAES
|
|
{ uint8_t(Ext::kSVE_PMULL128) , 3 }, // HWCAP2_SVEPMULL
|
|
{ uint8_t(Ext::kSVE_BITPERM) , 4 }, // HWCAP2_SVEBITPERM
|
|
{ uint8_t(Ext::kSVE_SHA3) , 5 }, // HWCAP2_SVESHA3
|
|
{ uint8_t(Ext::kSVE_SM4) , 6 }, // HWCAP2_SVESM4
|
|
{ uint8_t(Ext::kFLAGM2) , 7 }, // HWCAP2_FLAGM2
|
|
{ uint8_t(Ext::kFRINTTS) , 8 }, // HWCAP2_FRINT
|
|
{ uint8_t(Ext::kSVE_I8MM) , 9 }, // HWCAP2_SVEI8MM
|
|
{ uint8_t(Ext::kSVE_F32MM) , 10 }, // HWCAP2_SVEF32MM
|
|
{ uint8_t(Ext::kSVE_F64MM) , 11 }, // HWCAP2_SVEF64MM
|
|
{ uint8_t(Ext::kSVE_BF16) , 12 }, // HWCAP2_SVEBF16
|
|
{ uint8_t(Ext::kI8MM) , 13 }, // HWCAP2_I8MM
|
|
{ uint8_t(Ext::kBF16) , 14 }, // HWCAP2_BF16
|
|
{ uint8_t(Ext::kDGH) , 15 }, // HWCAP2_DGH
|
|
{ uint8_t(Ext::kRNG) , 16 }, // HWCAP2_RNG
|
|
{ uint8_t(Ext::kBTI) , 17 }, // HWCAP2_BTI
|
|
{ uint8_t(Ext::kMTE) , 18 }, // HWCAP2_MTE
|
|
{ uint8_t(Ext::kECV) , 19 }, // HWCAP2_ECV
|
|
{ uint8_t(Ext::kAFP) , 20 }, // HWCAP2_AFP
|
|
{ uint8_t(Ext::kRPRES) , 21 }, // HWCAP2_RPRES
|
|
{ uint8_t(Ext::kMTE3) , 22 }, // HWCAP2_MTE3
|
|
{ uint8_t(Ext::kSME) , 23 }, // HWCAP2_SME
|
|
{ uint8_t(Ext::kSME_I16I64) , 24 }, // HWCAP2_SME_I16I64
|
|
{ uint8_t(Ext::kSME_F64F64) , 25 }, // HWCAP2_SME_F64F64
|
|
{ uint8_t(Ext::kSME_I8I32) , 26 }, // HWCAP2_SME_I8I32
|
|
{ uint8_t(Ext::kSME_F16F32) , 27 }, // HWCAP2_SME_F16F32
|
|
{ uint8_t(Ext::kSME_B16F32) , 28 }, // HWCAP2_SME_B16F32
|
|
{ uint8_t(Ext::kSME_F32F32) , 29 }, // HWCAP2_SME_F32F32
|
|
{ uint8_t(Ext::kSME_FA64) , 30 }, // HWCAP2_SME_FA64
|
|
{ uint8_t(Ext::kWFXT) , 31 }, // HWCAP2_WFXT
|
|
{ uint8_t(Ext::kEBF16) , 32 }, // HWCAP2_EBF16
|
|
{ uint8_t(Ext::kSVE_EBF16) , 33 }, // HWCAP2_SVE_EBF16
|
|
{ uint8_t(Ext::kCSSC) , 34 }, // HWCAP2_CSSC
|
|
{ uint8_t(Ext::kRPRFM) , 35 }, // HWCAP2_RPRFM
|
|
{ uint8_t(Ext::kSVE2_1) , 36 }, // HWCAP2_SVE2P1
|
|
{ uint8_t(Ext::kSME2) , 37 }, // HWCAP2_SME2
|
|
{ uint8_t(Ext::kSME2_1) , 38 }, // HWCAP2_SME2P1
|
|
{ uint8_t(Ext::kSME_I16I32) , 39 }, // HWCAP2_SME_I16I32
|
|
{ uint8_t(Ext::kSME_BI32I32) , 40 }, // HWCAP2_SME_BI32I32
|
|
{ uint8_t(Ext::kSME_B16B16) , 41 }, // HWCAP2_SME_B16B16
|
|
{ uint8_t(Ext::kSME_F16F16) , 42 }, // HWCAP2_SME_F16F16
|
|
{ uint8_t(Ext::kMOPS) , 43 }, // HWCAP2_MOPS
|
|
{ uint8_t(Ext::kHBC) , 44 }, // HWCAP2_HBC
|
|
{ uint8_t(Ext::kSVE_B16B16) , 45 }, // HWCAP2_SVE_B16B16
|
|
{ uint8_t(Ext::kLRCPC3) , 46 }, // HWCAP2_LRCPC3
|
|
{ uint8_t(Ext::kLSE128) , 47 }, // HWCAP2_LSE128
|
|
};
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
cpu._wasDetected = true;
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
unsigned long hwCapMasks[2] {};
|
|
getAuxValues(hwCapMasks, hwCapTags, 2u);
|
|
|
|
mergeHWCaps(cpu, hwCapMasks[0], hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
|
|
mergeHWCaps(cpu, hwCapMasks[1], hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
|
|
|
|
#if defined(ASMJIT_ARM_DETECT_VIA_CPUID)
|
|
if (cpu.features().arm().hasCPUID()) {
|
|
detectAArch64FeaturesViaCPUID(cpu);
|
|
return;
|
|
}
|
|
#endif // ASMJIT_ARM_DETECT_VIA_CPUID
|
|
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
#endif // ASMJIT_ARCH_ARM
|
|
|
|
// CpuInfo - Detect - ARM - Detect by NetBSD API That Reads CPUID
|
|
// ==============================================================
|
|
|
|
#elif defined(__NetBSD__) && ASMJIT_ARCH_ARM >= 64
|
|
|
|
//! Position of AArch64 registers in a`aarch64_sysctl_cpu_id` struct, which is filled by sysctl().
|
|
struct NetBSDAArch64Regs {
|
|
enum ID : uint32_t {
|
|
k64_MIDR = 0, //!< Main ID Register.
|
|
k64_REVIDR = 8, //!< Revision ID Register.
|
|
k64_MPIDR = 16, //!< Multiprocessor Affinity Register.
|
|
k64_AA64DFR0 = 24, //!< A64 Debug Feature Register 0.
|
|
k64_AA64DFR1 = 32, //!< A64 Debug Feature Register 1.
|
|
k64_AA64ISAR0 = 40, //!< A64 Instruction Set Attribute Register 0.
|
|
k64_AA64ISAR1 = 48, //!< A64 Instruction Set Attribute Register 1.
|
|
k64_AA64MMFR0 = 56, //!< A64 Memory Model Feature Register 0.
|
|
k64_AA64MMFR1 = 64, //!< A64 Memory Model Feature Register 1.
|
|
k64_AA64MMFR2 = 72, //!< A64 Memory Model Feature Register 2.
|
|
k64_AA64PFR0 = 80, //!< A64 Processor Feature Register 0.
|
|
k64_AA64PFR1 = 88, //!< A64 Processor Feature Register 1.
|
|
k64_AA64ZFR0 = 96, //!< A64 SVE Feature ID Register 0.
|
|
k32_MVFR0 = 104, //!< Media and VFP Feature Register 0.
|
|
k32_MVFR1 = 108, //!< Media and VFP Feature Register 1.
|
|
k32_MVFR2 = 112, //!< Media and VFP Feature Register 2.
|
|
k32_PAD = 116, //!< Padding (not used).
|
|
k64_CLIDR = 120, //!< Cache Level ID Register.
|
|
k64_CTR = 128 //!< Cache Type Register.
|
|
};
|
|
|
|
enum Limits : uint32_t {
|
|
kBufferSize = 136
|
|
};
|
|
|
|
uint64_t data[kBufferSize / 8u];
|
|
|
|
ASMJIT_INLINE_NODEBUG uint64_t r64(uint32_t index) const noexcept {
|
|
ASMJIT_ASSERT(index % 8u == 0u);
|
|
return data[index / 8u];
|
|
}
|
|
|
|
ASMJIT_INLINE_NODEBUG uint32_t r32(uint32_t index) const noexcept {
|
|
ASMJIT_ASSERT(index % 4u == 0u);
|
|
uint32_t shift = (index % 8) * 8;
|
|
return uint32_t((r64(index) >> shift) & 0xFFFFFFFFu);
|
|
}
|
|
};
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
using Regs = NetBSDAArch64Regs;
|
|
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
Regs regs {};
|
|
size_t len = sizeof(regs);
|
|
const char sysctlCpuPath[] = "machdep.cpu0.cpu_id";
|
|
|
|
if (sysctlbyname(sysctlCpuPath, ®s, &len, nullptr, 0) == 0) {
|
|
detectAArch64FeaturesViaCPUID_AA64PFR0_AA64PFR1(cpu, regs.r64(Regs::k64_AA64PFR0), regs.r64(Regs::k64_AA64PFR1));
|
|
detectAArch64FeaturesViaCPUID_AA64ISAR0_AA64ISAR1(cpu, regs.r64(Regs::k64_AA64ISAR0), regs.r64(Regs::k64_AA64ISAR1));
|
|
|
|
// TODO: AA64ISAR2 should be added when it's provided by NetBSD.
|
|
// detectAArch64FeaturesViaCPUID_AA64ISAR2(cpu, regs.r64Regs::k64_AA64ISAR2));
|
|
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR0(cpu, regs.r64(Regs::k64_AA64MMFR0));
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR1(cpu, regs.r64(Regs::k64_AA64MMFR1));
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR2(cpu, regs.r64(Regs::k64_AA64MMFR2));
|
|
|
|
// Only read CPU_ID_AA64ZFR0 when either SVE or SME is available.
|
|
if (cpu.features().arm().hasAny(Ext::kSVE, Ext::kSME)) {
|
|
detectAArch64FeaturesViaCPUID_AA64ZFR0(cpu, regs.r64(Regs::k64_AA64ZFR0));
|
|
|
|
// TODO: AA64SMFR0 should be added when it's provided by NetBSD.
|
|
// if (cpu.features().arm().hasSME()) {
|
|
// detectAArch64FeaturesViaCPUID_AA64SMFR0(cpu, regs.r64(Regs::k64_kAA64SMFR0));
|
|
// }
|
|
}
|
|
}
|
|
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Detect by OpenBSD API That Reads CPUID
|
|
// ===============================================================
|
|
|
|
#elif defined(__OpenBSD__) && ASMJIT_ARCH_ARM >= 64
|
|
|
|
// Supported CPUID registers on OpenBSD (CTL_MACHDEP definitions):
|
|
// - https://github.com/openbsd/src/blob/master/sys/arch/arm64/include/cpu.h
|
|
enum class OpenBSDAArch64CPUID {
|
|
kAA64ISAR0 = 2,
|
|
kAA64ISAR1 = 3,
|
|
kAA64ISAR2 = 4,
|
|
kAA64MMFR0 = 5,
|
|
kAA64MMFR1 = 6,
|
|
kAA64MMFR2 = 7,
|
|
kAA64PFR0 = 8,
|
|
kAA64PFR1 = 9,
|
|
kAA64SMFR0 = 10,
|
|
kAA64ZFR0 = 11
|
|
};
|
|
|
|
static uint64_t openbsdReadAArch64CPUID(OpenBSDAArch64CPUID id) noexcept {
|
|
uint64_t bits = 0;
|
|
size_t size = sizeof(bits);
|
|
int name[2] = { CTL_MACHDEP, int(id) };
|
|
|
|
return (sysctl(name, 2, &bits, &size, NULL, 0) < 0) ? uint64_t(0) : bits;
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
using ID = OpenBSDAArch64CPUID;
|
|
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
detectAArch64FeaturesViaCPUID_AA64PFR0_AA64PFR1(cpu, openbsdReadAArch64CPUID(ID::kAA64PFR0), openbsdReadAArch64CPUID(ID::kAA64PFR1));
|
|
detectAArch64FeaturesViaCPUID_AA64ISAR0_AA64ISAR1(cpu, openbsdReadAArch64CPUID(ID::kAA64ISAR0), openbsdReadAArch64CPUID(ID::kAA64ISAR1));
|
|
detectAArch64FeaturesViaCPUID_AA64ISAR2(cpu, openbsdReadAArch64CPUID(ID::kAA64ISAR2));
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR0(cpu, openbsdReadAArch64CPUID(ID::kAA64MMFR0));
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR1(cpu, openbsdReadAArch64CPUID(ID::kAA64MMFR1));
|
|
detectAArch64FeaturesViaCPUID_AA64MMFR2(cpu, openbsdReadAArch64CPUID(ID::kAA64MMFR2));
|
|
|
|
// Only read CPU_ID_AA64ZFR0 when either SVE or SME is available.
|
|
if (cpu.features().arm().hasAny(Ext::kSVE, Ext::kSME)) {
|
|
detectAArch64FeaturesViaCPUID_AA64ZFR0(cpu, openbsdReadAArch64CPUID(ID::kAA64ZFR0));
|
|
|
|
if (cpu.features().arm().hasSME()) {
|
|
detectAArch64FeaturesViaCPUID_AA64SMFR0(cpu, openbsdReadAArch64CPUID(ID::kAA64SMFR0));
|
|
}
|
|
}
|
|
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Detect by Apple API (sysctlbyname)
|
|
// ===========================================================
|
|
|
|
#elif defined(__APPLE__)
|
|
|
|
enum class AppleFeatureType : uint8_t {
|
|
kHWOptional,
|
|
kHWOptionalArmFEAT
|
|
};
|
|
|
|
struct AppleFeatureMapping {
|
|
AppleFeatureType type;
|
|
char name[18];
|
|
uint8_t featureId;
|
|
};
|
|
|
|
template<typename T>
|
|
static inline bool appleSysctlByName(const char* sysctlName, T* dst, size_t size = sizeof(T)) noexcept {
|
|
return sysctlbyname(sysctlName, dst, &size, nullptr, 0) == 0;
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE long appleDetectARMFeatureViaSysctl(AppleFeatureType type, const char* featureName) noexcept {
|
|
static const char hwOptionalPrefix[] = "hw.optional.";
|
|
static const char hwOptionalArmFeatPrefix[] = "hw.optional.arm.FEAT_";
|
|
|
|
char sysctlName[128];
|
|
|
|
const char* prefix = type == AppleFeatureType::kHWOptional ? hwOptionalPrefix : hwOptionalArmFeatPrefix;
|
|
size_t prefixSize = (type == AppleFeatureType::kHWOptional ? sizeof(hwOptionalPrefix) : sizeof(hwOptionalArmFeatPrefix)) - 1u;
|
|
size_t featureNameSize = strlen(featureName);
|
|
|
|
if (featureNameSize < 128 - prefixSize) {
|
|
memcpy(sysctlName, prefix, prefixSize);
|
|
memcpy(sysctlName + prefixSize, featureName, featureNameSize + 1u); // Include NULL terminator.
|
|
|
|
long val = 0;
|
|
if (appleSysctlByName<long>(sysctlName, &val)) {
|
|
return val;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE void appleDetectARMFeaturesViaSysctl(CpuInfo& cpu) noexcept {
|
|
using FT = AppleFeatureType;
|
|
|
|
// Based on:
|
|
// - https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
|
|
static const AppleFeatureMapping mappings[] = {
|
|
// Determine Advanced SIMD and Floating Point Capabilities:
|
|
{ FT::kHWOptional , "AdvSIMD_HPFPCvt", uint8_t(Ext::kFP16CONV) },
|
|
{ FT::kHWOptional , "neon_hpfp" , uint8_t(Ext::kFP16CONV) },
|
|
{ FT::kHWOptionalArmFEAT, "BF16" , uint8_t(Ext::kBF16) },
|
|
{ FT::kHWOptionalArmFEAT, "DotProd" , uint8_t(Ext::kDOTPROD) },
|
|
{ FT::kHWOptionalArmFEAT, "FCMA" , uint8_t(Ext::kFCMA) },
|
|
{ FT::kHWOptional , "armv8_3_compnum", uint8_t(Ext::kFCMA) },
|
|
{ FT::kHWOptionalArmFEAT, "FHM" , uint8_t(Ext::kFHM) },
|
|
{ FT::kHWOptional , "armv8_2_fhm" , uint8_t(Ext::kFHM) },
|
|
{ FT::kHWOptionalArmFEAT, "FP16" , uint8_t(Ext::kFP16) },
|
|
{ FT::kHWOptional , "neon_fp16" , uint8_t(Ext::kFP16) },
|
|
{ FT::kHWOptionalArmFEAT, "FRINTTS" , uint8_t(Ext::kFRINTTS) },
|
|
{ FT::kHWOptionalArmFEAT, "I8MM" , uint8_t(Ext::kI8MM) },
|
|
{ FT::kHWOptionalArmFEAT, "JSCVT" , uint8_t(Ext::kJSCVT) },
|
|
{ FT::kHWOptionalArmFEAT, "RDM" , uint8_t(Ext::kRDM) },
|
|
|
|
// Determine Integer Capabilities:
|
|
{ FT::kHWOptional , "armv8_crc32" , uint8_t(Ext::kCRC32) },
|
|
{ FT::kHWOptionalArmFEAT, "FlagM" , uint8_t(Ext::kFLAGM) },
|
|
{ FT::kHWOptionalArmFEAT, "FlagM2" , uint8_t(Ext::kFLAGM2) },
|
|
|
|
// Determine Atomic and Memory Ordering Instruction Capabilities:
|
|
{ FT::kHWOptionalArmFEAT, "LRCPC" , uint8_t(Ext::kLRCPC) },
|
|
{ FT::kHWOptionalArmFEAT, "LRCPC2" , uint8_t(Ext::kLRCPC2) },
|
|
{ FT::kHWOptional , "armv8_1_atomics", uint8_t(Ext::kLSE) },
|
|
{ FT::kHWOptionalArmFEAT, "LSE" , uint8_t(Ext::kLSE) },
|
|
{ FT::kHWOptionalArmFEAT, "LSE2" , uint8_t(Ext::kLSE2) },
|
|
|
|
// Determine Encryption Capabilities:
|
|
{ FT::kHWOptionalArmFEAT, "AES" , uint8_t(Ext::kAES) },
|
|
{ FT::kHWOptionalArmFEAT, "PMULL" , uint8_t(Ext::kPMULL) },
|
|
{ FT::kHWOptionalArmFEAT, "SHA1" , uint8_t(Ext::kSHA1) },
|
|
{ FT::kHWOptionalArmFEAT, "SHA256" , uint8_t(Ext::kSHA256) },
|
|
{ FT::kHWOptionalArmFEAT, "SHA512" , uint8_t(Ext::kSHA512) },
|
|
{ FT::kHWOptional , "armv8_2_sha512" , uint8_t(Ext::kSHA512) },
|
|
{ FT::kHWOptionalArmFEAT, "SHA3" , uint8_t(Ext::kSHA3) },
|
|
{ FT::kHWOptional , "armv8_2_sha3" , uint8_t(Ext::kSHA3) },
|
|
|
|
// Determine General Capabilities:
|
|
{ FT::kHWOptionalArmFEAT, "BTI" , uint8_t(Ext::kBTI) },
|
|
{ FT::kHWOptionalArmFEAT, "DPB" , uint8_t(Ext::kDPB) },
|
|
{ FT::kHWOptionalArmFEAT, "DPB2" , uint8_t(Ext::kDPB2) },
|
|
{ FT::kHWOptionalArmFEAT, "ECV" , uint8_t(Ext::kECV) },
|
|
{ FT::kHWOptionalArmFEAT, "SB" , uint8_t(Ext::kSB) },
|
|
{ FT::kHWOptionalArmFEAT, "SSBS" , uint8_t(Ext::kSSBS) }
|
|
};
|
|
|
|
for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(mappings); i++) {
|
|
const AppleFeatureMapping& mapping = mappings[i];
|
|
if (!cpu.features().arm().has(mapping.featureId) && appleDetectARMFeatureViaSysctl(mapping.type, mapping.name)) {
|
|
cpu.features().arm().add(mapping.featureId);
|
|
}
|
|
}
|
|
}
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
cpu._wasDetected = true;
|
|
populateBaseARMFeatures(cpu);
|
|
|
|
appleSysctlByName<uint32_t>("hw.cpufamily", &cpu._familyId);
|
|
appleSysctlByName<uint32_t>("hw.cachelinesize", &cpu._cacheLineSize);
|
|
appleSysctlByName<uint32_t>("machdep.cpu.logical_per_package", &cpu._maxLogicalProcessors);
|
|
appleSysctlByName<char>("machdep.cpu.brand_string", cpu._brand.str, sizeof(cpu._brand.str));
|
|
|
|
memcpy(cpu._vendor.str, "APPLE", 6);
|
|
|
|
bool cpuFeaturesPopulated = detectARMFeaturesViaAppleFamilyId(cpu);
|
|
if (!cpuFeaturesPopulated) {
|
|
appleDetectARMFeaturesViaSysctl(cpu);
|
|
}
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
|
|
// CpuInfo - Detect - ARM - Detect by Fallback (Using Compiler Flags)
|
|
// ==================================================================
|
|
|
|
#else
|
|
|
|
#if ASMJIT_ARCH_ARM == 32
|
|
#pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch32 CPU)")
|
|
#else
|
|
#pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
|
|
#endif
|
|
|
|
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
|
populateBaseARMFeatures(cpu);
|
|
detectARMFeaturesViaCompilerFlags(cpu);
|
|
postProcessARMCpuInfo(cpu);
|
|
}
|
|
#endif
|
|
|
|
} // {arm}
|
|
|
|
#endif
|
|
|
|
// CpuInfo - Detect - Host
|
|
// =======================
|
|
|
|
const CpuInfo& CpuInfo::host() noexcept {
|
|
static std::atomic<uint32_t> cpuInfoInitialized;
|
|
static CpuInfo cpuInfoGlobal(Globals::NoInit);
|
|
|
|
// This should never cause a problem as the resulting information should always
|
|
// be the same. In the worst case it would just be overwritten non-atomically.
|
|
if (!cpuInfoInitialized.load(std::memory_order_relaxed)) {
|
|
CpuInfo cpuInfoLocal;
|
|
|
|
cpuInfoLocal._arch = Arch::kHost;
|
|
cpuInfoLocal._subArch = SubArch::kHost;
|
|
|
|
#if ASMJIT_ARCH_X86
|
|
x86::detectX86Cpu(cpuInfoLocal);
|
|
#elif ASMJIT_ARCH_ARM
|
|
arm::detectARMCpu(cpuInfoLocal);
|
|
#endif
|
|
|
|
cpuInfoLocal._hwThreadCount = detectHWThreadCount();
|
|
cpuInfoGlobal = cpuInfoLocal;
|
|
cpuInfoInitialized.store(1, std::memory_order_seq_cst);
|
|
}
|
|
|
|
return cpuInfoGlobal;
|
|
}
|
|
|
|
ASMJIT_END_NAMESPACE
|