[Bug] Use mremap() to allocate a dual mapped region on NetBSD

In addition, always enable DualMapping when RWX pages are not
possible to allocate in JitAllocator, because otherwise the
allocator would not be able to allocate memory for JIT code
execution.

New CI runners to test FreeBSD, NetBSD, and OpenBSD are also
provided.
This commit is contained in:
kobalicek
2023-02-23 00:28:27 +01:00
parent 8c31a8f34f
commit 9d33c892f7
5 changed files with 4082 additions and 3827 deletions

View File

@@ -149,3 +149,93 @@ jobs:
- name: "Test" - name: "Test"
run: python build-actions/action.py --step=test run: python build-actions/action.py --step=test
build-vm:
strategy:
fail-fast: false
matrix:
include:
- { host: "macos-12", os: "freebsd", osver: "13.1", cc: "clang", arch: "x86-64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { host: "macos-12", os: "openbsd", osver: "7.2" , cc: "clang", arch: "x86-64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
name: "${{matrix.os}}-${{matrix.osver}} (${{matrix.cc}}, ${{matrix.arch}}, ${{matrix.build_type}})"
runs-on: ${{matrix.host}}
steps:
- name: "Checkout"
uses: actions/checkout@v3
with:
path: "source"
- name: "Checkout Build Actions"
uses: actions/checkout@v3
with:
repository: build-actions/build-actions
path: "build-actions"
- name: Build & Test in VM
uses: cross-platform-actions/action@master
with:
operating_system: ${{matrix.os}}
architecture: ${{matrix.arch}}
version: ${{matrix.osver}}
shell: bash
run: |
set -e
PATH="/usr/sbin:/usr/pkg/sbin:/usr/pkg/bin:$PATH:$(pwd)/build-actions"
export PATH
sh ./build-actions/install-python3.sh
python3 build-actions/action.py \
--step=all \
--compiler=${{matrix.cc}} \
--architecture=${{matrix.arch}} \
--source-dir=source \
--config=source/.github/workflows/build-config.json \
--build-type=${{matrix.build_type}} \
--build-defs=${{matrix.defs}}
build-netbsd:
strategy:
fail-fast: false
matrix:
include:
- { title: "netbsd", host: "macos-12", os: "netbsd", cc: "clang", arch: "x86_64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
name: "${{matrix.title}} (${{matrix.cc}}, ${{matrix.arch}}, ${{matrix.build_type}})"
runs-on: ${{matrix.host}}
steps:
- name: "Checkout"
uses: actions/checkout@v3
with:
path: "source"
- name: "Checkout Build Actions"
uses: actions/checkout@v3
with:
repository: build-actions/build-actions
path: "build-actions"
- name: Build & Test (VM)
uses: vmactions/netbsd-vm@v0
with:
mem: 6144
usesh: true
copyback: false
run: |
set -e
PATH="/usr/sbin:/usr/pkg/sbin:/usr/pkg/bin:$PATH:$(pwd)/build-actions"
export PATH
bash ./build-actions/install-python3.sh
python3 ./build-actions/action.py \
--step=all \
--compiler=${{matrix.cc}} \
--architecture=${{matrix.arch}} \
--source-dir=source \
--config=source/.github/workflows/build-config.json \
--build-type=${{matrix.build_type}} \
--build-defs=${{matrix.defs}}

View File

@@ -430,6 +430,15 @@ static inline JitAllocatorPrivateImpl* JitAllocatorImpl_new(const JitAllocator::
if (ASMJIT_UNLIKELY(!p)) if (ASMJIT_UNLIKELY(!p))
return nullptr; return nullptr;
VirtMem::HardenedRuntimeInfo hardenedRtInfo = VirtMem::hardenedRuntimeInfo();
if (Support::test(hardenedRtInfo.flags, VirtMem::HardenedRuntimeFlags::kEnabled)) {
// If we are running within a hardened environment (mapping RWX is not allowed) then we have to use dual mapping
// or other runtime capabilities like Apple specific MAP_JIT. There is no point in not enabling these as otherwise
// the allocation would fail and JitAllocator would not be able to allocate memory.
if (!Support::test(hardenedRtInfo.flags, VirtMem::HardenedRuntimeFlags::kMapJit))
options |= JitAllocatorOptions::kUseDualMapping;
}
JitAllocatorPool* pools = reinterpret_cast<JitAllocatorPool*>((uint8_t*)p + sizeof(JitAllocatorPrivateImpl)); JitAllocatorPool* pools = reinterpret_cast<JitAllocatorPool*>((uint8_t*)p + sizeof(JitAllocatorPrivateImpl));
JitAllocatorPrivateImpl* impl = new(p) JitAllocatorPrivateImpl(pools, poolCount); JitAllocatorPrivateImpl* impl = new(p) JitAllocatorPrivateImpl(pools, poolCount);

View File

@@ -26,6 +26,10 @@ enum class JitAllocatorOptions : uint32_t {
//! The first buffer has read and execute permissions and the second buffer has read+write permissions. //! The first buffer has read and execute permissions and the second buffer has read+write permissions.
//! //!
//! See \ref VirtMem::allocDualMapping() for more details about this feature. //! See \ref VirtMem::allocDualMapping() for more details about this feature.
//!
//! \remarks Dual mapping would be automatically turned on by \ref JitAllocator in case of hardened runtime that
//! enforces `W^X` policy, so specifying this flag is essentually forcing to use dual mapped pages even when RWX
//! pages can be allocated and dual mapping is not necessary.
kUseDualMapping = 0x00000001u, kUseDualMapping = 0x00000001u,
//! Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable //! Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable

View File

@@ -42,9 +42,8 @@
#if !defined(MAP_ANONYMOUS) #if !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON #define MAP_ANONYMOUS MAP_ANON
#endif #endif
#endif
#include <atomic> #define ASMJIT_DUAL_MAPPING_ANON_FD
#if defined(__APPLE__) || defined(__BIONIC__) #if defined(__APPLE__) || defined(__BIONIC__)
#define ASMJIT_VM_SHM_DETECT 0 #define ASMJIT_VM_SHM_DETECT 0
@@ -53,7 +52,7 @@
#endif #endif
// Android NDK doesn't provide `shm_open()` and `shm_unlink()`. // Android NDK doesn't provide `shm_open()` and `shm_unlink()`.
#if !defined(_WIN32) && !defined(__BIONIC__) #if !defined(__BIONIC__)
#define ASMJIT_HAS_SHM_OPEN_AND_UNLINK #define ASMJIT_HAS_SHM_OPEN_AND_UNLINK
#endif #endif
@@ -61,12 +60,20 @@
#define ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP #define ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP
#endif #endif
#if defined(__NetBSD__) && defined(MAP_REMAPDUP) && defined(PROT_MPROTECT)
#undef ASMJIT_DUAL_MAPPING_ANON_FD
#define ASMJIT_DUAL_MAPPING_REMAPDUP
#endif
#endif
#include <atomic>
ASMJIT_BEGIN_SUB_NAMESPACE(VirtMem) ASMJIT_BEGIN_SUB_NAMESPACE(VirtMem)
// Virtual Memory Utilities // Virtual Memory Utilities
// ======================== // ========================
static const MemoryFlags dualMappingFilter[2] = { static const constexpr MemoryFlags dualMappingFilter[2] = {
MemoryFlags::kAccessWrite | MemoryFlags::kMMapMaxAccessWrite, MemoryFlags::kAccessWrite | MemoryFlags::kMMapMaxAccessWrite,
MemoryFlags::kAccessExecute | MemoryFlags::kMMapMaxAccessExecute MemoryFlags::kAccessExecute | MemoryFlags::kMMapMaxAccessExecute
}; };
@@ -217,19 +224,8 @@ Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
#if !defined(_WIN32) #if !defined(_WIN32)
static void getVMInfo(Info& vmInfo) noexcept { // Virtual Memory [Posix] - Utilities
uint32_t pageSize = uint32_t(::getpagesize()); // ==================================
vmInfo.pageSize = pageSize;
vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
}
#if !defined(SHM_ANON)
static const char* getTmpDir() noexcept {
const char* tmpDir = getenv("TMPDIR");
return tmpDir ? tmpDir : "/tmp";
}
#endif
// Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`. // Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`.
static Error asmjitErrorFromErrno(int e) noexcept { static Error asmjitErrorFromErrno(int e) noexcept {
@@ -254,16 +250,61 @@ static Error asmjitErrorFromErrno(int e) noexcept {
} }
} }
static void getVMInfo(Info& vmInfo) noexcept {
uint32_t pageSize = uint32_t(::getpagesize());
vmInfo.pageSize = pageSize;
vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
}
#if defined(__APPLE__) && TARGET_OS_OSX
static int getOSXVersion() noexcept {
// MAP_JIT flag required to run unsigned JIT code is only supported by kernel version 10.14+ (Mojave).
static std::atomic<int> globalVersion;
int ver = globalVersion.load();
if (!ver) {
struct utsname osname {};
uname(&osname);
ver = atoi(osname.release);
globalVersion.store(ver);
}
return ver;
}
#endif // __APPLE__ && TARGET_OS_OSX
// Returns `mmap()` protection flags from \ref MemoryFlags.
static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
int protection = 0;
if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ;
if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC;
return protection;
}
// Virtual Memory [Posix] - Anonymus Memory
// ========================================
#if defined(ASMJIT_DUAL_MAPPING_ANON_FD)
// Some operating systems don't allow /dev/shm to be executable. On Linux this happens when /dev/shm is mounted with // Some operating systems don't allow /dev/shm to be executable. On Linux this happens when /dev/shm is mounted with
// 'noexec', which is enforced by systemd. Other operating systems like MacOS also restrict executable permissions // 'noexec', which is enforced by systemd. Other operating systems like MacOS also restrict executable permissions
// regarding /dev/shm, so we use a runtime detection before attempting to allocate executable memory. Sometimes we // regarding /dev/shm, so we use a runtime detection before attempting to allocate executable memory. Sometimes we
// don't need the detection as we know it would always result in `ShmStrategy::kTmpDir`. // don't need the detection as we know it would always result in `AnonymousMemStrategy::kTmpDir`.
enum class ShmStrategy : uint32_t { enum class AnonymousMemStrategy : uint32_t {
kUnknown = 0, kUnknown = 0,
kDevShm = 1, kDevShm = 1,
kTmpDir = 2 kTmpDir = 2
}; };
#if !defined(SHM_ANON)
static const char* getTmpDir() noexcept {
const char* tmpDir = getenv("TMPDIR");
return tmpDir ? tmpDir : "/tmp";
}
#endif
class AnonymousMemory { class AnonymousMemory {
public: public:
enum FileType : uint32_t { enum FileType : uint32_t {
@@ -398,31 +439,54 @@ public:
} }
}; };
// Returns `mmap()` protection flags from \ref MemoryFlags. #if ASMJIT_VM_SHM_DETECT
static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept { static Error detectAnonMemStrategy(AnonymousMemStrategy* strategyOut) noexcept {
int protection = 0; AnonymousMemory anonMem;
if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ; Info vmInfo = info();
if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC; ASMJIT_PROPAGATE(anonMem.open(false));
return protection; ASMJIT_PROPAGATE(anonMem.allocate(vmInfo.pageSize));
void* ptr = mmap(nullptr, vmInfo.pageSize, PROT_READ | PROT_EXEC, MAP_SHARED, anonMem.fd(), 0);
if (ptr == MAP_FAILED) {
int e = errno;
if (e == EINVAL) {
*strategyOut = AnonymousMemStrategy::kTmpDir;
return kErrorOk;
}
return DebugUtils::errored(asmjitErrorFromErrno(e));
}
else {
munmap(ptr, vmInfo.pageSize);
*strategyOut = AnonymousMemStrategy::kDevShm;
return kErrorOk;
}
}
#endif
static Error getAnonMemStrategy(AnonymousMemStrategy* strategyOut) noexcept {
#if ASMJIT_VM_SHM_DETECT
// Initially don't assume anything. It has to be tested whether '/dev/shm' was mounted with 'noexec' flag or not.
static std::atomic<uint32_t> globalShmStrategy;
AnonymousMemStrategy strategy = static_cast<AnonymousMemStrategy>(globalShmStrategy.load());
if (strategy == AnonymousMemStrategy::kUnknown) {
ASMJIT_PROPAGATE(detectAnonMemStrategy(&strategy));
globalShmStrategy.store(static_cast<uint32_t>(strategy));
} }
#if defined(__APPLE__) && TARGET_OS_OSX *strategyOut = strategy;
static int getOSXVersion() noexcept { return kErrorOk;
// MAP_JIT flag required to run unsigned JIT code is only supported by kernel version 10.14+ (Mojave). #else
static std::atomic<int> globalVersion; *strategyOut = AnonymousMemStrategy::kTmpDir;
return kErrorOk;
int ver = globalVersion.load(); #endif
if (!ver) {
struct utsname osname {};
uname(&osname);
ver = atoi(osname.release);
globalVersion.store(ver);
} }
return ver; #endif // ASMJIT_DUAL_MAPPING_ANON_FD
}
#endif // __APPLE__ && TARGET_OS_OSX // Virtual Memory [Posix] - Hardened Runtime & MAP_JIT
// ===================================================
// Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags // Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags
// cannot be normally allocated. On OSX + AArch64 such allocation requires MAP_JIT flag, other platforms don't // cannot be normally allocated. On OSX + AArch64 such allocation requires MAP_JIT flag, other platforms don't
@@ -505,50 +569,6 @@ static inline int mmMaxProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
#endif #endif
} }
#if ASMJIT_VM_SHM_DETECT
static Error detectShmStrategy(ShmStrategy* strategyOut) noexcept {
AnonymousMemory anonMem;
Info vmInfo = info();
ASMJIT_PROPAGATE(anonMem.open(false));
ASMJIT_PROPAGATE(anonMem.allocate(vmInfo.pageSize));
void* ptr = mmap(nullptr, vmInfo.pageSize, PROT_READ | PROT_EXEC, MAP_SHARED, anonMem.fd(), 0);
if (ptr == MAP_FAILED) {
int e = errno;
if (e == EINVAL) {
*strategyOut = ShmStrategy::kTmpDir;
return kErrorOk;
}
return DebugUtils::errored(asmjitErrorFromErrno(e));
}
else {
munmap(ptr, vmInfo.pageSize);
*strategyOut = ShmStrategy::kDevShm;
return kErrorOk;
}
}
#endif
static Error getShmStrategy(ShmStrategy* strategyOut) noexcept {
#if ASMJIT_VM_SHM_DETECT
// Initially don't assume anything. It has to be tested whether '/dev/shm' was mounted with 'noexec' flag or not.
static std::atomic<uint32_t> globalShmStrategy;
ShmStrategy strategy = static_cast<ShmStrategy>(globalShmStrategy.load());
if (strategy == ShmStrategy::kUnknown) {
ASMJIT_PROPAGATE(detectShmStrategy(&strategy));
globalShmStrategy.store(static_cast<uint32_t>(strategy));
}
*strategyOut = strategy;
return kErrorOk;
#else
*strategyOut = ShmStrategy::kTmpDir;
return kErrorOk;
#endif
}
static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept { static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
HardenedRuntimeFlags flags = HardenedRuntimeFlags::kNone; HardenedRuntimeFlags flags = HardenedRuntimeFlags::kNone;
@@ -593,6 +613,53 @@ Error protect(void* p, size_t size, MemoryFlags memoryFlags) noexcept {
return DebugUtils::errored(kErrorInvalidArgument); return DebugUtils::errored(kErrorInvalidArgument);
} }
// Virtual Memory [Posix] - Dual Mapping
// =====================================
#if defined(ASMJIT_DUAL_MAPPING_REMAPDUP)
static void unmapDualMapping(DualMapping* dm, size_t size) noexcept {
if (dm->rw)
munmap(dm->rw, size);
if (dm->rx)
munmap(dm->rx, size);
}
static Error allocDualMappingUsingRemapdup(DualMapping* dmOut, size_t size, MemoryFlags memoryFlags) noexcept {
DualMapping dm {};
dm.rw = mmap(NULL, size, PROT_MPROTECT(mmProtFromMemoryFlags(memoryFlags)), MAP_ANONYMOUS, -1, 0);
if (dm.rw == MAP_FAILED) {
return DebugUtils::errored(asmjitErrorFromErrno(errno));
}
dm.rx = mremap(dm.rw, size, NULL, size, MAP_REMAPDUP);
if (dm.rx == MAP_FAILED) {
int e = errno;
unmapDualMapping(&dm, size);
return DebugUtils::errored(asmjitErrorFromErrno(e));
}
MemoryFlags rxAccessFlags = memoryFlags & ~dualMappingFilter[0];
MemoryFlags rwAccessFlags = memoryFlags & ~dualMappingFilter[1];
if (mprotect(dm.rw, size, mmProtFromMemoryFlags(rwAccessFlags)) != 0) {
int e = errno;
unmapDualMapping(&dm, size);
return DebugUtils::errored(asmjitErrorFromErrno(e));
}
if (mprotect(dm.rx, size, mmProtFromMemoryFlags(rxAccessFlags)) != 0) {
int e = errno;
unmapDualMapping(&dm, size);
return DebugUtils::errored(asmjitErrorFromErrno(e));
}
*dmOut = dm;
return kErrorOk;
}
#endif
Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept { Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept {
dm->rx = nullptr; dm->rx = nullptr;
dm->rw = nullptr; dm->rw = nullptr;
@@ -600,11 +667,14 @@ Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) no
if (off_t(size) <= 0) if (off_t(size) <= 0)
return DebugUtils::errored(size == 0 ? kErrorInvalidArgument : kErrorTooLarge); return DebugUtils::errored(size == 0 ? kErrorInvalidArgument : kErrorTooLarge);
#if defined(ASMJIT_DUAL_MAPPING_REMAPDUP)
return allocDualMappingUsingRemapdup(dm, size, memoryFlags);
#elif defined(ASMJIT_DUAL_MAPPING_ANON_FD)
bool preferTmpOverDevShm = Support::test(memoryFlags, MemoryFlags::kMappingPreferTmp); bool preferTmpOverDevShm = Support::test(memoryFlags, MemoryFlags::kMappingPreferTmp);
if (!preferTmpOverDevShm) { if (!preferTmpOverDevShm) {
ShmStrategy strategy; AnonymousMemStrategy strategy;
ASMJIT_PROPAGATE(getShmStrategy(&strategy)); ASMJIT_PROPAGATE(getAnonMemStrategy(&strategy));
preferTmpOverDevShm = (strategy == ShmStrategy::kTmpDir); preferTmpOverDevShm = (strategy == AnonymousMemStrategy::kTmpDir);
} }
AnonymousMemory anonMem; AnonymousMemory anonMem;
@@ -629,6 +699,9 @@ Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) no
dm->rx = ptr[0]; dm->rx = ptr[0];
dm->rw = ptr[1]; dm->rw = ptr[1];
return kErrorOk; return kErrorOk;
#else
#error "[asmjit] VirtMem::allocDualMapping() has no implementation"
#endif
} }
Error releaseDualMapping(DualMapping* dm, size_t size) noexcept { Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {

View File

@@ -983,9 +983,8 @@ static void generateSseSequence(BaseEmitter& emitter, InstForm form, bool emitPr
// Generates a long sequence of AVX instructions. // Generates a long sequence of AVX instructions.
template<typename Emitter> template<typename Emitter>
static void generateAvxSequenceInternal( static void generateAvxSequenceInternalRegOnly(
Emitter& cc, Emitter& cc,
InstForm form,
const x86::Gp& gp, const x86::Gp& gp,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) { const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
@@ -1001,7 +1000,6 @@ static void generateAvxSequenceInternal(
x86::Ymm ymmA = vecA.ymm(); x86::Ymm ymmA = vecA.ymm();
x86::Ymm ymmB = vecB.ymm(); x86::Ymm ymmB = vecB.ymm();
x86::Ymm ymmC = vecC.ymm(); x86::Ymm ymmC = vecC.ymm();
x86::Ymm ymmD = vecD.ymm();
cc.xor_(gpd, gpd); cc.xor_(gpd, gpd);
cc.vxorps(xmmA, xmmA, xmmA); cc.vxorps(xmmA, xmmA, xmmA);
@@ -1009,7 +1007,6 @@ static void generateAvxSequenceInternal(
cc.vxorps(xmmC, xmmC, xmmC); cc.vxorps(xmmC, xmmC, xmmC);
cc.vxorps(xmmD, xmmD, xmmD); cc.vxorps(xmmD, xmmD, xmmD);
if (form == InstForm::kReg) {
cc.vaddpd(xmmA, xmmB, xmmC); cc.vaddpd(xmmA, xmmB, xmmC);
cc.vaddpd(ymmA, ymmB, ymmC); cc.vaddpd(ymmA, ymmB, ymmC);
cc.vaddps(xmmA, xmmB, xmmC); cc.vaddps(xmmA, xmmB, xmmC);
@@ -1577,13 +1574,40 @@ static void generateAvxSequenceInternal(
cc.vfnmsub231sd(xmmA, xmmB, xmmC); cc.vfnmsub231sd(xmmA, xmmB, xmmC);
cc.vfnmsub231ss(xmmA, xmmB, xmmC); cc.vfnmsub231ss(xmmA, xmmB, xmmC);
} }
else {
// Generates a long sequence of AVX instructions.
template<typename Emitter>
static void generateAvxSequenceInternalRegMem(
Emitter& cc,
const x86::Gp& gp,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
x86::Gp gpd = gp.r32();
x86::Gp gpq = gp.r64();
x86::Gp gpz = cc.is32Bit() ? gpd : gpq;
x86::Xmm xmmA = vecA.xmm();
x86::Xmm xmmB = vecB.xmm();
x86::Xmm xmmC = vecC.xmm();
x86::Xmm xmmD = vecD.xmm();
x86::Ymm ymmA = vecA.ymm();
x86::Ymm ymmB = vecB.ymm();
x86::Ymm ymmC = vecC.ymm();
x86::Ymm ymmD = vecD.ymm();
x86::Mem m = x86::ptr(gpz); x86::Mem m = x86::ptr(gpz);
x86::Mem m128 = x86::xmmword_ptr(gpz); x86::Mem m128 = x86::xmmword_ptr(gpz);
x86::Mem m256 = x86::xmmword_ptr(gpz); x86::Mem m256 = x86::xmmword_ptr(gpz);
x86::Mem vx_ptr = x86::ptr(gpz, xmmD); x86::Mem vx_ptr = x86::ptr(gpz, xmmD);
x86::Mem vy_ptr = x86::ptr(gpz, ymmD); x86::Mem vy_ptr = x86::ptr(gpz, ymmD);
cc.xor_(gpd, gpd);
cc.vxorps(xmmA, xmmA, xmmA);
cc.vxorps(xmmB, xmmB, xmmB);
cc.vxorps(xmmC, xmmC, xmmC);
cc.vxorps(xmmD, xmmD, xmmD);
cc.vaddpd(xmmA, xmmB, m); cc.vaddpd(xmmA, xmmB, m);
cc.vaddpd(ymmA, ymmB, m); cc.vaddpd(ymmA, ymmB, m);
cc.vaddps(xmmA, xmmB, m); cc.vaddps(xmmA, xmmB, m);
@@ -2087,6 +2111,19 @@ static void generateAvxSequenceInternal(
cc.vpunpcklwd(ymmA, ymmB, m); cc.vpunpcklwd(ymmA, ymmB, m);
cc.vpxor(ymmA, ymmB, m); cc.vpxor(ymmA, ymmB, m);
} }
// Generates a long sequence of AVX instructions.
template<typename Emitter>
static void generateAvxSequenceInternal(
Emitter& cc,
InstForm form,
const x86::Gp& gp,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
if (form == InstForm::kReg)
generateAvxSequenceInternalRegOnly(cc, gp, vecA, vecB, vecC, vecD);
else
generateAvxSequenceInternalRegMem(cc, gp, vecA, vecB, vecC, vecD);
} }
static void generateAvxSequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) { static void generateAvxSequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {
@@ -2153,9 +2190,8 @@ static void generateAvxSequence(BaseEmitter& emitter, InstForm form, bool emitPr
// Generates a long sequence of AVX512 instructions. // Generates a long sequence of AVX512 instructions.
template<typename Emitter> template<typename Emitter>
static void generateAvx512SequenceInternal( static void generateAvx512SequenceInternalRegOnly(
Emitter& cc, Emitter& cc,
InstForm form,
const x86::Gp& gp, const x86::Gp& gp,
const x86::KReg& kA, const x86::KReg& kB, const x86::KReg& kC, const x86::KReg& kA, const x86::KReg& kB, const x86::KReg& kC,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) { const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
@@ -2172,12 +2208,10 @@ static void generateAvx512SequenceInternal(
x86::Ymm ymmA = vecA.ymm(); x86::Ymm ymmA = vecA.ymm();
x86::Ymm ymmB = vecB.ymm(); x86::Ymm ymmB = vecB.ymm();
x86::Ymm ymmC = vecC.ymm(); x86::Ymm ymmC = vecC.ymm();
x86::Ymm ymmD = vecD.ymm();
x86::Zmm zmmA = vecA.zmm(); x86::Zmm zmmA = vecA.zmm();
x86::Zmm zmmB = vecB.zmm(); x86::Zmm zmmB = vecB.zmm();
x86::Zmm zmmC = vecC.zmm(); x86::Zmm zmmC = vecC.zmm();
x86::Zmm zmmD = vecD.zmm();
cc.xor_(gpd, gpd); cc.xor_(gpd, gpd);
cc.vxorps(xmmA, xmmA, xmmA); cc.vxorps(xmmA, xmmA, xmmA);
@@ -2185,7 +2219,6 @@ static void generateAvx512SequenceInternal(
cc.vxorps(xmmC, xmmC, xmmC); cc.vxorps(xmmC, xmmC, xmmC);
cc.vxorps(xmmD, xmmD, xmmD); cc.vxorps(xmmD, xmmD, xmmD);
if (form == InstForm::kReg) {
cc.kaddb(kA, kB, kC); cc.kaddb(kA, kB, kC);
cc.kaddd(kA, kB, kC); cc.kaddd(kA, kB, kC);
cc.kaddq(kA, kB, kC); cc.kaddq(kA, kB, kC);
@@ -3505,7 +3538,33 @@ static void generateAvx512SequenceInternal(
cc.evex().vxorps(ymmA, ymmB, ymmC); cc.evex().vxorps(ymmA, ymmB, ymmC);
cc.evex().vxorps(zmmA, zmmB, zmmC); cc.evex().vxorps(zmmA, zmmB, zmmC);
} }
else {
template<typename Emitter>
static void generateAvx512SequenceInternalRegMem(
Emitter& cc,
const x86::Gp& gp,
const x86::KReg& kA, const x86::KReg& kB, const x86::KReg& kC,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
DebugUtils::unused(kC);
x86::Gp gpd = gp.r32();
x86::Gp gpq = gp.r64();
x86::Gp gpz = cc.is32Bit() ? gpd : gpq;
x86::Xmm xmmA = vecA.xmm();
x86::Xmm xmmB = vecB.xmm();
x86::Xmm xmmC = vecC.xmm();
x86::Xmm xmmD = vecD.xmm();
x86::Ymm ymmA = vecA.ymm();
x86::Ymm ymmB = vecB.ymm();
x86::Ymm ymmD = vecD.ymm();
x86::Zmm zmmA = vecA.zmm();
x86::Zmm zmmB = vecB.zmm();
x86::Zmm zmmD = vecD.zmm();
x86::Mem m = x86::ptr(gpz); x86::Mem m = x86::ptr(gpz);
x86::Mem m32 = x86::dword_ptr(gpz); x86::Mem m32 = x86::dword_ptr(gpz);
x86::Mem m64 = x86::qword_ptr(gpz); x86::Mem m64 = x86::qword_ptr(gpz);
@@ -3516,6 +3575,12 @@ static void generateAvx512SequenceInternal(
x86::Mem vy_ptr = x86::ptr(gpz, ymmD); x86::Mem vy_ptr = x86::ptr(gpz, ymmD);
x86::Mem vz_ptr = x86::ptr(gpz, zmmD); x86::Mem vz_ptr = x86::ptr(gpz, zmmD);
cc.xor_(gpd, gpd);
cc.vxorps(xmmA, xmmA, xmmA);
cc.vxorps(xmmB, xmmB, xmmB);
cc.vxorps(xmmC, xmmC, xmmC);
cc.vxorps(xmmD, xmmD, xmmD);
cc.kmovb(kA, m); cc.kmovb(kA, m);
cc.kmovb(m, kB); cc.kmovb(m, kB);
cc.kmovd(kA, m); cc.kmovd(kA, m);
@@ -4839,6 +4904,20 @@ static void generateAvx512SequenceInternal(
cc.evex().vxorps(ymmA, ymmB, m); cc.evex().vxorps(ymmA, ymmB, m);
cc.evex().vxorps(zmmA, zmmB, m); cc.evex().vxorps(zmmA, zmmB, m);
} }
// Generates a long sequence of AVX512 instructions.
template<typename Emitter>
static void generateAvx512SequenceInternal(
Emitter& cc,
InstForm form,
const x86::Gp& gp,
const x86::KReg& kA, const x86::KReg& kB, const x86::KReg& kC,
const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
if (form == InstForm::kReg)
generateAvx512SequenceInternalRegOnly(cc, gp, kA, kB, kC, vecA, vecB, vecC, vecD);
else
generateAvx512SequenceInternalRegMem(cc, gp, kA, kB, kC, vecA, vecB, vecC, vecD);
} }
static void generateAvx512Sequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) { static void generateAvx512Sequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {