[ABI] Reworked JitAllocator to alloc spans and to use explicit write API

2025-12-17 20:44:37 +03:00 · 2023-09-09 10:39:34 +02:00
parent bb47dc3ede
commit 8e2f4de484
16 changed files with 920 additions and 273 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -362,6 +362,7 @@ set(ASMJIT_SRC_LIST
  asmjit/core/operand.h
  asmjit/core/osutils.cpp
  asmjit/core/osutils.h
  asmjit/core/osutils_p.h
  asmjit/core/raassignment_p.h
  asmjit/core/rabuilders_p.h
  asmjit/core/radefs_p.h
--- a/src/asmjit/core.h
+++ b/src/asmjit/core.h
@@ -177,7 +177,7 @@ namespace asmjit {
 //! ### AsmJit Backends
 //!
 //! AsmJit currently supports only X86/X64 backend, but the plan is to add more backends in the future. By default
-//! AsmJit builds only the host backend, which is autodetected at compile-time, but this can be overridden.
+//! AsmJit builds only the host backend, which is auto-detected at compile-time, but this can be overridden.
 //!
 //!   - \ref ASMJIT_NO_X86 - Disable X86/X64 backends.
 //!   - \ref ASMJIT_NO_FOREIGN - Disables the support for foreign architectures.
@@ -306,7 +306,7 @@ namespace asmjit {
 //!     However, `gpCount()` was removed - at the moment `ArchTraits` can be used to access such properties.
 //!
 //!     Some other functions were renamed, like `ArchInfo::isX86Family()` is now `Environment::isFamilyX86()`, etc.
-//!     The reason for changing the order was support for more propertries and all the accessors now start with the
+//!     The reason for changing the order was support for more properties and all the accessors now start with the
 //!     type of the property, like `Environment::isPlatformWindows()`.
 //!
 //!     This function causes many other classes to provide `environment()` getter instead of `archInfo()` getter.
@@ -870,7 +870,7 @@ namespace asmjit {
 //!
 //!   if (code.hasUnresolvedLinks()) {
 //!     // This would mean either unbound label or some other issue.
-//!     printf("The code has %zu unbound labels\n", code.unresovedLinkCount());
+//!     printf("The code has %zu unbound labels\n", code.unresolvedLinkCount());
 //!     exit(1);
 //!   }
 //! }
@@ -1524,7 +1524,7 @@ namespace asmjit {
 //! override \ref ErrorHandler::handleError() to throw, in that case no error will be returned and exception will be
 //! thrown instead. All functions where this can happen are not marked `noexcept`.
 //!
-//! Errors should never be ignored, however, checking errors after each AsmJit API call would simply overcomplicate
+//! Errors should never be ignored, however, checking errors after each AsmJit API call would simply over-complicate
 //! the whole code generation experience. \ref ErrorHandler exists to make the use of AsmJit API simpler as it allows
 //! to customize how errors can be handled:
 //!
@@ -1679,7 +1679,7 @@ namespace asmjit {
 //! require a file descriptor. If none of these options are available, AsmJit uses a plain `open()` call followed by
 //! `unlink()`.
 //!
-//! The most challenging part is actually obtaing a file descriptor that can be passed to `mmap()` with `PROT_EXEC`.
+//! The most challenging part is actually obtaining a file descriptor that can be passed to `mmap()` with `PROT_EXEC`.
 //! This is still something that may fail, for example the environment could be hardened in a way that this would
 //! not be possible at all, and thus dual mapping would not work.
 //!
--- a/src/asmjit/core/api-config.h
+++ b/src/asmjit/core/api-config.h
@@ -12,20 +12,23 @@
 //! \addtogroup asmjit_core
 //! \{
-//! AsmJit library version in `(Major << 16) | (Minor << 8) | (Patch)` format.
+//! Makes a 32-bit integer that represents AsmJit version in `(major << 16) | (minor << 8) | patch` form.
-#define ASMJIT_LIBRARY_VERSION 0x010A00 /* 1.10.0 */
+#define ASMJIT_LIBRARY_MAKE_VERSION(major, minor, patch) ((major << 16) | (minor << 8) | (patch))
 //! AsmJit library version, see \ref ASMJIT_LIBRARY_MAKE_VERSION for a version format reference.
 #define ASMJIT_LIBRARY_VERSION ASMJIT_LIBRARY_MAKE_VERSION(1, 11, 0)
 //! \def ASMJIT_ABI_NAMESPACE
 //!
 //! AsmJit ABI namespace is an inline namespace within \ref asmjit namespace.
 //!
-//! It's used to make sure that when user links to an incompatible version of AsmJit, it won't link. It has also some
+//! It's used to make sure that when user links to an incompatible version of AsmJit, it won't link. It has also
-//! additional properties as well. When `ASMJIT_ABI_NAMESPACE` is defined by the user it would override the AsmJit
+//! some additional properties as well. When `ASMJIT_ABI_NAMESPACE` is defined by the user it would override the
-//! default, which makes it possible to use use multiple AsmJit libraries within a single project, totally controlled
+//! AsmJit default, which makes it possible to use multiple AsmJit libraries within a single project, totally
-//! by the users. This is useful especially in cases in which some of such library comes from a third party.
+//! controlled by users. This is useful especially in cases in which some of such library comes from third party.
-#ifndef ASMJIT_ABI_NAMESPACE
+#if !defined(ASMJIT_ABI_NAMESPACE)
-  #define ASMJIT_ABI_NAMESPACE _abi_1_10
+  #define ASMJIT_ABI_NAMESPACE _abi_1_11
-#endif
+#endif // !ASMJIT_ABI_NAMESPACE
 //! \}
@@ -68,12 +71,12 @@ namespace asmjit {
 //! Defined when AsmJit's build configuration is 'Debug'.
 //!
-//! \note Can be defined explicitly to bypass autodetection.
+//! \note Can be defined explicitly to bypass auto-detection.
 #define ASMJIT_BUILD_DEBUG
 //! Defined when AsmJit's build configuration is 'Release'.
 //!
-//! \note Can be defined explicitly to bypass autodetection.
+//! \note Can be defined explicitly to bypass auto-detection.
 #define ASMJIT_BUILD_RELEASE
 //! Disables X86/X64 backends.
--- a/src/asmjit/core/func.h
+++ b/src/asmjit/core/func.h
@@ -36,7 +36,7 @@ enum class CallConvId : uint8_t {
  //! Standard function call or explicit `__cdecl` where it can be specified.
  //!
-  //! This is a universal calling convention, which is used to initialize specific calling connventions based on
+  //! This is a universal calling convention, which is used to initialize specific calling conventions based on
  //! architecture, platform, and its ABI.
  kCDecl = 1,
@@ -163,14 +163,12 @@ struct CallConv {
  //! \name Constants
  //! \{
  enum : uint32_t {
  //! Maximum number of register arguments per register group.
  //!
-    //! \note This is not really AsmJit's limitatation, it's just the number that makes sense considering all common
+  //! \note This is not really AsmJit's limitation, it's just the number that makes sense considering all common
  //! calling conventions. Usually even conventions that use registers to pass function arguments are limited to 8
  //! and less arguments passed via registers per group.
-    kMaxRegArgsPerGroup = 16
+  static constexpr uint32_t kMaxRegArgsPerGroup = 16;
  };
  //! \}
@@ -370,10 +368,8 @@ struct FuncSignature {
  //! \name Constants
  //! \{
  enum : uint8_t {
  //! Doesn't have variable number of arguments (`...`).
-    kNoVarArgs = 0xFFu
+  static constexpr uint8_t kNoVarArgs = 0xFFu;
  };
  //! \}
@@ -393,7 +389,7 @@ struct FuncSignature {
  //! \}
-  //! \name Initializtion & Reset
+  //! \name Initialization & Reset
  //! \{
  //! Initializes the function signature.
@@ -460,7 +456,7 @@ class FuncSignatureBuilder : public FuncSignature {
 public:
  TypeId _builderArgList[Globals::kMaxFuncArgs];
-  //! \name Initializtion & Reset
+  //! \name Initialization & Reset
  //! \{
  inline FuncSignatureBuilder(CallConvId ccId = CallConvId::kHost, uint32_t vaIndex = kNoVarArgs) noexcept {
@@ -533,7 +529,7 @@ struct FuncValue {
  //! \}
-  //! \name Initializtion & Reset
+  //! \name Initialization & Reset
  //!
  //! These initialize the whole `FuncValue` to either register or stack. Useful when you know all of these
  //! properties and wanna just set it up.
@@ -561,7 +557,7 @@ struct FuncValue {
  //! \name Assign
  //!
  //! These initialize only part of `FuncValue`, useful when building `FuncValue` incrementally. The caller
-  //! should first init the type-id by caliing `initTypeId` and then continue building either register or stack.
+  //! should first init the type-id by calling `initTypeId` and then continue building either register or stack.
  //!
  //! \{
@@ -901,7 +897,7 @@ public:
 //! Function frame.
 //!
 //! Function frame is used directly by prolog and epilog insertion (PEI) utils. It provides information necessary to
-//! insert a proper and ABI comforming prolog and epilog. Function frame calculation is based on `CallConv` and
+//! insert a proper and ABI conforming prolog and epilog. Function frame calculation is based on `CallConv` and
 //! other function attributes.
 //!
 //! SSE vs AVX vs AVX-512
@@ -1016,7 +1012,7 @@ public:
  uint16_t _extraRegSaveSize;
  //! Offset where registers saved/restored via push/pop are stored
  uint32_t _pushPopSaveOffset;
-  //! Offset where extra ragisters that cannot use push/pop are stored.
+  //! Offset where extra registers that cannot use push/pop are stored.
  uint32_t _extraRegSaveOffset;
  //! \}
@@ -1269,7 +1265,7 @@ public:
  //! Returns the mask of preserved registers of the given register `group`.
  //!
  //! Preserved registers are those that must survive the function call unmodified. The function can only modify
-  //! preserved registers it they are saved and restored in funciton's prolog and epilog, respectively.
+  //! preserved registers it they are saved and restored in function's prolog and epilog, respectively.
  inline RegMask preservedRegs(RegGroup group) const noexcept {
    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
    return _preservedRegs[group];
@@ -1439,7 +1435,7 @@ public:
  //! Update `FuncFrame` based on function's arguments assignment.
  //!
-  //! \note You MUST call this in orher to use `BaseEmitter::emitArgsAssignment()`, otherwise the FuncFrame would
+  //! \note You MUST call this in order to use `BaseEmitter::emitArgsAssignment()`, otherwise the FuncFrame would
  //! not contain the information necessary to assign all arguments into the registers and/or stack specified.
  ASMJIT_API Error updateFuncFrame(FuncFrame& frame) const noexcept;
--- a/src/asmjit/core/globals.cpp
+++ b/src/asmjit/core/globals.cpp
@@ -86,6 +86,7 @@ ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
    "ExpressionLabelNotBound\0"
    "ExpressionOverflow\0"
    "FailedToOpenAnonymousMemory\0"
    "FailedToOpenFile\0"
    "<Unknown>\0";
  static const uint16_t sErrorIndex[] = {
@@ -93,7 +94,7 @@ ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
    247, 264, 283, 298, 314, 333, 352, 370, 392, 410, 429, 444, 460, 474, 488,
    508, 533, 551, 573, 595, 612, 629, 645, 661, 677, 694, 709, 724, 744, 764,
    784, 817, 837, 852, 869, 888, 909, 929, 943, 964, 978, 996, 1012, 1028, 1047,
-    1073, 1088, 1104, 1119, 1134, 1164, 1188, 1207, 1235
+    1073, 1088, 1104, 1119, 1134, 1164, 1188, 1207, 1235, 1252
  };
  // @EnumStringEnd@
--- a/src/asmjit/core/globals.h
+++ b/src/asmjit/core/globals.h
@@ -175,7 +175,10 @@ enum ErrorCode : uint32_t {
  //! The object is already initialized.
  kErrorAlreadyInitialized,
-  //! Built-in feature was disabled at compile time and it's not available.
+  //! Either a built-in feature was disabled at compile time and it's not available or the feature is not
  //! available on the target platform.
  //!
  //! For example trying to allocate large pages on unsupported platform would return this error.
  kErrorFeatureNotEnabled,
  //! Too many handles (Windows) or file descriptors (Unix/Posix).
@@ -320,6 +323,11 @@ enum ErrorCode : uint32_t {
  //! Failed to open anonymous memory handle or file descriptor.
  kErrorFailedToOpenAnonymousMemory,
  //! Failed to open a file.
  //!
  //! \note This is a generic error that is used by internal filesystem API.
  kErrorFailedToOpenFile,
  // @EnumValuesEnd@
  //! Count of AsmJit error codes.
--- a/src/asmjit/core/inst.h
+++ b/src/asmjit/core/inst.h
@@ -189,7 +189,7 @@ enum class InstControlFlow : uint32_t {
 //! Hint that is used when both input operands to the instruction are the same.
 //!
-//! Provides hints to the instrution RW query regarding special cases in which two or more operands are the same
+//! Provides hints to the instruction RW query regarding special cases in which two or more operands are the same
 //! registers. This is required by instructions such as XOR, AND, OR, SUB, etc... These hints will influence the
 //! RW operations query.
 enum class InstSameRegHint : uint8_t {
--- a/src/asmjit/core/jitallocator.cpp
+++ b/src/asmjit/core/jitallocator.cpp
@@ -31,7 +31,7 @@ static constexpr uint32_t kJitAllocatorMultiPoolCount = 3;
 static constexpr uint32_t kJitAllocatorBaseGranularity = 64;
 //! Maximum block size (32MB).
-static constexpr uint32_t kJitAllocatorMaxBlockSize = 1024 * 1024 * 32;
+static constexpr uint32_t kJitAllocatorMaxBlockSize = 1024 * 1024 * 64;
 // JitAllocator - Fill Pattern
 // ===========================
@@ -158,9 +158,9 @@ public:
  uint8_t emptyBlockCount = 0;
  //! Number of bits reserved across all blocks.
-  size_t totalAreaSize = 0;
+  size_t totalAreaSize[2] {};
  //! Number of bits used across all blocks.
-  size_t totalAreaUsed = 0;
+  size_t totalAreaUsed[2] {};
  //! Overhead of all blocks (in bytes).
  size_t totalOverheadBytes = 0;
@@ -172,10 +172,12 @@ public:
  inline void reset() noexcept {
    blocks.reset();
    cursor = nullptr;
-    blockCount = 0;
+    blockCount = 0u;
-    totalAreaSize = 0;
+    totalAreaSize[0] = 0u;
-    totalAreaUsed = 0;
+    totalAreaSize[1] = 0u;
-    totalOverheadBytes = 0;
+    totalAreaUsed[0] = 0u;
    totalAreaUsed[1] = 0u;
    totalOverheadBytes = 0u;
  }
  inline size_t byteSizeFromAreaSize(uint32_t areaSize) const noexcept { return size_t(areaSize) * granularity; }
@@ -202,8 +204,10 @@ public:
    kFlagEmpty = 0x00000002u,
    //! Block is dirty (largestUnusedArea, searchStart, searchEnd).
    kFlagDirty = 0x00000004u,
-    //! Block is dual-mapped.
+    //! Block represents memory that is using large pages.
-    kFlagDualMapped = 0x00000008u
+    kFlagLargePages = 0x00000008u,
    //! Block represents memory that is dual-mapped.
    kFlagDualMapped = 0x00000010u
  };
  static_assert(kFlagInitialPadding == 1, "JitAllocatorBlock::kFlagInitialPadding must be equal to 1");
@@ -269,13 +273,15 @@ public:
  inline void addFlags(uint32_t f) noexcept { _flags |= f; }
  inline void clearFlags(uint32_t f) noexcept { _flags &= ~f; }
  inline bool hasInitialPadding() const noexcept { return hasFlag(kFlagInitialPadding); }
  inline uint32_t initialAreaStart() const noexcept { return initialAreaStartByFlags(_flags); }
  inline bool empty() const noexcept { return hasFlag(kFlagEmpty); }
  inline bool isDirty() const noexcept { return hasFlag(kFlagDirty); }
  inline void makeDirty() noexcept { addFlags(kFlagDirty); }
  inline bool hasLargePages() const noexcept { return hasFlag(kFlagLargePages); }
  inline bool hasInitialPadding() const noexcept { return hasFlag(kFlagInitialPadding); }
  inline uint32_t initialAreaStart() const noexcept { return initialAreaStartByFlags(_flags); }
  inline size_t blockSize() const noexcept { return _blockSize; }
  inline uint32_t areaSize() const noexcept { return _areaSize; }
@@ -285,7 +291,7 @@ public:
  inline void decreaseUsedArea(uint32_t value) noexcept {
    _areaUsed -= value;
-    _pool->totalAreaUsed -= value;
+    _pool->totalAreaUsed[size_t(hasLargePages())] -= value;
  }
  inline void clearBlock() noexcept {
@@ -316,7 +322,7 @@ public:
    Support::bitVectorSetBit(_stopBitVector, allocatedAreaEnd - 1, true);
    // Update search region and statistics.
-    _pool->totalAreaUsed += allocatedAreaSize;
+    _pool->totalAreaUsed[size_t(hasLargePages())] += allocatedAreaSize;
    _areaUsed += allocatedAreaSize;
    if (areaAvailable() == 0) {
@@ -341,7 +347,7 @@ public:
    uint32_t releasedAreaSize = releasedAreaEnd - releasedAreaStart;
    // Update the search region and statistics.
-    _pool->totalAreaUsed -= releasedAreaSize;
+    _pool->totalAreaUsed[size_t(hasLargePages())] -= releasedAreaSize;
    _areaUsed -= releasedAreaSize;
    _searchStart = Support::min(_searchStart, releasedAreaStart);
    _searchEnd = Support::max(_searchEnd, releasedAreaEnd);
@@ -371,7 +377,7 @@ public:
    ASMJIT_ASSERT(shrunkAreaSize != 0);
    // Update the search region and statistics.
-    _pool->totalAreaUsed -= shrunkAreaSize;
+    _pool->totalAreaUsed[size_t(hasLargePages())] -= shrunkAreaSize;
    _areaUsed -= shrunkAreaSize;
    _searchStart = Support::min(_searchStart, shrunkAreaStart);
    _searchEnd = Support::max(_searchEnd, shrunkAreaEnd);
@@ -514,9 +520,10 @@ static inline size_t JitAllocatorImpl_calculateIdealBlockSize(JitAllocatorPrivat
  // We have to increase the allocationSize if we know that the block must provide padding.
  if (!Support::test(impl->options, JitAllocatorOptions::kDisableInitialPadding)) {
-    if (SIZE_MAX - allocationSize < 64u)
+    size_t granularity = pool->granularity;
    if (SIZE_MAX - allocationSize < granularity)
      return 0; // Overflown
-    allocationSize += 64u;
+    allocationSize += granularity;
  }
  if (blockSize < kJitAllocatorMaxBlockSize)
@@ -531,12 +538,31 @@ static inline size_t JitAllocatorImpl_calculateIdealBlockSize(JitAllocatorPrivat
  return blockSize;
 }
-ASMJIT_FAVOR_SPEED static void JitAllocatorImpl_fillPattern(void* mem, uint32_t pattern, size_t sizeInBytes) noexcept {
+ASMJIT_NOINLINE
-  size_t n = sizeInBytes / 4u;
+ASMJIT_FAVOR_SPEED static void JitAllocatorImpl_fillPattern(void* mem, uint32_t pattern, size_t byteSize) noexcept {
-  uint32_t* p = static_cast<uint32_t*>(mem);
+  // NOTE: This is always used to fill a pattern in allocated / freed memory. The allocation has always
  // a granularity that is greater than the pattern, however, when shrink() is used, we may end up having
  // an unaligned start, so deal with it here and then copy aligned pattern in the loop.
  if ((uintptr_t(mem) & 0x1u) && byteSize >= 1u) {
    static_cast<uint8_t*>(mem)[0] = uint8_t(pattern & 0xFF);
    mem = static_cast<uint8_t*>(mem) + 1;
    byteSize--;
  }
  if ((uintptr_t(mem) & 0x2u) && byteSize >= 2u) {
    static_cast<uint16_t*>(mem)[0] = uint16_t(pattern & 0xFFFF);
    mem = static_cast<uint16_t*>(mem) + 1;
    byteSize -= 2;
  }
  // Something would be seriously broken if we end up with aligned `mem`, but unaligned `byteSize`.
  ASMJIT_ASSERT((byteSize & 0x3u) == 0u);
  uint32_t* mem32 = static_cast<uint32_t*>(mem);
  size_t n = byteSize / 4u;
  for (size_t i = 0; i < n; i++)
-    p[i] = pattern;
+    mem32[i] = pattern;
 }
 // Allocate a new `JitAllocatorBlock` for the given `blockSize`.
@@ -547,45 +573,66 @@ static Error JitAllocatorImpl_newBlock(JitAllocatorPrivateImpl* impl, JitAllocat
  using Support::BitWord;
  using Support::kBitWordSizeInBits;
  uint32_t areaSize = uint32_t((blockSize + pool->granularity - 1) >> pool->granularityLog2);
  uint32_t numBitWords = (areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits;
  JitAllocatorBlock* block = static_cast<JitAllocatorBlock*>(::malloc(sizeof(JitAllocatorBlock) + size_t(numBitWords) * 2u * sizeof(BitWord)));
  if (ASMJIT_UNLIKELY(block == nullptr))
    return DebugUtils::errored(kErrorOutOfMemory);
  BitWord* bitWords = reinterpret_cast<BitWord*>(reinterpret_cast<uint8_t*>(block) + sizeof(JitAllocatorBlock));
  uint32_t blockFlags = 0;
  if (!Support::test(impl->options, JitAllocatorOptions::kDisableInitialPadding))
    blockFlags |= JitAllocatorBlock::kFlagInitialPadding;
  Error err {};
  VirtMem::DualMapping virtMem {};
  VirtMem::MemoryFlags memFlags = VirtMem::MemoryFlags::kAccessRWX;
  if (Support::test(impl->options, JitAllocatorOptions::kUseDualMapping)) {
-    err = VirtMem::allocDualMapping(&virtMem, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+    ASMJIT_PROPAGATE(VirtMem::allocDualMapping(&virtMem, blockSize, memFlags));
    blockFlags |= JitAllocatorBlock::kFlagDualMapped;
  }
  else {
-    err = VirtMem::alloc(&virtMem.rx, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+    bool allocateRegularPages = true;
    if (Support::test(impl->options, JitAllocatorOptions::kUseLargePages)) {
      size_t largePageSize = VirtMem::largePageSize();
      bool tryLargePage = blockSize >= largePageSize || Support::test(impl->options, JitAllocatorOptions::kAlignBlockSizeToLargePage);
      // Only proceed if we can actually allocate large pages.
      if (largePageSize && tryLargePage) {
        size_t largeBlockSize = Support::alignUp(blockSize, largePageSize);
        Error err = VirtMem::alloc(&virtMem.rx, largeBlockSize, memFlags | VirtMem::MemoryFlags::kMMapLargePages);
        // Fallback to regular pages if large page(s) allocation failed.
        if (err == kErrorOk) {
          allocateRegularPages = false;
          blockSize = largeBlockSize;
          blockFlags |= JitAllocatorBlock::kFlagLargePages;
        }
      }
    }
    // Called either if large pages were not requested or large page(s) allocation failed.
    if (allocateRegularPages) {
      ASMJIT_PROPAGATE(VirtMem::alloc(&virtMem.rx, blockSize, memFlags));
    }
    virtMem.rw = virtMem.rx;
  }
-  // Out of memory.
+  uint32_t areaSize = uint32_t((blockSize + pool->granularity - 1) >> pool->granularityLog2);
-  if (ASMJIT_UNLIKELY(err != kErrorOk)) {
+  uint32_t numBitWords = (areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits;
-    if (block)
+  uint8_t* blockPtr = static_cast<uint8_t*>(::malloc(sizeof(JitAllocatorBlock) + size_t(numBitWords) * 2u * sizeof(BitWord)));
-      ::free(block);
+
-    return err;
+  // Out of memory...
  if (ASMJIT_UNLIKELY(blockPtr == nullptr)) {
    if (Support::test(impl->options, JitAllocatorOptions::kUseDualMapping))
      VirtMem::releaseDualMapping(&virtMem, blockSize);
    else
      VirtMem::release(virtMem.rx, blockSize);
    return DebugUtils::errored(kErrorOutOfMemory);
  }
-  // Fill the memory if the secure mode is enabled.
+  // Fill the allocated virtual memory if secure mode is enabled.
  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
    VirtMem::ProtectJitReadWriteScope scope(virtMem.rw, blockSize);
    JitAllocatorImpl_fillPattern(virtMem.rw, impl->fillPattern, blockSize);
  }
-  *dst = new(block) JitAllocatorBlock(pool, virtMem, blockSize, blockFlags, bitWords, bitWords + numBitWords, areaSize);
+  BitWord* bitWords = reinterpret_cast<BitWord*>(blockPtr + sizeof(JitAllocatorBlock));
  *dst = new(blockPtr) JitAllocatorBlock(pool, virtMem, blockSize, blockFlags, bitWords, bitWords + numBitWords, areaSize);
  return kErrorOk;
 }
@@ -611,9 +658,10 @@ static void JitAllocatorImpl_insertBlock(JitAllocatorPrivateImpl* impl, JitAlloc
  pool->blocks.append(block);
  // Update statistics.
  size_t statIndex = size_t(block->hasLargePages());
  pool->blockCount++;
-  pool->totalAreaSize += block->areaSize();
+  pool->totalAreaSize[statIndex] += block->areaSize();
-  pool->totalAreaUsed += block->areaUsed();
+  pool->totalAreaUsed[statIndex] += block->areaUsed();
  pool->totalOverheadBytes += sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
 }
@@ -628,9 +676,10 @@ static void JitAllocatorImpl_removeBlock(JitAllocatorPrivateImpl* impl, JitAlloc
  pool->blocks.unlink(block);
  // Update statistics.
  size_t statIndex = size_t(block->hasLargePages());
  pool->blockCount--;
-  pool->totalAreaSize -= block->areaSize();
+  pool->totalAreaSize[statIndex] -= block->areaSize();
-  pool->totalAreaUsed -= block->areaUsed();
+  pool->totalAreaUsed[statIndex] -= block->areaUsed();
  pool->totalOverheadBytes -= sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
 }
@@ -733,8 +782,8 @@ JitAllocator::Statistics JitAllocator::statistics() const noexcept {
    for (size_t poolId = 0; poolId < poolCount; poolId++) {
      const JitAllocatorPool& pool = impl->pools[poolId];
      statistics._blockCount   += size_t(pool.blockCount);
-      statistics._reservedSize += size_t(pool.totalAreaSize) * pool.granularity;
+      statistics._reservedSize += size_t(pool.totalAreaSize[0] + pool.totalAreaSize[1]) * pool.granularity;
-      statistics._usedSize     += size_t(pool.totalAreaUsed) * pool.granularity;
+      statistics._usedSize     += size_t(pool.totalAreaUsed[0] + pool.totalAreaUsed[1]) * pool.granularity;
      statistics._overheadSize += size_t(pool.totalOverheadBytes);
    }
@@ -747,16 +796,15 @@ JitAllocator::Statistics JitAllocator::statistics() const noexcept {
 // JitAllocator - Alloc & Release
 // ==============================
-Error JitAllocator::alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept {
+Error JitAllocator::alloc(Span& out, size_t size) noexcept {
  out = Span{};
  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
    return DebugUtils::errored(kErrorNotInitialized);
  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
  constexpr uint32_t kNoIndex = std::numeric_limits<uint32_t>::max();
  *rxPtrOut = nullptr;
  *rwPtrOut = nullptr;
  // Align to the minimum granularity by default.
  size = Support::alignUp<size_t>(size, impl->granularity);
  if (ASMJIT_UNLIKELY(size == 0))
@@ -840,32 +888,35 @@ Error JitAllocator::alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcep
  impl->allocationCount++;
  block->markAllocatedArea(areaIndex, areaIndex + areaSize);
-  // Return a pointer to the allocated memory.
+  // Return a span referencing the allocated memory.
  size_t offset = pool->byteSizeFromAreaSize(areaIndex);
  ASMJIT_ASSERT(offset <= block->blockSize() - size);
-  *rxPtrOut = block->rxPtr() + offset;
+  out._rx = block->rxPtr() + offset;
-  *rwPtrOut = block->rwPtr() + offset;
+  out._rw = block->rwPtr() + offset;
  out._size = size;
  out._block = static_cast<void*>(block);
  return kErrorOk;
 }
-Error JitAllocator::release(void* rxPtr) noexcept {
+Error JitAllocator::release(void* rx) noexcept {
  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
    return DebugUtils::errored(kErrorNotInitialized);
-  if (ASMJIT_UNLIKELY(!rxPtr))
+  if (ASMJIT_UNLIKELY(!rx))
    return DebugUtils::errored(kErrorInvalidArgument);
  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
  LockGuard guard(impl->lock);
-  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rx));
  if (ASMJIT_UNLIKELY(!block))
    return DebugUtils::errored(kErrorInvalidState);
  // Offset relative to the start of the block.
  JitAllocatorPool* pool = block->pool();
-  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+  size_t offset = (size_t)((uint8_t*)rx - block->rxPtr());
  // The first bit representing the allocated area and its size.
  uint32_t areaIndex = uint32_t(offset >> pool->granularityLog2);
@@ -898,76 +949,88 @@ Error JitAllocator::release(void* rxPtr) noexcept {
  return kErrorOk;
 }
-Error JitAllocator::shrink(void* rxPtr, size_t newSize) noexcept {
+static Error JitAllocatorImpl_shrink(JitAllocatorPrivateImpl* impl, JitAllocator::Span& span, size_t newSize, bool alreadyUnderWriteScope) noexcept {
-  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+  JitAllocatorBlock* block = static_cast<JitAllocatorBlock*>(span._block);
    return DebugUtils::errored(kErrorNotInitialized);
  if (ASMJIT_UNLIKELY(!rxPtr))
    return DebugUtils::errored(kErrorInvalidArgument);
  if (ASMJIT_UNLIKELY(newSize == 0))
    return release(rxPtr);
  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
  LockGuard guard(impl->lock);
  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
  if (ASMJIT_UNLIKELY(!block))
    return DebugUtils::errored(kErrorInvalidArgument);
  LockGuard guard(impl->lock);
  // Offset relative to the start of the block.
  JitAllocatorPool* pool = block->pool();
-  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+  size_t offset = (size_t)((uint8_t*)span.rx() - block->rxPtr());
  // The first bit representing the allocated area and its size.
  uint32_t areaStart = uint32_t(offset >> pool->granularityLog2);
  // Don't trust `span.size()` - if it has been already truncated we would be off...
  bool isUsed = Support::bitVectorGetBit(block->_usedBitVector, areaStart);
  if (ASMJIT_UNLIKELY(!isUsed))
    return DebugUtils::errored(kErrorInvalidArgument);
  uint32_t areaEnd = uint32_t(Support::bitVectorIndexOf(block->_stopBitVector, areaStart, true)) + 1;
  uint32_t areaPrevSize = areaEnd - areaStart;
  uint32_t spanPrevSize = areaPrevSize * pool->granularity;
  uint32_t areaShrunkSize = pool->areaSizeFromByteSize(newSize);
  if (ASMJIT_UNLIKELY(areaShrunkSize > areaPrevSize))
-    return DebugUtils::errored(kErrorInvalidState);
+    return DebugUtils::errored(kErrorInvalidArgument);
  uint32_t areaDiff = areaPrevSize - areaShrunkSize;
  if (areaDiff) {
    block->markShrunkArea(areaStart + areaShrunkSize, areaEnd);
    span._size = pool->byteSizeFromAreaSize(areaShrunkSize);
  }
  // Fill released memory if the secure mode is enabled.
-    if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+  if (newSize < spanPrevSize && Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
    uint8_t* spanPtr = block->rwPtr() + (areaStart + areaShrunkSize) * pool->granularity;
    size_t spanSize = areaDiff * pool->granularity;
-      VirtMem::ProtectJitReadWriteScope scope(spanPtr, spanSize);
+    if (!alreadyUnderWriteScope) {
-      JitAllocatorImpl_fillPattern(spanPtr, fillPattern(), spanSize);
+      VirtMem::ProtectJitReadWriteScope scope(spanPtr, spanSize, VirtMem::CachePolicy::kNeverFlush);
      JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
    }
    else {
      JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
    }
  }
  return kErrorOk;
 }
-Error JitAllocator::query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t* sizeOut) const noexcept {
+Error JitAllocator::shrink(Span& span, size_t newSize) noexcept {
-  *rxPtrOut = nullptr;
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
-  *rwPtrOut = nullptr;
+    return DebugUtils::errored(kErrorNotInitialized);
-  *sizeOut = 0u;
+
  if (ASMJIT_UNLIKELY(!span.rx()))
    return DebugUtils::errored(kErrorInvalidArgument);
  if (ASMJIT_UNLIKELY(newSize == 0)) {
    Error err = release(span.rx());
    span = Span{};
    return err;
  }
  return JitAllocatorImpl_shrink(static_cast<JitAllocatorPrivateImpl*>(_impl), span, newSize, false);
 }
 Error JitAllocator::query(Span& out, void* rx) const noexcept {
  out = Span{};
  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
    return DebugUtils::errored(kErrorNotInitialized);
  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
  LockGuard guard(impl->lock);
-  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rx));
  if (ASMJIT_UNLIKELY(!block))
    return DebugUtils::errored(kErrorInvalidArgument);
  // Offset relative to the start of the block.
  JitAllocatorPool* pool = block->pool();
-  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+  size_t offset = (size_t)((uint8_t*)rx - block->rxPtr());
  // The first bit representing the allocated area and its size.
  uint32_t areaStart = uint32_t(offset >> pool->granularityLog2);
@@ -980,13 +1043,102 @@ Error JitAllocator::query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t*
  size_t byteOffset = pool->byteSizeFromAreaSize(areaStart);
  size_t byteSize = pool->byteSizeFromAreaSize(areaEnd - areaStart);
-  *rxPtrOut = static_cast<uint8_t*>(block->_mapping.rx) + byteOffset;
+  out._rx = static_cast<uint8_t*>(block->_mapping.rx) + byteOffset;
-  *rwPtrOut = static_cast<uint8_t*>(block->_mapping.rw) + byteOffset;
+  out._rw = static_cast<uint8_t*>(block->_mapping.rw) + byteOffset;
-  *sizeOut = byteSize;
+  out._size = byteSize;
  out._block = static_cast<void*>(block);
  return kErrorOk;
 }
 // JitAllocator - Write
 // ====================
 static ASMJIT_FORCE_INLINE VirtMem::CachePolicy JitAllocator_defaultPolicyForSpan(const JitAllocator::Span& span) noexcept {
  if (Support::test(span.flags(), JitAllocator::Span::Flags::kInstructionCacheClean))
    return VirtMem::CachePolicy::kNeverFlush;
  else
    return VirtMem::CachePolicy::kFlushAfterWrite;
 }
 Error JitAllocator::write(Span& span, size_t offset, const void* src, size_t size, VirtMem::CachePolicy policy) noexcept {
  if (ASMJIT_UNLIKELY(span._block == nullptr || offset > span.size() || span.size() - offset < size))
    return DebugUtils::errored(kErrorInvalidArgument);
  if (ASMJIT_UNLIKELY(size == 0))
    return kErrorOk;
  if (policy == VirtMem::CachePolicy::kDefault)
    policy = JitAllocator_defaultPolicyForSpan(span);
  VirtMem::ProtectJitReadWriteScope writeScope(span.rx(), span.size(), policy);
  memcpy(static_cast<uint8_t*>(span.rw()) + offset, src, size);
  return kErrorOk;
 }
 Error JitAllocator::write(Span& span, WriteFunc writeFunc, void* userData, VirtMem::CachePolicy policy) noexcept {
  if (ASMJIT_UNLIKELY(span._block == nullptr) || span.size() == 0)
    return DebugUtils::errored(kErrorInvalidArgument);
  size_t size = span.size();
  if (ASMJIT_UNLIKELY(size == 0))
    return kErrorOk;
  if (policy == VirtMem::CachePolicy::kDefault)
    policy = JitAllocator_defaultPolicyForSpan(span);
  VirtMem::ProtectJitReadWriteScope writeScope(span.rx(), span.size(), policy);
  ASMJIT_PROPAGATE(writeFunc(span, userData));
  // Check whether span.truncate() has been called.
  if (span.size() != size) {
    // OK, this is a bit awkward... However, shrink wants the original span and newSize, so we have to swap.
    std::swap(span._size, size);
    return JitAllocatorImpl_shrink(static_cast<JitAllocatorPrivateImpl*>(_impl), span, size, true);
  }
  return kErrorOk;
 }
 // JitAllocator - Write Scope
 // ==========================
 Error JitAllocator::beginWriteScope(WriteScopeData& scope, VirtMem::CachePolicy policy) noexcept {
  scope._allocator = this;
  scope._data[0] = size_t(policy);
  return kErrorOk;
 }
 Error JitAllocator::endWriteScope(WriteScopeData& scope) noexcept {
  if (ASMJIT_UNLIKELY(!scope._allocator))
    return DebugUtils::errored(kErrorInvalidArgument);
  return kErrorOk;
 }
 Error JitAllocator::flushWriteScope(WriteScopeData& scope) noexcept {
  if (ASMJIT_UNLIKELY(!scope._allocator))
    return DebugUtils::errored(kErrorInvalidArgument);
  return kErrorOk;
 }
 Error JitAllocator::scopedWrite(WriteScopeData& scope, Span& span, size_t offset, const void* src, size_t size) noexcept {
  if (ASMJIT_UNLIKELY(!scope._allocator))
    return DebugUtils::errored(kErrorInvalidArgument);
  VirtMem::CachePolicy policy = VirtMem::CachePolicy(scope._data[0]);
  return scope._allocator->write(span, offset, src, size, policy);
 }
 Error JitAllocator::scopedWrite(WriteScopeData& scope, Span& span, WriteFunc writeFunc, void* userData) noexcept {
  if (ASMJIT_UNLIKELY(!scope._allocator))
    return DebugUtils::errored(kErrorInvalidArgument);
  VirtMem::CachePolicy policy = VirtMem::CachePolicy(scope._data[0]);
  return scope._allocator->write(span, writeFunc, userData, policy);
 }
 // JitAllocator - Tests
 // ====================
@@ -1160,15 +1312,13 @@ public:
  }
  void* alloc(size_t size) noexcept {
-    void* rxPtr;
+    JitAllocator::Span span;
-    void* rwPtr;
+    Error err = _allocator.alloc(span, size);
    Error err = _allocator.alloc(&rxPtr, &rwPtr, size);
    EXPECT_EQ(err, kErrorOk)
      .message("JitAllocator failed to allocate %zu bytes\n", size);
-    _insert(rxPtr, rwPtr, size);
+    _insert(span.rx(), span.rw(), size);
-    return rxPtr;
+    return span.rx();
  }
  void release(void* p) noexcept {
@@ -1184,7 +1334,9 @@ public:
    if (!newSize)
      return release(p);
-    Error err = _allocator.shrink(p, newSize);
+    JitAllocator::Span span;
    EXPECT_EQ(_allocator.query(span, p), kErrorOk);
    Error err = _allocator.shrink(span, newSize);
    EXPECT_EQ(err, kErrorOk)
      .message("JitAllocator failed to shrink %p to %zu bytes\n", p, newSize);
@@ -1244,16 +1396,21 @@ static void test_jit_allocator_alloc_release() noexcept {
    uint32_t granularity;
  };
  using Opt = JitAllocatorOptions;
  TestParams testParams[] = {
-    { "Default", JitAllocatorOptions::kNone, 0, 0 },
+    { "Default"                                    , Opt::kNone, 0, 0 },
-    { "16MB blocks", JitAllocatorOptions::kNone, 16 * 1024 * 1024, 0 },
+    { "16MB blocks"                                , Opt::kNone, 16 * 1024 * 1024, 0 },
-    { "256B granularity", JitAllocatorOptions::kNone, 0, 256 },
+    { "256B granularity"                           , Opt::kNone, 0, 256 },
-    { "kUseDualMapping", JitAllocatorOptions::kUseDualMapping, 0, 0 },
+    { "kUseDualMapping"                            , Opt::kUseDualMapping , 0, 0 },
-    { "kUseMultiplePools", JitAllocatorOptions::kUseMultiplePools, 0, 0 },
+    { "kUseMultiplePools"                          , Opt::kUseMultiplePools, 0, 0 },
-    { "kFillUnusedMemory", JitAllocatorOptions::kFillUnusedMemory, 0, 0 },
+    { "kFillUnusedMemory"                          , Opt::kFillUnusedMemory, 0, 0 },
-    { "kImmediateRelease", JitAllocatorOptions::kImmediateRelease, 0, 0 },
+    { "kImmediateRelease"                          , Opt::kImmediateRelease, 0, 0 },
-    { "kDisableInitialPadding", JitAllocatorOptions::kDisableInitialPadding, 0, 0 },
+    { "kDisableInitialPadding"                     , Opt::kDisableInitialPadding, 0, 0 },
-    { "kUseDualMapping | kFillUnusedMemory", JitAllocatorOptions::kUseDualMapping | JitAllocatorOptions::kFillUnusedMemory, 0, 0 }
+    { "kUseLargePages"                             , Opt::kUseLargePages, 0, 0 },
    { "kUseLargePages | kFillUnusedMemory"         , Opt::kUseLargePages | Opt::kFillUnusedMemory, 0, 0 },
    { "kUseLargePages | kAlignBlockSizeToLargePage", Opt::kUseLargePages | Opt::kAlignBlockSizeToLargePage, 0, 0 },
    { "kUseDualMapping | kFillUnusedMemory"        , Opt::kUseDualMapping | Opt::kFillUnusedMemory, 0, 0 }
  };
  INFO("BitVectorRangeIterator<uint32_t>");
@@ -1372,23 +1529,18 @@ static void test_jit_allocator_alloc_release() noexcept {
 static void test_jit_allocator_query() noexcept {
  JitAllocator allocator;
  size_t allocatedSize = 100;
-  void* rxPtr = nullptr;
+  JitAllocator::Span allocatedSpan;
-  void* rwPtr = nullptr;
+  EXPECT_EQ(allocator.alloc(allocatedSpan, allocatedSize), kErrorOk);
-  size_t size = 100;
+  EXPECT_NOT_NULL(allocatedSpan.rx());
  EXPECT_GE(allocatedSpan.size(), allocatedSize);
-  EXPECT_EQ(allocator.alloc(&rxPtr, &rwPtr, size), kErrorOk);
+  JitAllocator::Span queriedSpan;
-  EXPECT_NOT_NULL(rxPtr);
+  EXPECT_EQ(allocator.query(queriedSpan, allocatedSpan.rx()), kErrorOk);
-  EXPECT_NOT_NULL(rwPtr);
+  EXPECT_EQ(allocatedSpan.rx(), queriedSpan.rx());
-
+  EXPECT_EQ(allocatedSpan.rw(), queriedSpan.rw());
-  void* rxPtrQueried = nullptr;
+  EXPECT_EQ(allocatedSpan.size(), queriedSpan.size());
  void* rwPtrQueried = nullptr;
  size_t sizeQueried;
  EXPECT_EQ(allocator.query(rxPtr, &rxPtrQueried, &rwPtrQueried, &sizeQueried), kErrorOk);
  EXPECT_EQ(rxPtrQueried, rxPtr);
  EXPECT_EQ(rwPtrQueried, rwPtr);
  EXPECT_EQ(sizeQueried, Support::alignUp(size, allocator.granularity()));
 }
 UNIT(jit_allocator) {
--- a/src/asmjit/core/jitallocator.h
+++ b/src/asmjit/core/jitallocator.h
@@ -10,6 +10,7 @@
 #ifndef ASMJIT_NO_JIT
 #include "../core/globals.h"
 #include "../core/support.h"
 #include "../core/virtmem.h"
 ASMJIT_BEGIN_NAMESPACE
@@ -70,6 +71,24 @@ enum class JitAllocatorOptions : uint32_t {
  //! when the code is not instrumented with LLVM's UBSAN.
  kDisableInitialPadding = 0x00000010u,
  //! Enables the use of large pages, if they are supported and the process can actually allocate them.
  //!
  //! \important This flag is a hint - if large pages can be allocated, JitAllocator would try to allocate them.
  //! However, if the allocation fails, it will still try to fallback to use regular pages as \ref JitAllocator
  //! is designed to minimize allocation failures, so a regular page is better than no page at all. Also, if a
  //! block \ref JitAllocator wants to allocate is too small to consume a whole large page, regular page(s) will
  //! be allocated as well.
  kUseLargePages = 0x00000020u,
  //! Forces \ref JitAllocator to always align block size to be at least as big as a large page, if large pages are
  //! enabled. This option does nothing if large pages are disabled.
  //!
  //! \remarks If \ref kUseLargePages option is used, the allocator would prefer large pages only when allocating a
  //! block that has a sufficient size. Usually the allocator first allocates smaller block and when more requests
  //! come it will start increasing the block size of next allocations. This option makes it sure that even the first
  //! allocation would be the same as a minimum large page when large pages are enabled and can be allocated.
  kAlignBlockSizeToLargePage = 0x00000040u,
  //! Use a custom fill pattern, must be combined with `kFlagFillUnusedMemory`.
  kCustomFillPattern = 0x10000000u
 };
@@ -95,6 +114,7 @@ class JitAllocator {
 public:
  ASMJIT_NONCOPYABLE(JitAllocator)
  //! Visible \ref JitAllocator implementation data.
  struct Impl {
    //! Allocator options.
    JitAllocatorOptions options;
@@ -106,9 +126,14 @@ public:
    uint32_t fillPattern;
  };
  //! \name Members
  //! \{
  //! Allocator implementation (private).
  Impl* _impl;
  //! \}
  //! \name Construction & Destruction
  //! \{
@@ -187,31 +212,298 @@ public:
  //! \name Alloc & Release
  //! \{
-  //! Allocates a new memory block of the requested `size`.
+  //! A memory reference returned by \ref JitAllocator::alloc().
  //!
-  //! When the function is successful it stores two pointers in `rxPtrOut` and `rwPtrOut`. The pointers will be
+  //! Span contains everything needed to actually write new code to the memory chunk it references.
-  //! different only if `kOptionUseDualMapping` was used to setup the allocator (in that case the `rxPtrOut` would
+  class Span {
-  //! point to a Read+Execute region and `rwPtrOut` would point to a Read+Write region of the same memory-mapped block.
+  public:
-  ASMJIT_API Error alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept;
+    //! \name Constants
    //! \{
    //! Span flags
    enum class Flags : uint32_t {
      //! No flags.
      kNone = 0u,
      //! The process has never executed the region of the span.
      //!
      //! If this flag is set on a \ref Span it would mean that the allocator can avoid flushing
      //! instruction cache after a code has been written to it.
      kInstructionCacheClean = 0x00000001u
    };
    //! \}
    //! \name Members
    //! \{
    //! Address of memory that has Read and Execute permissions.
    void* _rx = nullptr;
    //! Address of memory that has Read and Write permissions.
    void* _rw = nullptr;
    //! Size of the span in bytes (rounded up to the allocation granularity).
    size_t _size = 0;
    //! Pointer that references a memory block maintained by \ref JitAllocator.
    //!
    //! This pointer is considered private and should never be used nor inspected outside of AsmJit.
    void* _block = nullptr;
    //! Span flags.
    Flags _flags = Flags::kNone;
    //! Reserved for future use.
    uint32_t _reserved = 0;
    //! \}
    //! \name Accessors
    //! \{
    //! Returns a pointer having Read & Execute permissions (references executable memory).
    //!
    //! This pointer is never NULL if the allocation succeeded, it points to an executable memory.
    inline void* rx() const noexcept { return _rx; }
    //! Returns a pointer having Read & Write permissions (references writable memory).
    //!
    //! Depending on the type of the allocation strategy this could either be:
    //!
    //!   - the same address as returned by `rx()` if the allocator uses RWX mapping (pages have all of Read, Write,
    //!     and Execute permissions) or MAP_JIT, which requires either \ref ProtectJitReadWriteScope or to call
    //!     VirtMem::protectJitMemory() manually.
    //!   - a valid pointer, but not the same as `rx` - this would be valid if dual mapping is used.
    //!   - NULL pointer, in case that the allocation strategy doesn't use RWX, MAP_JIT, or dual mapping. In this
    //!     case only \ref JitAllocator can copy new code into the executable memory referenced by \ref Addr.
    //!
    //! \note If `rw()` returns a non-null pointer it's important to use either VirtMem::protectJitMemory() or
    //! \ref ProtectJitReadWriteScope to guard the write, because in case of `MAP_JIT` it would temporarily switch
    //! the permissions of the pointer to RW (that's per thread permissions). if \ref ProtectJitReadWriteScope is
    //! not used it's important to clear the instruction cache via \ref VirtMem::flushInstructionCache() after the
    //! write is done.
    inline void* rw() const noexcept { return _rw; }
    //! Returns size of this span, aligned to the allocator granularity.
    inline size_t size() const noexcept { return _size; }
    //! Returns span flags.
    inline Flags flags() const noexcept { return _flags; }
    //! Shrinks this span to `newSize`.
    //!
    //! \note This is the only function that is able to change the size of a span, and it's only use case is to
    //! shrink the span size during \ref JitAllocator::write(). When the writer detects that the span size shrunk,
    //! it will automatically shrink the memory used by the span, and propagate the new aligned size to the caller.
    inline void shrink(size_t newSize) noexcept { _size = Support::min(_size, newSize); }
    //! Returns whether \ref rw() returns a non-null pointer.
    inline bool isDirectlyWritable() const noexcept { return _rw != nullptr; }
    //! \}
  };
  //! Allocates a new memory span of the requested `size`.
  ASMJIT_API Error alloc(Span& out, size_t size) noexcept;
  //! Releases a memory block returned by `alloc()`.
  //!
  //! \remarks This function is thread-safe.
-  ASMJIT_API Error release(void* rxPtr) noexcept;
+  ASMJIT_API Error release(void* rx) noexcept;
-  //! Frees extra memory allocated with `rxPtr` by shrinking it to the given `newSize`.
+  //! Frees extra memory allocated with `rx` by shrinking it to the given `newSize`.
  //!
  //! \remarks This function is thread-safe.
-  ASMJIT_API Error shrink(void* rxPtr, size_t newSize) noexcept;
+  ASMJIT_API Error shrink(Span& span, size_t newSize) noexcept;
-  //! Queries information about an allocated memory block that contains the given `rxPtr`.
+  //! Queries information about an allocated memory block that contains the given `rx`, and writes it to `out`.
  //!
-  //! The function returns `kErrorOk` when `rxPtr` is matched and fills `rxPtrOut`, `rwPtrOut`, and `sizeOut` output
+  //! If the pointer is matched, the function returns `kErrorOk` and fills `out` with the corresponding span.
-  //! arguments. The returned `rxPtrOut` and `rwPtrOut` pointers point to the beginning of the block, and `sizeOut`
+  ASMJIT_API Error query(Span& out, void* rx) const noexcept;
-  //! describes the total amount of bytes this allocation uses - `sizeOut` will always be aligned to the allocation
+
-  //! granularity, so for example if an allocation was 1 byte and the size granularity is 64, the returned `sizeOut`
+#if !defined(ASMJIT_NO_DEPRECATED)
-  //! will be 64 bytes, because that's what the allocator sees.
+  //! Allocates a new memory block of the requested `size`.
-  ASMJIT_API Error query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t* sizeOut) const noexcept;
+  ASMJIT_DEPRECATED("Use alloc(Span& out, size_t size) instead")
  ASMJIT_FORCE_INLINE Error alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept {
    Span span;
    Error err = alloc(span, size);
    *rwPtrOut = span.rw();
    *rxPtrOut = span.rx();
    return err;
  }
  ASMJIT_DEPRECATED("Use shrink(Span& span, size_t newSize) instead")
  ASMJIT_FORCE_INLINE Error shrink(void* rxPtr, size_t newSize) noexcept {
    Span span;
    ASMJIT_PROPAGATE(query(span, rxPtr));
    return (span.size() > newSize) ? shrink(span, newSize) : Error(kErrorOk);
  }
  ASMJIT_DEPRECATED("Use query(Span& out, void* rx) instead")
  ASMJIT_FORCE_INLINE Error query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t* sizeOut) const noexcept {
    Span span;
    Error err = query(span, rxPtr);
    *rxPtrOut = span.rx();
    *rwPtrOut = span.rw();
    *sizeOut = span.size();
    return err;
  }
 #endif
  //! \}
  //! \name Write Operations
  //! \{
  typedef Error (ASMJIT_CDECL* WriteFunc)(Span& span, void* userData) ASMJIT_NOEXCEPT_TYPE;
  ASMJIT_API Error write(
    Span& span,
    size_t offset,
    const void* src,
    size_t size,
    VirtMem::CachePolicy policy = VirtMem::CachePolicy::kDefault) noexcept;
  ASMJIT_API Error write(
    Span& span,
    WriteFunc writeFunc,
    void* userData,
    VirtMem::CachePolicy policy = VirtMem::CachePolicy::kDefault) noexcept;
  template<class Lambda>
  ASMJIT_FORCE_INLINE Error write(
    Span& span,
    Lambda&& lambdaFunc,
    VirtMem::CachePolicy policy = VirtMem::CachePolicy::kDefault) noexcept {
    WriteFunc wrapperFunc = [](Span& span, void* userData) noexcept -> Error {
      Lambda& lambdaFunc = *static_cast<Lambda*>(userData);
      return lambdaFunc(span);
    };
    return write(span, wrapperFunc, (void*)(&lambdaFunc), policy);
  }
  //! \}
  //! \name Write Operations with Scope
  //! \{
  //! \cond INTERNAL
  //! Write scope data.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope instead.
  struct WriteScopeData {
    //! \name Members
    //! \{
    //! Link to the allocator.
    JitAllocator* _allocator;
    //! Cache policy passed to \ref JitAllocator::beginWriteScope().
    VirtMem::CachePolicy _policy;
    //! Internal flags used by the implementation.
    uint32_t _flags;
    //! Internal data used by the implementation.
    size_t _data[64];
    //! \}
  };
  //! Begins a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope constructor instead.
  ASMJIT_API Error beginWriteScope(WriteScopeData& scope, VirtMem::CachePolicy policy = VirtMem::CachePolicy::kDefault) noexcept;
  //! Ends a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope destructor instead.
  ASMJIT_API Error endWriteScope(WriteScopeData& scope) noexcept;
  //! Flushes accumulated changes in a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope destructor or \ref WriteScope::flush() instead.
  ASMJIT_API Error flushWriteScope(WriteScopeData& scope) noexcept;
  //! Alternative to `JitAllocator::write(span, offset, src, size)`, but under a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope::write() instead.
  ASMJIT_API Error scopedWrite(WriteScopeData& scope, Span& span, size_t offset, const void* src, size_t size) noexcept;
  //! Alternative to `JitAllocator::write(span. writeFunc, userData)`, but under a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope::write() instead.
  ASMJIT_API Error scopedWrite(WriteScopeData& scope, Span& span, WriteFunc writeFunc, void* userData) noexcept;
  //! Alternative to `JitAllocator::write(span. <lambda>)`, but under a write `scope`.
  //!
  //! This is mostly for internal purposes, please use \ref WriteScope::write() instead.
  template<class Lambda>
  inline Error scopedWrite(WriteScopeData& scope, Span& span, Lambda&& lambdaFunc) noexcept {
    WriteFunc wrapperFunc = [](Span& span, void* userData) noexcept -> Error {
      Lambda& lambdaFunc = *static_cast<Lambda*>(userData);
      return lambdaFunc(span);
    };
    return scopedWrite(scope, span, wrapperFunc, (void*)(&lambdaFunc));
  }
  //! \endcond
  //! Write scope can be used to create a single scope that is optimized for writing multiple spans.
  class WriteScope : public WriteScopeData {
  public:
    ASMJIT_NONCOPYABLE(WriteScope)
    //! \name Construction & Destruction
    //! \{
    // Begins a write scope.
    inline explicit WriteScope(JitAllocator* allocator, VirtMem::CachePolicy policy = VirtMem::CachePolicy::kDefault) noexcept {
      allocator->beginWriteScope(*this, policy);
    }
    // Ends a write scope.
    inline ~WriteScope() noexcept {
      if (_allocator)
        _allocator->endWriteScope(*this);
    }
    //! \}
    //! \name Accessors
    //! \{
    inline JitAllocator* allocator() const noexcept { return _allocator; }
    inline VirtMem::CachePolicy policy() const noexcept { return _policy; }
    //! \}
    //! \name Operations
    //! \{
    //! Similar to `JitAllocator::write(span, offset, src, size)`, but under a write scope.
    inline Error write(Span& span, size_t offset, const void* src, size_t size) noexcept {
      return _allocator->scopedWrite(*this, span, offset, src, size);
    }
    //! Similar to `JitAllocator::write(span, writeFunc, userData)`, but under a write scope.
    inline Error write(Span& span, WriteFunc writeFunc, void* userData) noexcept {
      return _allocator->scopedWrite(*this, span, writeFunc, userData);
    }
    //! Similar to `JitAllocator::write(span, <lambda>)`, but under a write scope.
    template<class Lambda>
    inline Error write(Span& span, Lambda&& lambdaFunc) noexcept {
      return _allocator->scopedWrite(*this, span, lambdaFunc);
    }
    //! Flushes accumulated changes in this write scope.
    inline Error flush() noexcept {
      return _allocator->flushWriteScope(*this);
    }
    //! \}
  };
  //! \}
@@ -231,12 +523,8 @@ public:
    //! Allocation overhead (in bytes) required to maintain all blocks.
    size_t _overheadSize;
-    inline void reset() noexcept {
+    //! Resets the statistics to all zeros.
-      _blockCount = 0;
+    inline void reset() noexcept { *this = Statistics{}; }
      _usedSize = 0;
      _reservedSize = 0;
      _overheadSize = 0;
    }
    //! Returns count of blocks managed by `JitAllocator` at the moment.
    inline size_t blockCount() const noexcept { return _blockCount; }
--- a/src/asmjit/core/jitruntime.cpp
+++ b/src/asmjit/core/jitruntime.cpp
@@ -30,42 +30,43 @@ Error JitRuntime::_add(void** dst, CodeHolder* code) noexcept {
  if (ASMJIT_UNLIKELY(estimatedCodeSize == 0))
    return DebugUtils::errored(kErrorNoCodeGenerated);
-  uint8_t* rx;
+  JitAllocator::Span span;
-  uint8_t* rw;
+  ASMJIT_PROPAGATE(_allocator.alloc(span, estimatedCodeSize));
  ASMJIT_PROPAGATE(_allocator.alloc((void**)&rx, (void**)&rw, estimatedCodeSize));
  // Relocate the code.
-  Error err = code->relocateToBase(uintptr_t((void*)rx));
+  Error err = code->relocateToBase(uintptr_t(span.rx()));
  if (ASMJIT_UNLIKELY(err)) {
-    _allocator.release(rx);
+    _allocator.release(span.rx());
    return err;
  }
  // Recalculate the final code size and shrink the memory we allocated for it
  // in case that some relocations didn't require records in an address table.
  size_t codeSize = code->codeSize();
-  if (codeSize < estimatedCodeSize)
+  ASMJIT_ASSERT(codeSize <= estimatedCodeSize);
    _allocator.shrink(rx, codeSize);
-  {
+  _allocator.write(span, [&](JitAllocator::Span& span) noexcept -> Error {
-    VirtMem::ProtectJitReadWriteScope rwScope(rx, codeSize);
+    uint8_t* rw = static_cast<uint8_t*>(span.rw());
    for (Section* section : code->_sections) {
      size_t offset = size_t(section->offset());
      size_t bufferSize = size_t(section->bufferSize());
      size_t virtualSize = size_t(section->virtualSize());
-      ASMJIT_ASSERT(offset + bufferSize <= codeSize);
+      ASMJIT_ASSERT(offset + bufferSize <= span.size());
      memcpy(rw + offset, section->data(), bufferSize);
      if (virtualSize > bufferSize) {
-        ASMJIT_ASSERT(offset + virtualSize <= codeSize);
+        ASMJIT_ASSERT(offset + virtualSize <= span.size());
        memset(rw + offset + bufferSize, 0, virtualSize - bufferSize);
      }
    }
  }
-  *dst = rx;
+    span.shrink(codeSize);
    return kErrorOk;
  });
  *dst = span.rx();
  return kErrorOk;
 }
--- a/src/asmjit/core/osutils.cpp
+++ b/src/asmjit/core/osutils.cpp
@@ -4,16 +4,19 @@
 // SPDX-License-Identifier: Zlib
 #include "../core/api-build_p.h"
-#include "../core/osutils.h"
+#include "../core/osutils_p.h"
 #include "../core/support.h"
 #if defined(_WIN32)
  #include <atomic>
-#elif defined(__APPLE__)
+#else
  #include <fcntl.h>
  #include <unistd.h>
  #if defined(__APPLE__)
    #include <mach/mach_time.h>
  #else
    #include <time.h>
-  #include <unistd.h>
+  #endif
 #endif
 ASMJIT_BEGIN_NAMESPACE
@@ -81,4 +84,27 @@ uint32_t OSUtils::getTickCount() noexcept {
 #endif
 }
 #if !defined(_WIN32)
 Error OSUtils::readFile(const char* name, String& dst, size_t maxSize) noexcept {
  char* buffer = dst.prepare(String::ModifyOp::kAssign, maxSize);
  if (ASMJIT_UNLIKELY(!buffer))
    return DebugUtils::errored(kErrorOutOfMemory);
  int fd = ::open(name, O_RDONLY);
  if (fd < 0) {
    dst.clear();
    return DebugUtils::errored(kErrorFailedToOpenFile);
  }
  intptr_t len = ::read(fd, buffer, maxSize);
  if (len >= 0) {
    buffer[len] = '\0';
    dst._setSize(size_t(len));
  }
  ::close(fd);
  return kErrorOk;
 }
 #endif
 ASMJIT_END_NAMESPACE
--- a/src/asmjit/core/osutils.h
+++ b/src/asmjit/core/osutils.h
@@ -15,10 +15,11 @@ ASMJIT_BEGIN_NAMESPACE
 //! Operating system utilities.
 namespace OSUtils {
 //! Gets the current CPU tick count, used for benchmarking (1ms resolution).
 ASMJIT_API uint32_t getTickCount() noexcept;
 };
 } // {OSUtils}
 //! \cond INTERNAL
 //! Lock.
--- a/src/asmjit/core/osutils_p.h
+++ b/src/asmjit/core/osutils_p.h
@@ -7,6 +7,7 @@
 #define ASMJIT_CORE_OSUTILS_P_H_INCLUDED
 #include "../core/osutils.h"
 #include "../core/string.h"
 ASMJIT_BEGIN_NAMESPACE
@@ -59,6 +60,15 @@ public:
  inline ~LockGuard() noexcept { _target.unlock(); }
 };
 #if !defined(_WIN32)
 namespace OSUtils {
 //! Reads a file, only used on non-Windows platforms to access /sys or other files when necessary.
 Error readFile(const char* name, String& dst, size_t maxSize) noexcept;
 } // {OSUtils}
 #endif
 //! \}
 //! \endcond
--- a/src/asmjit/core/virtmem.cpp
+++ b/src/asmjit/core/virtmem.cpp
@@ -6,7 +6,7 @@
 #include "../core/api-build_p.h"
 #ifndef ASMJIT_NO_JIT
-#include "../core/osutils.h"
+#include "../core/osutils_p.h"
 #include "../core/string.h"
 #include "../core/support.h"
 #include "../core/virtmem.h"
@@ -22,6 +22,22 @@
  // Linux has a `memfd_create` syscall that we would like to use, if available.
  #if defined(__linux__)
    #include <sys/syscall.h>
    #ifndef MAP_HUGETLB
      #define MAP_HUGETLB 0x40000
    #endif // MAP_HUGETLB
    #ifndef MAP_HUGE_SHIFT
      #define MAP_HUGE_SHIFT 26
    #endif // MAP_HUGE_SHIFT
    #ifndef MFD_HUGETLB
      #define MFD_HUGETLB 0x0004
    #endif // MFD_HUGETLB
    #ifndef MFD_HUGE_SHIFT
      #define MFD_HUGE_SHIFT 26
    #endif // MFD_HUGE_SHIFT
  #endif
  // Apple recently introduced MAP_JIT flag, which we want to use.
@@ -96,7 +112,7 @@ struct ScopedHandle {
  HANDLE value;
 };
-static void getVMInfo(Info& vmInfo) noexcept {
+static void detectVMInfo(Info& vmInfo) noexcept {
  SYSTEM_INFO systemInfo;
  ::GetSystemInfo(&systemInfo);
@@ -104,6 +120,10 @@ static void getVMInfo(Info& vmInfo) noexcept {
  vmInfo.pageGranularity = systemInfo.dwAllocationGranularity;
 }
 static size_t detectLargePageSize() noexcept {
  return ::GetLargePageMinimum();
 }
 // Returns windows-specific protectFlags from \ref MemoryFlags.
 static DWORD protectFlagsFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
  DWORD protectFlags;
@@ -136,9 +156,23 @@ Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
  if (size == 0)
    return DebugUtils::errored(kErrorInvalidArgument);
  DWORD allocationType = MEM_COMMIT | MEM_RESERVE;
  DWORD protectFlags = protectFlagsFromMemoryFlags(memoryFlags);
  void* result = ::VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, protectFlags);
  if (Support::test(memoryFlags, MemoryFlags::kMMapLargePages)) {
    size_t lpSize = largePageSize();
    // Does it make sense to call VirtualAlloc() if we failed to query large page size?
    if (lpSize == 0)
      return DebugUtils::errored(kErrorFeatureNotEnabled);
    if (!Support::isAligned(size, lpSize))
      return DebugUtils::errored(kErrorInvalidArgument);
    allocationType |= MEM_LARGE_PAGES;
  }
  void* result = ::VirtualAlloc(nullptr, size, allocationType, protectFlags);
  if (!result)
    return DebugUtils::errored(kErrorOutOfMemory);
@@ -148,7 +182,9 @@ Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
 Error release(void* p, size_t size) noexcept {
  DebugUtils::unused(size);
-  if (ASMJIT_UNLIKELY(!::VirtualFree(p, 0, MEM_RELEASE)))
+  // NOTE: If the `dwFreeType` parameter is MEM_RELEASE, `size` parameter must be zero.
  constexpr DWORD dwFreeType = MEM_RELEASE;
  if (ASMJIT_UNLIKELY(!::VirtualFree(p, 0, dwFreeType)))
    return DebugUtils::errored(kErrorInvalidArgument);
  return kErrorOk;
 }
@@ -189,7 +225,7 @@ Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) no
    ptr[i] = ::MapViewOfFile(handle.value, desiredAccess, 0, 0, size);
    if (ptr[i] == nullptr) {
-      if (i == 0)
+      if (i == 1u)
        ::UnmapViewOfFile(ptr[0]);
      return DebugUtils::errored(kErrorOutOfMemory);
    }
@@ -220,15 +256,16 @@ Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
 #endif
-// Virtual Memory [Posix]
+// Virtual Memory [Unix]
-// ======================
+// =====================
 #if !defined(_WIN32)
-// Virtual Memory [Posix] - Utilities
+// Virtual Memory [Unix] - Utilities
-// ==================================
+// =================================
 // Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`.
 ASMJIT_MAYBE_UNUSED
 static Error asmjitErrorFromErrno(int e) noexcept {
  switch (e) {
    case EACCES:
@@ -251,13 +288,90 @@ static Error asmjitErrorFromErrno(int e) noexcept {
  }
 }
-static void getVMInfo(Info& vmInfo) noexcept {
+ASMJIT_MAYBE_UNUSED
 static MemoryFlags maxAccessFlagsToRegularAccessFlags(MemoryFlags memoryFlags) noexcept {
  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
  return MemoryFlags(uint32_t(memoryFlags & MemoryFlags::kMMapMaxAccessRWX) >> kMaxProtShift);
 }
 ASMJIT_MAYBE_UNUSED
 static MemoryFlags regularAccessFlagsToMaxAccessFlags(MemoryFlags memoryFlags) noexcept {
  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
  return MemoryFlags(uint32_t(memoryFlags & MemoryFlags::kAccessRWX) << kMaxProtShift);
 }
 // Returns `mmap()` protection flags from \ref MemoryFlags.
 ASMJIT_MAYBE_UNUSED
 static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
  int protection = 0;
  if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ;
  if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC;
  return protection;
 }
 // Returns maximum protection flags from `memoryFlags`.
 //
 // Uses:
 //   - `PROT_MPROTECT()` on NetBSD.
 //   - `PROT_MAX()` when available on other BSDs.
 ASMJIT_MAYBE_UNUSED
 static inline int mmMaxProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
  MemoryFlags acc = maxAccessFlagsToRegularAccessFlags(memoryFlags);
  if (acc != MemoryFlags::kNone) {
 #if defined(__NetBSD__) && defined(PROT_MPROTECT)
    return PROT_MPROTECT(mmProtFromMemoryFlags(acc));
 #elif defined(PROT_MAX)
    return PROT_MAX(mmProtFromMemoryFlags(acc));
 #else
    return 0;
 #endif
  }
  return 0;
 }
 static void detectVMInfo(Info& vmInfo) noexcept {
  uint32_t pageSize = uint32_t(::getpagesize());
  vmInfo.pageSize = pageSize;
  vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
 }
 static size_t detectLargePageSize() noexcept {
 #if defined(__APPLE__) && defined(VM_FLAGS_SUPERPAGE_SIZE_2MB) && ASMJIT_ARCH_X86
  return 2u * 1024u * 1024u;
 #elif defined(__FreeBSD__)
  Support::Array<size_t, 2> pageSize;
  // TODO: Does it return unsigned?
  return (getpagesizes(pageSize.data(), 2) < 2) ? 0 : uint32_t(pageSize[1]);
 #elif defined(__linux__)
  StringTmp<128> storage;
  if (OSUtils::readFile("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", storage, 16) != kErrorOk || storage.empty())
    return 0u;
  // The first value should be the size of the page (hpage_pmd_size).
  size_t largePageSize = 0;
  const char* buf = storage.data();
  size_t bufSize = storage.size();
  for (size_t i = 0; i < bufSize; i++) {
    uint32_t digit = uint32_t(uint8_t(buf[i]) - uint8_t('0'));
    if (digit >= 10u)
      break;
    largePageSize = largePageSize * 10 + digit;
  }
  if (Support::isPowerOf2(largePageSize))
    return largePageSize;
  else
    return 0u;
 #else
  return 0u;
 #endif
 }
 #if defined(__APPLE__) && TARGET_OS_OSX
 static int getOSXVersion() noexcept {
  // MAP_JIT flag required to run unsigned JIT code is only supported by kernel version 10.14+ (Mojave).
@@ -275,17 +389,8 @@ static int getOSXVersion() noexcept {
 }
 #endif // __APPLE__ && TARGET_OS_OSX
-// Returns `mmap()` protection flags from \ref MemoryFlags.
+// Virtual Memory [Posix] - Anonymous Memory
-static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+// =========================================
  int protection = 0;
  if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ;
  if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC;
  return protection;
 }
 // Virtual Memory [Posix] - Anonymus Memory
 // ========================================
 #if defined(ASMJIT_ANONYMOUS_MEMORY_USE_FD)
@@ -569,39 +674,6 @@ static inline int mmMapJitFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
 #endif
 }
 ASMJIT_MAYBE_UNUSED
 static MemoryFlags maxAccessFlagsToRegularAccessFlags(MemoryFlags memoryFlags) noexcept {
  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
  return MemoryFlags(uint32_t(memoryFlags & MemoryFlags::kMMapMaxAccessRWX) >> kMaxProtShift);
 }
 ASMJIT_MAYBE_UNUSED
 static MemoryFlags regularAccessFlagsToMaxAccessFlags(MemoryFlags memoryFlags) noexcept {
  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
  return MemoryFlags(uint32_t(memoryFlags & MemoryFlags::kAccessRWX) << kMaxProtShift);
 }
 // Returns maximum protection flags from `memoryFlags`.
 //
 // Uses:
 //   - `PROT_MPROTECT()` on NetBSD.
 //   - `PROT_MAX()` when available(BSD).
 ASMJIT_MAYBE_UNUSED
 static inline int mmMaxProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
  MemoryFlags acc = maxAccessFlagsToRegularAccessFlags(memoryFlags);
  if (acc != MemoryFlags::kNone) {
 #if defined(__NetBSD__) && defined(PROT_MPROTECT)
    return PROT_MPROTECT(mmProtFromMemoryFlags(acc));
 #elif defined(PROT_MAX)
    return PROT_MAX(mmProtFromMemoryFlags(acc));
 #else
    return 0;
 #endif
  }
  return 0;
 }
 static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
  HardenedRuntimeFlags flags = HardenedRuntimeFlags::kNone;
@@ -626,10 +698,34 @@ static Error mapMemory(void** p, size_t size, MemoryFlags memoryFlags, int fd =
  if (fd == -1)
    mmFlags |= MAP_ANONYMOUS;
  bool useLargePages = Support::test(memoryFlags, VirtMem::MemoryFlags::kMMapLargePages);
  if (useLargePages) {
 #if defined(__linux__)
    size_t lpSize = largePageSize();
    if (lpSize == 0)
      return DebugUtils::errored(kErrorFeatureNotEnabled);
    if (!Support::isAligned(size, lpSize))
      return DebugUtils::errored(kErrorInvalidArgument);
    unsigned lpSizeLog2 = Support::ctz(lpSize);
    mmFlags |= int(unsigned(MAP_HUGETLB) | (lpSizeLog2 << MAP_HUGE_SHIFT));
 #else
    return DebugUtils::errored(kErrorFeatureNotEnabled);
 #endif // __linux__
  }
  void* ptr = mmap(nullptr, size, protection, mmFlags, fd, offset);
  if (ptr == MAP_FAILED)
    return DebugUtils::errored(asmjitErrorFromErrno(errno));
 #if defined(MADV_HUGEPAGE)
  if (useLargePages) {
    madvise(ptr, size, MADV_HUGEPAGE);
  }
 #endif
  *p = ptr;
  return kErrorOk;
 }
@@ -743,7 +839,7 @@ Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) no
  dm->rw = ptr[1];
  return kErrorOk;
 #else
-  #error "[asmjit] VirtMem::allocDualMapping() has no implementation"
+  #error "[asmjit] VirtMem::allocDualMapping() doesn't have implementation for the target OS and compiler"
 #endif
 }
@@ -783,7 +879,7 @@ Info info() noexcept {
  if (!vmInfoInitialized.load()) {
    Info localMemInfo;
-    getVMInfo(localMemInfo);
+    detectVMInfo(localMemInfo);
    vmInfo = localMemInfo;
    vmInfoInitialized.store(1u);
@@ -792,6 +888,22 @@ Info info() noexcept {
  return vmInfo;
 }
 size_t largePageSize() noexcept {
  static std::atomic<size_t> largePageSize;
  static constexpr size_t kNotAvailable = 1;
  size_t size = largePageSize.load();
  if (ASMJIT_LIKELY(size > kNotAvailable))
    return size;
  if (size == kNotAvailable)
    return 0;
  size = detectLargePageSize();
  largePageSize.store(size != 0 ? size : kNotAvailable);
  return size;
 }
 // Virtual Memory - Hardened Runtime Info
 // ======================================
@@ -825,6 +937,9 @@ UNIT(virt_mem) {
  INFO("  pageSize: %zu", size_t(vmInfo.pageSize));
  INFO("  pageGranularity: %zu", size_t(vmInfo.pageGranularity));
  INFO("VirtMem::largePageSize():");
  INFO("  largePageSize: %zu", size_t(VirtMem::largePageSize()));
  VirtMem::HardenedRuntimeInfo hardenedRtInfo = VirtMem::hardenedRuntimeInfo();
  VirtMem::HardenedRuntimeFlags hardenedFlags = hardenedRtInfo.flags;
--- a/src/asmjit/core/virtmem.h
+++ b/src/asmjit/core/virtmem.h
@@ -19,6 +19,21 @@ ASMJIT_BEGIN_NAMESPACE
 //! Virtual memory management.
 namespace VirtMem {
 //! Describes whether instruction cache should be flushed after a write operation.
 enum class CachePolicy : uint32_t {
  //! Default policy.
  //!
  //! In some places this would mean `kFlushAfterWrite` and in some places it would mean `kNeverFlush`.
  //! For example if it's known that an address has never been used before to execute code.
  kDefault = 0,
  //! Flush instruction cache after a write operation.
  kFlushAfterWrite = 1,
  //! Avoid flushing instruction cache after a write operation.
  kNeverFlush = 2
 };
 //! Flushes instruction cache in the given region.
 //!
 //! Only useful on non-x86 architectures, however, it's a good practice to call it on any platform to make your
@@ -36,6 +51,15 @@ struct Info {
 //! Returns virtual memory information, see `VirtMem::Info` for more details.
 ASMJIT_API Info info() noexcept;
 //! Returns the size of the smallest large page supported.
 //!
 //! AsmJit only uses the smallest large page at the moment as these are usually perfectly sized for executable
 //! memory allocation (standard size is 2MB, but different sizes are possible).
 //!
 //! Returns either the detected large page size or 0, if large page support is either not supported by AsmJit
 //! or not accessible to the process.
 ASMJIT_API size_t largePageSize() noexcept;
 //! Virtual memory access and mmap-specific flags.
 enum class MemoryFlags : uint32_t {
  //! No flags.
@@ -117,6 +141,13 @@ enum class MemoryFlags : uint32_t {
  //! would cause RX page not having the updated content.
  kMapShared = 0x00000100u,
  //! Request large memory mapped pages.
  //!
  //! \important If this option is used and large page(s) cannot be mapped, the allocation will fail. Fallback to
  //! regular pages must be done by the user in this case. Higher level API such as \ref JitAllocator provides an
  //! additional mechanism to allocate regular page(s) when large page(s) allocation fails.
  kMMapLargePages = 0x00000200u,
  //! Not an access flag, only used by `allocDualMapping()` to override the default allocation strategy to always use
  //! a 'tmp' directory instead of "/dev/shm" (on POSIX platforms). Please note that this flag will be ignored if the
  //! operating system allows to allocate an executable memory by a different API than `open()` or `shm_open()`. For
@@ -233,24 +264,40 @@ ASMJIT_API void protectJitMemory(ProtectJitAccess access) noexcept;
 //! in destructor. The purpose of this class is to make writing to JIT memory easier.
 class ProtectJitReadWriteScope {
 public:
  ASMJIT_NONCOPYABLE(ProtectJitReadWriteScope)
  //! \name Members
  //! \{
  void* _rxPtr;
  size_t _size;
  CachePolicy _policy;
  //! \}
  //! \name Construction / Destruction
  //! \{
  //! Makes the given memory block RW protected.
-  ASMJIT_FORCE_INLINE ProtectJitReadWriteScope(void* rxPtr, size_t size) noexcept
+  ASMJIT_FORCE_INLINE ProtectJitReadWriteScope(
    void* rxPtr,
    size_t size,
    CachePolicy policy = CachePolicy::kDefault) noexcept
    : _rxPtr(rxPtr),
-      _size(size) {
+      _size(size),
      _policy(policy) {
    protectJitMemory(ProtectJitAccess::kReadWrite);
  }
  // Not copyable.
  ProtectJitReadWriteScope(const ProtectJitReadWriteScope& other) = delete;
  //! Makes the memory block RX protected again and flushes instruction cache.
  ASMJIT_FORCE_INLINE  ~ProtectJitReadWriteScope() noexcept {
    protectJitMemory(ProtectJitAccess::kReadExecute);
    if (_policy != CachePolicy::kNeverFlush)
      flushInstructionCache(_rxPtr, _size);
  }
  //! \}
 };
 } // VirtMem
--- a/test/asmjit_test_x86_sections.cpp
+++ b/test/asmjit_test_x86_sections.cpp
@@ -127,28 +127,26 @@ int main() {
  }
  // Allocate memory for the function and relocate it there.
-  void* rxPtr;
+  JitAllocator::Span span;
-  void* rwPtr;
+  err = allocator.alloc(span, codeSize);
  err = allocator.alloc(&rxPtr, &rwPtr, codeSize);
  if (err)
    fail("Failed to allocate executable memory", err);
  // Relocate to the base-address of the allocated memory.
-  code.relocateToBase(uint64_t(uintptr_t(rxPtr)));
+  code.relocateToBase(uint64_t(uintptr_t(span.rx())));
  {
    VirtMem::ProtectJitReadWriteScope scope(rxPtr, code.codeSize());
  allocator.write(span, [&](JitAllocator::Span& span) noexcept -> Error {
    // Copy the flattened code into `mem.rw`. There are two ways. You can either copy
    // everything manually by iterating over all sections or use `copyFlattenedData`.
    // This code is similar to what `copyFlattenedData(p, codeSize, 0)` would do:
    for (Section* section : code.sectionsByOrder())
-      memcpy(static_cast<uint8_t*>(rwPtr) + size_t(section->offset()), section->data(), section->bufferSize());
+      memcpy(static_cast<uint8_t*>(span.rw()) + size_t(section->offset()), section->data(), section->bufferSize());
-  }
+    return kErrorOk;
  });
  // Execute the function and test whether it works.
  typedef size_t (*Func)(size_t idx);
-  Func fn = (Func)rxPtr;
+  Func fn = (Func)span.rx();
  printf("\n");
  if (fn(0) != dataArray[0] ||