diff --git a/src/asmjit/core.h b/src/asmjit/core.h index eb99c39..cb19333 100644 --- a/src/asmjit/core.h +++ b/src/asmjit/core.h @@ -739,15 +739,17 @@ namespace asmjit { //! JitAllocator allocator; //! //! // Allocate an executable virtual memory and handle a possible failure. -//! void* p = allocator.alloc(estimatedSize); -//! if (!p) +//! JitAllocator::Span span; +//! Error err = allocator.alloc(span, estimatedSize); +//! +//! if (err != kErrorOk) // <- NOTE: This must be checked, always! //! return 0; //! //! // Now relocate the code to the address provided by the memory allocator. -//! // Please note that this DOESN'T COPY anything to `p`. This function will -//! // store the address in CodeHolder and use relocation entries to patch the -//! // existing code in all sections to respect the base address provided. -//! code.relocateToBase((uint64_t)p); +//! // Please note that this DOESN'T COPY anything to it. This function will +//! // store the address in CodeHolder and use relocation entries to patch +//! // the existing code in all sections to respect the base address provided. +//! code.relocateToBase((uint64_t)span.rx()); //! //! // This is purely optional. There are cases in which the relocation can omit //! // unneeded data, which would shrink the size of address table. If that @@ -760,12 +762,17 @@ namespace asmjit { //! // additional options that can be used to also zero pad sections' virtual //! // size, etc. //! // -//! // With some additional features, copyFlattenData() does roughly this: -//! // for (Section* section : code.sections()) -//! // memcpy((uint8_t*)p + section->offset(), -//! // section->data(), -//! // section->bufferSize()); -//! code.copyFlattenedData(p, codeSize, CopySectionFlags::kPadSectionBuffer); +//! // With some additional features, copyFlattenData() does roughly the following: +//! // +//! // allocator.write([&](JitAllocator::Span& span) { +//! // for (Section* section : code.sections()) { +//! // uint8_t* p = (uint8_t*)span.rw() + section->offset(); +//! // memcpy(p, section->data(), section->bufferSize()); +//! // } +//! // } +//! allocator.write([&](JitAllocator::Span& span) { +//! code.copyFlattenedData(span.rw(), codeSize, CopySectionFlags::kPadSectionBuffer); +//! }); //! //! // Execute the generated function. //! int inA[4] = { 4, 3, 2, 1 }; diff --git a/src/asmjit/core/radefs_p.h b/src/asmjit/core/radefs_p.h index d61a9cc..3250396 100644 --- a/src/asmjit/core/radefs_p.h +++ b/src/asmjit/core/radefs_p.h @@ -559,7 +559,7 @@ public: ASMJIT_FORCE_INLINE Error nonOverlappingUnionOf(ZoneAllocator* allocator, const RALiveSpans& x, const RALiveSpans& y, const DataType& yData) noexcept { uint32_t finalSize = x.size() + y.size(); - ASMJIT_PROPAGATE(_data.reserve(allocator, finalSize)); + ASMJIT_PROPAGATE(_data.growingReserve(allocator, finalSize)); T* dstPtr = _data.data(); const T* xSpan = x.data(); @@ -694,7 +694,7 @@ typedef RALiveSpans LiveRegSpans; //! - LEA x{ W|Out}, [x{R|Use} + y{R|Out}] -> {x:R|W|Use|Out y:R|Use} //! //! It should be obvious from the example above how these flags get created. Each operand contains READ/WRITE -//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possitility +//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possibility //! to view the operation as two independent operations - USE and OUT, because the register allocator first //! allocates USE registers, and then assigns OUT registers independently of USE registers. enum class RATiedFlags : uint32_t { diff --git a/src/asmjit/core/zonevector.cpp b/src/asmjit/core/zonevector.cpp index 2486021..b68e25a 100644 --- a/src/asmjit/core/zonevector.cpp +++ b/src/asmjit/core/zonevector.cpp @@ -13,8 +13,63 @@ ASMJIT_BEGIN_NAMESPACE // ZoneVectorBase - Helpers // ======================== +// ZoneVector is used as an array to hold short-lived data structures used during code generation. The growing +// strategy is simple - use small capacity at the beginning (very good for ZoneAllocator) and then grow quicker +// to prevent successive reallocations. +static ASMJIT_FORCE_INLINE uint32_t ZoneVector_growCapacity(uint32_t current, uint32_t growMinimum, uint32_t sizeOfT) noexcept { + static constexpr size_t kGrowThreshold = Globals::kGrowThreshold; + + size_t byteSize = size_t(current) * sizeOfT; + size_t minimumByteSize = size_t(growMinimum) * sizeOfT; + + // This is more than exponential growth at the beginning. + if (byteSize < 32) { + byteSize = 32; + } + else if (byteSize < 128) { + byteSize = 128; + } + else if (byteSize < 512) { + byteSize = 512; + } + + if (byteSize < minimumByteSize) { + // Exponential growth before we reach `kGrowThreshold`. + byteSize = Support::alignUpPowerOf2(minimumByteSize); + + // Bail to `growMinimum` in case of overflow - most likely whatever that is happening afterwards would just fail. + if (byteSize < minimumByteSize) { + return growMinimum; + } + + // Pretty much chunked growth advancing by `kGrowThreshold` after we exceed it. + // This should not be a common case, so we don't really have to optimize for it. + if (byteSize > kGrowThreshold) { + // Align to kGrowThreshold. + size_t remainder = minimumByteSize % kGrowThreshold; + + byteSize = minimumByteSize + remainder; + + // Bail to `growMinimum` in case of overflow - should never happen as it's unlikely we would hit this on a 32-bit + // machine (consecutive near 4GiB allocation is impossible, and this should never happen on 64-bit machine as we + // use 32-bit size & capacity, so overflow of 64 bit integer is not possible. Added just as an extreme measure. + if (byteSize < minimumByteSize) + return growMinimum; + } + } + + size_t n = byteSize / sizeOfT; + return uint32_t(Support::min(n, 0xFFFFFFFFu)); +} + +static ASMJIT_FORCE_INLINE bool ZoneVector_byteSizeIsSafe(size_t nBytes, uint32_t n) noexcept { + if (sizeof(uint32_t) < sizeof(size_t)) + return true; // there is no problem when running on a 64-bit machine. + else + return nBytes >= size_t(n); +}; + Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { - uint32_t threshold = Globals::kGrowThreshold / sizeOfT; uint32_t capacity = _capacity; uint32_t after = _size; @@ -25,29 +80,7 @@ Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t if (capacity >= after) return kErrorOk; - // ZoneVector is used as an array to hold short-lived data structures used - // during code generation. The growing strategy is simple - use small capacity - // at the beginning (very good for ZoneAllocator) and then grow quicker to - // prevent successive reallocations. - if (capacity < 4) - capacity = 4; - else if (capacity < 8) - capacity = 8; - else if (capacity < 16) - capacity = 16; - else if (capacity < 64) - capacity = 64; - else if (capacity < 256) - capacity = 256; - - while (capacity < after) { - if (capacity < threshold) - capacity *= 2; - else - capacity += threshold; - } - - return _reserve(allocator, sizeOfT, capacity); + return _reserve(allocator, sizeOfT, ZoneVector_growCapacity(capacity, after, sizeOfT)); } Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { @@ -55,8 +88,8 @@ Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint3 if (oldCapacity >= n) return kErrorOk; - uint32_t nBytes = n * sizeOfT; - if (ASMJIT_UNLIKELY(nBytes < n)) + size_t nBytes = size_t(n) * sizeOfT; + if (ASMJIT_UNLIKELY(!ZoneVector_byteSizeIsSafe(nBytes, n))) return DebugUtils::errored(kErrorOutOfMemory); size_t allocatedBytes; @@ -65,19 +98,28 @@ Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint3 if (ASMJIT_UNLIKELY(!newData)) return DebugUtils::errored(kErrorOutOfMemory); + uint32_t newCapacity = uint32_t(allocatedBytes / sizeOfT); + ASMJIT_ASSERT(newCapacity >= n); + void* oldData = _data; if (oldData && _size) { memcpy(newData, oldData, size_t(_size) * sizeOfT); allocator->release(oldData, size_t(oldCapacity) * sizeOfT); } - _capacity = uint32_t(allocatedBytes / sizeOfT); - ASMJIT_ASSERT(_capacity >= n); - _data = newData; + _capacity = newCapacity; + return kErrorOk; } +Error ZoneVectorBase::_growingReserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { + uint32_t capacity = _capacity; + if (capacity >= n) + return kErrorOk; + return _reserve(allocator, sizeOfT, ZoneVector_growCapacity(capacity, n, sizeOfT)); +} + Error ZoneVectorBase::_resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { uint32_t size = _size; @@ -266,6 +308,8 @@ Error ZoneBitVector::_append(ZoneAllocator* allocator, bool value) noexcept { #if defined(ASMJIT_TEST) template static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) { + constexpr uint32_t kMiB = 1024 * 1024; + int i; int kMax = 100000; @@ -301,12 +345,22 @@ static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) { int64_t fsum = 0; int64_t rsum = 0; - for (const T& item : vec) { fsum += item; } - for (auto it = vec.rbegin(); it != vec.rend(); ++it) { rsum += *it; } + for (const T& item : vec) { + fsum += item; + } + + for (auto it = vec.rbegin(); it != vec.rend(); ++it) { + rsum += *it; + } EXPECT_EQ(fsum, rsum); - vec.release(allocator); + + INFO("ZoneBitVector::growingReserve()"); + for (uint32_t j = 0; j < 40 / sizeof(T); j += 8) { + EXPECT_EQ(vec.growingReserve(allocator, j * kMiB), kErrorOk); + EXPECT_GE(vec.capacity(), j * kMiB); + } } static void test_zone_bitvector(ZoneAllocator* allocator) { diff --git a/src/asmjit/core/zonevector.h b/src/asmjit/core/zonevector.h index 13d28bb..f38dca5 100644 --- a/src/asmjit/core/zonevector.h +++ b/src/asmjit/core/zonevector.h @@ -58,6 +58,7 @@ protected: ASMJIT_API Error _grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; ASMJIT_API Error _reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; + ASMJIT_API Error _growingReserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; inline void _swap(ZoneVectorBase& other) noexcept { std::swap(_data, other._data); @@ -414,7 +415,21 @@ public: //! Reallocates the internal array to fit at least `n` items. inline Error reserve(ZoneAllocator* allocator, uint32_t n) noexcept { - return n > _capacity ? ZoneVectorBase::_reserve(allocator, sizeof(T), n) : Error(kErrorOk); + if (ASMJIT_UNLIKELY(n > _capacity)) + return ZoneVectorBase::_reserve(allocator, sizeof(T), n); + else + return Error(kErrorOk); + } + + //! Reallocates the internal array to fit at least `n` items with growing semantics. + //! + //! If the vector is smaller than `n` the same growing calculations will be used as if N items were appended + //! to an empty vector, which means reserving additional space for more append operations that could follow. + inline Error growingReserve(ZoneAllocator* allocator, uint32_t n) noexcept { + if (ASMJIT_UNLIKELY(n > _capacity)) + return ZoneVectorBase::_growingReserve(allocator, sizeof(T), n); + else + return Error(kErrorOk); } inline Error willGrow(ZoneAllocator* allocator, uint32_t n = 1) noexcept {