Improved register allocation of consecutive register in some cases

* The implementation tries to detect whether a virtual register
    only lives in a single basic block and then uses a move approach
    instead of spill/alloc when reallocating
  * Additionally, the implementation now improves the use of scratch
    registers during function arguments allocation - scratch is only
    reserved when it's actually needed
This commit is contained in:
kobalicek
2025-04-20 13:25:43 +02:00
parent e8c8e2e48a
commit 4cd9198a6c
11 changed files with 293 additions and 122 deletions

View File

@@ -17,7 +17,7 @@
{ "optional": true, "cmd": ["asmjit_test_assembler", "--validate"] },
{ "optional": true, "cmd": ["asmjit_test_emitters"] },
{ "optional": true, "cmd": ["asmjit_test_execute"] },
{ "optional": true, "cmd": ["asmjit_test_compiler"] },
{ "optional": true, "cmd": ["asmjit_test_compiler", "--verbose", "--dump-asm"] },
{ "optional": true, "cmd": ["asmjit_test_instinfo"] },
{ "optional": true, "cmd": ["asmjit_test_x86_sections"] },
{ "optional": true, "cmd": ["asmjit_test_perf", "--quick"] }

View File

@@ -1041,6 +1041,13 @@ public:
//! \}
//! \name Types
//! \{
using RegMasks = Support::Array<RegMask, Globals::kNumVirtGroups>;
//! \}
//! \name Members
//! \{
@@ -1093,9 +1100,9 @@ public:
uint32_t _stackAdjustment = 0;
//! Registers that are dirty.
Support::Array<RegMask, Globals::kNumVirtGroups> _dirtyRegs {};
RegMasks _dirtyRegs {};
//! Registers that must be preserved (copied from CallConv).
Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs {};
RegMasks _preservedRegs {};
//! Size to save/restore per register group.
Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize {};
//! Alignment of save/restore area per register group.
@@ -1380,6 +1387,12 @@ public:
return _dirtyRegs[group] & _preservedRegs[group];
}
//! Returns all dirty registers as a Support::Array<> type.
ASMJIT_INLINE_NODEBUG const RegMasks& dirtyRegs() const noexcept { return _dirtyRegs; }
//! Returns all preserved registers as a Support::Array<> type.
ASMJIT_INLINE_NODEBUG const RegMasks& preservedRegs() const noexcept { return _preservedRegs; }
//! Returns the mask of preserved registers of the given register `group`.
//!
//! Preserved registers are those that must survive the function call unmodified. The function can only modify

View File

@@ -13,8 +13,9 @@ ASMJIT_BEGIN_NAMESPACE
//! \{
FuncArgsContext::FuncArgsContext() noexcept {
for (RegGroup group : RegGroupVirtValues{})
for (RegGroup group : RegGroupVirtValues{}) {
_workData[size_t(group)].reset();
}
}
ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept {
@@ -26,23 +27,29 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
_arch = arch;
// Initialize `_archRegs`.
for (RegGroup group : RegGroupVirtValues{})
for (RegGroup group : RegGroupVirtValues{}) {
_workData[group]._archRegs = _constraints->availableRegs(group);
}
if (frame.hasPreservedFP())
if (frame.hasPreservedFP()) {
_workData[size_t(RegGroup::kGp)]._archRegs &= ~Support::bitMask(archTraits().fpRegId());
}
uint32_t reassignmentFlagMask = 0;
// Extract information from all function arguments/assignments and build Var[] array.
uint32_t varId = 0;
for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) {
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
const FuncValue& dst_ = args.arg(argIndex, valueIndex);
if (!dst_.isAssigned())
if (!dst_.isAssigned()) {
continue;
}
const FuncValue& src_ = func.arg(argIndex, valueIndex);
if (ASMJIT_UNLIKELY(!src_.isAssigned()))
if (ASMJIT_UNLIKELY(!src_.isAssigned())) {
return DebugUtils::errored(kErrorInvalidState);
}
Var& var = _vars[varId];
var.init(src_, dst_);
@@ -55,42 +62,51 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
WorkData* dstWd = nullptr;
// Not supported.
if (src.isIndirect())
if (src.isIndirect()) {
return DebugUtils::errored(kErrorInvalidAssignment);
}
if (dst.isReg()) {
RegType dstType = dst.regType();
if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType)))
if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType))) {
return DebugUtils::errored(kErrorInvalidRegType);
}
// Copy TypeId from source if the destination doesn't have it. The RA used by BaseCompiler would never
// leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying the type.
if (!dst.hasTypeId())
// leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying their types.
if (!dst.hasTypeId()) {
dst.setTypeId(archTraits().regTypeToTypeId(dst.regType()));
}
dstGroup = archTraits().regTypeToGroup(dstType);
if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt))
if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt)) {
return DebugUtils::errored(kErrorInvalidRegGroup);
}
dstWd = &_workData[dstGroup];
dstId = dst.regId();
if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
return DebugUtils::errored(kErrorInvalidPhysId);
if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId))) {
return DebugUtils::errored(kErrorInvalidPhysId);
}
if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId))) {
return DebugUtils::errored(kErrorOverlappedRegs);
}
dstWd->_dstRegs |= Support::bitMask(dstId);
dstWd->_dstShuf |= Support::bitMask(dstId);
dstWd->_usedRegs |= Support::bitMask(dstId);
}
else {
if (!dst.hasTypeId())
if (!dst.hasTypeId()) {
dst.setTypeId(src.typeId());
}
OperandSignature signature = getSuitableRegForMemToMemMove(arch, dst.typeId(), src.typeId());
if (ASMJIT_UNLIKELY(!signature.isValid()))
if (ASMJIT_UNLIKELY(!signature.isValid())) {
return DebugUtils::errored(kErrorInvalidState);
}
_stackDstMask = uint8_t(_stackDstMask | Support::bitMask(signature.regGroup()));
}
@@ -102,9 +118,11 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
ASMJIT_ASSERT(dstWd != nullptr);
dstWd->assign(varId, srcId);
// The best case, register is allocated where it is expected to be. However, we should
// not mark this as done if both registers are GP and sign or zero extension is required.
reassignmentFlagMask |= uint32_t(dstId != srcId) << uint32_t(dstGroup);
if (dstId == srcId) {
// The best case, register is allocated where it is expected to be. However, we should
// not mark this as done if both registers are GP and sign or zero extension is required.
if (dstGroup != RegGroup::kGp) {
var.markDone();
}
@@ -115,17 +133,20 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
uint32_t dstSize = TypeUtils::sizeOf(dt);
uint32_t srcSize = TypeUtils::sizeOf(st);
if (dt == TypeId::kVoid || st == TypeId::kVoid || dstSize <= srcSize)
if (dt == TypeId::kVoid || st == TypeId::kVoid || dstSize <= srcSize) {
var.markDone();
}
}
}
}
else {
if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt))
if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt)) {
return DebugUtils::errored(kErrorInvalidState);
}
WorkData& srcData = _workData[size_t(srcGroup)];
srcData.assign(varId, srcId);
reassignmentFlagMask |= 1u << uint32_t(dstGroup);
}
}
else {
@@ -142,6 +163,7 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
for (RegGroup group : RegGroupVirtValues{}) {
_workData[group]._workRegs =
(_workData[group].archRegs() & (frame.dirtyRegs(group) | ~frame.preservedRegs(group))) | _workData[group].dstRegs() | _workData[group].assignedRegs();
_workData[group]._needsScratch = (reassignmentFlagMask >> uint32_t(group)) & 1u;
}
// Create a variable that represents `SARegId` if necessary.
@@ -153,14 +175,16 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
if (saCurRegId != BaseReg::kIdBad) {
// Check if the provided `SARegId` doesn't collide with input registers.
if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId))) {
return DebugUtils::errored(kErrorOverlappedRegs);
}
}
if (saOutRegId != BaseReg::kIdBad) {
// Check if the provided `SARegId` doesn't collide with argument assignments.
if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId))) {
return DebugUtils::errored(kErrorOverlappedRegs);
}
saRegRequired = true;
}
@@ -180,11 +204,13 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
}
else {
RegMask availableRegs = gpRegs.availableRegs();
if (!availableRegs)
if (!availableRegs) {
availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
}
if (ASMJIT_UNLIKELY(!availableRegs))
if (ASMJIT_UNLIKELY(!availableRegs)) {
return DebugUtils::errored(kErrorNoMorePhysRegs);
}
saCurRegId = Support::ctz(availableRegs);
}
@@ -216,8 +242,9 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
uint32_t dstId = var.out.regId();
RegGroup group = archTraits().regTypeToGroup(var.cur.regType());
if (group != archTraits().regTypeToGroup(var.out.regType()))
if (group != archTraits().regTypeToGroup(var.out.regType())) {
continue;
}
WorkData& wd = _workData[group];
if (wd.isAssigned(dstId)) {
@@ -261,28 +288,32 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::markScratchRegs(FuncFrame& frame) noexc
for (RegGroup group : RegGroupVirtValues{}) {
if (Support::bitTest(groupMask, group)) {
WorkData& wd = _workData[group];
if (wd._needsScratch) {
// Initially, pick some clobbered or dirty register.
RegMask workRegs = wd.workRegs();
RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
// Initially, pick some clobbered or dirty register.
RegMask workRegs = wd.workRegs();
RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
// If that didn't work out pick some register which is not in 'used'.
if (!regs) {
regs = workRegs & ~wd.usedRegs();
}
// If that didn't work out pick some register which is not in 'used'.
if (!regs)
regs = workRegs & ~wd.usedRegs();
// If that didn't work out pick any other register that is allocable.
// This last resort case will, however, result in marking one more
// register dirty.
if (!regs) {
regs = wd.archRegs() & ~workRegs;
}
// If that didn't work out pick any other register that is allocable.
// This last resort case will, however, result in marking one more
// register dirty.
if (!regs)
regs = wd.archRegs() & ~workRegs;
// If that didn't work out we will have to use XORs instead of MOVs.
if (!regs) {
continue;
}
// If that didn't work out we will have to use XORs instead of MOVs.
if (!regs)
continue;
RegMask regMask = Support::blsi(regs);
wd._workRegs |= regMask;
frame.addDirtyRegs(group, regMask);
RegMask regMask = Support::blsi(regs);
wd._workRegs |= regMask;
frame.addDirtyRegs(group, regMask);
}
}
}

View File

@@ -65,8 +65,8 @@ public:
out.reset();
}
inline bool isDone() const noexcept { return cur.isDone(); }
inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
ASMJIT_INLINE_NODEBUG bool isDone() const noexcept { return cur.isDone(); }
ASMJIT_INLINE_NODEBUG void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
};
struct WorkData {
@@ -86,8 +86,10 @@ public:
uint8_t _numSwaps;
//! Number of stack loads.
uint8_t _numStackArgs;
//! Whether this work data would need reassignment.
uint8_t _needsScratch;
//! Reserved (only used as padding).
uint8_t _reserved[6];
uint8_t _reserved[5];
//! Physical ID to variable ID mapping.
uint8_t _physToVarId[32];
@@ -100,6 +102,7 @@ public:
_dstShuf = 0;
_numSwaps = 0;
_numStackArgs = 0;
_needsScratch = 0;
memset(_reserved, 0, sizeof(_reserved));
memset(_physToVarId, kVarIdNone, 32);
}
@@ -147,12 +150,12 @@ public:
_assignedRegs ^= Support::bitMask(regId);
}
inline RegMask archRegs() const noexcept { return _archRegs; }
inline RegMask workRegs() const noexcept { return _workRegs; }
inline RegMask usedRegs() const noexcept { return _usedRegs; }
inline RegMask assignedRegs() const noexcept { return _assignedRegs; }
inline RegMask dstRegs() const noexcept { return _dstRegs; }
inline RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
ASMJIT_INLINE_NODEBUG RegMask archRegs() const noexcept { return _archRegs; }
ASMJIT_INLINE_NODEBUG RegMask workRegs() const noexcept { return _workRegs; }
ASMJIT_INLINE_NODEBUG RegMask usedRegs() const noexcept { return _usedRegs; }
ASMJIT_INLINE_NODEBUG RegMask assignedRegs() const noexcept { return _assignedRegs; }
ASMJIT_INLINE_NODEBUG RegMask dstRegs() const noexcept { return _dstRegs; }
ASMJIT_INLINE_NODEBUG RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
};
//! Architecture traits.

View File

@@ -65,7 +65,7 @@ public:
#endif
}
inline BaseCompiler* cc() const noexcept { return _cc; }
ASMJIT_INLINE_NODEBUG BaseCompiler* cc() const noexcept { return _cc; }
//! \name Run
//! \{
@@ -80,8 +80,10 @@ public:
RABlock* entryBlock = _curBlock;
BaseNode* node = _funcNode->next();
if (ASMJIT_UNLIKELY(!node))
if (ASMJIT_UNLIKELY(!node)) {
return DebugUtils::errored(kErrorInvalidState);
}
_curBlock->setFirst(_funcNode);
_curBlock->setLast(_funcNode);
@@ -117,16 +119,19 @@ public:
// the first possible inserted node by `onBeforeInvoke()` or `onBeforeRet()`.
BaseNode* prev = node->prev();
if (node->type() == NodeType::kInvoke)
if (node->type() == NodeType::kInvoke) {
ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeInvoke(node->as<InvokeNode>()));
else
}
else {
ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeRet(node->as<FuncRetNode>()));
}
if (prev != node->prev()) {
// If this was the first node in the block and something was
// inserted before it then we have to update the first block.
if (_curBlock->first() == node)
if (_curBlock->first() == node) {
_curBlock->setFirst(prev->next());
}
node->setPosition(kNodePositionDidOnBefore);
node = prev->next();
@@ -148,7 +153,7 @@ public:
logNode(inst, kCodeIndentation);
InstControlFlow cf = InstControlFlow::kRegular;
ib.reset();
ib.reset(_curBlock->blockId());
ASMJIT_PROPAGATE(static_cast<This*>(this)->onInst(inst, cf, ib));
if (node->isInvoke()) {
@@ -191,8 +196,9 @@ public:
const Operand* opArray = inst->operands();
// Cannot jump anywhere without operands.
if (ASMJIT_UNLIKELY(!opCount))
if (ASMJIT_UNLIKELY(!opCount)) {
return DebugUtils::errored(kErrorInvalidState);
}
if (opArray[opCount - 1].isLabel()) {
// Labels are easy for constructing the control flow.
@@ -200,8 +206,9 @@ public:
ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, opArray[opCount - 1].as<Label>()));
RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
if (ASMJIT_UNLIKELY(!targetBlock))
if (ASMJIT_UNLIKELY(!targetBlock)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
targetBlock->makeTargetable();
ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
@@ -213,8 +220,9 @@ public:
JumpAnnotation* jumpAnnotation = nullptr;
_curBlock->addFlags(RABlockFlags::kHasJumpTable);
if (inst->type() == NodeType::kJump)
if (inst->type() == NodeType::kJump) {
jumpAnnotation = inst->as<JumpNode>()->annotation();
}
if (jumpAnnotation) {
uint64_t timestamp = _pass->nextTimestamp();
@@ -223,8 +231,9 @@ public:
ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, id));
RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
if (ASMJIT_UNLIKELY(!targetBlock))
if (ASMJIT_UNLIKELY(!targetBlock)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
// Prevents adding basic-block successors multiple times.
if (!targetBlock->hasTimestamp(timestamp)) {
@@ -260,15 +269,17 @@ public:
}
else {
consecutiveBlock = _pass->newBlock(node);
if (ASMJIT_UNLIKELY(!consecutiveBlock))
if (ASMJIT_UNLIKELY(!consecutiveBlock)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
node->setPassData<RABlock>(consecutiveBlock);
}
}
else {
consecutiveBlock = _pass->newBlock(node);
if (ASMJIT_UNLIKELY(!consecutiveBlock))
if (ASMJIT_UNLIKELY(!consecutiveBlock)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
}
_curBlock->addFlags(RABlockFlags::kHasConsecutive);
@@ -308,14 +319,16 @@ public:
if (_curBlock) {
// If the label has a block assigned we can either continue with it or skip it if the block has been
// constructed already.
if (_curBlock->isConstructed())
if (_curBlock->isConstructed()) {
break;
}
}
else {
// No block assigned - create a new one and assign it.
_curBlock = _pass->newBlock(node);
if (ASMJIT_UNLIKELY(!_curBlock))
if (ASMJIT_UNLIKELY(!_curBlock)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
node->setPassData<RABlock>(_curBlock);
}
@@ -333,8 +346,9 @@ public:
// The label currently processed is part of the current block. This is only possible for multiple labels
// that are right next to each other or labels that are separated by non-code nodes like directives and
// comments.
if (ASMJIT_UNLIKELY(_hasCode))
if (ASMJIT_UNLIKELY(_hasCode)) {
return DebugUtils::errored(kErrorInvalidState);
}
}
else {
// Label makes the current block constructed. There is a chance that the Label is not used, but we don't
@@ -363,8 +377,9 @@ public:
_curBlock->makeConstructed(_blockRegStats);
RABlock* consecutive = _pass->newBlock(node);
if (ASMJIT_UNLIKELY(!consecutive))
if (ASMJIT_UNLIKELY(!consecutive)) {
return DebugUtils::errored(kErrorOutOfMemory);
}
consecutive->makeTargetable();
ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
@@ -379,8 +394,9 @@ public:
}
}
if (_curBlock && _curBlock != _lastLoggedBlock)
if (_curBlock && _curBlock != _lastLoggedBlock) {
logBlock(_curBlock, kRootIndentation);
}
logNode(node, kRootIndentation);
// Unlikely: Assume that the exit label is reached only once per function.

View File

@@ -223,18 +223,29 @@ struct RARegIndex : public RARegCount {
//! Registers mask.
struct RARegMask {
//! \name Types
//! \{
using RegMasks = Support::Array<RegMask, Globals::kNumVirtGroups>;
//! \}
//! \name Members
//! \{
Support::Array<RegMask, Globals::kNumVirtGroups> _masks;
RegMasks _masks;
//! \}
//! \name Construction & Destruction
//! \{
//! Initializes from other `RARegMask`.
ASMJIT_INLINE_NODEBUG void init(const RARegMask& other) noexcept { _masks = other._masks; }
//! Reset all register masks to zero.
//! Initializes directly from an array of masks.
ASMJIT_INLINE_NODEBUG void init(const RegMasks& masks) noexcept { _masks = masks; }
//! Resets all register masks to zero.
ASMJIT_INLINE_NODEBUG void reset() noexcept { _masks.fill(0); }
//! \}
@@ -981,17 +992,20 @@ enum class RAWorkRegFlags : uint32_t {
//! Has been coalesced to another WorkReg.
kCoalesced = 0x00000002u,
//! This register is used across multiple basic blocks - this can be used as an optimization.
kMultipleBasicBlocks = 0x00000004u,
//! Set when this register is used as a LEAD consecutive register at least once.
kLeadConsecutive = 0x00000004u,
kLeadConsecutive = 0x00000010u,
//! Used to mark consecutive registers during processing.
kProcessedConsecutive = 0x00000008u,
kProcessedConsecutive = 0x00000020u,
//! Stack slot has to be allocated.
kStackUsed = 0x00000010u,
kStackUsed = 0x00000100u,
//! Stack allocation is preferred.
kStackPreferred = 0x00000020u,
kStackPreferred = 0x00000200u,
//! Marked for stack argument reassignment.
kStackArgToStack = 0x00000040u
kStackArgToStack = 0x00000400u
};
ASMJIT_DEFINE_ENUM_FLAGS(RAWorkRegFlags)
@@ -1040,6 +1054,12 @@ public:
//! RAPass specific flags used during analysis and allocation.
RAWorkRegFlags _flags = RAWorkRegFlags::kNone;
//! The identifier of a basic block this register lives in.
//!
//! If this register is used by multiple basic blocks, the id would always be `kIdNone`. However, if the register
//! lives in a single basic block, the id would be a valid block id, and `_flags` would not contain `kMultipleBasicBlocks`.
uint32_t _singleBasicBlockId = kIdNone;
//! Constains all USE ids collected from all instructions.
//!
//! If this mask is non-zero and not a power of two, it means that the register is used multiple times in
@@ -1122,6 +1142,23 @@ public:
ASMJIT_INLINE_NODEBUG bool isAllocated() const noexcept { return hasFlag(RAWorkRegFlags::kAllocated); }
ASMJIT_INLINE_NODEBUG void markAllocated() noexcept { addFlags(RAWorkRegFlags::kAllocated); }
ASMJIT_INLINE_NODEBUG bool isWithinSingleBasicBlock() const noexcept { return !hasFlag(RAWorkRegFlags::kMultipleBasicBlocks); }
ASMJIT_INLINE_NODEBUG uint32_t singleBasicBlockId() const noexcept { return _singleBasicBlockId; }
//! Called when this register appeared in a basic block having `blockId`.
//!
//! This function just sets the basic block of this work register, and then later, when this register is processed
//! again it's compared with all other basic blocks it appears in so the flag `kMultipleBasicBlocks` can be properly
//! set when the compared basic blocks differ.
ASMJIT_INLINE_NODEBUG void assignBasicBlock(uint32_t blockId) noexcept { _singleBasicBlockId = blockId; }
//! Marks this register as using multiple basic blocks, which means reseting the single basic block identifier and
//! adding `kMultipleBasicBlocks` flag.
ASMJIT_INLINE_NODEBUG void markUseOfMultipleBasicBlocks() noexcept {
_singleBasicBlockId = Globals::kInvalidId;
addFlags(RAWorkRegFlags::kMultipleBasicBlocks);
}
ASMJIT_INLINE_NODEBUG bool isLeadConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kLeadConsecutive); }
ASMJIT_INLINE_NODEBUG void markLeadConsecutive() noexcept { addFlags(RAWorkRegFlags::kLeadConsecutive); }

View File

@@ -234,7 +234,7 @@ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, cons
uint32_t tmpPhysId = Support::ctz(allocableRegs);
ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
_pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
}
else {
// MOVE is impossible, must SPILL.
@@ -675,7 +675,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
if (reassignedId != RAAssignment::kPhysNone) {
ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
allocableRegs ^= Support::bitMask(reassignedId);
_pass->_clobberedRegs[group] |= Support::bitMask(reassignedId);
_clobberedRegs[group] |= Support::bitMask(reassignedId);
continue;
}
}
@@ -704,8 +704,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
for (i = 0; i < count; i++) {
RATiedReg* thisTiedReg = &tiedRegs[i];
if (thisTiedReg->isUseDone())
if (thisTiedReg->isUseDone()) {
continue;
}
uint32_t thisWorkId = thisTiedReg->workId();
uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
@@ -725,16 +726,18 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
thisTiedReg->markUseDone();
if (thisTiedReg->isWrite())
if (thisTiedReg->isWrite()) {
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
}
usePending--;
// Double-hit.
RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
targetTiedReg->markUseDone();
if (targetTiedReg->isWrite())
if (targetTiedReg->isWrite()) {
_curAssignment.makeDirty(group, targetWorkId, thisPhysId);
}
usePending--;
}
continue;
@@ -748,10 +751,10 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// such case is to SPILL the target register or MOVE it to another register so the loop can continue.
RegMask availableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
if (availableRegs) {
uint32_t tmpRegId = Support::ctz(availableRegs);
uint32_t tmpRegId = pickBestSuitableRegister(group, availableRegs);
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, tmpRegId, thisPhysId));
_pass->_clobberedRegs[group] |= Support::bitMask(tmpRegId);
_clobberedRegs[group] |= Support::bitMask(tmpRegId);
// NOTE: This register is not done, we have just moved it to another physical spot, and we will have to
// move it again into the correct spot once it's free (since this is essentially doing a swap operation
@@ -766,16 +769,18 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
thisTiedReg->markUseDone();
if (thisTiedReg->isWrite())
if (thisTiedReg->isWrite()) {
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
}
usePending--;
}
else {
ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
thisTiedReg->markUseDone();
if (thisTiedReg->isWrite())
if (thisTiedReg->isWrite()) {
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
}
usePending--;
}
}
@@ -822,8 +827,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
uint32_t physId = it.next();
uint32_t workId = _curAssignment.physToWorkId(group, physId);
if (workId == RAAssignment::kWorkNone)
if (workId == RAAssignment::kWorkNone) {
continue;
}
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
} while (it.hasNext());
@@ -842,8 +848,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
while (it.hasNext()) {
uint32_t dstId = it.next();
if (dstId == srcId)
if (dstId == srcId) {
continue;
}
_pass->emitMove(workId, dstId, srcId);
}
}
@@ -925,8 +932,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
}
}
if (bestLeadReg == 0xFFFFFFFF)
if (bestLeadReg == 0xFFFFFFFF) {
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
}
for (i = 0; i < consecutiveCount; i++) {
uint32_t consecutiveIndex = bestLeadReg + i;
@@ -949,8 +957,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
uint32_t workId = tiedReg->workId();
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
if (assignedId != RAAssignment::kPhysNone)
if (assignedId != RAAssignment::kPhysNone) {
ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
}
uint32_t physId = tiedReg->outId();
if (physId == RAAssignment::kPhysNone) {
@@ -970,8 +979,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// OUTs are CLOBBERed thus cannot be ASSIGNed right now.
ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
if (!tiedReg->isKill())
if (!tiedReg->isKill()) {
ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
}
tiedReg->setOutId(physId);
tiedReg->markOutDone();
@@ -1043,8 +1053,9 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
// Additional instructions emitted to switch from the current state to the `target` state. This means
// that we have to move these instructions into an independent code block and patch the jump location.
Operand& targetOp = node->op(node->opCount() - 1);
if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
if (ASMJIT_UNLIKELY(!targetOp.isLabel())) {
return DebugUtils::errored(kErrorInvalidState);
}
Label trampoline = _cc->newLabel();
Label savedTarget = targetOp.as<Label>();
@@ -1121,16 +1132,18 @@ uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, u
// Prefer home register id, if possible.
if (workReg->hasHomeRegId()) {
uint32_t homeId = workReg->homeRegId();
if (Support::bitTest(allocableRegs, homeId))
if (Support::bitTest(allocableRegs, homeId)) {
return homeId;
}
}
// Prefer registers used upon block entries.
RegMask previouslyAssignedRegs = workReg->allocatedMask();
if (allocableRegs & previouslyAssignedRegs)
if (allocableRegs & previouslyAssignedRegs) {
allocableRegs &= previouslyAssignedRegs;
}
return Support::ctz(allocableRegs);
return pickBestSuitableRegister(group, allocableRegs);
}
uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs, RAInst* raInst) const noexcept {
@@ -1141,8 +1154,9 @@ uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId,
// Prefer reassignment back to HomeId, if possible.
if (workReg->hasHomeRegId()) {
if (Support::bitTest(allocableRegs, workReg->homeRegId()))
if (Support::bitTest(allocableRegs, workReg->homeRegId())) {
return workReg->homeRegId();
}
}
// Prefer assignment to a temporary register in case this register is killed by the instruction (or has an out slot).
@@ -1151,6 +1165,14 @@ uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId,
return Support::ctz(allocableRegs);
}
// Prefer reassignment if this register is only used within a single basic block.
if (workReg->isWithinSingleBasicBlock()) {
RegMask filteredRegs = allocableRegs & ~workReg->clobberSurvivalMask();
if (filteredRegs) {
return pickBestSuitableRegister(group, filteredRegs);
}
}
// TODO: [Register Allocator] This could be improved.
// Decided to SPILL.

View File

@@ -25,8 +25,8 @@ class RALocalAllocator {
public:
ASMJIT_NONCOPYABLE(RALocalAllocator)
typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
using PhysToWorkMap = RAAssignment::PhysToWorkMap;
using WorkToPhysMap = RAAssignment::WorkToPhysMap;
//! Link to `BaseRAPass`.
BaseRAPass* _pass {};
@@ -39,6 +39,8 @@ public:
RARegMask _availableRegs {};
//! Registers clobbered by the allocator.
RARegMask _clobberedRegs {};
//! Registers that must be preserved by the function (clobbering means saving & restoring in function prolog & epilog).
RARegMask _funcPreservedRegs {};
//! Register assignment (current).
RAAssignment _curAssignment {};
@@ -67,7 +69,9 @@ public:
: _pass(pass),
_cc(pass->cc()),
_archTraits(pass->_archTraits),
_availableRegs(pass->_availableRegs) {}
_availableRegs(pass->_availableRegs) {
_funcPreservedRegs.init(pass->func()->frame().preservedRegs());
}
Error init() noexcept;
@@ -150,7 +154,7 @@ public:
return uint32_t(int32_t(freq * float(kCostOfFrequency)));
}
inline uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
ASMJIT_FORCE_INLINE uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
RAWorkReg* workReg = workRegById(workId);
uint32_t cost = costByFrequency(workReg->liveStats().freq());
@@ -160,6 +164,18 @@ public:
return cost;
}
ASMJIT_FORCE_INLINE uint32_t pickBestSuitableRegister(RegGroup group, RegMask allocableRegs) const noexcept {
// These are registers must be preserved by the function itself.
RegMask preservedRegs = _funcPreservedRegs[group];
// Reduce the set by removing preserved registers when possible.
if (allocableRegs & ~preservedRegs) {
allocableRegs &= ~preservedRegs;
}
return Support::ctz(allocableRegs);
}
//! Decides on register assignment.
uint32_t decideOnAssignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
@@ -182,7 +198,10 @@ public:
//! Emits a move between a destination and source register, and fixes the
//! register assignment.
inline Error onMoveReg(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
if (dstPhysId == srcPhysId) return kErrorOk;
if (dstPhysId == srcPhysId) {
return kErrorOk;
}
_curAssignment.reassign(group, workId, dstPhysId, srcPhysId);
return _pass->emitMove(workId, dstPhysId, srcPhysId);
}

View File

@@ -964,15 +964,23 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
}
// These are not needed anymore, so release the memory now so other allocations can reuse it.
nUsesPerWorkReg.release(allocator());
nOutsPerWorkReg.release(allocator());
// Assign block and instruction positions, build LiveCount and LiveSpans
// ---------------------------------------------------------------------
// This is a starting position, reserving [0, 1] for function arguments.
uint32_t position = 2;
for (i = 0; i < numAllBlocks; i++) {
RABlock* block = _blocks[i];
if (!block->isReachable())
continue;
uint32_t blockId = block->blockId();
BaseNode* node = block->first();
BaseNode* stop = block->last();
@@ -995,6 +1003,8 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
if (node->isInst()) {
InstNode* inst = node->as<InstNode>();
RAInst* raInst = inst->passData<RAInst>();
// Impossible - each processed instruction node must have an associated RAInst.
ASMJIT_ASSERT(raInst != nullptr);
RATiedReg* tiedRegs = raInst->tiedRegs();
@@ -1010,14 +1020,20 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
// Create refs and writes.
RAWorkReg* workReg = workRegById(workId);
workReg->_refs.appendUnsafe(node);
if (tiedReg->isWrite())
if (tiedReg->isWrite()) {
workReg->_writes.appendUnsafe(node);
}
if (workReg->singleBasicBlockId() != blockId) {
workReg->markUseOfMultipleBasicBlocks();
}
// We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
// trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
// KILLed here.
if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
if (tiedReg->isLast() && !block->liveOut().bitAt(workId)) {
tiedReg->addFlags(RATiedFlags::kKill);
}
LiveRegSpans& liveSpans = workReg->liveSpans();
bool wasOpen;
@@ -1038,20 +1054,23 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
if (tiedReg->hasUseId()) {
uint32_t useId = tiedReg->useId();
workReg->addUseIdMask(Support::bitMask(useId));
if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId)) {
workReg->setHintRegId(useId);
}
}
if (tiedReg->useRegMask()) {
workReg->restrictPreferredMask(tiedReg->useRegMask());
if (workReg->isLeadConsecutive())
if (workReg->isLeadConsecutive()) {
workReg->restrictConsecutiveMask(tiedReg->useRegMask());
}
}
if (tiedReg->outRegMask()) {
workReg->restrictPreferredMask(tiedReg->outRegMask());
if (workReg->isLeadConsecutive())
if (workReg->isLeadConsecutive()) {
workReg->restrictConsecutiveMask(tiedReg->outRegMask());
}
}
// Update `RAWorkReg::clobberedSurvivalMask`.
@@ -1060,6 +1079,10 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
}
}
if (node->isInvoke()) {
func()->frame().updateCallStackAlignment(node->as<InvokeNode>()->detail().naturalStackAlignment());
}
position += 2;
maxLiveCount.op<Support::Max>(raInst->_liveCount);
}
@@ -1098,8 +1121,6 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
logger->log(sb);
});
nUsesPerWorkReg.release(allocator());
nOutsPerWorkReg.release(allocator());
nInstsPerBlock.release(allocator());
return kErrorOk;
@@ -1226,7 +1247,7 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
RegMask preservedRegs = func()->frame().preservedRegs(group);
// First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
// (precolored) virtual registers.
// (pre-colored) virtual registers.
if (!workRegs.empty()) {
uint32_t dstIndex = 0;

View File

@@ -44,7 +44,7 @@ enum class RABlockFlags : uint32_t {
kHasConsecutive = 0x00000200u,
//! Block has a jump to a jump-table at the end.
kHasJumpTable = 0x00000400u,
//! Block contains fixed registers (precolored).
//! Block contains fixed registers (pre-colored).
kHasFixedRegs = 0x00000800u,
//! Block contains function calls.
kHasFuncCalls = 0x00001000u
@@ -402,6 +402,8 @@ public:
//! \name Members
//! \{
//! Basic block id.
uint32_t _basicBlockId;
//! Instruction RW flags.
InstRWFlags _instRWFlags;
@@ -425,10 +427,11 @@ public:
//! \name Construction & Destruction
//! \{
ASMJIT_INLINE_NODEBUG RAInstBuilder() noexcept { reset(); }
ASMJIT_INLINE_NODEBUG explicit RAInstBuilder(uint32_t blockId = Globals::kInvalidId) noexcept { reset(blockId); }
ASMJIT_INLINE_NODEBUG void init() noexcept { reset(); }
ASMJIT_INLINE_NODEBUG void reset() noexcept {
ASMJIT_INLINE_NODEBUG void init(uint32_t blockId) noexcept { reset(blockId); }
ASMJIT_INLINE_NODEBUG void reset(uint32_t blockId) noexcept {
_basicBlockId = blockId;
_instRWFlags = InstRWFlags::kNone;
_aggregatedFlags = RATiedFlags::kNone;
_forbiddenFlags = RATiedFlags::kNone;
@@ -465,13 +468,13 @@ public:
ASMJIT_INLINE_NODEBUG const RATiedReg* end() const noexcept { return _cur; }
//! Returns `RATiedReg` at the given `index`.
inline RATiedReg* operator[](uint32_t index) noexcept {
inline RATiedReg* operator[](size_t index) noexcept {
ASMJIT_ASSERT(index < tiedRegCount());
return &_tiedRegs[index];
}
//! Returns `RATiedReg` at the given `index`. (const).
inline const RATiedReg* operator[](uint32_t index) const noexcept {
inline const RATiedReg* operator[](size_t index) const noexcept {
ASMJIT_ASSERT(index < tiedRegCount());
return &_tiedRegs[index];
}
@@ -487,8 +490,8 @@ public:
RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask,
RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask,
uint32_t rmSize = 0,
uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
uint32_t consecutiveParent = Globals::kInvalidId
) noexcept {
RegGroup group = workReg->group();
RATiedReg* tiedReg = workReg->tiedReg();
@@ -507,12 +510,14 @@ public:
_stats.makeUsed(group);
if (!tiedReg) {
// Could happen when the builder is not reset properly after each instruction.
// Would happen when the builder is not reset properly after each instruction - so catch that!
ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
tiedReg = _cur++;
tiedReg->init(workReg->workId(), flags, useRegMask, useId, useRewriteMask, outRegMask, outId, outRewriteMask, rmSize, consecutiveParent);
workReg->setTiedReg(tiedReg);
workReg->assignBasicBlock(_basicBlockId);
_count.add(group);
return kErrorOk;
@@ -566,7 +571,9 @@ public:
tiedReg = _cur++;
tiedReg->init(workReg->workId(), flags, allocable, useId, 0, allocable, BaseReg::kIdBad, 0);
workReg->setTiedReg(tiedReg);
workReg->assignBasicBlock(_basicBlockId);
_count.add(group);
return kErrorOk;
@@ -606,7 +613,9 @@ public:
tiedReg = _cur++;
tiedReg->init(workReg->workId(), flags, Support::allOnes<RegMask>(), BaseReg::kIdBad, 0, outRegs, outId, 0);
workReg->setTiedReg(tiedReg);
workReg->assignBasicBlock(_basicBlockId);
_count.add(group);
return kErrorOk;

View File

@@ -83,16 +83,16 @@ public:
bool _is64Bit;
bool _avxEnabled;
inline RACFGBuilder(X86RAPass* pass) noexcept
ASMJIT_INLINE_NODEBUG RACFGBuilder(X86RAPass* pass) noexcept
: RACFGBuilderT<RACFGBuilder>(pass),
_arch(pass->cc()->arch()),
_is64Bit(pass->registerSize() == 8),
_avxEnabled(pass->avxEnabled()) {
}
inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
ASMJIT_INLINE_NODEBUG Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
ASMJIT_INLINE_NODEBUG uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
return _avxEnabled ? avxInst : sseInst;
}