mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-16 20:17:05 +03:00
Improved register allocation of consecutive register in some cases
* The implementation tries to detect whether a virtual register
only lives in a single basic block and then uses a move approach
instead of spill/alloc when reallocating
* Additionally, the implementation now improves the use of scratch
registers during function arguments allocation - scratch is only
reserved when it's actually needed
This commit is contained in:
2
.github/workflows/build-config.json
vendored
2
.github/workflows/build-config.json
vendored
@@ -17,7 +17,7 @@
|
||||
{ "optional": true, "cmd": ["asmjit_test_assembler", "--validate"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_emitters"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_execute"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_compiler"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_compiler", "--verbose", "--dump-asm"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_instinfo"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_x86_sections"] },
|
||||
{ "optional": true, "cmd": ["asmjit_test_perf", "--quick"] }
|
||||
|
||||
@@ -1041,6 +1041,13 @@ public:
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Types
|
||||
//! \{
|
||||
|
||||
using RegMasks = Support::Array<RegMask, Globals::kNumVirtGroups>;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
@@ -1093,9 +1100,9 @@ public:
|
||||
uint32_t _stackAdjustment = 0;
|
||||
|
||||
//! Registers that are dirty.
|
||||
Support::Array<RegMask, Globals::kNumVirtGroups> _dirtyRegs {};
|
||||
RegMasks _dirtyRegs {};
|
||||
//! Registers that must be preserved (copied from CallConv).
|
||||
Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs {};
|
||||
RegMasks _preservedRegs {};
|
||||
//! Size to save/restore per register group.
|
||||
Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize {};
|
||||
//! Alignment of save/restore area per register group.
|
||||
@@ -1380,6 +1387,12 @@ public:
|
||||
return _dirtyRegs[group] & _preservedRegs[group];
|
||||
}
|
||||
|
||||
//! Returns all dirty registers as a Support::Array<> type.
|
||||
ASMJIT_INLINE_NODEBUG const RegMasks& dirtyRegs() const noexcept { return _dirtyRegs; }
|
||||
|
||||
//! Returns all preserved registers as a Support::Array<> type.
|
||||
ASMJIT_INLINE_NODEBUG const RegMasks& preservedRegs() const noexcept { return _preservedRegs; }
|
||||
|
||||
//! Returns the mask of preserved registers of the given register `group`.
|
||||
//!
|
||||
//! Preserved registers are those that must survive the function call unmodified. The function can only modify
|
||||
|
||||
@@ -13,8 +13,9 @@ ASMJIT_BEGIN_NAMESPACE
|
||||
//! \{
|
||||
|
||||
FuncArgsContext::FuncArgsContext() noexcept {
|
||||
for (RegGroup group : RegGroupVirtValues{})
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
_workData[size_t(group)].reset();
|
||||
}
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept {
|
||||
@@ -26,23 +27,29 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
_arch = arch;
|
||||
|
||||
// Initialize `_archRegs`.
|
||||
for (RegGroup group : RegGroupVirtValues{})
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
_workData[group]._archRegs = _constraints->availableRegs(group);
|
||||
}
|
||||
|
||||
if (frame.hasPreservedFP())
|
||||
if (frame.hasPreservedFP()) {
|
||||
_workData[size_t(RegGroup::kGp)]._archRegs &= ~Support::bitMask(archTraits().fpRegId());
|
||||
}
|
||||
|
||||
uint32_t reassignmentFlagMask = 0;
|
||||
|
||||
// Extract information from all function arguments/assignments and build Var[] array.
|
||||
uint32_t varId = 0;
|
||||
for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) {
|
||||
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
||||
const FuncValue& dst_ = args.arg(argIndex, valueIndex);
|
||||
if (!dst_.isAssigned())
|
||||
if (!dst_.isAssigned()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const FuncValue& src_ = func.arg(argIndex, valueIndex);
|
||||
if (ASMJIT_UNLIKELY(!src_.isAssigned()))
|
||||
if (ASMJIT_UNLIKELY(!src_.isAssigned())) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
Var& var = _vars[varId];
|
||||
var.init(src_, dst_);
|
||||
@@ -55,42 +62,51 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
WorkData* dstWd = nullptr;
|
||||
|
||||
// Not supported.
|
||||
if (src.isIndirect())
|
||||
if (src.isIndirect()) {
|
||||
return DebugUtils::errored(kErrorInvalidAssignment);
|
||||
}
|
||||
|
||||
if (dst.isReg()) {
|
||||
RegType dstType = dst.regType();
|
||||
if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType)))
|
||||
if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType))) {
|
||||
return DebugUtils::errored(kErrorInvalidRegType);
|
||||
}
|
||||
|
||||
// Copy TypeId from source if the destination doesn't have it. The RA used by BaseCompiler would never
|
||||
// leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying the type.
|
||||
if (!dst.hasTypeId())
|
||||
// leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying their types.
|
||||
if (!dst.hasTypeId()) {
|
||||
dst.setTypeId(archTraits().regTypeToTypeId(dst.regType()));
|
||||
}
|
||||
|
||||
dstGroup = archTraits().regTypeToGroup(dstType);
|
||||
if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt))
|
||||
if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt)) {
|
||||
return DebugUtils::errored(kErrorInvalidRegGroup);
|
||||
}
|
||||
|
||||
dstWd = &_workData[dstGroup];
|
||||
dstId = dst.regId();
|
||||
if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
|
||||
return DebugUtils::errored(kErrorInvalidPhysId);
|
||||
|
||||
if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
|
||||
if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId))) {
|
||||
return DebugUtils::errored(kErrorInvalidPhysId);
|
||||
}
|
||||
|
||||
if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId))) {
|
||||
return DebugUtils::errored(kErrorOverlappedRegs);
|
||||
}
|
||||
|
||||
dstWd->_dstRegs |= Support::bitMask(dstId);
|
||||
dstWd->_dstShuf |= Support::bitMask(dstId);
|
||||
dstWd->_usedRegs |= Support::bitMask(dstId);
|
||||
}
|
||||
else {
|
||||
if (!dst.hasTypeId())
|
||||
if (!dst.hasTypeId()) {
|
||||
dst.setTypeId(src.typeId());
|
||||
}
|
||||
|
||||
OperandSignature signature = getSuitableRegForMemToMemMove(arch, dst.typeId(), src.typeId());
|
||||
if (ASMJIT_UNLIKELY(!signature.isValid()))
|
||||
if (ASMJIT_UNLIKELY(!signature.isValid())) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
_stackDstMask = uint8_t(_stackDstMask | Support::bitMask(signature.regGroup()));
|
||||
}
|
||||
|
||||
@@ -102,9 +118,11 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
ASMJIT_ASSERT(dstWd != nullptr);
|
||||
dstWd->assign(varId, srcId);
|
||||
|
||||
// The best case, register is allocated where it is expected to be. However, we should
|
||||
// not mark this as done if both registers are GP and sign or zero extension is required.
|
||||
reassignmentFlagMask |= uint32_t(dstId != srcId) << uint32_t(dstGroup);
|
||||
|
||||
if (dstId == srcId) {
|
||||
// The best case, register is allocated where it is expected to be. However, we should
|
||||
// not mark this as done if both registers are GP and sign or zero extension is required.
|
||||
if (dstGroup != RegGroup::kGp) {
|
||||
var.markDone();
|
||||
}
|
||||
@@ -115,17 +133,20 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
uint32_t dstSize = TypeUtils::sizeOf(dt);
|
||||
uint32_t srcSize = TypeUtils::sizeOf(st);
|
||||
|
||||
if (dt == TypeId::kVoid || st == TypeId::kVoid || dstSize <= srcSize)
|
||||
if (dt == TypeId::kVoid || st == TypeId::kVoid || dstSize <= srcSize) {
|
||||
var.markDone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt))
|
||||
if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt)) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
WorkData& srcData = _workData[size_t(srcGroup)];
|
||||
srcData.assign(varId, srcId);
|
||||
reassignmentFlagMask |= 1u << uint32_t(dstGroup);
|
||||
}
|
||||
}
|
||||
else {
|
||||
@@ -142,6 +163,7 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
_workData[group]._workRegs =
|
||||
(_workData[group].archRegs() & (frame.dirtyRegs(group) | ~frame.preservedRegs(group))) | _workData[group].dstRegs() | _workData[group].assignedRegs();
|
||||
_workData[group]._needsScratch = (reassignmentFlagMask >> uint32_t(group)) & 1u;
|
||||
}
|
||||
|
||||
// Create a variable that represents `SARegId` if necessary.
|
||||
@@ -153,14 +175,16 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
|
||||
if (saCurRegId != BaseReg::kIdBad) {
|
||||
// Check if the provided `SARegId` doesn't collide with input registers.
|
||||
if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
|
||||
if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId))) {
|
||||
return DebugUtils::errored(kErrorOverlappedRegs);
|
||||
}
|
||||
}
|
||||
|
||||
if (saOutRegId != BaseReg::kIdBad) {
|
||||
// Check if the provided `SARegId` doesn't collide with argument assignments.
|
||||
if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
|
||||
if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId))) {
|
||||
return DebugUtils::errored(kErrorOverlappedRegs);
|
||||
}
|
||||
saRegRequired = true;
|
||||
}
|
||||
|
||||
@@ -180,11 +204,13 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
}
|
||||
else {
|
||||
RegMask availableRegs = gpRegs.availableRegs();
|
||||
if (!availableRegs)
|
||||
if (!availableRegs) {
|
||||
availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
|
||||
}
|
||||
|
||||
if (ASMJIT_UNLIKELY(!availableRegs))
|
||||
if (ASMJIT_UNLIKELY(!availableRegs)) {
|
||||
return DebugUtils::errored(kErrorNoMorePhysRegs);
|
||||
}
|
||||
|
||||
saCurRegId = Support::ctz(availableRegs);
|
||||
}
|
||||
@@ -216,8 +242,9 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, co
|
||||
uint32_t dstId = var.out.regId();
|
||||
|
||||
RegGroup group = archTraits().regTypeToGroup(var.cur.regType());
|
||||
if (group != archTraits().regTypeToGroup(var.out.regType()))
|
||||
if (group != archTraits().regTypeToGroup(var.out.regType())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
WorkData& wd = _workData[group];
|
||||
if (wd.isAssigned(dstId)) {
|
||||
@@ -261,28 +288,32 @@ ASMJIT_FAVOR_SIZE Error FuncArgsContext::markScratchRegs(FuncFrame& frame) noexc
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
if (Support::bitTest(groupMask, group)) {
|
||||
WorkData& wd = _workData[group];
|
||||
if (wd._needsScratch) {
|
||||
// Initially, pick some clobbered or dirty register.
|
||||
RegMask workRegs = wd.workRegs();
|
||||
RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
|
||||
|
||||
// Initially, pick some clobbered or dirty register.
|
||||
RegMask workRegs = wd.workRegs();
|
||||
RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
|
||||
// If that didn't work out pick some register which is not in 'used'.
|
||||
if (!regs) {
|
||||
regs = workRegs & ~wd.usedRegs();
|
||||
}
|
||||
|
||||
// If that didn't work out pick some register which is not in 'used'.
|
||||
if (!regs)
|
||||
regs = workRegs & ~wd.usedRegs();
|
||||
// If that didn't work out pick any other register that is allocable.
|
||||
// This last resort case will, however, result in marking one more
|
||||
// register dirty.
|
||||
if (!regs) {
|
||||
regs = wd.archRegs() & ~workRegs;
|
||||
}
|
||||
|
||||
// If that didn't work out pick any other register that is allocable.
|
||||
// This last resort case will, however, result in marking one more
|
||||
// register dirty.
|
||||
if (!regs)
|
||||
regs = wd.archRegs() & ~workRegs;
|
||||
// If that didn't work out we will have to use XORs instead of MOVs.
|
||||
if (!regs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If that didn't work out we will have to use XORs instead of MOVs.
|
||||
if (!regs)
|
||||
continue;
|
||||
|
||||
RegMask regMask = Support::blsi(regs);
|
||||
wd._workRegs |= regMask;
|
||||
frame.addDirtyRegs(group, regMask);
|
||||
RegMask regMask = Support::blsi(regs);
|
||||
wd._workRegs |= regMask;
|
||||
frame.addDirtyRegs(group, regMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -65,8 +65,8 @@ public:
|
||||
out.reset();
|
||||
}
|
||||
|
||||
inline bool isDone() const noexcept { return cur.isDone(); }
|
||||
inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
|
||||
ASMJIT_INLINE_NODEBUG bool isDone() const noexcept { return cur.isDone(); }
|
||||
ASMJIT_INLINE_NODEBUG void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
|
||||
};
|
||||
|
||||
struct WorkData {
|
||||
@@ -86,8 +86,10 @@ public:
|
||||
uint8_t _numSwaps;
|
||||
//! Number of stack loads.
|
||||
uint8_t _numStackArgs;
|
||||
//! Whether this work data would need reassignment.
|
||||
uint8_t _needsScratch;
|
||||
//! Reserved (only used as padding).
|
||||
uint8_t _reserved[6];
|
||||
uint8_t _reserved[5];
|
||||
//! Physical ID to variable ID mapping.
|
||||
uint8_t _physToVarId[32];
|
||||
|
||||
@@ -100,6 +102,7 @@ public:
|
||||
_dstShuf = 0;
|
||||
_numSwaps = 0;
|
||||
_numStackArgs = 0;
|
||||
_needsScratch = 0;
|
||||
memset(_reserved, 0, sizeof(_reserved));
|
||||
memset(_physToVarId, kVarIdNone, 32);
|
||||
}
|
||||
@@ -147,12 +150,12 @@ public:
|
||||
_assignedRegs ^= Support::bitMask(regId);
|
||||
}
|
||||
|
||||
inline RegMask archRegs() const noexcept { return _archRegs; }
|
||||
inline RegMask workRegs() const noexcept { return _workRegs; }
|
||||
inline RegMask usedRegs() const noexcept { return _usedRegs; }
|
||||
inline RegMask assignedRegs() const noexcept { return _assignedRegs; }
|
||||
inline RegMask dstRegs() const noexcept { return _dstRegs; }
|
||||
inline RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask archRegs() const noexcept { return _archRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask workRegs() const noexcept { return _workRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask usedRegs() const noexcept { return _usedRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask assignedRegs() const noexcept { return _assignedRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask dstRegs() const noexcept { return _dstRegs; }
|
||||
ASMJIT_INLINE_NODEBUG RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
|
||||
};
|
||||
|
||||
//! Architecture traits.
|
||||
|
||||
@@ -65,7 +65,7 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
inline BaseCompiler* cc() const noexcept { return _cc; }
|
||||
ASMJIT_INLINE_NODEBUG BaseCompiler* cc() const noexcept { return _cc; }
|
||||
|
||||
//! \name Run
|
||||
//! \{
|
||||
@@ -80,8 +80,10 @@ public:
|
||||
|
||||
RABlock* entryBlock = _curBlock;
|
||||
BaseNode* node = _funcNode->next();
|
||||
if (ASMJIT_UNLIKELY(!node))
|
||||
|
||||
if (ASMJIT_UNLIKELY(!node)) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
_curBlock->setFirst(_funcNode);
|
||||
_curBlock->setLast(_funcNode);
|
||||
@@ -117,16 +119,19 @@ public:
|
||||
// the first possible inserted node by `onBeforeInvoke()` or `onBeforeRet()`.
|
||||
BaseNode* prev = node->prev();
|
||||
|
||||
if (node->type() == NodeType::kInvoke)
|
||||
if (node->type() == NodeType::kInvoke) {
|
||||
ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeInvoke(node->as<InvokeNode>()));
|
||||
else
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeRet(node->as<FuncRetNode>()));
|
||||
}
|
||||
|
||||
if (prev != node->prev()) {
|
||||
// If this was the first node in the block and something was
|
||||
// inserted before it then we have to update the first block.
|
||||
if (_curBlock->first() == node)
|
||||
if (_curBlock->first() == node) {
|
||||
_curBlock->setFirst(prev->next());
|
||||
}
|
||||
|
||||
node->setPosition(kNodePositionDidOnBefore);
|
||||
node = prev->next();
|
||||
@@ -148,7 +153,7 @@ public:
|
||||
logNode(inst, kCodeIndentation);
|
||||
|
||||
InstControlFlow cf = InstControlFlow::kRegular;
|
||||
ib.reset();
|
||||
ib.reset(_curBlock->blockId());
|
||||
ASMJIT_PROPAGATE(static_cast<This*>(this)->onInst(inst, cf, ib));
|
||||
|
||||
if (node->isInvoke()) {
|
||||
@@ -191,8 +196,9 @@ public:
|
||||
const Operand* opArray = inst->operands();
|
||||
|
||||
// Cannot jump anywhere without operands.
|
||||
if (ASMJIT_UNLIKELY(!opCount))
|
||||
if (ASMJIT_UNLIKELY(!opCount)) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
if (opArray[opCount - 1].isLabel()) {
|
||||
// Labels are easy for constructing the control flow.
|
||||
@@ -200,8 +206,9 @@ public:
|
||||
ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, opArray[opCount - 1].as<Label>()));
|
||||
|
||||
RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
|
||||
if (ASMJIT_UNLIKELY(!targetBlock))
|
||||
if (ASMJIT_UNLIKELY(!targetBlock)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
|
||||
targetBlock->makeTargetable();
|
||||
ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
|
||||
@@ -213,8 +220,9 @@ public:
|
||||
JumpAnnotation* jumpAnnotation = nullptr;
|
||||
_curBlock->addFlags(RABlockFlags::kHasJumpTable);
|
||||
|
||||
if (inst->type() == NodeType::kJump)
|
||||
if (inst->type() == NodeType::kJump) {
|
||||
jumpAnnotation = inst->as<JumpNode>()->annotation();
|
||||
}
|
||||
|
||||
if (jumpAnnotation) {
|
||||
uint64_t timestamp = _pass->nextTimestamp();
|
||||
@@ -223,8 +231,9 @@ public:
|
||||
ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, id));
|
||||
|
||||
RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
|
||||
if (ASMJIT_UNLIKELY(!targetBlock))
|
||||
if (ASMJIT_UNLIKELY(!targetBlock)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
|
||||
// Prevents adding basic-block successors multiple times.
|
||||
if (!targetBlock->hasTimestamp(timestamp)) {
|
||||
@@ -260,15 +269,17 @@ public:
|
||||
}
|
||||
else {
|
||||
consecutiveBlock = _pass->newBlock(node);
|
||||
if (ASMJIT_UNLIKELY(!consecutiveBlock))
|
||||
if (ASMJIT_UNLIKELY(!consecutiveBlock)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
node->setPassData<RABlock>(consecutiveBlock);
|
||||
}
|
||||
}
|
||||
else {
|
||||
consecutiveBlock = _pass->newBlock(node);
|
||||
if (ASMJIT_UNLIKELY(!consecutiveBlock))
|
||||
if (ASMJIT_UNLIKELY(!consecutiveBlock)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
}
|
||||
|
||||
_curBlock->addFlags(RABlockFlags::kHasConsecutive);
|
||||
@@ -308,14 +319,16 @@ public:
|
||||
if (_curBlock) {
|
||||
// If the label has a block assigned we can either continue with it or skip it if the block has been
|
||||
// constructed already.
|
||||
if (_curBlock->isConstructed())
|
||||
if (_curBlock->isConstructed()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No block assigned - create a new one and assign it.
|
||||
_curBlock = _pass->newBlock(node);
|
||||
if (ASMJIT_UNLIKELY(!_curBlock))
|
||||
if (ASMJIT_UNLIKELY(!_curBlock)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
node->setPassData<RABlock>(_curBlock);
|
||||
}
|
||||
|
||||
@@ -333,8 +346,9 @@ public:
|
||||
// The label currently processed is part of the current block. This is only possible for multiple labels
|
||||
// that are right next to each other or labels that are separated by non-code nodes like directives and
|
||||
// comments.
|
||||
if (ASMJIT_UNLIKELY(_hasCode))
|
||||
if (ASMJIT_UNLIKELY(_hasCode)) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Label makes the current block constructed. There is a chance that the Label is not used, but we don't
|
||||
@@ -363,8 +377,9 @@ public:
|
||||
_curBlock->makeConstructed(_blockRegStats);
|
||||
|
||||
RABlock* consecutive = _pass->newBlock(node);
|
||||
if (ASMJIT_UNLIKELY(!consecutive))
|
||||
if (ASMJIT_UNLIKELY(!consecutive)) {
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
}
|
||||
consecutive->makeTargetable();
|
||||
|
||||
ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
|
||||
@@ -379,8 +394,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (_curBlock && _curBlock != _lastLoggedBlock)
|
||||
if (_curBlock && _curBlock != _lastLoggedBlock) {
|
||||
logBlock(_curBlock, kRootIndentation);
|
||||
}
|
||||
logNode(node, kRootIndentation);
|
||||
|
||||
// Unlikely: Assume that the exit label is reached only once per function.
|
||||
|
||||
@@ -223,18 +223,29 @@ struct RARegIndex : public RARegCount {
|
||||
|
||||
//! Registers mask.
|
||||
struct RARegMask {
|
||||
//! \name Types
|
||||
//! \{
|
||||
|
||||
using RegMasks = Support::Array<RegMask, Globals::kNumVirtGroups>;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
Support::Array<RegMask, Globals::kNumVirtGroups> _masks;
|
||||
RegMasks _masks;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
//! Initializes from other `RARegMask`.
|
||||
ASMJIT_INLINE_NODEBUG void init(const RARegMask& other) noexcept { _masks = other._masks; }
|
||||
//! Reset all register masks to zero.
|
||||
//! Initializes directly from an array of masks.
|
||||
ASMJIT_INLINE_NODEBUG void init(const RegMasks& masks) noexcept { _masks = masks; }
|
||||
|
||||
//! Resets all register masks to zero.
|
||||
ASMJIT_INLINE_NODEBUG void reset() noexcept { _masks.fill(0); }
|
||||
|
||||
//! \}
|
||||
@@ -981,17 +992,20 @@ enum class RAWorkRegFlags : uint32_t {
|
||||
//! Has been coalesced to another WorkReg.
|
||||
kCoalesced = 0x00000002u,
|
||||
|
||||
//! This register is used across multiple basic blocks - this can be used as an optimization.
|
||||
kMultipleBasicBlocks = 0x00000004u,
|
||||
|
||||
//! Set when this register is used as a LEAD consecutive register at least once.
|
||||
kLeadConsecutive = 0x00000004u,
|
||||
kLeadConsecutive = 0x00000010u,
|
||||
//! Used to mark consecutive registers during processing.
|
||||
kProcessedConsecutive = 0x00000008u,
|
||||
kProcessedConsecutive = 0x00000020u,
|
||||
|
||||
//! Stack slot has to be allocated.
|
||||
kStackUsed = 0x00000010u,
|
||||
kStackUsed = 0x00000100u,
|
||||
//! Stack allocation is preferred.
|
||||
kStackPreferred = 0x00000020u,
|
||||
kStackPreferred = 0x00000200u,
|
||||
//! Marked for stack argument reassignment.
|
||||
kStackArgToStack = 0x00000040u
|
||||
kStackArgToStack = 0x00000400u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(RAWorkRegFlags)
|
||||
|
||||
@@ -1040,6 +1054,12 @@ public:
|
||||
//! RAPass specific flags used during analysis and allocation.
|
||||
RAWorkRegFlags _flags = RAWorkRegFlags::kNone;
|
||||
|
||||
//! The identifier of a basic block this register lives in.
|
||||
//!
|
||||
//! If this register is used by multiple basic blocks, the id would always be `kIdNone`. However, if the register
|
||||
//! lives in a single basic block, the id would be a valid block id, and `_flags` would not contain `kMultipleBasicBlocks`.
|
||||
uint32_t _singleBasicBlockId = kIdNone;
|
||||
|
||||
//! Constains all USE ids collected from all instructions.
|
||||
//!
|
||||
//! If this mask is non-zero and not a power of two, it means that the register is used multiple times in
|
||||
@@ -1122,6 +1142,23 @@ public:
|
||||
ASMJIT_INLINE_NODEBUG bool isAllocated() const noexcept { return hasFlag(RAWorkRegFlags::kAllocated); }
|
||||
ASMJIT_INLINE_NODEBUG void markAllocated() noexcept { addFlags(RAWorkRegFlags::kAllocated); }
|
||||
|
||||
ASMJIT_INLINE_NODEBUG bool isWithinSingleBasicBlock() const noexcept { return !hasFlag(RAWorkRegFlags::kMultipleBasicBlocks); }
|
||||
ASMJIT_INLINE_NODEBUG uint32_t singleBasicBlockId() const noexcept { return _singleBasicBlockId; }
|
||||
|
||||
//! Called when this register appeared in a basic block having `blockId`.
|
||||
//!
|
||||
//! This function just sets the basic block of this work register, and then later, when this register is processed
|
||||
//! again it's compared with all other basic blocks it appears in so the flag `kMultipleBasicBlocks` can be properly
|
||||
//! set when the compared basic blocks differ.
|
||||
ASMJIT_INLINE_NODEBUG void assignBasicBlock(uint32_t blockId) noexcept { _singleBasicBlockId = blockId; }
|
||||
|
||||
//! Marks this register as using multiple basic blocks, which means reseting the single basic block identifier and
|
||||
//! adding `kMultipleBasicBlocks` flag.
|
||||
ASMJIT_INLINE_NODEBUG void markUseOfMultipleBasicBlocks() noexcept {
|
||||
_singleBasicBlockId = Globals::kInvalidId;
|
||||
addFlags(RAWorkRegFlags::kMultipleBasicBlocks);
|
||||
}
|
||||
|
||||
ASMJIT_INLINE_NODEBUG bool isLeadConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kLeadConsecutive); }
|
||||
ASMJIT_INLINE_NODEBUG void markLeadConsecutive() noexcept { addFlags(RAWorkRegFlags::kLeadConsecutive); }
|
||||
|
||||
|
||||
@@ -234,7 +234,7 @@ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, cons
|
||||
uint32_t tmpPhysId = Support::ctz(allocableRegs);
|
||||
|
||||
ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
|
||||
_pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
|
||||
_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
|
||||
}
|
||||
else {
|
||||
// MOVE is impossible, must SPILL.
|
||||
@@ -675,7 +675,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
if (reassignedId != RAAssignment::kPhysNone) {
|
||||
ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
|
||||
allocableRegs ^= Support::bitMask(reassignedId);
|
||||
_pass->_clobberedRegs[group] |= Support::bitMask(reassignedId);
|
||||
_clobberedRegs[group] |= Support::bitMask(reassignedId);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -704,8 +704,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
RATiedReg* thisTiedReg = &tiedRegs[i];
|
||||
if (thisTiedReg->isUseDone())
|
||||
if (thisTiedReg->isUseDone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t thisWorkId = thisTiedReg->workId();
|
||||
uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
|
||||
@@ -725,16 +726,18 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
|
||||
|
||||
thisTiedReg->markUseDone();
|
||||
if (thisTiedReg->isWrite())
|
||||
if (thisTiedReg->isWrite()) {
|
||||
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||||
}
|
||||
usePending--;
|
||||
|
||||
// Double-hit.
|
||||
RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
|
||||
if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
|
||||
targetTiedReg->markUseDone();
|
||||
if (targetTiedReg->isWrite())
|
||||
if (targetTiedReg->isWrite()) {
|
||||
_curAssignment.makeDirty(group, targetWorkId, thisPhysId);
|
||||
}
|
||||
usePending--;
|
||||
}
|
||||
continue;
|
||||
@@ -748,10 +751,10 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
// such case is to SPILL the target register or MOVE it to another register so the loop can continue.
|
||||
RegMask availableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
|
||||
if (availableRegs) {
|
||||
uint32_t tmpRegId = Support::ctz(availableRegs);
|
||||
uint32_t tmpRegId = pickBestSuitableRegister(group, availableRegs);
|
||||
|
||||
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, tmpRegId, thisPhysId));
|
||||
_pass->_clobberedRegs[group] |= Support::bitMask(tmpRegId);
|
||||
_clobberedRegs[group] |= Support::bitMask(tmpRegId);
|
||||
|
||||
// NOTE: This register is not done, we have just moved it to another physical spot, and we will have to
|
||||
// move it again into the correct spot once it's free (since this is essentially doing a swap operation
|
||||
@@ -766,16 +769,18 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
|
||||
|
||||
thisTiedReg->markUseDone();
|
||||
if (thisTiedReg->isWrite())
|
||||
if (thisTiedReg->isWrite()) {
|
||||
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||||
}
|
||||
usePending--;
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
|
||||
|
||||
thisTiedReg->markUseDone();
|
||||
if (thisTiedReg->isWrite())
|
||||
if (thisTiedReg->isWrite()) {
|
||||
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||||
}
|
||||
usePending--;
|
||||
}
|
||||
}
|
||||
@@ -822,8 +827,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
uint32_t physId = it.next();
|
||||
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
||||
|
||||
if (workId == RAAssignment::kWorkNone)
|
||||
if (workId == RAAssignment::kWorkNone) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||||
} while (it.hasNext());
|
||||
@@ -842,8 +848,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
|
||||
while (it.hasNext()) {
|
||||
uint32_t dstId = it.next();
|
||||
if (dstId == srcId)
|
||||
if (dstId == srcId) {
|
||||
continue;
|
||||
}
|
||||
_pass->emitMove(workId, dstId, srcId);
|
||||
}
|
||||
}
|
||||
@@ -925,8 +932,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
if (bestLeadReg == 0xFFFFFFFF)
|
||||
if (bestLeadReg == 0xFFFFFFFF) {
|
||||
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
|
||||
}
|
||||
|
||||
for (i = 0; i < consecutiveCount; i++) {
|
||||
uint32_t consecutiveIndex = bestLeadReg + i;
|
||||
@@ -949,8 +957,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
uint32_t workId = tiedReg->workId();
|
||||
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||||
|
||||
if (assignedId != RAAssignment::kPhysNone)
|
||||
if (assignedId != RAAssignment::kPhysNone) {
|
||||
ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
|
||||
}
|
||||
|
||||
uint32_t physId = tiedReg->outId();
|
||||
if (physId == RAAssignment::kPhysNone) {
|
||||
@@ -970,8 +979,9 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
// OUTs are CLOBBERed thus cannot be ASSIGNed right now.
|
||||
ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
|
||||
|
||||
if (!tiedReg->isKill())
|
||||
if (!tiedReg->isKill()) {
|
||||
ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
|
||||
}
|
||||
|
||||
tiedReg->setOutId(physId);
|
||||
tiedReg->markOutDone();
|
||||
@@ -1043,8 +1053,9 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
|
||||
// Additional instructions emitted to switch from the current state to the `target` state. This means
|
||||
// that we have to move these instructions into an independent code block and patch the jump location.
|
||||
Operand& targetOp = node->op(node->opCount() - 1);
|
||||
if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
|
||||
if (ASMJIT_UNLIKELY(!targetOp.isLabel())) {
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
Label trampoline = _cc->newLabel();
|
||||
Label savedTarget = targetOp.as<Label>();
|
||||
@@ -1121,16 +1132,18 @@ uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, u
|
||||
// Prefer home register id, if possible.
|
||||
if (workReg->hasHomeRegId()) {
|
||||
uint32_t homeId = workReg->homeRegId();
|
||||
if (Support::bitTest(allocableRegs, homeId))
|
||||
if (Support::bitTest(allocableRegs, homeId)) {
|
||||
return homeId;
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer registers used upon block entries.
|
||||
RegMask previouslyAssignedRegs = workReg->allocatedMask();
|
||||
if (allocableRegs & previouslyAssignedRegs)
|
||||
if (allocableRegs & previouslyAssignedRegs) {
|
||||
allocableRegs &= previouslyAssignedRegs;
|
||||
}
|
||||
|
||||
return Support::ctz(allocableRegs);
|
||||
return pickBestSuitableRegister(group, allocableRegs);
|
||||
}
|
||||
|
||||
uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs, RAInst* raInst) const noexcept {
|
||||
@@ -1141,8 +1154,9 @@ uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId,
|
||||
|
||||
// Prefer reassignment back to HomeId, if possible.
|
||||
if (workReg->hasHomeRegId()) {
|
||||
if (Support::bitTest(allocableRegs, workReg->homeRegId()))
|
||||
if (Support::bitTest(allocableRegs, workReg->homeRegId())) {
|
||||
return workReg->homeRegId();
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer assignment to a temporary register in case this register is killed by the instruction (or has an out slot).
|
||||
@@ -1151,6 +1165,14 @@ uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId,
|
||||
return Support::ctz(allocableRegs);
|
||||
}
|
||||
|
||||
// Prefer reassignment if this register is only used within a single basic block.
|
||||
if (workReg->isWithinSingleBasicBlock()) {
|
||||
RegMask filteredRegs = allocableRegs & ~workReg->clobberSurvivalMask();
|
||||
if (filteredRegs) {
|
||||
return pickBestSuitableRegister(group, filteredRegs);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: [Register Allocator] This could be improved.
|
||||
|
||||
// Decided to SPILL.
|
||||
|
||||
@@ -25,8 +25,8 @@ class RALocalAllocator {
|
||||
public:
|
||||
ASMJIT_NONCOPYABLE(RALocalAllocator)
|
||||
|
||||
typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
|
||||
typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
|
||||
using PhysToWorkMap = RAAssignment::PhysToWorkMap;
|
||||
using WorkToPhysMap = RAAssignment::WorkToPhysMap;
|
||||
|
||||
//! Link to `BaseRAPass`.
|
||||
BaseRAPass* _pass {};
|
||||
@@ -39,6 +39,8 @@ public:
|
||||
RARegMask _availableRegs {};
|
||||
//! Registers clobbered by the allocator.
|
||||
RARegMask _clobberedRegs {};
|
||||
//! Registers that must be preserved by the function (clobbering means saving & restoring in function prolog & epilog).
|
||||
RARegMask _funcPreservedRegs {};
|
||||
|
||||
//! Register assignment (current).
|
||||
RAAssignment _curAssignment {};
|
||||
@@ -67,7 +69,9 @@ public:
|
||||
: _pass(pass),
|
||||
_cc(pass->cc()),
|
||||
_archTraits(pass->_archTraits),
|
||||
_availableRegs(pass->_availableRegs) {}
|
||||
_availableRegs(pass->_availableRegs) {
|
||||
_funcPreservedRegs.init(pass->func()->frame().preservedRegs());
|
||||
}
|
||||
|
||||
Error init() noexcept;
|
||||
|
||||
@@ -150,7 +154,7 @@ public:
|
||||
return uint32_t(int32_t(freq * float(kCostOfFrequency)));
|
||||
}
|
||||
|
||||
inline uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
|
||||
ASMJIT_FORCE_INLINE uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
|
||||
RAWorkReg* workReg = workRegById(workId);
|
||||
uint32_t cost = costByFrequency(workReg->liveStats().freq());
|
||||
|
||||
@@ -160,6 +164,18 @@ public:
|
||||
return cost;
|
||||
}
|
||||
|
||||
ASMJIT_FORCE_INLINE uint32_t pickBestSuitableRegister(RegGroup group, RegMask allocableRegs) const noexcept {
|
||||
// These are registers must be preserved by the function itself.
|
||||
RegMask preservedRegs = _funcPreservedRegs[group];
|
||||
|
||||
// Reduce the set by removing preserved registers when possible.
|
||||
if (allocableRegs & ~preservedRegs) {
|
||||
allocableRegs &= ~preservedRegs;
|
||||
}
|
||||
|
||||
return Support::ctz(allocableRegs);
|
||||
}
|
||||
|
||||
//! Decides on register assignment.
|
||||
uint32_t decideOnAssignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
|
||||
|
||||
@@ -182,7 +198,10 @@ public:
|
||||
//! Emits a move between a destination and source register, and fixes the
|
||||
//! register assignment.
|
||||
inline Error onMoveReg(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
|
||||
if (dstPhysId == srcPhysId) return kErrorOk;
|
||||
if (dstPhysId == srcPhysId) {
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
_curAssignment.reassign(group, workId, dstPhysId, srcPhysId);
|
||||
return _pass->emitMove(workId, dstPhysId, srcPhysId);
|
||||
}
|
||||
|
||||
@@ -964,15 +964,23 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
|
||||
}
|
||||
|
||||
// These are not needed anymore, so release the memory now so other allocations can reuse it.
|
||||
nUsesPerWorkReg.release(allocator());
|
||||
nOutsPerWorkReg.release(allocator());
|
||||
|
||||
// Assign block and instruction positions, build LiveCount and LiveSpans
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
// This is a starting position, reserving [0, 1] for function arguments.
|
||||
uint32_t position = 2;
|
||||
|
||||
for (i = 0; i < numAllBlocks; i++) {
|
||||
RABlock* block = _blocks[i];
|
||||
if (!block->isReachable())
|
||||
continue;
|
||||
|
||||
uint32_t blockId = block->blockId();
|
||||
|
||||
BaseNode* node = block->first();
|
||||
BaseNode* stop = block->last();
|
||||
|
||||
@@ -995,6 +1003,8 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
if (node->isInst()) {
|
||||
InstNode* inst = node->as<InstNode>();
|
||||
RAInst* raInst = inst->passData<RAInst>();
|
||||
|
||||
// Impossible - each processed instruction node must have an associated RAInst.
|
||||
ASMJIT_ASSERT(raInst != nullptr);
|
||||
|
||||
RATiedReg* tiedRegs = raInst->tiedRegs();
|
||||
@@ -1010,14 +1020,20 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
// Create refs and writes.
|
||||
RAWorkReg* workReg = workRegById(workId);
|
||||
workReg->_refs.appendUnsafe(node);
|
||||
if (tiedReg->isWrite())
|
||||
if (tiedReg->isWrite()) {
|
||||
workReg->_writes.appendUnsafe(node);
|
||||
}
|
||||
|
||||
if (workReg->singleBasicBlockId() != blockId) {
|
||||
workReg->markUseOfMultipleBasicBlocks();
|
||||
}
|
||||
|
||||
// We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
|
||||
// trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
|
||||
// KILLed here.
|
||||
if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
|
||||
if (tiedReg->isLast() && !block->liveOut().bitAt(workId)) {
|
||||
tiedReg->addFlags(RATiedFlags::kKill);
|
||||
}
|
||||
|
||||
LiveRegSpans& liveSpans = workReg->liveSpans();
|
||||
bool wasOpen;
|
||||
@@ -1038,20 +1054,23 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
if (tiedReg->hasUseId()) {
|
||||
uint32_t useId = tiedReg->useId();
|
||||
workReg->addUseIdMask(Support::bitMask(useId));
|
||||
if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
|
||||
if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId)) {
|
||||
workReg->setHintRegId(useId);
|
||||
}
|
||||
}
|
||||
|
||||
if (tiedReg->useRegMask()) {
|
||||
workReg->restrictPreferredMask(tiedReg->useRegMask());
|
||||
if (workReg->isLeadConsecutive())
|
||||
if (workReg->isLeadConsecutive()) {
|
||||
workReg->restrictConsecutiveMask(tiedReg->useRegMask());
|
||||
}
|
||||
}
|
||||
|
||||
if (tiedReg->outRegMask()) {
|
||||
workReg->restrictPreferredMask(tiedReg->outRegMask());
|
||||
if (workReg->isLeadConsecutive())
|
||||
if (workReg->isLeadConsecutive()) {
|
||||
workReg->restrictConsecutiveMask(tiedReg->outRegMask());
|
||||
}
|
||||
}
|
||||
|
||||
// Update `RAWorkReg::clobberedSurvivalMask`.
|
||||
@@ -1060,6 +1079,10 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
if (node->isInvoke()) {
|
||||
func()->frame().updateCallStackAlignment(node->as<InvokeNode>()->detail().naturalStackAlignment());
|
||||
}
|
||||
|
||||
position += 2;
|
||||
maxLiveCount.op<Support::Max>(raInst->_liveCount);
|
||||
}
|
||||
@@ -1098,8 +1121,6 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
|
||||
logger->log(sb);
|
||||
});
|
||||
|
||||
nUsesPerWorkReg.release(allocator());
|
||||
nOutsPerWorkReg.release(allocator());
|
||||
nInstsPerBlock.release(allocator());
|
||||
|
||||
return kErrorOk;
|
||||
@@ -1226,7 +1247,7 @@ ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
|
||||
RegMask preservedRegs = func()->frame().preservedRegs(group);
|
||||
|
||||
// First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
|
||||
// (precolored) virtual registers.
|
||||
// (pre-colored) virtual registers.
|
||||
if (!workRegs.empty()) {
|
||||
uint32_t dstIndex = 0;
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ enum class RABlockFlags : uint32_t {
|
||||
kHasConsecutive = 0x00000200u,
|
||||
//! Block has a jump to a jump-table at the end.
|
||||
kHasJumpTable = 0x00000400u,
|
||||
//! Block contains fixed registers (precolored).
|
||||
//! Block contains fixed registers (pre-colored).
|
||||
kHasFixedRegs = 0x00000800u,
|
||||
//! Block contains function calls.
|
||||
kHasFuncCalls = 0x00001000u
|
||||
@@ -402,6 +402,8 @@ public:
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Basic block id.
|
||||
uint32_t _basicBlockId;
|
||||
//! Instruction RW flags.
|
||||
InstRWFlags _instRWFlags;
|
||||
|
||||
@@ -425,10 +427,11 @@ public:
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
ASMJIT_INLINE_NODEBUG RAInstBuilder() noexcept { reset(); }
|
||||
ASMJIT_INLINE_NODEBUG explicit RAInstBuilder(uint32_t blockId = Globals::kInvalidId) noexcept { reset(blockId); }
|
||||
|
||||
ASMJIT_INLINE_NODEBUG void init() noexcept { reset(); }
|
||||
ASMJIT_INLINE_NODEBUG void reset() noexcept {
|
||||
ASMJIT_INLINE_NODEBUG void init(uint32_t blockId) noexcept { reset(blockId); }
|
||||
ASMJIT_INLINE_NODEBUG void reset(uint32_t blockId) noexcept {
|
||||
_basicBlockId = blockId;
|
||||
_instRWFlags = InstRWFlags::kNone;
|
||||
_aggregatedFlags = RATiedFlags::kNone;
|
||||
_forbiddenFlags = RATiedFlags::kNone;
|
||||
@@ -465,13 +468,13 @@ public:
|
||||
ASMJIT_INLINE_NODEBUG const RATiedReg* end() const noexcept { return _cur; }
|
||||
|
||||
//! Returns `RATiedReg` at the given `index`.
|
||||
inline RATiedReg* operator[](uint32_t index) noexcept {
|
||||
inline RATiedReg* operator[](size_t index) noexcept {
|
||||
ASMJIT_ASSERT(index < tiedRegCount());
|
||||
return &_tiedRegs[index];
|
||||
}
|
||||
|
||||
//! Returns `RATiedReg` at the given `index`. (const).
|
||||
inline const RATiedReg* operator[](uint32_t index) const noexcept {
|
||||
inline const RATiedReg* operator[](size_t index) const noexcept {
|
||||
ASMJIT_ASSERT(index < tiedRegCount());
|
||||
return &_tiedRegs[index];
|
||||
}
|
||||
@@ -487,8 +490,8 @@ public:
|
||||
RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask,
|
||||
RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask,
|
||||
uint32_t rmSize = 0,
|
||||
uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
|
||||
|
||||
uint32_t consecutiveParent = Globals::kInvalidId
|
||||
) noexcept {
|
||||
RegGroup group = workReg->group();
|
||||
RATiedReg* tiedReg = workReg->tiedReg();
|
||||
|
||||
@@ -507,12 +510,14 @@ public:
|
||||
_stats.makeUsed(group);
|
||||
|
||||
if (!tiedReg) {
|
||||
// Could happen when the builder is not reset properly after each instruction.
|
||||
// Would happen when the builder is not reset properly after each instruction - so catch that!
|
||||
ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
|
||||
|
||||
tiedReg = _cur++;
|
||||
tiedReg->init(workReg->workId(), flags, useRegMask, useId, useRewriteMask, outRegMask, outId, outRewriteMask, rmSize, consecutiveParent);
|
||||
|
||||
workReg->setTiedReg(tiedReg);
|
||||
workReg->assignBasicBlock(_basicBlockId);
|
||||
|
||||
_count.add(group);
|
||||
return kErrorOk;
|
||||
@@ -566,7 +571,9 @@ public:
|
||||
|
||||
tiedReg = _cur++;
|
||||
tiedReg->init(workReg->workId(), flags, allocable, useId, 0, allocable, BaseReg::kIdBad, 0);
|
||||
|
||||
workReg->setTiedReg(tiedReg);
|
||||
workReg->assignBasicBlock(_basicBlockId);
|
||||
|
||||
_count.add(group);
|
||||
return kErrorOk;
|
||||
@@ -606,7 +613,9 @@ public:
|
||||
|
||||
tiedReg = _cur++;
|
||||
tiedReg->init(workReg->workId(), flags, Support::allOnes<RegMask>(), BaseReg::kIdBad, 0, outRegs, outId, 0);
|
||||
|
||||
workReg->setTiedReg(tiedReg);
|
||||
workReg->assignBasicBlock(_basicBlockId);
|
||||
|
||||
_count.add(group);
|
||||
return kErrorOk;
|
||||
|
||||
@@ -83,16 +83,16 @@ public:
|
||||
bool _is64Bit;
|
||||
bool _avxEnabled;
|
||||
|
||||
inline RACFGBuilder(X86RAPass* pass) noexcept
|
||||
ASMJIT_INLINE_NODEBUG RACFGBuilder(X86RAPass* pass) noexcept
|
||||
: RACFGBuilderT<RACFGBuilder>(pass),
|
||||
_arch(pass->cc()->arch()),
|
||||
_is64Bit(pass->registerSize() == 8),
|
||||
_avxEnabled(pass->avxEnabled()) {
|
||||
}
|
||||
|
||||
inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
|
||||
ASMJIT_INLINE_NODEBUG Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
|
||||
|
||||
inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
|
||||
ASMJIT_INLINE_NODEBUG uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
|
||||
return _avxEnabled ? avxInst : sseInst;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user