[Bug] Fixed MOV reg->mem instruction rewriting (Compiler)

The problem is that the rewriter must also rewrite an instruction
ID in case that it's a [K|V]MOV[B|W|D|Q] instruction that moves
from either K or SIMD register to GP register. when such instruction
is rewritten in a way that it ends up as "xMOVx GP, [MEM]" it would
be invalid if it's not changed to a general purpose MOV.

The problem can only happen in case that the compiler spills a
virtual register, which is then moved to a scalar register.

In addition, checks were added to MOVD|MOVQ to ensure that when an
invalid instruction is emitted it's not ignored as it used to be.
This commit is contained in:
kobalicek
2024-05-19 17:34:04 +02:00
parent 594576485b
commit b9c8b5399f
7 changed files with 148 additions and 29 deletions

View File

@@ -35,6 +35,7 @@ jobs:
- { title: "diag-asan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "asan", defs: "ASMJIT_TEST=1" } - { title: "diag-asan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "asan", defs: "ASMJIT_TEST=1" }
- { title: "diag-msan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "msan", defs: "ASMJIT_TEST=1" } - { title: "diag-msan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "msan", defs: "ASMJIT_TEST=1" }
- { title: "diag-ubsan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "ubsan", defs: "ASMJIT_TEST=1" } - { title: "diag-ubsan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "ubsan", defs: "ASMJIT_TEST=1" }
- { title: "diag-hardened" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "hardened", defs: "ASMJIT_TEST=1" }
- { title: "diag-valgrind" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "valgrind", defs: "ASMJIT_TEST=1" } - { title: "diag-valgrind" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "valgrind", defs: "ASMJIT_TEST=1" }
- { title: "no-deprecated" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", defs: "ASMJIT_TEST=1,ASMJIT_NO_DEPRECATED=1" } - { title: "no-deprecated" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", defs: "ASMJIT_TEST=1,ASMJIT_NO_DEPRECATED=1" }
@@ -169,7 +170,6 @@ jobs:
- name: "Build & Test - Native" - name: "Build & Test - Native"
if: ${{!matrix.vm}} if: ${{!matrix.vm}}
run: python build-actions/action.py run: python build-actions/action.py
--step=all
--source-dir=source --source-dir=source
--config=source/.github/workflows/build-config.json --config=source/.github/workflows/build-config.json
--compiler=${{matrix.cc}} --compiler=${{matrix.cc}}
@@ -200,7 +200,6 @@ jobs:
sh ./build-actions/prepare-environment.sh sh ./build-actions/prepare-environment.sh
python3 build-actions/action.py \ python3 build-actions/action.py \
--step=all \
--source-dir=source \ --source-dir=source \
--config=source/.github/workflows/build-config.json \ --config=source/.github/workflows/build-config.json \
--compiler=${{matrix.cc}} \ --compiler=${{matrix.cc}} \
@@ -220,7 +219,6 @@ jobs:
--platform linux/${{matrix.arch}} \ --platform linux/${{matrix.arch}} \
${{matrix.vm}} \ ${{matrix.vm}} \
bash action.sh \ bash action.sh \
--step=all \
--source-dir=../source \ --source-dir=../source \
--config=../source/.github/workflows/build-config.json \ --config=../source/.github/workflows/build-config.json \
--compiler=${{matrix.cc}} \ --compiler=${{matrix.cc}} \

View File

@@ -767,6 +767,12 @@ enum class RATiedFlags : uint32_t {
// Instruction Flags (Never used by RATiedReg) // Instruction Flags (Never used by RATiedReg)
// ------------------------------------------- // -------------------------------------------
//! Instruction has been patched to address a memory location instead of a register.
//!
//! This is currently only possible on X86 or X86_64 targets. It informs rewriter to rewrite the instruction if
//! necessary.
kInst_RegToMemPatched = 0x40000000u,
//! Instruction is transformable to another instruction if necessary. //! Instruction is transformable to another instruction if necessary.
//! //!
//! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some //! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some

View File

@@ -137,6 +137,7 @@ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, cons
dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap); dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
dst.assignWorkIdsFromPhysIds(); dst.assignWorkIdsFromPhysIds();
// TODO: Remove this - finally enable this functionality.
if (tryMode) if (tryMode)
return kErrorOk; return kErrorOk;
@@ -601,6 +602,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
tiedReg->_useRewriteMask = 0; tiedReg->_useRewriteMask = 0;
tiedReg->markUseDone(); tiedReg->markUseDone();
raInst->addFlags(RATiedFlags::kInst_RegToMemPatched);
usePending--; usePending--;
rmAllocated = true; rmAllocated = true;
@@ -687,7 +689,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// ------ // ------
// //
// ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
// complicated as the allocation is iterative. In some cases we have to wait before allocating a particual // complicated as the allocation is iterative. In some cases we have to wait before allocating a particular
// physical register as it's still occupied by some other one, which we need to move before we can use it. // physical register as it's still occupied by some other one, which we need to move before we can use it.
// In this case we skip it and allocate another some other instead (making it free for another iteration). // In this case we skip it and allocate another some other instead (making it free for another iteration).
// //
@@ -836,7 +838,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// STEP 9 // STEP 9
// ------ // ------
// //
// Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary. // Vector registers can be clobbered partially by invoke - find if that's the case and clobber when necessary.
if (node->isInvoke() && group == RegGroup::kVec) { if (node->isInvoke() && group == RegGroup::kVec) {
const InvokeNode* invokeNode = node->as<InvokeNode>(); const InvokeNode* invokeNode = node->as<InvokeNode>();

View File

@@ -335,6 +335,8 @@ public:
//! Clears instruction `flags` from this RAInst. //! Clears instruction `flags` from this RAInst.
ASMJIT_INLINE_NODEBUG void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; } ASMJIT_INLINE_NODEBUG void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; }
//! Tests whether one operand of this instruction has been patched from Reg to Mem.
ASMJIT_INLINE_NODEBUG bool isRegToMemPatched() const noexcept { return hasFlag(RATiedFlags::kInst_RegToMemPatched); }
//! Tests whether this instruction can be transformed to another instruction if necessary. //! Tests whether this instruction can be transformed to another instruction if necessary.
ASMJIT_INLINE_NODEBUG bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); } ASMJIT_INLINE_NODEBUG bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); }

View File

@@ -345,6 +345,10 @@ static ASMJIT_FORCE_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info)
return InstDB::_altOpcodeTable[info->_altOpcodeIndex]; return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
} }
static ASMJIT_FORCE_INLINE bool x86IsMmxOrXmm(const Reg& reg) noexcept {
return reg.type() == RegType::kX86_Mm || reg.type() == RegType::kX86_Xmm;
}
// x86::Assembler - X86BufferWriter // x86::Assembler - X86BufferWriter
// ================================ // ================================
@@ -2572,6 +2576,7 @@ CaseFpuArith_Mem:
case InstDB::kEncodingExtMovd: case InstDB::kEncodingExtMovd:
CaseExtMovd: CaseExtMovd:
if (x86IsMmxOrXmm(o0.as<Reg>())) {
opReg = o0.id(); opReg = o0.id();
opcode.add66hIf(Reg::isXmm(o0)); opcode.add66hIf(Reg::isXmm(o0));
@@ -2586,8 +2591,10 @@ CaseExtMovd:
rmRel = &o1; rmRel = &o1;
goto EmitX86M; goto EmitX86M;
} }
}
// The following instructions use the secondary opcode. // The following instructions use the secondary opcode.
if (x86IsMmxOrXmm(o1.as<Reg>())) {
opcode &= Opcode::kW; opcode &= Opcode::kW;
opcode |= x86AltOpcodeOf(instInfo); opcode |= x86AltOpcodeOf(instInfo);
opReg = o1.id(); opReg = o1.id();
@@ -2604,6 +2611,7 @@ CaseExtMovd:
rmRel = &o0; rmRel = &o0;
goto EmitX86M; goto EmitX86M;
} }
}
break; break;
case InstDB::kEncodingExtMovq: case InstDB::kEncodingExtMovq:

View File

@@ -1309,6 +1309,54 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
} }
} }
// If one operand was rewritten from Reg to Mem, we have to ensure that we are using the correct instruction.
if (raInst->isRegToMemPatched()) {
switch (inst->id()) {
case Inst::kIdKmovb: {
if (operands[0].isGp() && operands[1].isMem()) {
// Transform from [V]MOVD to MOV.
operands[1].as<Mem>().setSize(1);
inst->setId(Inst::kIdMovzx);
}
break;
}
case Inst::kIdVmovw: {
if (operands[0].isGp() && operands[1].isMem()) {
// Transform from [V]MOVD to MOV.
operands[1].as<Mem>().setSize(2);
inst->setId(Inst::kIdMovzx);
}
break;
}
case Inst::kIdMovd:
case Inst::kIdVmovd:
case Inst::kIdKmovd: {
if (operands[0].isGp() && operands[1].isMem()) {
// Transform from [V]MOVD to MOV.
operands[1].as<Mem>().setSize(4);
inst->setId(Inst::kIdMov);
}
break;
}
case Inst::kIdMovq:
case Inst::kIdVmovq:
case Inst::kIdKmovq: {
if (operands[0].isGp() && operands[1].isMem()) {
// Transform from [V]MOVQ to MOV.
operands[1].as<Mem>().setSize(8);
inst->setId(Inst::kIdMov);
}
break;
}
default:
break;
}
}
// Transform VEX instruction to EVEX when necessary. // Transform VEX instruction to EVEX when necessary.
if (raInst->isTransformable()) { if (raInst->isTransformable()) {
if (maxRegId > 15) { if (maxRegId > 15) {

View File

@@ -4132,6 +4132,60 @@ public:
} }
}; };
// x86::Compiler - X86Test_VecToScalar
// ===================================
class X86Test_VecToScalar : public X86TestCase {
public:
static constexpr uint32_t kVecCount = 64;
X86Test_VecToScalar() : X86TestCase("VecToScalar") {}
static void add(TestApp& app) {
app.add(new X86Test_VecToScalar());
}
virtual void compile(x86::Compiler& cc) {
FuncNode* func = cc.addFunc(FuncSignature::build<uint32_t, uint32_t>());
x86::Gp x = cc.newInt32("x");
x86::Gp t = cc.newInt32("t");
x86::Xmm v[kVecCount];
func->setArg(0, x);
for (size_t i = 0; i < kVecCount; i++) {
v[i] = cc.newXmm("v%d", i);
if (i != 0)
cc.add(x, 1);
cc.movd(v[i], x);
}
cc.xor_(x, x);
for (size_t i = 0; i < kVecCount; i++) {
cc.movd(t, v[i]);
cc.add(x, t);
}
cc.ret(x);
cc.endFunc();
}
virtual bool run(void* _func, String& result, String& expect) {
typedef uint32_t (*Func)(uint32_t);
Func func = ptr_as_func<Func>(_func);
uint32_t resultRet = func(1);
uint32_t expectRet = 2080; // 1 + 2 + 3 + ... + 64
result.assignFormat("ret=%d", resultRet);
expect.assignFormat("ret=%d", expectRet);
return result == expect;
}
};
// x86::Compiler - X86Test_MiscLocalConstPool // x86::Compiler - X86Test_MiscLocalConstPool
// ========================================== // ==========================================
@@ -4512,6 +4566,7 @@ void compiler_add_x86_tests(TestApp& app) {
app.addT<X86Test_FuncCallAVXClobber>(); app.addT<X86Test_FuncCallAVXClobber>();
// Miscellaneous tests. // Miscellaneous tests.
app.addT<X86Test_VecToScalar>();
app.addT<X86Test_MiscLocalConstPool>(); app.addT<X86Test_MiscLocalConstPool>();
app.addT<X86Test_MiscGlobalConstPool>(); app.addT<X86Test_MiscGlobalConstPool>();
app.addT<X86Test_MiscMultiRet>(); app.addT<X86Test_MiscMultiRet>();