diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 906ac17..fe5eaf1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,6 +35,7 @@ jobs: - { title: "diag-asan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "asan", defs: "ASMJIT_TEST=1" } - { title: "diag-msan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "msan", defs: "ASMJIT_TEST=1" } - { title: "diag-ubsan" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "ubsan", defs: "ASMJIT_TEST=1" } + - { title: "diag-hardened" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "hardened", defs: "ASMJIT_TEST=1" } - { title: "diag-valgrind" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", diagnostics: "valgrind", defs: "ASMJIT_TEST=1" } - { title: "no-deprecated" , host: "ubuntu-latest" , arch: "x64" , cc: "clang-18", conf: "Release", defs: "ASMJIT_TEST=1,ASMJIT_NO_DEPRECATED=1" } @@ -169,7 +170,6 @@ jobs: - name: "Build & Test - Native" if: ${{!matrix.vm}} run: python build-actions/action.py - --step=all --source-dir=source --config=source/.github/workflows/build-config.json --compiler=${{matrix.cc}} @@ -200,7 +200,6 @@ jobs: sh ./build-actions/prepare-environment.sh python3 build-actions/action.py \ - --step=all \ --source-dir=source \ --config=source/.github/workflows/build-config.json \ --compiler=${{matrix.cc}} \ @@ -220,7 +219,6 @@ jobs: --platform linux/${{matrix.arch}} \ ${{matrix.vm}} \ bash action.sh \ - --step=all \ --source-dir=../source \ --config=../source/.github/workflows/build-config.json \ --compiler=${{matrix.cc}} \ diff --git a/src/asmjit/core/radefs_p.h b/src/asmjit/core/radefs_p.h index 99e79be..d61a9cc 100644 --- a/src/asmjit/core/radefs_p.h +++ b/src/asmjit/core/radefs_p.h @@ -767,6 +767,12 @@ enum class RATiedFlags : uint32_t { // Instruction Flags (Never used by RATiedReg) // ------------------------------------------- + //! Instruction has been patched to address a memory location instead of a register. + //! + //! This is currently only possible on X86 or X86_64 targets. It informs rewriter to rewrite the instruction if + //! necessary. + kInst_RegToMemPatched = 0x40000000u, + //! Instruction is transformable to another instruction if necessary. //! //! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some diff --git a/src/asmjit/core/ralocal.cpp b/src/asmjit/core/ralocal.cpp index 16f11a8..95ccd16 100644 --- a/src/asmjit/core/ralocal.cpp +++ b/src/asmjit/core/ralocal.cpp @@ -137,6 +137,7 @@ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, cons dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap); dst.assignWorkIdsFromPhysIds(); + // TODO: Remove this - finally enable this functionality. if (tryMode) return kErrorOk; @@ -601,6 +602,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { tiedReg->_useRewriteMask = 0; tiedReg->markUseDone(); + raInst->addFlags(RATiedFlags::kInst_RegToMemPatched); usePending--; rmAllocated = true; @@ -687,7 +689,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // ------ // // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit - // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual + // complicated as the allocation is iterative. In some cases we have to wait before allocating a particular // physical register as it's still occupied by some other one, which we need to move before we can use it. // In this case we skip it and allocate another some other instead (making it free for another iteration). // @@ -836,7 +838,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // STEP 9 // ------ // - // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary. + // Vector registers can be clobbered partially by invoke - find if that's the case and clobber when necessary. if (node->isInvoke() && group == RegGroup::kVec) { const InvokeNode* invokeNode = node->as(); diff --git a/src/asmjit/core/rapass_p.h b/src/asmjit/core/rapass_p.h index 90d4ae4..9676240 100644 --- a/src/asmjit/core/rapass_p.h +++ b/src/asmjit/core/rapass_p.h @@ -335,6 +335,8 @@ public: //! Clears instruction `flags` from this RAInst. ASMJIT_INLINE_NODEBUG void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; } + //! Tests whether one operand of this instruction has been patched from Reg to Mem. + ASMJIT_INLINE_NODEBUG bool isRegToMemPatched() const noexcept { return hasFlag(RATiedFlags::kInst_RegToMemPatched); } //! Tests whether this instruction can be transformed to another instruction if necessary. ASMJIT_INLINE_NODEBUG bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); } diff --git a/src/asmjit/x86/x86assembler.cpp b/src/asmjit/x86/x86assembler.cpp index f48c3b2..35c5502 100644 --- a/src/asmjit/x86/x86assembler.cpp +++ b/src/asmjit/x86/x86assembler.cpp @@ -345,6 +345,10 @@ static ASMJIT_FORCE_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) return InstDB::_altOpcodeTable[info->_altOpcodeIndex]; } +static ASMJIT_FORCE_INLINE bool x86IsMmxOrXmm(const Reg& reg) noexcept { + return reg.type() == RegType::kX86_Mm || reg.type() == RegType::kX86_Xmm; +} + // x86::Assembler - X86BufferWriter // ================================ @@ -2572,37 +2576,41 @@ CaseFpuArith_Mem: case InstDB::kEncodingExtMovd: CaseExtMovd: - opReg = o0.id(); - opcode.add66hIf(Reg::isXmm(o0)); + if (x86IsMmxOrXmm(o0.as())) { + opReg = o0.id(); + opcode.add66hIf(Reg::isXmm(o0)); - // MM/XMM <- Gp - if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) { - rbReg = o1.id(); - goto EmitX86R; - } + // MM/XMM <- Gp + if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) { + rbReg = o1.id(); + goto EmitX86R; + } - // MM/XMM <- Mem - if (isign3 == ENC_OPS2(Reg, Mem)) { - rmRel = &o1; - goto EmitX86M; + // MM/XMM <- Mem + if (isign3 == ENC_OPS2(Reg, Mem)) { + rmRel = &o1; + goto EmitX86M; + } } // The following instructions use the secondary opcode. - opcode &= Opcode::kW; - opcode |= x86AltOpcodeOf(instInfo); - opReg = o1.id(); - opcode.add66hIf(Reg::isXmm(o1)); + if (x86IsMmxOrXmm(o1.as())) { + opcode &= Opcode::kW; + opcode |= x86AltOpcodeOf(instInfo); + opReg = o1.id(); + opcode.add66hIf(Reg::isXmm(o1)); - // GP <- MM/XMM - if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) { - rbReg = o0.id(); - goto EmitX86R; - } + // GP <- MM/XMM + if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) { + rbReg = o0.id(); + goto EmitX86R; + } - // Mem <- MM/XMM - if (isign3 == ENC_OPS2(Mem, Reg)) { - rmRel = &o0; - goto EmitX86M; + // Mem <- MM/XMM + if (isign3 == ENC_OPS2(Mem, Reg)) { + rmRel = &o0; + goto EmitX86M; + } } break; diff --git a/src/asmjit/x86/x86rapass.cpp b/src/asmjit/x86/x86rapass.cpp index 88a8b39..c7b46f5 100644 --- a/src/asmjit/x86/x86rapass.cpp +++ b/src/asmjit/x86/x86rapass.cpp @@ -1309,6 +1309,54 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no } } + // If one operand was rewritten from Reg to Mem, we have to ensure that we are using the correct instruction. + if (raInst->isRegToMemPatched()) { + switch (inst->id()) { + case Inst::kIdKmovb: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(1); + inst->setId(Inst::kIdMovzx); + } + break; + } + + case Inst::kIdVmovw: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(2); + inst->setId(Inst::kIdMovzx); + } + break; + } + + case Inst::kIdMovd: + case Inst::kIdVmovd: + case Inst::kIdKmovd: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(4); + inst->setId(Inst::kIdMov); + } + break; + } + + case Inst::kIdMovq: + case Inst::kIdVmovq: + case Inst::kIdKmovq: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVQ to MOV. + operands[1].as().setSize(8); + inst->setId(Inst::kIdMov); + } + break; + } + + default: + break; + } + } + // Transform VEX instruction to EVEX when necessary. if (raInst->isTransformable()) { if (maxRegId > 15) { diff --git a/test/asmjit_test_compiler_x86.cpp b/test/asmjit_test_compiler_x86.cpp index 54442ef..cae7c7a 100644 --- a/test/asmjit_test_compiler_x86.cpp +++ b/test/asmjit_test_compiler_x86.cpp @@ -4132,6 +4132,60 @@ public: } }; +// x86::Compiler - X86Test_VecToScalar +// =================================== + +class X86Test_VecToScalar : public X86TestCase { +public: + static constexpr uint32_t kVecCount = 64; + + X86Test_VecToScalar() : X86TestCase("VecToScalar") {} + + static void add(TestApp& app) { + app.add(new X86Test_VecToScalar()); + } + + virtual void compile(x86::Compiler& cc) { + FuncNode* func = cc.addFunc(FuncSignature::build()); + + x86::Gp x = cc.newInt32("x"); + x86::Gp t = cc.newInt32("t"); + x86::Xmm v[kVecCount]; + + func->setArg(0, x); + + for (size_t i = 0; i < kVecCount; i++) { + v[i] = cc.newXmm("v%d", i); + if (i != 0) + cc.add(x, 1); + cc.movd(v[i], x); + } + + cc.xor_(x, x); + + for (size_t i = 0; i < kVecCount; i++) { + cc.movd(t, v[i]); + cc.add(x, t); + } + + cc.ret(x); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t); + Func func = ptr_as_func(_func); + + uint32_t resultRet = func(1); + uint32_t expectRet = 2080; // 1 + 2 + 3 + ... + 64 + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + // x86::Compiler - X86Test_MiscLocalConstPool // ========================================== @@ -4512,6 +4566,7 @@ void compiler_add_x86_tests(TestApp& app) { app.addT(); // Miscellaneous tests. + app.addT(); app.addT(); app.addT(); app.addT();