mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-17 04:24:37 +03:00
Added missing vpermpd & vpermps (AVX512) and changed how EVEX prefix is propagated in x86::Assembler
This commit is contained in:
@@ -2837,7 +2837,6 @@ CaseExtRm:
|
|||||||
case InstDB::kEncodingVexRm_Lx_Bcst:
|
case InstDB::kEncodingVexRm_Lx_Bcst:
|
||||||
if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
|
if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
|
||||||
opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
|
opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
|
||||||
options |= Inst::kOptionEvex;
|
|
||||||
opReg = o0.id();
|
opReg = o0.id();
|
||||||
rbReg = o1.id();
|
rbReg = o1.id();
|
||||||
goto EmitVexEvexR;
|
goto EmitVexEvexR;
|
||||||
@@ -4122,13 +4121,8 @@ EmitVexEvexR:
|
|||||||
(_extraReg.id() << 16); // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
|
(_extraReg.id() << 16); // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
|
||||||
opReg &= 0x7;
|
opReg &= 0x7;
|
||||||
|
|
||||||
// Mark invalid VEX (force EVEX) case: // [@.......|.LL..aaa|Vvvvv..R|RBBmmmmm].
|
|
||||||
x |= (~commonInfo->flags() & InstDB::kFlagVex) << (31 - Support::constCtz(InstDB::kFlagVex));
|
|
||||||
|
|
||||||
// Handle AVX512 options by a single branch.
|
// Handle AVX512 options by a single branch.
|
||||||
const uint32_t kAvx512Options = Inst::kOptionZMask |
|
const uint32_t kAvx512Options = Inst::kOptionZMask | Inst::kOptionER | Inst::kOptionSAE;
|
||||||
Inst::kOptionER |
|
|
||||||
Inst::kOptionSAE ;
|
|
||||||
if (options & kAvx512Options) {
|
if (options & kAvx512Options) {
|
||||||
uint32_t kBcstMask = 0x1 << 20;
|
uint32_t kBcstMask = 0x1 << 20;
|
||||||
uint32_t kLLMask10 = 0x2 << 21;
|
uint32_t kLLMask10 = 0x2 << 21;
|
||||||
@@ -4138,7 +4132,7 @@ EmitVexEvexR:
|
|||||||
// The {rz-sae} is encoded as {11}, so it should match the mask.
|
// The {rz-sae} is encoded as {11}, so it should match the mask.
|
||||||
ASMJIT_ASSERT(Inst::kOptionRZ_SAE == kLLMask11);
|
ASMJIT_ASSERT(Inst::kOptionRZ_SAE == kLLMask11);
|
||||||
|
|
||||||
x |= options & Inst::kOptionZMask; // [@.......|zLLb.aaa|Vvvvv..R|RBBmmmmm].
|
x |= options & Inst::kOptionZMask; // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
|
||||||
|
|
||||||
// Support embedded-rounding {er} and suppress-all-exceptions {sae}.
|
// Support embedded-rounding {er} and suppress-all-exceptions {sae}.
|
||||||
if (options & (Inst::kOptionER | Inst::kOptionSAE)) {
|
if (options & (Inst::kOptionER | Inst::kOptionSAE)) {
|
||||||
@@ -4157,22 +4151,22 @@ EmitVexEvexR:
|
|||||||
if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
|
if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
|
||||||
goto InvalidEROrSAE;
|
goto InvalidEROrSAE;
|
||||||
|
|
||||||
x &=~kLLMask11; // [@.......|.00..aaa|Vvvvv..R|RBBmmmmm].
|
x &=~kLLMask11; // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
|
||||||
x |= kBcstMask | (options & kLLMask11); // [@.......|.LLb.aaa|Vvvvv..R|RBBmmmmm].
|
x |= kBcstMask | (options & kLLMask11); // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
|
if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
|
||||||
goto InvalidEROrSAE;
|
goto InvalidEROrSAE;
|
||||||
|
|
||||||
x |= kBcstMask; // [@.......|.LLb.aaa|Vvvvv..R|RBBmmmmm].
|
x |= kBcstMask; // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if EVEX is required by checking bits in `x` : [@.......|xx.x.xxx|x......x|.x.x....].
|
// Check if EVEX is required by checking bits in `x` : [........|xx.x.xxx|x......x|.x.x....].
|
||||||
if (x & 0x80D78150u) {
|
if (x & 0x00D78150u) {
|
||||||
uint32_t y = ((x << 4) & 0x00080000u) | // [@.......|...bV...|........|........].
|
uint32_t y = ((x << 4) & 0x00080000u) | // [........|...bV...|........|........].
|
||||||
((x >> 4) & 0x00000010u) ; // [@.......|...bV...|........|...R....].
|
((x >> 4) & 0x00000010u) ; // [........|...bV...|........|...R....].
|
||||||
x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm].
|
x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm].
|
||||||
x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
|
x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
|
||||||
x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
|
x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
|
||||||
|
|||||||
@@ -1192,8 +1192,8 @@ struct Inst : public BaseInst {
|
|||||||
kIdVpermil2ps, //!< Instruction 'vpermil2ps' {XOP}.
|
kIdVpermil2ps, //!< Instruction 'vpermil2ps' {XOP}.
|
||||||
kIdVpermilpd, //!< Instruction 'vpermilpd' {AVX|AVX512_F+VL}.
|
kIdVpermilpd, //!< Instruction 'vpermilpd' {AVX|AVX512_F+VL}.
|
||||||
kIdVpermilps, //!< Instruction 'vpermilps' {AVX|AVX512_F+VL}.
|
kIdVpermilps, //!< Instruction 'vpermilps' {AVX|AVX512_F+VL}.
|
||||||
kIdVpermpd, //!< Instruction 'vpermpd' {AVX2}.
|
kIdVpermpd, //!< Instruction 'vpermpd' {AVX2|AVX512_F+VL}.
|
||||||
kIdVpermps, //!< Instruction 'vpermps' {AVX2}.
|
kIdVpermps, //!< Instruction 'vpermps' {AVX2|AVX512_F+VL}.
|
||||||
kIdVpermq, //!< Instruction 'vpermq' {AVX2|AVX512_F+VL}.
|
kIdVpermq, //!< Instruction 'vpermq' {AVX2|AVX512_F+VL}.
|
||||||
kIdVpermt2b, //!< Instruction 'vpermt2b' {AVX512_VBMI+VL}.
|
kIdVpermt2b, //!< Instruction 'vpermt2b' {AVX512_VBMI+VL}.
|
||||||
kIdVpermt2d, //!< Instruction 'vpermt2d' {AVX512_F+VL}.
|
kIdVpermt2d, //!< Instruction 'vpermt2d' {AVX512_F+VL}.
|
||||||
|
|||||||
@@ -1476,6 +1476,12 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O
|
|||||||
case Inst::kIdVpbroadcastw:
|
case Inst::kIdVpbroadcastw:
|
||||||
mustUseEvex = opCount >= 2 && x86::Reg::isGp(operands[1]);
|
mustUseEvex = opCount >= 2 && x86::Reg::isGp(operands[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
// Special case: VPERMPD only supports YMM predicate in AVX mode, immediate
|
||||||
|
// precicate is only supported by AVX512-F and newer.
|
||||||
|
case Inst::kIdVpermpd:
|
||||||
|
mustUseEvex = opCount >= 3 && !operands[2].isImm();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(hasEvex | mustUseEvex | hasKMask | hasKOrZmm | regAnalysis.highVecUsed))
|
if (!(hasEvex | mustUseEvex | hasKMask | hasKOrZmm | regAnalysis.highVecUsed))
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -144,7 +144,7 @@ struct Opcode {
|
|||||||
// cases (similar to forcing REX prefix). Force EVEX will force emitting
|
// cases (similar to forcing REX prefix). Force EVEX will force emitting
|
||||||
// EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have
|
// EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have
|
||||||
// ForceEvex always set, however. instructions that can be encoded by
|
// ForceEvex always set, however. instructions that can be encoded by
|
||||||
// either VEX or EVEX prefix shall not have ForceEvex set.
|
// either VEX or EVEX prefix should not have ForceEvex set.
|
||||||
|
|
||||||
kMM_ForceVex3 = 0x04u << kMM_Shift, // Force 3-BYTE VEX prefix.
|
kMM_ForceVex3 = 0x04u << kMM_Shift, // Force 3-BYTE VEX prefix.
|
||||||
kMM_ForceEvex = 0x10u << kMM_Shift, // Force 4-BYTE EVEX prefix.
|
kMM_ForceEvex = 0x10u << kMM_Shift, // Force 4-BYTE EVEX prefix.
|
||||||
|
|||||||
@@ -621,7 +621,6 @@ class X86TableGen extends core.TableGen {
|
|||||||
this.merge();
|
this.merge();
|
||||||
this.save();
|
this.save();
|
||||||
this.dumpTableSizes();
|
this.dumpTableSizes();
|
||||||
|
|
||||||
this.printMissing();
|
this.printMissing();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -714,7 +713,7 @@ class AltOpcodeTable extends core.Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// X(______,OP,_,_,_,_,_,_ )
|
// X(______,OP,_,_,_,_,_,_ )
|
||||||
if (opcode.startsWith("O_FPU(") || opcode.startsWith("O(") || opcode.startsWith("V(")) {
|
if (opcode.startsWith("O_FPU(") || opcode.startsWith("O(") || opcode.startsWith("V(") || opcode.startsWith("E(")) {
|
||||||
var value = opcode.substring(9, 11);
|
var value = opcode.substring(9, 11);
|
||||||
var remaining = opcode.substring(0, 9) + "00" + opcode.substring(11);
|
var remaining = opcode.substring(0, 9) + "00" + opcode.substring(11);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user