[ABI] Added more AVX_VNNI instructions, added MOVABS for explicit Imm64 encodings, added more assembler tests

This commit is contained in:
kobalicek
2021-01-26 01:00:10 +01:00
parent 1422faa011
commit 58b6c025f2
24 changed files with 3919 additions and 3017 deletions

View File

@@ -20,17 +20,21 @@
"optional": true "optional": true
}, },
{ {
"cmd": ["asmjit_test_x86_asm"], "cmd": ["asmjit_test_assembler", "--quiet"],
"optional": true
},
{
"cmd": ["asmjit_test_emitters"],
"optional": true
},
{
"cmd": ["asmjit_test_instinfo"],
"optional": true "optional": true
}, },
{ {
"cmd": ["asmjit_test_x86_sections"], "cmd": ["asmjit_test_x86_sections"],
"optional": true "optional": true
}, },
{
"cmd": ["asmjit_test_x86_instinfo"],
"optional": true
},
{ {
"cmd": ["asmjit_test_compiler"], "cmd": ["asmjit_test_compiler"],
"optional": true "optional": true

View File

@@ -79,14 +79,14 @@ jobs:
- { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { title: "osx-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "osx-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { title: "osx-10.15" , os: "macos-10.15" , cc: "clang" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "clang" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "osx-10.15" , os: "macos-10.15" , cc: "clang" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "clang" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { title: "osx-11.0" , os: "macos-11.0" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "macos-11.0" , os: "macos-11.0" , cc: "gcc-10" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "osx-11.0" , os: "macos-11.0" , cc: "gcc-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "macos-11.0" , os: "macos-11.0" , cc: "gcc-10" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { title: "osx-11.0" , os: "macos-11.0" , cc: "clang" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "macos-11.0" , os: "macos-11.0" , cc: "clang" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "osx-11.0" , os: "macos-11.0" , cc: "clang" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "macos-11.0" , os: "macos-11.0" , cc: "clang" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" }
- { title: "windows" , os: "windows-latest", cc: "vs2019" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "windows" , os: "windows-latest", cc: "vs2019" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" }
- { title: "windows" , os: "windows-latest", cc: "vs2019" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "windows" , os: "windows-latest", cc: "vs2019" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" }

View File

@@ -498,7 +498,7 @@ if (NOT ASMJIT_EMBED)
target_include_directories(asmjit_test_unit BEFORE PRIVATE ${ASMJIT_INCLUDE_DIRS}) target_include_directories(asmjit_test_unit BEFORE PRIVATE ${ASMJIT_INCLUDE_DIRS})
foreach(_target asmjit_test_opcode foreach(_target asmjit_test_opcode
asmjit_test_x86_asm asmjit_test_emitters
asmjit_test_x86_sections) asmjit_test_x86_sections)
asmjit_add_target(${_target} TEST asmjit_add_target(${_target} TEST
SOURCES test/${_target}.cpp SOURCES test/${_target}.cpp
@@ -508,9 +508,18 @@ if (NOT ASMJIT_EMBED)
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
endforeach() endforeach()
asmjit_add_target(asmjit_test_assembler TEST
SOURCES test/asmjit_test_assembler.cpp
test/asmjit_test_assembler_x86.cpp
test/asmjit_test_assembler.h
LIBRARIES asmjit::asmjit
CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
if (NOT ASMJIT_NO_INTROSPECTION) if (NOT ASMJIT_NO_INTROSPECTION)
asmjit_add_target(asmjit_test_x86_instinfo TEST asmjit_add_target(asmjit_test_instinfo TEST
SOURCES test/asmjit_test_x86_instinfo.cpp SOURCES test/asmjit_test_instinfo.cpp
LIBRARIES asmjit::asmjit LIBRARIES asmjit::asmjit
CFLAGS ${ASMJIT_PRIVATE_CFLAGS} CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}

View File

@@ -23,9 +23,11 @@ Breaking the API is sometimes inevitable, what to do?
* See [Breaking Changes Guide](https://asmjit.com/doc/group__asmjit__breaking__changes.html), which is now part of AsmJit documentation. * See [Breaking Changes Guide](https://asmjit.com/doc/group__asmjit__breaking__changes.html), which is now part of AsmJit documentation.
* See asmjit tests, they always compile and provide implementation of many use-cases: * See asmjit tests, they always compile and provide implementation of many use-cases:
* [asmjit_test_x86_asm.cpp](./test/asmjit_test_x86_asm.cpp) - Tests that demonstrate the purpose of emitters. * [asmjit_test_emitters.cpp](./test/asmjit_test_emitters.cpp) - Tests that demonstrate the purpose of emitters.
* [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) - Multiple sections test. * [asmjit_test_assembler_x86.cpp](./test/asmjit_test_assembler_x86.cpp) - Tests targeting AsmJit's Assembler (x86/x64).
* [asmjit_test_compiler_x86.cpp](./test/asmjit_test_compiler_x86.cpp) - Tests targeting AsmJit's Compiler (x86/x64). * [asmjit_test_compiler_x86.cpp](./test/asmjit_test_compiler_x86.cpp) - Tests targeting AsmJit's Compiler (x86/x64).
* [asmjit_test_instinfo.cpp](./test/asmjit_test_instinfo.cpp) - Tests that query instruction information.
* [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) - Multiple sections test.
* Visit our [Official Chat](https://gitter.im/asmjit/asmjit) if you need a quick help. * Visit our [Official Chat](https://gitter.im/asmjit/asmjit) if you need a quick help.
Project Organization Project Organization

View File

@@ -47,7 +47,7 @@ public:
typedef Support::BitVectorIterator<BitWord> Iterator; typedef Support::BitVectorIterator<BitWord> Iterator;
enum : uint32_t { enum : uint32_t {
kMaxFeatures = 128, kMaxFeatures = 256,
kNumBitWords = kMaxFeatures / Support::kBitWordSizeInBits kNumBitWords = kMaxFeatures / Support::kBitWordSizeInBits
}; };

View File

@@ -62,7 +62,7 @@ public:
kOptionReserved = 0x00000001u, kOptionReserved = 0x00000001u,
//! Prevents following a jump during compilation (BaseCompiler). //! Prevents following a jump during compilation (BaseCompiler).
kOptionUnfollow = 0x00000010u, kOptionUnfollow = 0x00000002u,
//! Overwrite the destination operand(s) (BaseCompiler). //! Overwrite the destination operand(s) (BaseCompiler).
//! //!
@@ -102,17 +102,17 @@ public:
//! //!
//! - `sqrtss x, y` - only LO element of `x` is changed, if you don't //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't
//! use HI elements, use `compiler.overwrite().sqrtss(x, y)`. //! use HI elements, use `compiler.overwrite().sqrtss(x, y)`.
kOptionOverwrite = 0x00000020u, kOptionOverwrite = 0x00000004u,
//! Emit short-form of the instruction. //! Emit short-form of the instruction.
kOptionShortForm = 0x00000040u, kOptionShortForm = 0x00000010u,
//! Emit long-form of the instruction. //! Emit long-form of the instruction.
kOptionLongForm = 0x00000080u, kOptionLongForm = 0x00000020u,
//! Conditional jump is likely to be taken. //! Conditional jump is likely to be taken.
kOptionTaken = 0x00000100u, kOptionTaken = 0x00000040u,
//! Conditional jump is unlikely to be taken. //! Conditional jump is unlikely to be taken.
kOptionNotTaken = 0x00000200u kOptionNotTaken = 0x00000080u
}; };
//! Control type. //! Control type.

View File

@@ -505,33 +505,37 @@ static ASMJIT_INLINE uint32_t x86GetMovAbsInstSize64Bit(uint32_t regSize, uint32
return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize; return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
} }
static ASMJIT_INLINE uint32_t x86GetMovAbsAddrType(Assembler* self, X86BufferWriter& writer, uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept { static ASMJIT_INLINE bool x86ShouldUseMovabs(Assembler* self, X86BufferWriter& writer, uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
uint32_t addrType = rmRel.addrType(); if (self->is32Bit()) {
int64_t addrValue = rmRel.offset(); // There is no relative addressing, just decide whether to use MOV encoded with MOD R/M or absolute.
return !(options & Inst::kOptionModMR);
}
else {
// If the addressing type is REL or MOD R/M was specified then absolute mov won't be used.
if (rmRel.addrType() == Mem::kAddrTypeRel || (options & Inst::kOptionModMR) != 0)
return false;
if (addrType == Mem::kAddrTypeDefault && !(options & Inst::kOptionModMR)) { int64_t addrValue = rmRel.offset();
if (self->is64Bit()) { uint64_t baseAddress = self->code()->baseAddress();
uint64_t baseAddress = self->code()->baseAddress();
if (baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
uint64_t rel64 = uint64_t(addrValue) - rip64;
if (!Support::isInt32(int64_t(rel64))) // If the address type is default, it means to basically check whether relative addressing is possible. However,
addrType = Mem::kAddrTypeAbs; // this is only possible when the base address is known - relative encoding uses RIP+N it has to be calculated.
} if (rmRel.addrType() == Mem::kAddrTypeDefault && baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
else { uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
if (!Support::isInt32(addrValue)) uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
addrType = Mem::kAddrTypeAbs; uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
} uint64_t rel64 = uint64_t(addrValue) - rip64;
if (Support::isInt32(int64_t(rel64)))
return false;
} }
else { else {
addrType = Mem::kAddrTypeAbs; if (Support::isInt32(addrValue))
return false;
} }
}
return addrType; return uint64_t(addrValue) > 0xFFFFFFFFu;
}
} }
// ============================================================================ // ============================================================================
@@ -1631,18 +1635,18 @@ CaseX86M_GPB_MulDiv:
opcode = 0; opcode = 0;
opcode.addArithBySize(o0.size()); opcode.addArithBySize(o0.size());
if (o0.size() == 1)
FIXUP_GPB(o0, opReg);
// Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD. // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) { if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
immValue = rmRel->as<Mem>().offset(); if (x86ShouldUseMovabs(this, writer, o0.size(), options, rmRel->as<Mem>())) {
if (x86GetMovAbsAddrType(this, writer, o0.size(), options, rmRel->as<Mem>()) == Mem::kAddrTypeAbs) {
opcode += 0xA0; opcode += 0xA0;
immValue = rmRel->as<Mem>().offset();
goto EmitX86OpMovAbs; goto EmitX86OpMovAbs;
} }
} }
if (o0.size() == 1)
FIXUP_GPB(o0, opReg);
opcode += 0x8A; opcode += 0x8A;
goto EmitX86M; goto EmitX86M;
} }
@@ -1664,18 +1668,18 @@ CaseX86M_GPB_MulDiv:
opcode = 0; opcode = 0;
opcode.addArithBySize(o1.size()); opcode.addArithBySize(o1.size());
if (o1.size() == 1)
FIXUP_GPB(o1, opReg);
// Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD. // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) { if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
immValue = rmRel->as<Mem>().offset(); if (x86ShouldUseMovabs(this, writer, o1.size(), options, rmRel->as<Mem>())) {
if (x86GetMovAbsAddrType(this, writer, o1.size(), options, rmRel->as<Mem>()) == Mem::kAddrTypeAbs) {
opcode += 0xA2; opcode += 0xA2;
immValue = rmRel->as<Mem>().offset();
goto EmitX86OpMovAbs; goto EmitX86OpMovAbs;
} }
} }
if (o1.size() == 1)
FIXUP_GPB(o1, opReg);
opcode += 0x88; opcode += 0x88;
goto EmitX86M; goto EmitX86M;
} }
@@ -1736,6 +1740,62 @@ CaseX86M_GPB_MulDiv:
} }
break; break;
case InstDB::kEncodingX86Movabs:
// Reg <- Mem
if (isign3 == ENC_OPS2(Reg, Mem)) {
opReg = o0.id();
rmRel = &o1;
opcode = 0xA0;
opcode.addArithBySize(o0.size());
if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGp()) || opReg != Gp::kIdAx)
goto InvalidInstruction;
if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
goto InvalidAddress;
if (ASMJIT_UNLIKELY(rmRel->as<Mem>().addrType() == Mem::kAddrTypeRel))
goto InvalidAddress;
immValue = rmRel->as<Mem>().offset();
goto EmitX86OpMovAbs;
}
// Mem <- Reg
if (isign3 == ENC_OPS2(Mem, Reg)) {
opReg = o1.id();
rmRel = &o0;
opcode = 0xA2;
opcode.addArithBySize(o1.size());
if (ASMJIT_UNLIKELY(!o1.as<Reg>().isGp()) || opReg != Gp::kIdAx)
goto InvalidInstruction;
if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
goto InvalidAddress;
immValue = rmRel->as<Mem>().offset();
goto EmitX86OpMovAbs;
}
// Reg <- Imm.
if (isign3 == ENC_OPS2(Reg, Imm)) {
if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGpq()))
goto InvalidInstruction;
opReg = o0.id();
opcode = 0xB8;
immSize = 8;
immValue = o1.as<Imm>().value();
opcode.addPrefixBySize(8);
goto EmitX86OpReg;
}
break;
case InstDB::kEncodingX86MovsxMovzx: case InstDB::kEncodingX86MovsxMovzx:
opcode.add(o1.size() != 1); opcode.add(o1.size() != 1);
opcode.addPrefixBySize(o0.size()); opcode.addPrefixBySize(o0.size());
@@ -4424,8 +4484,20 @@ EmitVexEvexR:
} }
} }
// If these bits are used then EVEX prefix is required.
constexpr uint32_t kEvexBits = 0x00D78150u; // [........|xx.x.xxx|x......x|.x.x....].
// Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
// moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
// such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
if (commonInfo->preferEvex()) {
if ((x & kEvexBits) == 0 && (options & (Inst::kOptionVex | Inst::kOptionVex3)) == 0) {
x |= (Opcode::kMM_ForceEvex) >> Opcode::kMM_Shift;
}
}
// Check if EVEX is required by checking bits in `x` : [........|xx.x.xxx|x......x|.x.x....]. // Check if EVEX is required by checking bits in `x` : [........|xx.x.xxx|x......x|.x.x....].
if (x & 0x00D78150u) { if (x & kEvexBits) {
uint32_t y = ((x << 4) & 0x00080000u) | // [........|...bV...|........|........]. uint32_t y = ((x << 4) & 0x00080000u) | // [........|...bV...|........|........].
((x >> 4) & 0x00000010u) ; // [........|...bV...|........|...R....]. ((x >> 4) & 0x00000010u) ; // [........|...bV...|........|...R....].
x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm]. x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm].
@@ -4525,8 +4597,20 @@ EmitVexEvexM:
x |= options & (Inst::kOptionZMask); // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm]. x |= options & (Inst::kOptionZMask); // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
} }
// If these bits are used then EVEX prefix is required.
constexpr uint32_t kEvexBits = 0x80DF8110u; // [@.......|xx.xxxxx|x......x|...x....].
// Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
// moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
// such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
if (commonInfo->preferEvex()) {
if ((x & kEvexBits) == 0 && (options & (Inst::kOptionVex | Inst::kOptionVex3)) == 0) {
x |= (Opcode::kMM_ForceEvex) >> Opcode::kMM_Shift;
}
}
// Check if EVEX is required by checking bits in `x` : [@.......|xx.xxxxx|x......x|...x....]. // Check if EVEX is required by checking bits in `x` : [@.......|xx.xxxxx|x......x|...x....].
if (x & 0x80DF8110u) { if (x & kEvexBits) {
uint32_t y = ((x << 4) & 0x00080000u) | // [@.......|....V...|........|........]. uint32_t y = ((x << 4) & 0x00080000u) | // [@.......|....V...|........|........].
((x >> 4) & 0x00000010u) ; // [@.......|....V...|........|...R....]. ((x >> 4) & 0x00000010u) ; // [@.......|....V...|........|...R....].
x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RXBR00mm]. x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RXBR00mm].

File diff suppressed because it is too large Load Diff

View File

@@ -273,6 +273,7 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept {
if (bitTest(regs.ecx, 0)) features.add(Features::kPREFETCHWT1); if (bitTest(regs.ecx, 0)) features.add(Features::kPREFETCHWT1);
if (bitTest(regs.ecx, 4)) features.add(Features::kOSPKE); if (bitTest(regs.ecx, 4)) features.add(Features::kOSPKE);
if (bitTest(regs.ecx, 5)) features.add(Features::kWAITPKG); if (bitTest(regs.ecx, 5)) features.add(Features::kWAITPKG);
if (bitTest(regs.ecx, 7)) features.add(Features::kCET_SS);
if (bitTest(regs.ecx, 8)) features.add(Features::kGFNI); if (bitTest(regs.ecx, 8)) features.add(Features::kGFNI);
if (bitTest(regs.ecx, 9)) features.add(Features::kVAES); if (bitTest(regs.ecx, 9)) features.add(Features::kVAES);
if (bitTest(regs.ecx, 10)) features.add(Features::kVPCLMULQDQ); if (bitTest(regs.ecx, 10)) features.add(Features::kVPCLMULQDQ);
@@ -281,9 +282,11 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept {
if (bitTest(regs.ecx, 27)) features.add(Features::kMOVDIRI); if (bitTest(regs.ecx, 27)) features.add(Features::kMOVDIRI);
if (bitTest(regs.ecx, 28)) features.add(Features::kMOVDIR64B); if (bitTest(regs.ecx, 28)) features.add(Features::kMOVDIR64B);
if (bitTest(regs.ecx, 29)) features.add(Features::kENQCMD); if (bitTest(regs.ecx, 29)) features.add(Features::kENQCMD);
if (bitTest(regs.edx, 5)) features.add(Features::kUINTR);
if (bitTest(regs.edx, 14)) features.add(Features::kSERIALIZE); if (bitTest(regs.edx, 14)) features.add(Features::kSERIALIZE);
if (bitTest(regs.edx, 16)) features.add(Features::kTSXLDTRK); if (bitTest(regs.edx, 16)) features.add(Features::kTSXLDTRK);
if (bitTest(regs.edx, 18)) features.add(Features::kPCONFIG); if (bitTest(regs.edx, 18)) features.add(Features::kPCONFIG);
if (bitTest(regs.edx, 20)) features.add(Features::kCET_IBT);
// Detect 'TSX' - Requires at least one of `HLE` and `RTM` features. // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
if (features.hasHLE() || features.hasRTM()) if (features.hasHLE() || features.hasRTM())
@@ -329,7 +332,9 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept {
if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) { if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
cpuidQuery(&regs, 0x7, 1); cpuidQuery(&regs, 0x7, 1);
if (bitTest(regs.eax, 3)) features.add(Features::kAVX_VNNI);
if (bitTest(regs.eax, 5)) features.add(Features::kAVX512_BF16); if (bitTest(regs.eax, 5)) features.add(Features::kAVX512_BF16);
if (bitTest(regs.eax, 22)) features.add(Features::kHRESET);
} }
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------

View File

@@ -74,9 +74,10 @@ public:
kAVX512_VNNI, //!< CPU has AVX512_VNNI (vector neural network instructions). kAVX512_VNNI, //!< CPU has AVX512_VNNI (vector neural network instructions).
kAVX512_VP2INTERSECT, //!< CPU has AVX512_VP2INTERSECT kAVX512_VP2INTERSECT, //!< CPU has AVX512_VP2INTERSECT
kAVX512_VPOPCNTDQ, //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions). kAVX512_VPOPCNTDQ, //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions).
kAVX_VNNI, //!< CPU has AVX_VNNI (VEX encoding of vpdpbusd/vpdpbusds/vpdpwssd/vpdpwssds).
kBMI, //!< CPU has BMI (bit manipulation instructions #1). kBMI, //!< CPU has BMI (bit manipulation instructions #1).
kBMI2, //!< CPU has BMI2 (bit manipulation instructions #2). kBMI2, //!< CPU has BMI2 (bit manipulation instructions #2).
kCET_IBT, //!< CPU has CET-IBT. kCET_IBT, //!< CPU has CET-IBT (indirect branch tracking).
kCET_SS, //!< CPU has CET-SS. kCET_SS, //!< CPU has CET-SS.
kCLDEMOTE, //!< CPU has CLDEMOTE (cache line demote). kCLDEMOTE, //!< CPU has CLDEMOTE (cache line demote).
kCLFLUSH, //!< CPU has CLFUSH (Cache Line flush). kCLFLUSH, //!< CPU has CLFUSH (Cache Line flush).
@@ -99,6 +100,7 @@ public:
kGEODE, //!< CPU has GEODE extensions (3DNOW additions). kGEODE, //!< CPU has GEODE extensions (3DNOW additions).
kGFNI, //!< CPU has GFNI (Galois field instructions). kGFNI, //!< CPU has GFNI (Galois field instructions).
kHLE, //!< CPU has HLE. kHLE, //!< CPU has HLE.
kHRESET, //!< CPU has HRESET.
kI486, //!< CPU has I486 features (I486+ support). kI486, //!< CPU has I486 features (I486+ support).
kLAHFSAHF, //!< CPU has LAHF/SAHF (LAHF/SAHF in 64-bit mode) [X86_64]. kLAHFSAHF, //!< CPU has LAHF/SAHF (LAHF/SAHF in 64-bit mode) [X86_64].
kLWP, //!< CPU has LWP (lightweight profiling) [AMD]. kLWP, //!< CPU has LWP (lightweight profiling) [AMD].
@@ -147,6 +149,7 @@ public:
kTBM, //!< CPU has TBM (trailing bit manipulation) [AMD]. kTBM, //!< CPU has TBM (trailing bit manipulation) [AMD].
kTSX, //!< CPU has TSX. kTSX, //!< CPU has TSX.
kTSXLDTRK, //!< CPU has TSXLDTRK. kTSXLDTRK, //!< CPU has TSXLDTRK.
kUINTR, //!< CPU has UINTR (user interrupts).
kVAES, //!< CPU has VAES (vector AES 256|512 bit support). kVAES, //!< CPU has VAES (vector AES 256|512 bit support).
kVMX, //!< CPU has VMX (virtualization) [INTEL]. kVMX, //!< CPU has VMX (virtualization) [INTEL].
kVPCLMULQDQ, //!< CPU has VPCLMULQDQ (vector PCLMULQDQ 256|512-bit support). kVPCLMULQDQ, //!< CPU has VPCLMULQDQ (vector PCLMULQDQ 256|512-bit support).
@@ -217,8 +220,11 @@ public:
ASMJIT_X86_FEATURE(AVX512_VNNI) ASMJIT_X86_FEATURE(AVX512_VNNI)
ASMJIT_X86_FEATURE(AVX512_VP2INTERSECT) ASMJIT_X86_FEATURE(AVX512_VP2INTERSECT)
ASMJIT_X86_FEATURE(AVX512_VPOPCNTDQ) ASMJIT_X86_FEATURE(AVX512_VPOPCNTDQ)
ASMJIT_X86_FEATURE(AVX_VNNI)
ASMJIT_X86_FEATURE(BMI) ASMJIT_X86_FEATURE(BMI)
ASMJIT_X86_FEATURE(BMI2) ASMJIT_X86_FEATURE(BMI2)
ASMJIT_X86_FEATURE(CET_IBT)
ASMJIT_X86_FEATURE(CET_SS)
ASMJIT_X86_FEATURE(CLDEMOTE) ASMJIT_X86_FEATURE(CLDEMOTE)
ASMJIT_X86_FEATURE(CLFLUSH) ASMJIT_X86_FEATURE(CLFLUSH)
ASMJIT_X86_FEATURE(CLFLUSHOPT) ASMJIT_X86_FEATURE(CLFLUSHOPT)
@@ -240,6 +246,7 @@ public:
ASMJIT_X86_FEATURE(GEODE) ASMJIT_X86_FEATURE(GEODE)
ASMJIT_X86_FEATURE(GFNI) ASMJIT_X86_FEATURE(GFNI)
ASMJIT_X86_FEATURE(HLE) ASMJIT_X86_FEATURE(HLE)
ASMJIT_X86_FEATURE(HRESET)
ASMJIT_X86_FEATURE(I486) ASMJIT_X86_FEATURE(I486)
ASMJIT_X86_FEATURE(LAHFSAHF) ASMJIT_X86_FEATURE(LAHFSAHF)
ASMJIT_X86_FEATURE(LWP) ASMJIT_X86_FEATURE(LWP)
@@ -287,6 +294,7 @@ public:
ASMJIT_X86_FEATURE(TBM) ASMJIT_X86_FEATURE(TBM)
ASMJIT_X86_FEATURE(TSX) ASMJIT_X86_FEATURE(TSX)
ASMJIT_X86_FEATURE(TSXLDTRK) ASMJIT_X86_FEATURE(TSXLDTRK)
ASMJIT_X86_FEATURE(UINTR)
ASMJIT_X86_FEATURE(XSAVE) ASMJIT_X86_FEATURE(XSAVE)
ASMJIT_X86_FEATURE(XSAVEC) ASMJIT_X86_FEATURE(XSAVEC)
ASMJIT_X86_FEATURE(XSAVEOPT) ASMJIT_X86_FEATURE(XSAVEOPT)

View File

@@ -238,6 +238,7 @@ Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept
"AVX512_VNNI\0" "AVX512_VNNI\0"
"AVX512_VP2INTERSECT\0" "AVX512_VP2INTERSECT\0"
"AVX512_VPOPCNTDQ\0" "AVX512_VPOPCNTDQ\0"
"AVX_VNNI\0"
"BMI\0" "BMI\0"
"BMI2\0" "BMI2\0"
"CET_IBT\0" "CET_IBT\0"
@@ -263,6 +264,7 @@ Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept
"GEODE\0" "GEODE\0"
"GFNI\0" "GFNI\0"
"HLE\0" "HLE\0"
"HRESET\0"
"I486\0" "I486\0"
"LAHFSAHF\0" "LAHFSAHF\0"
"LWP\0" "LWP\0"
@@ -311,6 +313,7 @@ Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept
"TBM\0" "TBM\0"
"TSX\0" "TSX\0"
"TSXLDTRK\0" "TSXLDTRK\0"
"UINTR\0"
"VAES\0" "VAES\0"
"VMX\0" "VMX\0"
"VPCLMULQDQ\0" "VPCLMULQDQ\0"
@@ -325,13 +328,13 @@ Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept
static const uint16_t sFeatureIndex[] = { static const uint16_t sFeatureIndex[] = {
0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144, 0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144,
155, 165, 176, 185, 197, 208, 220, 233, 243, 255, 275, 292, 296, 301, 309, 155, 165, 176, 185, 197, 208, 220, 233, 243, 255, 275, 292, 301, 305, 310,
316, 325, 333, 344, 349, 356, 361, 372, 382, 388, 395, 400, 405, 409, 414, 318, 325, 334, 342, 353, 358, 365, 370, 381, 391, 397, 404, 409, 414, 418,
418, 427, 432, 440, 446, 451, 455, 460, 469, 473, 479, 487, 491, 496, 504, 423, 427, 436, 441, 449, 455, 460, 464, 471, 476, 485, 489, 495, 503, 507,
513, 519, 529, 537, 541, 545, 550, 558, 564, 574, 582, 589, 599, 611, 619, 512, 520, 529, 535, 545, 553, 557, 561, 566, 574, 580, 590, 598, 605, 615,
625, 631, 638, 645, 651, 658, 662, 672, 676, 683, 688, 693, 697, 701, 705, 627, 635, 641, 647, 654, 661, 667, 674, 678, 688, 692, 699, 704, 709, 713,
710, 715, 722, 729, 735, 741, 745, 749, 753, 762, 767, 771, 782, 790, 799, 717, 721, 726, 731, 738, 745, 751, 757, 761, 765, 769, 778, 784, 789, 793,
803, 809, 816, 825, 832 804, 812, 821, 825, 831, 838, 847, 854
}; };
// @EnumStringEnd@ // @EnumStringEnd@
@@ -871,8 +874,9 @@ ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
} }
// VEX|EVEX options. // VEX|EVEX options.
if (options & Inst::kOptionVex3) ASMJIT_PROPAGATE(sb.append("vex3 ")); if (options & Inst::kOptionVex) ASMJIT_PROPAGATE(sb.append("{vex} "));
if (options & Inst::kOptionEvex) ASMJIT_PROPAGATE(sb.append("evex ")); if (options & Inst::kOptionVex3) ASMJIT_PROPAGATE(sb.append("{vex3} "));
if (options & Inst::kOptionEvex) ASMJIT_PROPAGATE(sb.append("{evex} "));
ASMJIT_PROPAGATE(InstAPI::instIdToString(arch, instId, sb)); ASMJIT_PROPAGATE(InstAPI::instIdToString(arch, instId, sb));
} }

View File

@@ -27,13 +27,13 @@
#include "../core/archtraits.h" #include "../core/archtraits.h"
#include "../core/inst.h" #include "../core/inst.h"
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
//! \namespace asmjit::x86 //! \namespace asmjit::x86
//! \ingroup asmjit_x86 //! \ingroup asmjit_x86
//! //!
//! X86/X64 API. //! X86/X64 API.
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
//! \addtogroup asmjit_x86 //! \addtogroup asmjit_x86
//! \{ //! \{
@@ -464,6 +464,7 @@ struct Inst : public BaseInst {
kIdMonitor, //!< Instruction 'monitor' {MONITOR}. kIdMonitor, //!< Instruction 'monitor' {MONITOR}.
kIdMonitorx, //!< Instruction 'monitorx' {MONITORX}. kIdMonitorx, //!< Instruction 'monitorx' {MONITORX}.
kIdMov, //!< Instruction 'mov'. kIdMov, //!< Instruction 'mov'.
kIdMovabs, //!< Instruction 'movabs' (X64).
kIdMovapd, //!< Instruction 'movapd' {SSE2}. kIdMovapd, //!< Instruction 'movapd' {SSE2}.
kIdMovaps, //!< Instruction 'movaps' {SSE}. kIdMovaps, //!< Instruction 'movaps' {SSE}.
kIdMovbe, //!< Instruction 'movbe' {MOVBE}. kIdMovbe, //!< Instruction 'movbe' {MOVBE}.
@@ -1229,10 +1230,10 @@ struct Inst : public BaseInst {
kIdVpcomw, //!< Instruction 'vpcomw' {XOP}. kIdVpcomw, //!< Instruction 'vpcomw' {XOP}.
kIdVpconflictd, //!< Instruction 'vpconflictd' {AVX512_CDI+VL}. kIdVpconflictd, //!< Instruction 'vpconflictd' {AVX512_CDI+VL}.
kIdVpconflictq, //!< Instruction 'vpconflictq' {AVX512_CDI+VL}. kIdVpconflictq, //!< Instruction 'vpconflictq' {AVX512_CDI+VL}.
kIdVpdpbusd, //!< Instruction 'vpdpbusd' {AVX512_VNNI+VL}. kIdVpdpbusd, //!< Instruction 'vpdpbusd' {AVX_VNNI|AVX512_VNNI+VL}.
kIdVpdpbusds, //!< Instruction 'vpdpbusds' {AVX512_VNNI+VL}. kIdVpdpbusds, //!< Instruction 'vpdpbusds' {AVX_VNNI|AVX512_VNNI+VL}.
kIdVpdpwssd, //!< Instruction 'vpdpwssd' {AVX512_VNNI+VL}. kIdVpdpwssd, //!< Instruction 'vpdpwssd' {AVX_VNNI|AVX512_VNNI+VL}.
kIdVpdpwssds, //!< Instruction 'vpdpwssds' {AVX512_VNNI+VL}. kIdVpdpwssds, //!< Instruction 'vpdpwssds' {AVX_VNNI|AVX512_VNNI+VL}.
kIdVperm2f128, //!< Instruction 'vperm2f128' {AVX}. kIdVperm2f128, //!< Instruction 'vperm2f128' {AVX}.
kIdVperm2i128, //!< Instruction 'vperm2i128' {AVX2}. kIdVperm2i128, //!< Instruction 'vperm2i128' {AVX2}.
kIdVpermb, //!< Instruction 'vpermb' {AVX512_VBMI+VL}. kIdVpermb, //!< Instruction 'vpermb' {AVX512_VBMI+VL}.
@@ -1602,8 +1603,9 @@ struct Inst : public BaseInst {
//! Instruction options. //! Instruction options.
enum Options : uint32_t { enum Options : uint32_t {
kOptionModMR = 0x00000100u, //!< Use ModMR instead of ModRM when it's available.
kOptionVex3 = 0x00000400u, //!< Use 3-byte VEX prefix if possible (AVX) (must be 0x00000400). kOptionVex3 = 0x00000400u, //!< Use 3-byte VEX prefix if possible (AVX) (must be 0x00000400).
kOptionModMR = 0x00000800u, //!< Use ModMR instead of ModRM when it's available. kOptionVex = 0x00000800u, //!< Use VEX prefix when both VEX|EVEX prefixes are available (HINT: AVX_VNNI).
kOptionEvex = 0x00001000u, //!< Use 4-byte EVEX prefix if possible (AVX-512) (must be 0x00001000). kOptionEvex = 0x00001000u, //!< Use 4-byte EVEX prefix if possible (AVX-512) (must be 0x00001000).
kOptionLock = 0x00002000u, //!< LOCK prefix (lock-enabled instructions only). kOptionLock = 0x00002000u, //!< LOCK prefix (lock-enabled instructions only).

View File

@@ -1067,6 +1067,35 @@ Error InstInternal::queryRWInfo(uint32_t arch, const BaseInst& inst, const Opera
break; break;
} }
case InstDB::RWInfo::kCategoryMovabs: {
if (opCount == 2) {
if (Reg::isGp(operands[0]) && operands[1].isMem()) {
const Reg& o0 = operands[0].as<Reg>();
out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
out->_operands[1].reset(R | MibRead, o0.size());
rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
return kErrorOk;
}
if (operands[0].isMem() && Reg::isGp(operands[1])) {
const Reg& o1 = operands[1].as<Reg>();
out->_operands[0].reset(W | MibRead, o1.size());
out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
return kErrorOk;
}
if (Reg::isGp(operands[0]) && operands[1].isImm()) {
const Reg& o0 = operands[0].as<Reg>();
out->_operands[0].reset(W, o0.size());
out->_operands[1].reset();
rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
return kErrorOk;
}
}
break;
}
case InstDB::RWInfo::kCategoryImul: { case InstDB::RWInfo::kCategoryImul: {
// Special case for 'imul' instruction. // Special case for 'imul' instruction.
// //
@@ -1400,6 +1429,14 @@ static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, size_t opC
return RegAnalysis { mask, highVecUsed }; return RegAnalysis { mask, highVecUsed };
} }
static ASMJIT_INLINE uint32_t InstInternal_usesAvx512(uint32_t instOptions, const RegOnly& extraReg, const RegAnalysis& regAnalysis) noexcept {
uint32_t hasEvex = instOptions & (Inst::kOptionEvex | Inst::_kOptionAvx512Mask);
uint32_t hasKMask = extraReg.type() == Reg::kTypeKReg;
uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(Reg::kTypeZmm, Reg::kTypeKReg);
return hasEvex | hasKMask | hasKOrZmm;
}
Error InstInternal::queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept { Error InstInternal::queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept {
// Only called when `arch` matches X86 family. // Only called when `arch` matches X86 family.
DebugUtils::unused(arch); DebugUtils::unused(arch);
@@ -1509,10 +1546,7 @@ Error InstInternal::queryFeatures(uint32_t arch, const BaseInst& inst, const Ope
if (out->has(Features::kAVX) || out->has(Features::kAVX2) || out->has(Features::kFMA) || out->has(Features::kF16C)) { if (out->has(Features::kAVX) || out->has(Features::kAVX2) || out->has(Features::kFMA) || out->has(Features::kF16C)) {
// Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions // Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions
if (out->has(Features::kAVX512_F) || out->has(Features::kAVX512_BW) || out->has(Features::kAVX512_DQ)) { if (out->has(Features::kAVX512_F) || out->has(Features::kAVX512_BW) || out->has(Features::kAVX512_DQ)) {
uint32_t hasEvex = options & (Inst::kOptionEvex | Inst::_kOptionAvx512Mask); uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
uint32_t hasKMask = inst.extraReg().type() == Reg::kTypeKReg;
uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(Reg::kTypeZmm, Reg::kTypeKReg);
uint32_t mustUseEvex = 0; uint32_t mustUseEvex = 0;
switch (instId) { switch (instId) {
@@ -1540,13 +1574,26 @@ Error InstInternal::queryFeatures(uint32_t arch, const BaseInst& inst, const Ope
break; break;
} }
if (!(hasEvex | mustUseEvex | hasKMask | hasKOrZmm | regAnalysis.highVecUsed)) if (!(usesAvx512 | mustUseEvex | regAnalysis.highVecUsed))
out->remove(Features::kAVX512_F, Features::kAVX512_BW, Features::kAVX512_DQ, Features::kAVX512_VL); out->remove(Features::kAVX512_F, Features::kAVX512_BW, Features::kAVX512_DQ, Features::kAVX512_VL);
else else
out->remove(Features::kAVX, Features::kAVX2, Features::kFMA, Features::kF16C); out->remove(Features::kAVX, Features::kAVX2, Features::kFMA, Features::kF16C);
} }
} }
// Handle AVX_VNNI vs AVX512_VNNI overlap.
if (out->has(Features::kAVX512_VNNI)) {
// By default the AVX512_VNNI instruction should be used, because it was
// introduced first. However, VEX|VEX3 prefix can be used to force AVX_VNNI
// instead.
uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
if (!usesAvx512 && (options & (Inst::kOptionVex | Inst::kOptionVex3)) != 0)
out->remove(Features::kAVX512_VNNI, Features::kAVX512_VL);
else
out->remove(Features::kAVX_VNNI);
}
// Clear AVX512_VL if ZMM register is used. // Clear AVX512_VL if ZMM register is used.
if (regAnalysis.hasRegType(Reg::kTypeZmm)) if (regAnalysis.hasRegType(Reg::kTypeZmm))
out->remove(Features::kAVX512_VL); out->remove(Features::kAVX512_VL);

File diff suppressed because it is too large Load Diff

View File

@@ -168,6 +168,7 @@ enum Flags : uint32_t {
kFlagVsib = 0x00040000u, //!< Instruction uses VSIB instead of legacy SIB. kFlagVsib = 0x00040000u, //!< Instruction uses VSIB instead of legacy SIB.
kFlagVex = 0x00080000u, //!< Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...). kFlagVex = 0x00080000u, //!< Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
kFlagEvex = 0x00100000u, //!< Instruction can be encoded by EVEX (AVX512). kFlagEvex = 0x00100000u, //!< Instruction can be encoded by EVEX (AVX512).
kFlagPreferEvex = 0x00200000u, //!< EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI).
// FPU Flags // FPU Flags
// --------- // ---------
@@ -331,6 +332,9 @@ struct CommonInfo {
//! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable). //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
inline bool isVexOrEvex() const noexcept { return hasFlag(kFlagVex | kFlagEvex); } inline bool isVexOrEvex() const noexcept { return hasFlag(kFlagVex | kFlagEvex); }
//! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix.
inline bool preferEvex() const noexcept { return hasFlag(kFlagPreferEvex); }
//! Tests whether the instruction supports AVX512 masking {k}. //! Tests whether the instruction supports AVX512 masking {k}.
inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); } inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); }
//! Tests whether the instruction supports AVX512 zeroing {k}{z}. //! Tests whether the instruction supports AVX512 zeroing {k}{z}.

View File

@@ -88,6 +88,7 @@ enum EncodingId : uint32_t {
kEncodingX86JmpRel, //!< X86 xbegin. kEncodingX86JmpRel, //!< X86 xbegin.
kEncodingX86Lea, //!< X86 lea. kEncodingX86Lea, //!< X86 lea.
kEncodingX86Mov, //!< X86 mov (all possible cases). kEncodingX86Mov, //!< X86 mov (all possible cases).
kEncodingX86Movabs, //!< X86 movabs.
kEncodingX86MovsxMovzx, //!< X86 movsx, movzx. kEncodingX86MovsxMovzx, //!< X86 movsx, movzx.
kEncodingX86MovntiMovdiri, //!< X86 movnti/movdiri. kEncodingX86MovntiMovdiri, //!< X86 movnti/movdiri.
kEncodingX86EnqcmdMovdir64b, //!< X86 enqcmd/enqcmds/movdir64b. kEncodingX86EnqcmdMovdir64b, //!< X86 enqcmd/enqcmds/movdir64b.
@@ -239,6 +240,7 @@ struct RWInfo {
enum Category : uint8_t { enum Category : uint8_t {
kCategoryGeneric, kCategoryGeneric,
kCategoryMov, kCategoryMov,
kCategoryMovabs,
kCategoryImul, kCategoryImul,
kCategoryMovh64, kCategoryMovh64,
kCategoryVmaskmov, kCategoryVmaskmov,

View File

@@ -0,0 +1,83 @@
// AsmJit - Machine code generation for C++
//
// * Official AsmJit Home Page: https://asmjit.com
// * Official Github Repository: https://github.com/asmjit/asmjit
//
// Copyright (c) 2008-2020 The AsmJit Authors
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#include <asmjit/core.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "asmjit_test_assembler.h"
#include "cmdline.h"
using namespace asmjit;
#if defined(ASMJIT_BUILD_X86)
bool testX86Assembler(const TestSettings& settings) noexcept;
bool testX64Assembler(const TestSettings& settings) noexcept;
#endif
int main(int argc, char* argv[]) {
CmdLine cmdLine(argc, argv);
TestSettings settings {};
settings.quiet = cmdLine.hasArg("--quiet");
printf("AsmJit Assembler Test-Suite v%u.%u.%u:\n\n",
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
printf("Usage:\n");
printf(" --help Show usage only\n");
printf(" --arch=<ARCH> Select architecture to run ('all' by default)\n");
printf(" --quiet Show only assembling errors [%s]\n", settings.quiet ? "x" : " ");
printf("\n");
if (cmdLine.hasArg("--help"))
return 0;
const char* arch = cmdLine.valueOf("--arch", "all");
bool x86Failed = false;
bool x64Failed = false;
#if defined(ASMJIT_BUILD_X86)
if ((strcmp(arch, "all") == 0 || strcmp(arch, "x86") == 0))
x86Failed = !testX86Assembler(settings);
if ((strcmp(arch, "all") == 0 || strcmp(arch, "x64") == 0))
x64Failed = !testX64Assembler(settings);
#endif
bool failed = x86Failed || x64Failed;
if (failed) {
if (x86Failed) printf("** X86 test suite failed **\n");
if (x64Failed) printf("** X64 test suite failed **\n");
printf("** FAILURE **\n");
}
else {
printf("** SUCCESS **\n");
}
return failed ? 1 : 0;
}

View File

@@ -0,0 +1,96 @@
// AsmJit - Machine code generation for C++
//
// * Official AsmJit Home Page: https://asmjit.com
// * Official Github Repository: https://github.com/asmjit/asmjit
//
// Copyright (c) 2008-2020 The AsmJit Authors
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#ifndef ASMJIT_TEST_ASSEMBLER_H_INCLUDED
#define ASMJIT_TEST_ASSEMBLER_H_INCLUDED
#include <asmjit/core.h>
#include <stdio.h>
struct TestSettings {
bool quiet;
};
template<typename AssemblerType>
class AssemblerTester {
public:
asmjit::Environment env {};
asmjit::CodeHolder code {};
AssemblerType assembler {};
const TestSettings& settings;
size_t passed {};
size_t count {};
AssemblerTester(uint32_t arch, const TestSettings& settings) noexcept
: env(arch),
settings(settings) {}
void printHeader(const char* archName) noexcept {
printf("%s assembler tests:\n", archName);
}
void printSummary() noexcept {
printf(" Passed: %zu / %zu tests\n\n", passed, count);
}
bool didPass() const noexcept { return passed == count; }
void beforeInstruction() noexcept {
code.init(env, 0);
code.attach(&assembler);
}
bool testInstruction(const char* expectedOpcode, const char* s, uint32_t err) noexcept {
count++;
if (err) {
printf(" !! %s\n"
" <%s>\n", s, asmjit::DebugUtils::errorAsString(err));
return false;
}
asmjit::String encodedOpcode;
asmjit::Section* text = code.textSection();
encodedOpcode.appendHex(text->data(), text->bufferSize());
if (encodedOpcode != expectedOpcode) {
printf(" !! [%s] <- %s\n"
" [%s] (Expected)\n", encodedOpcode.data(), s, expectedOpcode);
return false;
}
if (!settings.quiet)
printf(" OK [%s] <- %s\n", encodedOpcode.data(), s);
passed++;
return true;
}
void afterInstruction() noexcept {
code.reset();
}
};
#endif // ASMJIT_TEST_ASSEMBLER_H_INCLUDED

View File

@@ -0,0 +1,604 @@
// AsmJit - Machine code generation for C++
//
// * Official AsmJit Home Page: https://asmjit.com
// * Official Github Repository: https://github.com/asmjit/asmjit
//
// Copyright (c) 2008-2020 The AsmJit Authors
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#include <asmjit/core.h>
#if defined(ASMJIT_BUILD_X86)
#include <asmjit/x86.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "asmjit_test_assembler.h"
#include "cmdline.h"
using namespace asmjit;
#define TEST_INSTRUCTION(OPCODE, ...) \
do { \
tester.beforeInstruction(); \
tester.testInstruction(OPCODE, #__VA_ARGS__, tester.assembler.__VA_ARGS__); \
tester.afterInstruction(); \
} while (0)
bool testX86Assembler(const TestSettings& settings) noexcept {
using namespace x86;
AssemblerTester<Assembler> tester(Environment::kArchX86, settings);
tester.printHeader("X86");
// Base Instructions.
TEST_INSTRUCTION("8AE0" , mov(ah, al));
TEST_INSTRUCTION("8AF0" , mov(dh, al));
TEST_INSTRUCTION("8BC3" , mov(eax, ebx));
TEST_INSTRUCTION("89D8" , mod_mr().mov(eax, ebx));
TEST_INSTRUCTION("B800000000" , mov(eax, 0));
TEST_INSTRUCTION("BB00000000" , mov(ebx, 0));
TEST_INSTRUCTION("B8FFFFFFFF" , mov(eax, 0xFFFFFFFF));
TEST_INSTRUCTION("8CE0" , mov(eax, fs));
TEST_INSTRUCTION("8EE0" , mov(fs, eax));
TEST_INSTRUCTION("8B10" , mov(edx, ptr(eax)));
TEST_INSTRUCTION("8B10" , mov(edx, ptr(eax, 0)));
TEST_INSTRUCTION("8B9080000000" , mov(edx, ptr(eax, 128)));
TEST_INSTRUCTION("8B1408" , mov(edx, ptr(eax, ecx)));
TEST_INSTRUCTION("8B940880000000" , mov(edx, ptr(eax, ecx, 0, 128)));
TEST_INSTRUCTION("8B1408" , mov(edx, ptr(eax, ecx)));
TEST_INSTRUCTION("8B544820" , mov(edx, ptr(eax, ecx, 1, 32)));
TEST_INSTRUCTION("8B548840" , mov(edx, ptr(eax, ecx, 2, 64)));
TEST_INSTRUCTION("8B94C800010000" , mov(edx, ptr(eax, ecx, 3, 128 + 128)));
TEST_INSTRUCTION("8B1408" , mov(edx, ptr(eax, ecx)));
TEST_INSTRUCTION("8B940880000000" , mov(edx, ptr(eax, ecx, 0, 128)));
TEST_INSTRUCTION("8B1408" , mov(edx, ptr(eax, ecx)));
TEST_INSTRUCTION("8B544820" , mov(edx, ptr(eax, ecx, 1, 32)));
TEST_INSTRUCTION("8B54C802" , mov(edx, ptr(eax, ecx, 3, 2)));
TEST_INSTRUCTION("0F20C0" , mov(eax, cr0));
TEST_INSTRUCTION("F00F20C0" , mov(eax, cr8));
TEST_INSTRUCTION("A344332211" , mov(ptr(0x11223344), eax));
TEST_INSTRUCTION("890544332211" , mod_mr().mov(ptr(0x11223344), eax));
TEST_INSTRUCTION("891D44332211" , mov(ptr(0x11223344), ebx));
TEST_INSTRUCTION("0FBE07" , movsx(eax, byte_ptr(edi)));
TEST_INSTRUCTION("0FBF07" , movsx(eax, word_ptr(edi)));
TEST_INSTRUCTION("0FB607" , movzx(eax, byte_ptr(edi)));
TEST_INSTRUCTION("0FB6C6" , movzx(eax, dh));
TEST_INSTRUCTION("0FB707" , movzx(eax, word_ptr(edi)));
TEST_INSTRUCTION("03D9" , add(ebx, ecx));
TEST_INSTRUCTION("83C001" , add(eax, 1));
TEST_INSTRUCTION("0504030201" , add(eax, 0x01020304));
TEST_INSTRUCTION("66050201" , add(ax, 0x0102));
TEST_INSTRUCTION("6603849004030201" , add(ax, ptr(eax, edx, 2, 0x01020304)));
TEST_INSTRUCTION("F00118" , lock().add(ptr(eax), ebx));
TEST_INSTRUCTION("F00FC138" , lock().xadd(ptr(eax), edi));
TEST_INSTRUCTION("660FBA2001" , bt(word_ptr(eax), 1));
TEST_INSTRUCTION("0FBA2001" , bt(dword_ptr(eax), 1));
TEST_INSTRUCTION("FE00" , inc(byte_ptr(eax)));
TEST_INSTRUCTION("66FF00" , inc(word_ptr(eax)));
TEST_INSTRUCTION("FF00" , inc(dword_ptr(eax)));
TEST_INSTRUCTION("F6D8" , neg(al));
TEST_INSTRUCTION("F6DC" , neg(ah));
TEST_INSTRUCTION("F7D8" , neg(eax));
TEST_INSTRUCTION("F7D0" , not_(eax));
TEST_INSTRUCTION("0F95C3" , setnz(bl));
TEST_INSTRUCTION("0F94C7" , setz(bh));
TEST_INSTRUCTION("F600FF" , test(byte_ptr(eax), 0xFF));
TEST_INSTRUCTION("66F700FF00" , test(word_ptr(eax), 0xFF));
TEST_INSTRUCTION("F700FF000000" , test(dword_ptr(eax), 0xFF));
TEST_INSTRUCTION("A836" , test(al, 0x36));
TEST_INSTRUCTION("F6C436" , test(ah, 0x36));
TEST_INSTRUCTION("50" , push(eax));
TEST_INSTRUCTION("51" , push(ecx));
TEST_INSTRUCTION("52" , push(edx));
TEST_INSTRUCTION("53" , push(ebx));
TEST_INSTRUCTION("54" , push(esp));
TEST_INSTRUCTION("55" , push(ebp));
TEST_INSTRUCTION("56" , push(esi));
TEST_INSTRUCTION("57" , push(edi));
TEST_INSTRUCTION("0E" , push(cs));
TEST_INSTRUCTION("16" , push(ss));
TEST_INSTRUCTION("1E" , push(ds));
TEST_INSTRUCTION("06" , push(es));
TEST_INSTRUCTION("0FA0" , push(fs));
TEST_INSTRUCTION("0FA8" , push(gs));
TEST_INSTRUCTION("C8010002" , enter(1, 2));
TEST_INSTRUCTION("C9" , leave());
TEST_INSTRUCTION("FF10" , call(ptr(eax)));
TEST_INSTRUCTION("FF10" , call(dword_ptr(eax)));
TEST_INSTRUCTION("66C501" , lds(ax, ptr(ecx)));
TEST_INSTRUCTION("C501" , lds(eax, ptr(ecx)));
TEST_INSTRUCTION("66C401" , les(ax, ptr(ecx)));
TEST_INSTRUCTION("C401" , les(eax, ptr(ecx)));
TEST_INSTRUCTION("660FB401" , lfs(ax, ptr(ecx)));
TEST_INSTRUCTION("0FB401" , lfs(eax, ptr(ecx)));
TEST_INSTRUCTION("660FB501" , lgs(ax, ptr(ecx)));
TEST_INSTRUCTION("0FB501" , lgs(eax, ptr(ecx)));
TEST_INSTRUCTION("660FB201" , lss(ax, ptr(ecx)));
TEST_INSTRUCTION("0FB201" , lss(eax, ptr(ecx)));
// NOP.
TEST_INSTRUCTION("90" , nop());
TEST_INSTRUCTION("660F1F0400" , nop(word_ptr(eax, eax)));
TEST_INSTRUCTION("660F1F0400" , nop(word_ptr(eax, eax), ax));
TEST_INSTRUCTION("660F1F1C00" , nop(word_ptr(eax, eax), bx));
TEST_INSTRUCTION("0F1F0400" , nop(dword_ptr(eax, eax)));
TEST_INSTRUCTION("0F1F0400" , nop(dword_ptr(eax, eax), eax));
TEST_INSTRUCTION("0F1F1C00" , nop(dword_ptr(eax, eax), ebx));
// LEA.
TEST_INSTRUCTION("67668D00" , lea(ax, ptr(bx, si)));
TEST_INSTRUCTION("67668D01" , lea(ax, ptr(bx, di)));
TEST_INSTRUCTION("67668D02" , lea(ax, ptr(bp, si)));
TEST_INSTRUCTION("67668D03" , lea(ax, ptr(bp, di)));
TEST_INSTRUCTION("67668D04" , lea(ax, ptr(si)));
TEST_INSTRUCTION("67668D05" , lea(ax, ptr(di)));
TEST_INSTRUCTION("67668D4600" , lea(ax, ptr(bp)));
TEST_INSTRUCTION("67668D07" , lea(ax, ptr(bx)));
TEST_INSTRUCTION("67668D4010" , lea(ax, ptr(bx, si, 0, 0x10)));
TEST_INSTRUCTION("67668D4120" , lea(ax, ptr(bx, di, 0, 0x20)));
TEST_INSTRUCTION("67668D4240" , lea(ax, ptr(bp, si, 0, 0x40)));
TEST_INSTRUCTION("67668D4360" , lea(ax, ptr(bp, di, 0, 0x60)));
TEST_INSTRUCTION("67668D848000" , lea(ax, ptr(si, 0x80)));
TEST_INSTRUCTION("67668D85A000" , lea(ax, ptr(di, 0xA0)));
TEST_INSTRUCTION("67668D86C000" , lea(ax, ptr(bp, 0xC0)));
TEST_INSTRUCTION("67668D87FF01" , lea(ax, ptr(bx, 0x01FF)));
TEST_INSTRUCTION("678D00" , lea(eax, ptr(bx, si)));
TEST_INSTRUCTION("678D01" , lea(eax, ptr(bx, di)));
TEST_INSTRUCTION("8D0433" , lea(eax, ptr(ebx, esi)));
TEST_INSTRUCTION("8D043B" , lea(eax, ptr(ebx, edi)));
TEST_INSTRUCTION("8D0500000000" , lea(eax, ptr(0)));
// XACQUIRE|XRELEASE|RTM.
TEST_INSTRUCTION("C6F811" , xabort(0x11));
TEST_INSTRUCTION("F2F00108" , xacquire().lock().add(dword_ptr(eax), ecx));
TEST_INSTRUCTION("F3F00108" , xrelease().lock().add(dword_ptr(eax), ecx));
// BND.
TEST_INSTRUCTION("660F1ACA" , bndmov(bnd1, bnd2));
TEST_INSTRUCTION("F20F1ACF" , bndcu(bnd1, edi));
TEST_INSTRUCTION("0F1A0408" , bndldx(bnd0, ptr(eax, ecx)));
TEST_INSTRUCTION("0F1B0C08" , bndstx(ptr(eax, ecx), bnd1));
// BMI+.
TEST_INSTRUCTION("66F30FB8C2" , popcnt(ax, dx));
TEST_INSTRUCTION("F30FB8C2" , popcnt(eax, edx));
TEST_INSTRUCTION("66F30FBDC2" , lzcnt(ax, dx));
TEST_INSTRUCTION("F30FBDC2" , lzcnt(eax, edx));
TEST_INSTRUCTION("66F30FBCC2" , tzcnt(ax, dx));
TEST_INSTRUCTION("F30FBCC2" , tzcnt(eax, edx));
// CRC32.
TEST_INSTRUCTION("F20F38F0C7" , crc32(eax, bh));
TEST_INSTRUCTION("66F20F38F1C3" , crc32(eax, bx));
TEST_INSTRUCTION("F20F38F1C1" , crc32(eax, ecx));
TEST_INSTRUCTION("F20F38F006" , crc32(eax, byte_ptr(esi)));
TEST_INSTRUCTION("66F20F38F106" , crc32(eax, word_ptr(esi)));
TEST_INSTRUCTION("F20F38F106" , crc32(eax, dword_ptr(esi)));
// FPU.
TEST_INSTRUCTION("9B" , fwait());
TEST_INSTRUCTION("D800" , fadd(dword_ptr(eax)));
TEST_INSTRUCTION("DC00" , fadd(qword_ptr(eax)));
// MMX & SSE.
TEST_INSTRUCTION("0F6FC1" , movq(mm0, mm1));
TEST_INSTRUCTION("0F6E00" , movd(mm0, ptr(eax)));
TEST_INSTRUCTION("0F6F0418" , movq(mm0, ptr(eax, ebx)));
TEST_INSTRUCTION("0F7E38" , movd(ptr(eax), mm7));
TEST_INSTRUCTION("0F7F0418" , movq(ptr(eax, ebx), mm0));
TEST_INSTRUCTION("F30F7EC1" , movq(xmm0, xmm1));
TEST_INSTRUCTION("660F6E0418" , movd(xmm0, ptr(eax, ebx)));
TEST_INSTRUCTION("F30F7E0418" , movq(xmm0, ptr(eax, ebx)));
TEST_INSTRUCTION("660F7E0C18" , movd(ptr(eax, ebx), xmm1));
TEST_INSTRUCTION("660FD60C18" , movq(ptr(eax, ebx), xmm1));
TEST_INSTRUCTION("0F280498" , movaps(xmm0, ptr(eax, ebx, 2)));
TEST_INSTRUCTION("660F280498" , movapd(xmm0, ptr(eax, ebx, 2)));
TEST_INSTRUCTION("660F6F0498" , movdqa(xmm0, ptr(eax, ebx, 2)));
TEST_INSTRUCTION("0F290C98" , movaps(ptr(eax, ebx, 2), xmm1));
TEST_INSTRUCTION("660F290C98" , movapd(ptr(eax, ebx, 2), xmm1));
TEST_INSTRUCTION("660F7F0C98" , movdqa(ptr(eax, ebx, 2), xmm1));
TEST_INSTRUCTION("F30F2DC1" , cvtss2si(eax, xmm1));
TEST_INSTRUCTION("F20F2DC1" , cvtsd2si(eax, xmm1));
TEST_INSTRUCTION("F30F2AC2" , cvtsi2ss(xmm0, edx));
TEST_INSTRUCTION("F20F2AC2" , cvtsi2sd(xmm0, edx));
TEST_INSTRUCTION("660F3A41C100" , dppd(xmm0, xmm1, 0));
TEST_INSTRUCTION("0FDBC1" , pand(mm0, mm1));
TEST_INSTRUCTION("660FDBC1" , pand(xmm0, xmm1));
TEST_INSTRUCTION("660FFDC1" , paddw(xmm0, xmm1));
// AVX & AVX512.
TEST_INSTRUCTION("C5F96E5A10" , vmovd(xmm3, dword_ptr(edx, 0x10)));
TEST_INSTRUCTION("C5FA7E5A10" , vmovq(xmm3, qword_ptr(edx, 0x10)));
TEST_INSTRUCTION("C5F97E5A10" , vmovd(dword_ptr(edx, 0x10), xmm3));
TEST_INSTRUCTION("C5F9D65A10" , vmovq(qword_ptr(edx, 0x10), xmm3));
TEST_INSTRUCTION("C5F96EEB" , vmovd(xmm5, ebx));
TEST_INSTRUCTION("C5F97EEB" , vmovd(ebx, xmm5));
TEST_INSTRUCTION("C5FA7EC1" , vmovq(xmm0, xmm1));
TEST_INSTRUCTION("62F17D086EC0" , evex().vmovd(xmm0, eax));
TEST_INSTRUCTION("62F17D087EC0" , evex().vmovd(eax, xmm0));
TEST_INSTRUCTION("C5F5FDC7" , vpaddw(ymm0, ymm1, ymm7));
TEST_INSTRUCTION("C4E37141C200" , vdppd(xmm0, xmm1, xmm2, 0));
TEST_INSTRUCTION("62F1F5D95800" , k(k1).z().vaddpd(zmm0, zmm1, ptr(eax)._1to8()));
TEST_INSTRUCTION("C5F058C2" , vaddps(xmm0, xmm1, xmm2));
TEST_INSTRUCTION("62F1748858C2" , z().vaddps(xmm0, xmm1, xmm2));
TEST_INSTRUCTION("62F1748958C2" , k(k1).z().vaddps(xmm0, xmm1, xmm2));
TEST_INSTRUCTION("62F16C4FC25498040F" , k(k7).vcmpps(k2, zmm2, zmmword_ptr(eax, ebx, 2, 256), 15));
TEST_INSTRUCTION("62F16C5FC25498400F" , k(k7).vcmpps(k2, zmm2, dword_ptr(eax, ebx, 2, 256)._1to16(), 15));
TEST_INSTRUCTION("C5FA2DC1" , vcvtss2si(eax, xmm1));
TEST_INSTRUCTION("C5FB2DC1" , vcvtsd2si(eax, xmm1));
TEST_INSTRUCTION("C5F22AC2" , vcvtsi2ss(xmm0, xmm1, edx));
TEST_INSTRUCTION("C5F32AC2" , vcvtsi2sd(xmm0, xmm1, edx));
TEST_INSTRUCTION("C5FBE63B" , vcvtpd2dq(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("C5FFE63B" , vcvtpd2dq(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("C5F95A3B" , vcvtpd2ps(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("C5FD5A3B" , vcvtpd2ps(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("C5F95AC1" , vcvtpd2ps(xmm0, xmm1));
TEST_INSTRUCTION("C5F95A03" , vcvtpd2ps(xmm0, xmmword_ptr(ebx)));
TEST_INSTRUCTION("C5FD5AC1" , vcvtpd2ps(xmm0, ymm1));
TEST_INSTRUCTION("C5FD5A03" , vcvtpd2ps(xmm0, ymmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FD485AC1" , vcvtpd2ps(ymm0, zmm1));
TEST_INSTRUCTION("62F1FD485A03" , vcvtpd2ps(ymm0, zmmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC08793B" , vcvtpd2udq(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC28793B" , vcvtpd2udq(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC085B3B" , vcvtqq2ps(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC285B3B" , vcvtqq2ps(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("C5F9E63B" , vcvttpd2dq(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("C5FDE63B" , vcvttpd2dq(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC08783B" , vcvttpd2udq(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FC28783B" , vcvttpd2udq(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FF087A3B" , vcvtuqq2ps(xmm7, xmmword_ptr(ebx)));
TEST_INSTRUCTION("62F1FF287A3B" , vcvtuqq2ps(xmm7, ymmword_ptr(ebx)));
TEST_INSTRUCTION("62F3FD08663F01" , vfpclasspd(k7, xmmword_ptr(edi), 0x01));
TEST_INSTRUCTION("62F3FD28663F01" , vfpclasspd(k7, ymmword_ptr(edi), 0x01));
TEST_INSTRUCTION("62F3FD48663F01" , vfpclasspd(k7, zmmword_ptr(edi), 0x01));
TEST_INSTRUCTION("62F37D08663F01" , vfpclassps(k7, xmmword_ptr(edi), 0x01));
TEST_INSTRUCTION("62F37D28663F01" , vfpclassps(k7, ymmword_ptr(edi), 0x01));
TEST_INSTRUCTION("62F37D48663F01" , vfpclassps(k7, zmmword_ptr(edi), 0x01));
TEST_INSTRUCTION("C4E2F990040500000000" , vpgatherdq(xmm0, ptr(0, xmm0), xmm0));
TEST_INSTRUCTION("C4E2FD91040500000000" , vpgatherqq(ymm0, ptr(0, ymm0), ymm0));
TEST_INSTRUCTION("C4E2E9920C00" , vgatherdpd(xmm1, ptr(eax, xmm0), xmm2));
TEST_INSTRUCTION("62F36D083ECB00" , vpcmpub(k1, xmm2, xmm3, 0x0));
TEST_INSTRUCTION("62F26D48CF4C1101" , vgf2p8mulb(zmm1, zmm2, zmmword_ptr(ecx, edx, 0, 64)));
TEST_INSTRUCTION("62F3ED48CE4C11010F" , vgf2p8affineqb(zmm1, zmm2, zmmword_ptr(ecx, edx, 0, 64), 15));
TEST_INSTRUCTION("62F3ED48CF4C11010F" , vgf2p8affineinvqb(zmm1, zmm2, zmmword_ptr(ecx, edx, 0, 64), 15));
TEST_INSTRUCTION("62F2674868246D00F8FFFF" , vp2intersectd(k4, k5, zmm3, zmmword_ptr(0xFFFFF800, ebp, 1)));
// AVX512_VNNI vs AVX_VNNI.
TEST_INSTRUCTION("62F2552850F4" , vpdpbusd(ymm6, ymm5, ymm4));
TEST_INSTRUCTION("C4E25550F4" , vex().vpdpbusd(ymm6, ymm5, ymm4));
tester.printSummary();
return tester.didPass();
}
bool testX64Assembler(const TestSettings& settings) noexcept {
using namespace x86;
AssemblerTester<Assembler> tester(Environment::kArchX64, settings);
tester.printHeader("X64");
// Base Instructions.
TEST_INSTRUCTION("B800000000" , mov(eax, 0));
TEST_INSTRUCTION("BB00000000" , mov(ebx, 0));
TEST_INSTRUCTION("48C7C300000000" , mov(rbx, 0));
TEST_INSTRUCTION("48BB8877665544332211" , mov(rbx, 0x001122334455667788));
TEST_INSTRUCTION("48BB0000000000000000" , long_().mov(rbx, 0));
TEST_INSTRUCTION("8AE0" , mov(ah, al));
TEST_INSTRUCTION("8AF0" , mov(dh, al));
TEST_INSTRUCTION("B8E8030000" , mov(eax, 1000));
TEST_INSTRUCTION("0F20C0" , mov(rax, cr0));
TEST_INSTRUCTION("440F20C0" , mov(rax, cr8));
TEST_INSTRUCTION("488B0500000000" , mov(rax, ptr(rip)));
TEST_INSTRUCTION("4A8B0460" , mov(rax, ptr(rax, r12, 1)));
TEST_INSTRUCTION("4A8B0468" , mov(rax, ptr(rax, r13, 1)));
TEST_INSTRUCTION("4A8B846000010000" , mov(rax, ptr(rax, r12, 1, 256)));
TEST_INSTRUCTION("89042544332211" , mov(ptr_abs(0x11223344), eax));
TEST_INSTRUCTION("891C2544332211" , mov(ptr_abs(0x11223344), ebx));
TEST_INSTRUCTION("A38877665544332211" , mov(ptr_abs(0x1122334455667788), eax));
TEST_INSTRUCTION("A34433221100000000" , movabs(ptr(0x0000000011223344), eax));
TEST_INSTRUCTION("A38877665544332211" , movabs(ptr(0x1122334455667788), eax));
TEST_INSTRUCTION("48A1EFCDAB8967452301" , movabs(rax, ptr(0x123456789ABCDEF)));
TEST_INSTRUCTION("0FBE07" , movsx(eax, byte_ptr(rdi)));
TEST_INSTRUCTION("480FBE07" , movsx(rax, byte_ptr(rdi)));
TEST_INSTRUCTION("0FBF07" , movsx(eax, word_ptr(rdi)));
TEST_INSTRUCTION("480FBF07" , movsx(rax, word_ptr(rdi)));
TEST_INSTRUCTION("486307" , movsxd(rax, ptr(rdi)));
TEST_INSTRUCTION("486307" , movsxd(rax, dword_ptr(rdi)));
TEST_INSTRUCTION("6663C3" , movsxd(ax, bx));
TEST_INSTRUCTION("63C3" , movsxd(eax, ebx));
TEST_INSTRUCTION("4863C3" , movsxd(rax, ebx));
TEST_INSTRUCTION("0FB6C6" , movzx(eax, dh));
TEST_INSTRUCTION("0FB607" , movzx(eax, byte_ptr(rdi)));
TEST_INSTRUCTION("480FB607" , movzx(rax, byte_ptr(rdi)));
TEST_INSTRUCTION("440FB6FA" , movzx(r15d, dl));
TEST_INSTRUCTION("440FB6FD" , movzx(r15d, bpl));
TEST_INSTRUCTION("0FB707" , movzx(eax, word_ptr(rdi)));
TEST_INSTRUCTION("480FB707" , movzx(rax, word_ptr(rdi)));
TEST_INSTRUCTION("03D9" , add(ebx, ecx));
TEST_INSTRUCTION("83C001" , add(eax, 1));
TEST_INSTRUCTION("0504030201" , add(eax, 0x01020304));
TEST_INSTRUCTION("66050201" , add(ax, 0x0102));
TEST_INSTRUCTION("6603849004030201" , add(ax, ptr(rax, rdx, 2, 0x01020304)));
TEST_INSTRUCTION("F00118" , lock().add(ptr(rax), ebx));
TEST_INSTRUCTION("F0480FC138" , lock().xadd(ptr(rax), rdi));
TEST_INSTRUCTION("660FC8" , bswap(ax));
TEST_INSTRUCTION("0FC8" , bswap(eax));
TEST_INSTRUCTION("480FC8" , bswap(rax));
TEST_INSTRUCTION("660FBA2001" , bt(word_ptr(rax), 1));
TEST_INSTRUCTION("0FBA2001" , bt(dword_ptr(rax), 1));
TEST_INSTRUCTION("480FBA2001" , bt(qword_ptr(rax), 1));
TEST_INSTRUCTION("FE00" , inc(byte_ptr(rax)));
TEST_INSTRUCTION("66FF00" , inc(word_ptr(rax)));
TEST_INSTRUCTION("FF00" , inc(dword_ptr(rax)));
TEST_INSTRUCTION("48FF00" , inc(qword_ptr(rax)));
TEST_INSTRUCTION("411351FD" , adc(edx, dword_ptr(r9, -3)));
TEST_INSTRUCTION("F6D8" , neg(al));
TEST_INSTRUCTION("F6DC" , neg(ah));
TEST_INSTRUCTION("40F6DE" , neg(sil));
TEST_INSTRUCTION("F7D8" , neg(eax));
TEST_INSTRUCTION("F7D0" , not_(eax));
TEST_INSTRUCTION("0F95C3" , setnz(bl));
TEST_INSTRUCTION("0F94C7" , setz(bh));
TEST_INSTRUCTION("400F94C0" , rex().setz(al));
TEST_INSTRUCTION("410F94C7" , setz(r15b));
TEST_INSTRUCTION("F600FF" , test(byte_ptr(rax), 0xFF));
TEST_INSTRUCTION("66F700FF00" , test(word_ptr(rax), 0xFF));
TEST_INSTRUCTION("F700FF000000" , test(dword_ptr(rax), 0xFF));
TEST_INSTRUCTION("48F700FF000000" , test(qword_ptr(rax), 0xFF));
TEST_INSTRUCTION("A836" , test(al, 0x36));
TEST_INSTRUCTION("F6C436" , test(ah, 0x36));
TEST_INSTRUCTION("50" , push(rax));
TEST_INSTRUCTION("51" , push(rcx));
TEST_INSTRUCTION("52" , push(rdx));
TEST_INSTRUCTION("53" , push(rbx));
TEST_INSTRUCTION("54" , push(rsp));
TEST_INSTRUCTION("55" , push(rbp));
TEST_INSTRUCTION("56" , push(rsi));
TEST_INSTRUCTION("57" , push(rdi));
TEST_INSTRUCTION("4150" , push(r8));
TEST_INSTRUCTION("4151" , push(r9));
TEST_INSTRUCTION("4152" , push(r10));
TEST_INSTRUCTION("4153" , push(r11));
TEST_INSTRUCTION("4154" , push(r12));
TEST_INSTRUCTION("4155" , push(r13));
TEST_INSTRUCTION("4156" , push(r14));
TEST_INSTRUCTION("4157" , push(r15));
TEST_INSTRUCTION("0FA0" , push(fs));
TEST_INSTRUCTION("0FA8" , push(gs));
TEST_INSTRUCTION("400FA0" , rex().push(fs));
TEST_INSTRUCTION("400FA8" , rex().push(gs));
TEST_INSTRUCTION("C8010002" , enter(1, 2));
TEST_INSTRUCTION("40C8010002" , rex().enter(1, 2));
TEST_INSTRUCTION("C9" , leave());
TEST_INSTRUCTION("FF10" , call(ptr(rax)));
TEST_INSTRUCTION("FF10" , call(qword_ptr(rax)));
TEST_INSTRUCTION("660FB401" , lfs(ax, ptr(rcx)));
TEST_INSTRUCTION("0FB401" , lfs(eax, ptr(rcx)));
TEST_INSTRUCTION("480FB401" , lfs(rax, ptr(rcx)));
TEST_INSTRUCTION("660FB501" , lgs(ax, ptr(rcx)));
TEST_INSTRUCTION("0FB501" , lgs(eax, ptr(rcx)));
TEST_INSTRUCTION("480FB501" , lgs(rax, ptr(rcx)));
TEST_INSTRUCTION("660FB201" , lss(ax, ptr(rcx)));
TEST_INSTRUCTION("0FB201" , lss(eax, ptr(rcx)));
TEST_INSTRUCTION("480FB201" , lss(rax, ptr(rcx)));
TEST_INSTRUCTION("40863424" , xchg(ptr(rsp), sil));
TEST_INSTRUCTION("40863C24" , xchg(ptr(rsp), dil));
// NOP.
TEST_INSTRUCTION("90" , nop());
TEST_INSTRUCTION("660F1F0400" , nop(word_ptr(rax, rax)));
TEST_INSTRUCTION("660F1F0400" , nop(word_ptr(rax, rax), ax));
TEST_INSTRUCTION("660F1F1C00" , nop(word_ptr(rax, rax), bx));
TEST_INSTRUCTION("0F1F0400" , nop(dword_ptr(rax, rax)));
TEST_INSTRUCTION("0F1F0400" , nop(dword_ptr(rax, rax), eax));
TEST_INSTRUCTION("0F1F1C00" , nop(dword_ptr(rax, rax), ebx));
TEST_INSTRUCTION("480F1F0400" , nop(qword_ptr(rax, rax)));
TEST_INSTRUCTION("480F1F0400" , nop(qword_ptr(rax, rax), rax));
TEST_INSTRUCTION("480F1F1C00" , nop(qword_ptr(rax, rax), rbx));
// LEA.
TEST_INSTRUCTION("8D042500000000" , lea(eax, ptr(0)));
TEST_INSTRUCTION("488D042500000000" , lea(rax, ptr(0)));
TEST_INSTRUCTION("488D0433" , lea(rax, ptr(rbx, rsi)));
TEST_INSTRUCTION("488D043B" , lea(rax, ptr(rbx, rdi)));
TEST_INSTRUCTION("488D840000400000" , lea(rax, ptr(rax, rax, 0, 0x4000)));
// CRC32.
TEST_INSTRUCTION("F20F38F0C7" , crc32(eax, bh));
TEST_INSTRUCTION("66F20F38F1C3" , crc32(eax, bx));
TEST_INSTRUCTION("F20F38F1C1" , crc32(eax, ecx));
TEST_INSTRUCTION("F20F38F006" , crc32(eax, byte_ptr(rsi)));
TEST_INSTRUCTION("66F20F38F106" , crc32(eax, word_ptr(rsi)));
TEST_INSTRUCTION("F20F38F106" , crc32(eax, dword_ptr(rsi)));
TEST_INSTRUCTION("F2480F38F0C3" , crc32(rax, bl));
TEST_INSTRUCTION("F2480F38F1C1" , crc32(rax, rcx));
TEST_INSTRUCTION("F2480F38F006" , crc32(rax, byte_ptr(rsi)));
TEST_INSTRUCTION("F2480F38F106" , crc32(rax, qword_ptr(rsi)));
// XACQUIRE|XRELEASE|RTM.
TEST_INSTRUCTION("C6F811" , xabort(0x11));
TEST_INSTRUCTION("F2F0480108" , xacquire().lock().add(qword_ptr(rax), rcx));
TEST_INSTRUCTION("F3F0480108" , xrelease().lock().add(qword_ptr(rax), rcx));
// BND.
TEST_INSTRUCTION("660F1ACA" , bndmov(bnd1, bnd2));
TEST_INSTRUCTION("F20F1ACF" , bndcu(bnd1, rdi));
TEST_INSTRUCTION("0F1A0408" , bndldx(bnd0, ptr(rax, rcx)));
TEST_INSTRUCTION("0F1B0C08" , bndstx(ptr(rax, rcx), bnd1));
// BMI+.
TEST_INSTRUCTION("66F30FB8C2" , popcnt(ax, dx));
TEST_INSTRUCTION("66F3450FB8C1" , popcnt(r8w, r9w));
TEST_INSTRUCTION("F30FB8C2" , popcnt(eax, edx));
TEST_INSTRUCTION("F3480FB8C2" , popcnt(rax, rdx));
TEST_INSTRUCTION("66F30FBDC2" , lzcnt(ax, dx));
TEST_INSTRUCTION("66F3450FBDC7" , lzcnt(r8w, r15w));
TEST_INSTRUCTION("F30FBDC2" , lzcnt(eax, edx));
TEST_INSTRUCTION("F3490FBDC2" , lzcnt(rax, r10));
TEST_INSTRUCTION("66F30FBCC2" , tzcnt(ax, dx));
TEST_INSTRUCTION("66F3450FBCC7" , tzcnt(r8w, r15w));
TEST_INSTRUCTION("F30FBCC2" , tzcnt(eax, edx));
TEST_INSTRUCTION("F34D0FBCFA" , tzcnt(r15, r10));
// FPU.
TEST_INSTRUCTION("9B" , fwait());
TEST_INSTRUCTION("D800" , fadd(dword_ptr(rax)));
TEST_INSTRUCTION("DC00" , fadd(qword_ptr(rax)));
// MMX & SSE.
TEST_INSTRUCTION("0F6FC1" , movq(mm0, mm1));
TEST_INSTRUCTION("0F6E00" , movd(mm0, ptr(rax)));
TEST_INSTRUCTION("0F6F0418" , movq(mm0, ptr(rax, rbx)));
TEST_INSTRUCTION("0F7E38" , movd(ptr(rax), mm7));
TEST_INSTRUCTION("0F7F0418" , movq(ptr(rax, rbx), mm0));
TEST_INSTRUCTION("F30F7EC1" , movq(xmm0, xmm1));
TEST_INSTRUCTION("660F6E0418" , movd(xmm0, ptr(rax, rbx)));
TEST_INSTRUCTION("F30F7E0418" , movq(xmm0, ptr(rax, rbx)));
TEST_INSTRUCTION("660F7E0C18" , movd(ptr(rax, rbx), xmm1));
TEST_INSTRUCTION("660FD60C18" , movq(ptr(rax, rbx), xmm1));
TEST_INSTRUCTION("0F280498" , movaps(xmm0, ptr(rax, rbx, 2)));
TEST_INSTRUCTION("660F280498" , movapd(xmm0, ptr(rax, rbx, 2)));
TEST_INSTRUCTION("660F6F0498" , movdqa(xmm0, ptr(rax, rbx, 2)));
TEST_INSTRUCTION("0F290C98" , movaps(ptr(rax, rbx, 2), xmm1));
TEST_INSTRUCTION("660F290C98" , movapd(ptr(rax, rbx, 2), xmm1));
TEST_INSTRUCTION("660F7F0C98" , movdqa(ptr(rax, rbx, 2), xmm1));
TEST_INSTRUCTION("F30F2DC1" , cvtss2si(eax, xmm1));
TEST_INSTRUCTION("F3480F2DC1" , cvtss2si(rax, xmm1));
TEST_INSTRUCTION("F20F2DC1" , cvtsd2si(eax, xmm1));
TEST_INSTRUCTION("F2480F2DC1" , cvtsd2si(rax, xmm1));
TEST_INSTRUCTION("F30F2AC2" , cvtsi2ss(xmm0, edx));
TEST_INSTRUCTION("F3480F2AC2" , cvtsi2ss(xmm0, rdx));
TEST_INSTRUCTION("F20F2AC2" , cvtsi2sd(xmm0, edx));
TEST_INSTRUCTION("F2480F2AC2" , cvtsi2sd(xmm0, rdx));
TEST_INSTRUCTION("66450F3A41D300" , dppd(xmm10, xmm11, 0));
TEST_INSTRUCTION("0FDBC1" , pand(mm0, mm1));
TEST_INSTRUCTION("660FDBC1" , pand(xmm0, xmm1));
TEST_INSTRUCTION("660FFDC1" , paddw(xmm0, xmm1));
// AVX & AVX512.
TEST_INSTRUCTION("C5F96E5A10" , vmovd(xmm3, dword_ptr(rdx, 0x10)));
TEST_INSTRUCTION("C5FA7E5A10" , vmovq(xmm3, qword_ptr(rdx, 0x10)));
TEST_INSTRUCTION("C5F97E5A10" , vmovd(dword_ptr(rdx, 0x10), xmm3));
TEST_INSTRUCTION("C5F9D65A10" , vmovq(qword_ptr(rdx, 0x10), xmm3));
TEST_INSTRUCTION("C5F96EEB" , vmovd(xmm5, ebx));
TEST_INSTRUCTION("C4E1F96EEB" , vmovq(xmm5, rbx));
TEST_INSTRUCTION("62617D086EFB" , vmovd(xmm31, ebx));
TEST_INSTRUCTION("6261FD086EFB" , vmovq(xmm31, rbx));
TEST_INSTRUCTION("C5F97EEB" , vmovd(ebx, xmm5));
TEST_INSTRUCTION("C4E1F97EEB" , vmovq(rbx, xmm5));
TEST_INSTRUCTION("62617D087EFB" , vmovd(ebx, xmm31));
TEST_INSTRUCTION("6261FD087EFB" , vmovq(rbx, xmm31));
TEST_INSTRUCTION("C5FA7EC1" , vmovq(xmm0, xmm1));
TEST_INSTRUCTION("62F17D086EC0" , evex().vmovd(xmm0, eax));
TEST_INSTRUCTION("62F1FD086EC0" , evex().vmovq(xmm0, rax));
TEST_INSTRUCTION("62F17D087EC0" , evex().vmovd(eax, xmm0));
TEST_INSTRUCTION("62F1FD087EC0" , evex().vmovq(rax, xmm0));
TEST_INSTRUCTION("C44135FDC7" , vpaddw(ymm8, ymm9, ymm15));
TEST_INSTRUCTION("C4432141D400" , vdppd(xmm10, xmm11, xmm12, 0));
TEST_INSTRUCTION("6271B5D95808" , k(k1).z().vaddpd(zmm9, zmm9, ptr(rax)._1to8()));
TEST_INSTRUCTION("C5F058C2" , vaddps(xmm0, xmm1, xmm2));
TEST_INSTRUCTION("62F1748858C2" , z().vaddps(xmm0, xmm1, xmm2));
TEST_INSTRUCTION("C5FA2DC1" , vcvtss2si(eax, xmm1));
TEST_INSTRUCTION("C4E1FA2DC1" , vcvtss2si(rax, xmm1));
TEST_INSTRUCTION("C5FB2DC1" , vcvtsd2si(eax, xmm1));
TEST_INSTRUCTION("C4E1FB2DC1" , vcvtsd2si(rax, xmm1));
TEST_INSTRUCTION("C5F22AC2" , vcvtsi2ss(xmm0, xmm1, edx));
TEST_INSTRUCTION("C4E1F22AC2" , vcvtsi2ss(xmm0, xmm1, rdx));
TEST_INSTRUCTION("C5F32AC2" , vcvtsi2sd(xmm0, xmm1, edx));
TEST_INSTRUCTION("C4E1F32AC2" , vcvtsi2sd(xmm0, xmm1, rdx));
TEST_INSTRUCTION("C57BE63B" , vcvtpd2dq(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("C57FE63B" , vcvtpd2dq(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("C5795A3B" , vcvtpd2ps(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("C57D5A3B" , vcvtpd2ps(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC08793B" , vcvtpd2udq(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC28793B" , vcvtpd2udq(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC085B3B" , vcvtqq2ps(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC285B3B" , vcvtqq2ps(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("C5F95AC1" , vcvtpd2ps(xmm0, xmm1));
TEST_INSTRUCTION("C5F95A03" , vcvtpd2ps(xmm0, xmmword_ptr(rbx)));
TEST_INSTRUCTION("C5FD5AC1" , vcvtpd2ps(xmm0, ymm1));
TEST_INSTRUCTION("C5FD5A03" , vcvtpd2ps(xmm0, ymmword_ptr(rbx)));
TEST_INSTRUCTION("62F1FD485AC1" , vcvtpd2ps(ymm0, zmm1));
TEST_INSTRUCTION("62F1FD485A03" , vcvtpd2ps(ymm0, zmmword_ptr(rbx)));
TEST_INSTRUCTION("C579E63B" , vcvttpd2dq(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("C57DE63B" , vcvttpd2dq(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC08783B" , vcvttpd2udq(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("6271FC28783B" , vcvttpd2udq(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("6271FF087A3B" , vcvtuqq2ps(xmm15, xmmword_ptr(rbx)));
TEST_INSTRUCTION("6271FF287A3B" , vcvtuqq2ps(xmm15, ymmword_ptr(rbx)));
TEST_INSTRUCTION("62F3FD08663F01" , vfpclasspd(k7, xmmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("62F3FD28663701" , vfpclasspd(k6, ymmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("62F3FD48662F01" , vfpclasspd(k5, zmmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("62F37D08662701" , vfpclassps(k4, xmmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("62F37D28661F01" , vfpclassps(k3, ymmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("62F37D48661701" , vfpclassps(k2, zmmword_ptr(rdi), 0x01));
TEST_INSTRUCTION("6201951058F4" , rn_sae().vaddpd(zmm30, zmm29, zmm28));
TEST_INSTRUCTION("6201953058F4" , rd_sae().vaddpd(zmm30, zmm29, zmm28));
TEST_INSTRUCTION("6201955058F4" , ru_sae().vaddpd(zmm30, zmm29, zmm28));
TEST_INSTRUCTION("6201957058F4" , rz_sae().vaddpd(zmm30, zmm29, zmm28));
TEST_INSTRUCTION("62F16C4FC25498040F" , k(k7).vcmpps(k2, zmm2, zmmword_ptr(rax, rbx, 2, 256), 15));
TEST_INSTRUCTION("62F16C1FC25498400F" , k(k7).vcmpps(k2, xmm2, dword_ptr(rax, rbx, 2, 256)._1to4(), 15));
TEST_INSTRUCTION("62F16C3FC25498400F" , k(k7).vcmpps(k2, ymm2, dword_ptr(rax, rbx, 2, 256)._1to8(), 15));
TEST_INSTRUCTION("62F16C5FC25498400F" , k(k7).vcmpps(k2, zmm2, dword_ptr(rax, rbx, 2, 256)._1to16(), 15));
TEST_INSTRUCTION("62F1FD58C2C100" , sae().vcmppd(k0, zmm0, zmm1, 0x00));
TEST_INSTRUCTION("6201FD182EF5" , sae().vucomisd(xmm30, xmm29));
TEST_INSTRUCTION("62017C182EF5" , sae().vucomiss(xmm30, xmm29));
TEST_INSTRUCTION("C4E2FD91040500000000" , vpgatherqq(ymm0, ptr(0, ymm0), ymm0));
TEST_INSTRUCTION("C4E2E9920C00" , vgatherdpd(xmm1, ptr(rax, xmm0), xmm2));
TEST_INSTRUCTION("C4E26990440D00" , vpgatherdd(xmm0, ptr(rbp, xmm1), xmm2));
TEST_INSTRUCTION("C4C26990040C" , vpgatherdd(xmm0, ptr(r12, xmm1), xmm2));
TEST_INSTRUCTION("C4C26990440D00" , vpgatherdd(xmm0, ptr(r13, xmm1), xmm2));
TEST_INSTRUCTION("62F36D083ECB00" , vpcmpub(k1, xmm2, xmm3, 0x0));
TEST_INSTRUCTION("C5E9FE4C1140" , vpaddd(xmm1, xmm2, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("C5EDFE4C1140" , vpaddd(ymm1, ymm2, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62F16D48FE4C1101" , vpaddd(zmm1, zmm2, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62E23D0850441104" , vpdpbusd(xmm16, xmm8, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62E23D2850441102" , vpdpbusd(ymm16, ymm8, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62E23D4850441101" , vpdpbusd(zmm16, zmm8, ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62F26D48CF4C1101" , vgf2p8mulb(zmm1, zmm2, zmmword_ptr(rcx, rdx, 0, 64)));
TEST_INSTRUCTION("62F3ED48CE4C11010F" , vgf2p8affineqb(zmm1, zmm2, zmmword_ptr(rcx, rdx, 0, 64), 15));
TEST_INSTRUCTION("62F3ED48CF4C11010F" , vgf2p8affineinvqb(zmm1, zmm2, zmmword_ptr(rcx, rdx, 0, 64), 15));
TEST_INSTRUCTION("62F27D087AC6" , vpbroadcastb(xmm0, esi));
TEST_INSTRUCTION("62F27D287AC6" , vpbroadcastb(ymm0, esi));
TEST_INSTRUCTION("62F27D487AC6" , vpbroadcastb(zmm0, esi));
TEST_INSTRUCTION("62F2CD088DF8" , vpermw(xmm7, xmm6, xmm0));
TEST_INSTRUCTION("C4E3FD01FE01" , vpermpd(ymm7, ymm6, 1));
TEST_INSTRUCTION("62F3FD4801FE01" , vpermpd(zmm7, zmm6, 1));
TEST_INSTRUCTION("62F2CD2816F8" , vpermpd(ymm7, ymm6, ymm0));
TEST_INSTRUCTION("62F2CD4816F8" , vpermpd(zmm7, zmm6, zmm0));
TEST_INSTRUCTION("C4E24D16F9" , vpermps(ymm7, ymm6, ymm1));
TEST_INSTRUCTION("62F24D4816F9" , vpermps(zmm7, zmm6, zmm1));
TEST_INSTRUCTION("6292472068F0" , vp2intersectd(k6, k7, ymm23, ymm24));
TEST_INSTRUCTION("62B2472068B4F500000010" , vp2intersectd(k6, k7, ymm23, ptr(rbp, r14, 3, 268435456)));
TEST_INSTRUCTION("62F24730683500000000" , vp2intersectd(k6, k7, ymm23, dword_ptr(rip)._1to8()));
TEST_INSTRUCTION("62F2472068742DE0" , vp2intersectd(k6, k7, ymm23, ymmword_ptr(rbp, rbp, 0, -1024)));
TEST_INSTRUCTION("62F2472068717F" , vp2intersectd(k6, k7, ymm23, ymmword_ptr(rcx, 4064)));
// AVX512_VNNI vs AVX_VNNI.
TEST_INSTRUCTION("62F2552850F4" , vpdpbusd(ymm6, ymm5, ymm4));
TEST_INSTRUCTION("C4E25550F4" , vex().vpdpbusd(ymm6, ymm5, ymm4));
tester.printSummary();
return tester.didPass();
}
#undef TEST_INSTRUCTION
#endif

View File

@@ -29,6 +29,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <chrono>
#include "cmdline.h" #include "cmdline.h"
#include "asmjit_test_compiler.h" #include "asmjit_test_compiler.h"
@@ -53,6 +54,27 @@ void compiler_add_a64_tests(TestApp& app);
using namespace asmjit; using namespace asmjit;
class PerformanceTimer {
public:
typedef std::chrono::high_resolution_clock::time_point TimePoint;
TimePoint _startTime {};
TimePoint _endTime {};
inline void start() {
_startTime = std::chrono::high_resolution_clock::now();
}
inline void stop() {
_endTime = std::chrono::high_resolution_clock::now();
}
inline double duration() const {
std::chrono::duration<double> elapsed = _endTime - _startTime;
return elapsed.count() * 1000;
}
};
// ============================================================================ // ============================================================================
// [TestApp] // [TestApp]
// ============================================================================ // ============================================================================
@@ -79,7 +101,7 @@ int TestApp::handleArgs(int argc, const char* const* argv) {
} }
void TestApp::showInfo() { void TestApp::showInfo() {
printf("AsmJit Compiler Test-Suite v%u.%u.%u [Arch=%s]:\n", printf("AsmJit Compiler Test-Suite v%u.%u.%u (Arch=%s):\n",
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ), unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF), unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF), unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF),
@@ -109,11 +131,16 @@ int TestApp::run() {
stringLogger.addFlags(kFormatFlags); stringLogger.addFlags(kFormatFlags);
#endif #endif
double compileTime = 0;
double finalizeTime = 0;
for (std::unique_ptr<TestCase>& test : _tests) { for (std::unique_ptr<TestCase>& test : _tests) {
JitRuntime runtime; JitRuntime runtime;
CodeHolder code; CodeHolder code;
SimpleErrorHandler errorHandler; SimpleErrorHandler errorHandler;
PerformanceTimer perfTimer;
code.init(runtime.environment()); code.init(runtime.environment());
code.setErrorHandler(&errorHandler); code.setErrorHandler(&errorHandler);
@@ -141,13 +168,20 @@ int TestApp::run() {
arm::Compiler cc(&code); arm::Compiler cc(&code);
#endif #endif
perfTimer.start();
test->compile(cc); test->compile(cc);
perfTimer.stop();
compileTime += perfTimer.duration();
void* func = nullptr; void* func = nullptr;
Error err = errorHandler._err; Error err = errorHandler._err;
if (!err) if (!err) {
perfTimer.start();
err = cc.finalize(); err = cc.finalize();
perfTimer.stop();
finalizeTime += perfTimer.duration();
}
#ifndef ASMJIT_NO_LOGGING #ifndef ASMJIT_NO_LOGGING
if (_dumpAsm) { if (_dumpAsm) {
@@ -213,13 +247,17 @@ int TestApp::run() {
} }
} }
if (_nFailed == 0)
printf("\nSuccess:\n All %u tests passed\n", unsigned(_tests.size()));
else
printf("\nFailure:\n %u %s of %u failed\n", _nFailed, _nFailed == 1 ? "test" : "tests", unsigned(_tests.size()));
printf(" OutputSize=%zu\n", _outputSize);
printf("\n"); printf("\n");
printf("Summary:\n");
printf(" OutputSize: %zu bytes\n", _outputSize);
printf(" CompileTime: %.2f ms\n", compileTime);
printf(" FinalizeTime: %.2f ms\n", finalizeTime);
printf("\n");
if (_nFailed == 0)
printf("** SUCCESS: All %u tests passed **\n", unsigned(_tests.size()));
else
printf("** FAILURE: %u of %u tests failed **\n", _nFailed, unsigned(_tests.size()));
return _nFailed == 0 ? 0 : 1; return _nFailed == 0 ? 0 : 1;
#endif #endif

View File

@@ -132,7 +132,7 @@ static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept {
err = cb.finalize(); err = cb.finalize();
if (err) { if (err) {
printf("x86::Builder::finalize() failed: %s\n", DebugUtils::errorAsString(err)); printf("** FAILURE: x86::Builder::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
return 1; return 1;
} }
break; break;
@@ -147,7 +147,7 @@ static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept {
err = cc.finalize(); err = cc.finalize();
if (err) { if (err) {
printf("x86::Compiler::finalize() failed: %s\n", DebugUtils::errorAsString(err)); printf("** FAILURE: x86::Compiler::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
return 1; return 1;
} }
break; break;
@@ -160,7 +160,7 @@ static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept {
err = rt.add(&fn, &code); err = rt.add(&fn, &code);
if (err) { if (err) {
printf("JitRuntime::add() failed: %s\n", DebugUtils::errorAsString(err)); printf("** FAILURE: JitRuntime::add() failed (%s) **\n", DebugUtils::errorAsString(err));
return 1; return 1;
} }
@@ -178,7 +178,11 @@ static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept {
} }
int main() { int main() {
printf("AsmJit X86 Emitter Test\n\n"); printf("AsmJit Emitters Test-Suite v%u.%u.%u\n",
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
printf("\n");
JitRuntime rt; JitRuntime rt;
unsigned nFailed = 0; unsigned nFailed = 0;
@@ -194,9 +198,9 @@ int main() {
#endif #endif
if (!nFailed) if (!nFailed)
printf("Success:\n All tests passed\n"); printf("** SUCCESS **\n");
else else
printf("Failure:\n %u %s failed\n", nFailed, nFailed == 1 ? "test" : "tests"); printf("** FAILURE - %u %s failed ** \n", nFailed, nFailed == 1 ? "test" : "tests");
return nFailed ? 1 : 0; return nFailed ? 1 : 0;
} }

View File

@@ -44,8 +44,6 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
InstRWInfo rw; InstRWInfo rw;
InstAPI::queryRWInfo(arch, inst, operands, opCount, &rw); InstAPI::queryRWInfo(arch, inst, operands, opCount, &rw);
sb.append("Instruction:\n");
sb.append(" ");
#ifndef ASMJIT_NO_LOGGING #ifndef ASMJIT_NO_LOGGING
Formatter::formatInstruction(sb, 0, nullptr, arch, inst, operands, opCount); Formatter::formatInstruction(sb, 0, nullptr, arch, inst, operands, opCount);
#else #else
@@ -53,11 +51,11 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
#endif #endif
sb.append("\n"); sb.append("\n");
sb.append("Operands:\n"); sb.append(" Operands:\n");
for (uint32_t i = 0; i < rw.opCount(); i++) { for (uint32_t i = 0; i < rw.opCount(); i++) {
const OpRWInfo& op = rw.operand(i); const OpRWInfo& op = rw.operand(i);
sb.appendFormat(" [%u] Op=%c Read=%016llX Write=%016llX Extend=%016llX", sb.appendFormat(" [%u] Op=%c Read=%016llX Write=%016llX Extend=%016llX",
i, i,
accessLetter(op.isRead(), op.isWrite()), accessLetter(op.isRead(), op.isWrite()),
op.readByteMask(), op.readByteMask(),
@@ -80,7 +78,7 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
} }
if (rw.readFlags() | rw.writeFlags()) { if (rw.readFlags() | rw.writeFlags()) {
sb.append("Flags: \n"); sb.append(" Flags: \n");
struct FlagMap { struct FlagMap {
uint32_t flag; uint32_t flag;
@@ -103,7 +101,7 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
{ x86::Status::kC3, "C3" } { x86::Status::kC3, "C3" }
}; };
sb.append(" "); sb.append(" ");
for (uint32_t f = 0; f < 13; f++) { for (uint32_t f = 0; f < 13; f++) {
char c = accessLetter((rw.readFlags() & flagMap[f].flag) != 0, char c = accessLetter((rw.readFlags() & flagMap[f].flag) != 0,
(rw.writeFlags() & flagMap[f].flag) != 0); (rw.writeFlags() & flagMap[f].flag) != 0);
@@ -122,8 +120,8 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
#ifndef ASMJIT_NO_LOGGING #ifndef ASMJIT_NO_LOGGING
if (!features.empty()) { if (!features.empty()) {
sb.append("Features:\n"); sb.append(" Features:\n");
sb.append(" "); sb.append(" ");
bool first = true; bool first = true;
BaseFeatures::Iterator it(features.iterator()); BaseFeatures::Iterator it(features.iterator());
@@ -142,8 +140,9 @@ static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* opera
} }
template<typename... Args> template<typename... Args>
static void printInfoSimple(uint32_t arch, uint32_t instId, Args&&... args) { static void printInfoSimple(uint32_t arch, uint32_t instId, uint32_t options, Args&&... args) {
BaseInst inst(instId); BaseInst inst(instId);
inst.addOptions(options);
Operand_ opArray[] = { std::forward<Args>(args)... }; Operand_ opArray[] = { std::forward<Args>(args)... };
printInfo(arch, inst, opArray, sizeof...(args)); printInfo(arch, inst, opArray, sizeof...(args));
} }
@@ -162,35 +161,43 @@ static void testX86Arch() {
uint32_t arch = Environment::kArchX64; uint32_t arch = Environment::kArchX64;
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdAdd, x86::Inst::kIdAdd, 0,
x86::eax, x86::ebx); x86::eax, x86::ebx);
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdLods, x86::Inst::kIdLods, 0,
x86::eax , dword_ptr(x86::rsi)); x86::eax , dword_ptr(x86::rsi));
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdPshufd, x86::Inst::kIdPshufd, 0,
x86::xmm0, x86::xmm1, imm(0)); x86::xmm0, x86::xmm1, imm(0));
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdPextrw, x86::Inst::kIdPextrw, 0,
x86::eax, x86::xmm1, imm(0)); x86::eax, x86::xmm1, imm(0));
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdPextrw, x86::Inst::kIdPextrw, 0,
x86::ptr(x86::rax), x86::xmm1, imm(0)); x86::ptr(x86::rax), x86::xmm1, imm(0));
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdVaddpd, x86::Inst::kIdVpdpbusd, 0,
x86::xmm0, x86::xmm1, x86::xmm2);
printInfoSimple(arch,
x86::Inst::kIdVpdpbusd, x86::Inst::kOptionVex,
x86::xmm0, x86::xmm1, x86::xmm2);
printInfoSimple(arch,
x86::Inst::kIdVaddpd, 0,
x86::ymm0, x86::ymm1, x86::ymm2); x86::ymm0, x86::ymm1, x86::ymm2);
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdVaddpd, x86::Inst::kIdVaddpd, 0,
x86::ymm0, x86::ymm30, x86::ymm31); x86::ymm0, x86::ymm30, x86::ymm31);
printInfoSimple(arch, printInfoSimple(arch,
x86::Inst::kIdVaddpd, x86::Inst::kIdVaddpd, 0,
x86::zmm0, x86::zmm1, x86::zmm2); x86::zmm0, x86::zmm1, x86::zmm2);
printInfoExtra(arch, printInfoExtra(arch,
@@ -206,7 +213,11 @@ static void testX86Arch() {
} }
int main() { int main() {
printf("AsmJit Instruction Information Test\n\n"); printf("AsmJit Instruction Info Test-Suite v%u.%u.%u\n",
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
printf("\n");
testX86Arch(); testX86Arch();

View File

@@ -51,7 +51,7 @@ using namespace asmjit;
static const uint8_t dataArray[] = { 2, 9, 4, 7, 1, 3, 8, 5, 6, 0 }; static const uint8_t dataArray[] = { 2, 9, 4, 7, 1, 3, 8, 5, 6, 0 };
static void fail(const char* message, Error err) { static void fail(const char* message, Error err) {
printf("%s: %s\n", message, DebugUtils::errorAsString(err)); printf("** FAILURE: %s (%s) **\n", message, DebugUtils::errorAsString(err));
exit(1); exit(1);
} }
@@ -169,11 +169,11 @@ int main() {
fn(3) != dataArray[3] || fn(3) != dataArray[3] ||
fn(6) != dataArray[6] || fn(6) != dataArray[6] ||
fn(9) != dataArray[9] ) { fn(9) != dataArray[9] ) {
printf("Failure:\n The generated function returned incorrect result(s)\n"); printf("** FAILURE: The generated function returned incorrect result(s) **\n");
return 1; return 1;
} }
printf("Success:\n The generated function returned expected results\n"); printf("** SUCCESS **\n");
return 0; return 0;
} }

View File

@@ -69,7 +69,18 @@ const x86isa = new asmdb.x86.ISA({
["imul", "r64, ib" , "RMI" , "REX.W 6B /r ib", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"], ["imul", "r64, ib" , "RMI" , "REX.W 6B /r ib", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"],
["imul", "r16, iw" , "RMI" , "66 69 /r iw" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"], ["imul", "r16, iw" , "RMI" , "66 69 /r iw" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
["imul", "r32, id" , "RMI" , "69 /r id" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"], ["imul", "r32, id" , "RMI" , "69 /r id" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
["imul", "r64, id" , "RMI" , "REX.W 69 /r id", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"] ["imul", "r64, id" , "RMI" , "REX.W 69 /r id", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"],
// Movabs (X64 only).
["movabs", "W:r64, iq/uq" , "I" , "REX.W B8+r iq", "X64"],
["movabs", "w:al, moff8" , "NONE", "A0" , "X64"],
["movabs", "w:ax, moff16" , "NONE", "66 A1" , "X64"],
["movabs", "W:eax, moff32", "NONE", "A1" , "X64"],
["movabs", "W:rax, moff64", "NONE", "REX.W A1" , "X64"],
["movabs", "W:moff8, al" , "NONE", "A2" , "X64"],
["movabs", "W:moff16, ax" , "NONE", "66 A3" , "X64"],
["movabs", "W:moff32, eax", "NONE", "A3" , "X64"],
["movabs", "W:moff64, rax", "NONE", "REX.W A3" , "X64"]
] ]
}); });
@@ -233,6 +244,9 @@ class GenUtils {
if (dbInst.prefix === "EVEX") { if (dbInst.prefix === "EVEX") {
f.Evex = true; f.Evex = true;
if (dbInst.extensions["AVX512_VNNI"])
f.PreferEvex = true;
if (dbInst.kmask) f.Avx512K = true; if (dbInst.kmask) f.Avx512K = true;
if (dbInst.zmask) f.Avx512Z = true; if (dbInst.zmask) f.Avx512Z = true;
@@ -682,11 +696,21 @@ class IdEnum extends core.IdEnum {
var text = ""; var text = "";
var features = GenUtils.cpuFeaturesOf(dbInsts); var features = GenUtils.cpuFeaturesOf(dbInsts);
const priorityFeatures = ["AVX_VNNI"];
if (features.length) { if (features.length) {
text += "{"; text += "{";
const avxFeatures = filterAVX(features, true); const avxFeatures = filterAVX(features, true);
const otherFeatures = filterAVX(features, false); const otherFeatures = filterAVX(features, false);
for (const pf of priorityFeatures) {
const index = avxFeatures.indexOf(pf);
if (index != -1) {
avxFeatures.splice(index, 1);
avxFeatures.unshift(pf);
}
}
const vl = avxFeatures.indexOf("AVX512_VL"); const vl = avxFeatures.indexOf("AVX512_VL");
if (vl !== -1) avxFeatures.splice(vl, 1); if (vl !== -1) avxFeatures.splice(vl, 1);
@@ -781,138 +805,6 @@ class AltOpcodeTable extends core.Task {
} }
} }
// ============================================================================
// [tablegen.x86.SseToAvxTable]
// ============================================================================
/*
// Removed from asmjit.
class InstSseToAvxTable extends core.Task {
constructor() {
super("InstSseToAvxTable", ["IdEnum"]);
}
run() {
const insts = this.ctx.insts;
const dataTable = new IndexedArray();
const indexTable = [];
function add(data) {
return dataTable.addIndexed("{ " + `SseToAvxData::kMode${data.mode}`.padEnd(28) + ", " + String(data.delta).padEnd(4) + " }");
}
// This will receive a zero index, which means that no SseToAvx or AvxToSSe translation is possible.
const kInvalidIndex = add({ mode: "None", delta: 0 });
insts.forEach((inst) => { indexTable.push(kInvalidIndex); });
insts.forEach((inst) => {
// If it's not `kInvalidIndex` it's an AVX instruction that shares the
// SseToAvx data. We won't touch it as it already has the index assigned.
if (indexTable[inst.id] === kInvalidIndex) {
const data = this.calcSseToAvxData(inst.dbInsts);
const index = add(data);
indexTable[inst.id] = index;
if (data.delta !== 0)
indexTable[this.ctx.instMap["v" + inst.name].id] = index;
}
});
this.inject("SseToAvxIndex",
disclaimer(`static const uint8_t sseToAvxIndex[] = {\n${StringUtils.format(indexTable, kIndent, -1)}\n};\n`),
indexTable.length * 1);
this.inject("SseToAvxTable",
disclaimer(`static const SseToAvxData sseToAvxData[] = {\n${StringUtils.format(dataTable, kIndent, true)}\n};\n`),
dataTable.length * 2);
}
filterSseToAvx(dbInsts) {
const filtered = [];
for (var x = 0; x < dbInsts.length; x++) {
const dbInst = dbInsts[x];
const ops = dbInst.operands;
// SSE instruction does never share its name with AVX one.
if (/^(VEX|XOP|EVEX)$/.test(dbInst.prefix))
return [];
var ok = false;
for (var y = 0; y < ops.length; y++) {
// There is no AVX instruction that works with MMX regs.
if (ops[y].reg === "mm") { ok = false; break; }
if (ops[y].reg === "xmm") { ok = true; }
}
if (ok)
filtered.push(dbInst);
}
return filtered;
}
calcSseToAvxData(dbInsts) {
const data = {
mode : "None", // No conversion by default.
delta: 0 // 0 if no conversion is possible.
};
const dbSseInsts = this.filterSseToAvx(dbInsts);
if (!dbSseInsts.length)
return data;
const sseName = dbSseInsts[0].name;
const avxName = "v" + sseName;
const dbAvxInsts = this.ctx.query(avxName);
if (!dbAvxInsts.length) {
DEBUG(`SseToAvx: Instruction '${sseName}' has no AVX counterpart`);
return data;
}
if (avxName === "vblendvpd" || avxName === "vblendvps" || avxName === "vpblendvb") {
// Special cases first.
data.mode = "Blend";
}
else {
// Common case, deduce conversion mode by checking both SSE and AVX instructions.
const map = Object.create(null);
for (var sseIndex = 0; sseIndex < dbSseInsts.length; sseIndex++) {
const sseInst = dbSseInsts[sseIndex];
var match = false;
for (var avxIndex = 0; avxIndex < dbAvxInsts.length; avxIndex++) {
const avxInst = dbAvxInsts[avxIndex];
// Select only VEX instructions.
if (avxInst.prefix !== "VEX") continue;
// Check if the AVX version is the same.
if (GenUtils.eqOps(avxInst.operands, 0, sseInst.operands, 0)) {
map.raw = true;
match = true;
}
else if (avxInst.operands[0].data === "xmm" && GenUtils.eqOps(avxInst.operands, 1, sseInst.operands, 0)) {
map.nds = true;
match = true;
}
}
if (!match) {
const signature = sseInst.operands.map(function(op) { return op.data; }).join(", ");
console.log(`SseToAvx: Instruction '${sseName}(${signature})' has no AVX counterpart`);
return data;
}
}
data.mode = (map.raw && !map.nds) ? "Move" : (map.raw && map.nds) ? "MoveIfMem" : "Extend";
}
data.delta = this.ctx.instMap[avxName].id - this.ctx.instMap[sseName].id;
return data;
}
}
*/
// ============================================================================ // ============================================================================
// [tablegen.x86.InstSignatureTable] // [tablegen.x86.InstSignatureTable]
// ============================================================================ // ============================================================================
@@ -1456,14 +1348,7 @@ class SignatureArray extends Array {
class InstSignatureTable extends core.Task { class InstSignatureTable extends core.Task {
constructor() { constructor() {
super("InstSignatureTable"); super("InstSignatureTable");
this.maxOpRows = 0; this.maxOpRows = 0;
this.opBlackList = {
"moff8" : true,
"moff16": true,
"moff32": true,
"moff64": true
};
} }
run() { run() {
@@ -1637,10 +1522,8 @@ class InstSignatureTable extends core.Task {
var imm = iop.imm; var imm = iop.imm;
var rel = iop.rel; var rel = iop.rel;
// Terminate if this operand is something asmjit doesn't support // Skip all instructions having implicit `imm` operand of `1`.
// and skip all instructions having implicit `imm` operand of `1`, if (iop.immValue !== null)
// which are handled fine by asmjit.
if (this.opBlackList[mem] === true || iop.immValue !== null)
break; break;
if (reg === "r8") reg = "r8lo"; if (reg === "r8") reg = "r8lo";
@@ -1648,6 +1531,11 @@ class InstSignatureTable extends core.Task {
if (reg === "st(i)") reg = "st"; if (reg === "st(i)") reg = "st";
if (reg === "st(0)") reg = "st0"; if (reg === "st(0)") reg = "st0";
if (mem === "moff8") mem = "m8";
if (mem === "moff16") mem = "m16";
if (mem === "moff32") mem = "m32";
if (mem === "moff64") mem = "m64";
if (mem === "m32fp") mem = "m32"; if (mem === "m32fp") mem = "m32";
if (mem === "m64fp") mem = "m64"; if (mem === "m64fp") mem = "m64";
if (mem === "m80fp") mem = "m80"; if (mem === "m80fp") mem = "m80";
@@ -1850,6 +1738,7 @@ class InstRWInfoTable extends core.Task {
this.rwCategoryByName = { this.rwCategoryByName = {
"imul" : "Imul", "imul" : "Imul",
"mov" : "Mov", "mov" : "Mov",
"movabs" : "Movabs",
"movhpd" : "Movh64", "movhpd" : "Movh64",
"movhps" : "Movh64", "movhps" : "Movh64",
"vmaskmovpd": "Vmaskmov", "vmaskmovpd": "Vmaskmov",