Files
asmjit/db/x86.js
kobalicek 7596c6d035 [abi] AsmJit v1.18 - performance and memory footprint improvements
* Refactored the whole codebase to use snake_case convention to
    name functions and variables, including member variables.
    Class naming is unchanged and each starts with upper-case
    character. The intention of this change is to make the source
    code more readable and consistent across multiple projects
    where AsmJit is currently used.

  * Refactored support.h to make it more shareable across projects.

  * x86::Vec now inherits from UniVec

  * minor changes in JitAllocator and WriteScope in order to make
    the size of WriteScope smaller

  * added ZoneStatistics and Zone::statistics() getter

  * improved x86::EmitHelper to use tables instead of choose() and
    other mechanisms to pick between SSE and AVX instructions

  * Refactored the whole codebase to use snake_case convention for
    for functions names, function parameter names, struct members,
    and variables

  * Added a non-owning asmjit::Span<T> type and use into public API
    to hide the usage of ZoneVector in CodeHolder, Builder, and
    Compiler. Users now only get Span (with data and size), which
    doesn't require users to know about ZoneVector

  * Removed RAWorkId from RATiedReg in favor of RAWorkReg*

  * Removed GEN from LiveInfo as it's not needed by CFG construction
    to save memory (GEN was merged with LIVE-IN bits). The remaining
    LIVE-IN, LIVE-OUT, and KILL bits are enough, however KILL bits may
    be removed in the future as KILL bits are not needed after LIVE-IN
    and LIVE-OUT converged

  * Optimized the representation of LIVE-IN, LIVE-OUT, and KILL bits
    per block. Now only registers that live across multiple basic
    blocks are included here, which means that virtual registers that
    only live in a single block are not included and won't be overhead
    during liveness analysis. This optimization alone can make liveness
    analysis 90% faster depending on the code generated (more virtual
    registers that only live in a single basic block -> more gains)

  * Optimized building liveness information bits per block. The new
    code uses an optimized algorithm to prevent too many traversals
    and uses a more optimized code for a case in which not too many
    registers are used (it avoids array operations if the number of
    all virtual registers within the function fits a single BitWord)

  * Optimized code that computes which virtual register is only used
    in a single basic block - this aims to optimize register allocator
    in the future by using a designed code path for allocating regs
    only used in a single basic block

  * Reduced the information required for each live-span, which is used
    by bin-packing. Now the struct is 8 bytes, which is good for a lot
    of optimizations C++ compiler can do

  * Added UniCompiler (ujit) which can be used to share code paths
    between X86, X86_64, and AArch64 code generation (experimental).
2025-09-06 13:44:34 +02:00

1274 lines
40 KiB
JavaScript

// This file is part of AsmJit project <https://asmjit.com>
//
// See <asmjit/core.h> or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
(function($scope, $as) {
"use strict";
// Import.
const base = $scope.base ? $scope.base : require("./base.js");
const dict = base.dict;
const NONE = base.NONE;
const Parsing = base.Parsing;
const MapUtils = base.MapUtils;
// Export.
const x86 = $scope[$as] = {};
function FAIL(msg) { throw new Error("[X86] " + msg); }
// Database
// ========
x86.dbName = "isa_x86.json";
// Metadata Tables
// ===============
const ArchGroupInfo = dict({
"ry": ["ANY", "X64"],
"rv": ["ANY", "ANY", "X64"]
});
// Groups are used by instruction tables to group multiple operand combinations into a single record. In general
// X86 and X86_64 instructions can be divided into GP and SIMD groups, where GP groups use `ry/my` syntax to
// specify operation for 16/32/64 bit registers and "xy/mxy"/"xyz/mxyz" groups to specify a SIMD instruction that
// uses either XMM/YMM (AVX) or XMM/YMM/ZMM registers (AVX-512).
const OperandGroupInfo = dict({
"ry" : { "group": "ry" , "subst": ["r32", "r64"] },
"my" : { "group": "ry" , "subst": ["m32", "m64"] },
"axy" : { "group": "ry" , "subst": ["eax", "rax"] },
"bxy" : { "group": "ry" , "subst": ["ebx", "rbx"] },
"cxy" : { "group": "ry" , "subst": ["ecx", "rcx"] },
"dxy" : { "group": "ry" , "subst": ["edx", "rdx"] },
"rv" : { "group": "rv" , "subst": ["r16", "r32", "r64"] },
"mv" : { "group": "rv" , "subst": ["m16", "m32", "m64"] },
"axv" : { "group": "rv" , "subst": ["ax", "eax", "rax"] },
"bxv" : { "group": "rv" , "subst": ["bx", "ebx", "rbx"] },
"cxv" : { "group": "rv" , "subst": ["cx", "ecx", "rcx"] },
"dxv" : { "group": "rv" , "subst": ["dx", "edx", "rdx"] },
"immv" : { "group": "rv" , "subst": ["imm16", "imm32", "imms32"] },
"xy" : { "group": "xy" , "subst": ["xmm", "ymm"] },
"mxy" : { "group": "xy" , "subst": ["m128", "m256"] },
"xxx" : { "group": "xyz", "subst": ["xmm[31:0]", "xmm[63:0]", "xmm"] },
"xxy" : { "group": "xyz", "subst": ["xmm[63:0]", "xmm", "ymm"] },
"xyz" : { "group": "xyz", "subst": ["xmm", "ymm", "zmm"] },
"mxxx" : { "group": "xyz", "subst": ["m32", "m64", "m128"] },
"mxxy" : { "group": "xyz", "subst": ["m64", "m128", "m256"] },
"mxyz" : { "group": "xyz", "subst": ["m128", "m256", "m512"] }
});
const OpcodeGroupInfo = dict({
"Wy" : { "group": "ry" , "subst": ["W0", "W1"] },
"iv" : { "group": "rv" , "subst": ["iw", "id", "id"] },
"Pv" : { "group": "rv" , "subst": ["66", "NP", "NP"] },
"Wv" : { "group": "rv" , "subst": ["W0", "W0", "W1"] }
});
// Instruction tables use various notations to specify L/LL field, which is used by VEX/EVEX/XOP encodings. This
// field has 1 bit (VEX/XOP) and 2 bits (EVEX) and in general the notation used is 128/256/512, which determines
// the size of SIMD operation, and this is also the notation we want to convert everything else into.
const OpcodeLLMapping = dict({
"128": "128",
"256": "256",
"512": "512",
"LZ" : "128",
"LLZ": "128",
"L0" : "128",
"L1" : "256",
"LIG": "LIG",
"Lxy": "xy",
"xyz": "xyz"
});
const RegSize = Object.freeze({
"r8" : 8,
"r8hi": 8,
"r16" : 16,
"r32" : 32,
"r64" : 64,
"mm" : 64,
"xmm" : 128,
"ymm" : 256,
"zmm" : 512,
"tmm" : 512, // Maximum size (64 bytes).
"bnd" : 128,
"k" : 64,
"st" : 80
});
// CpuRegs
// =======
// Build an object containing CPU registers as keys mapping them to type, kind, and index.
function buildCpuRegs(defs) {
const map = dict();
for (let type in defs) {
const def = defs[type];
const kind = def.kind;
const names = def.names;
const group = def.group;
if (def.any)
map[def.any] = { type: type, kind: kind, index: -1, group: group };
if (names) {
for (let i = 0; i < names.length; i++) {
let name = names[i];
let m = /^([A-Za-z\(\)]+)(\d+)-(\d+)([A-Za-z\(\)]*)$/.exec(name);
if (m) {
let a = parseInt(m[2], 10);
let b = parseInt(m[3], 10);
for (let n = a; n <= b; n++) {
const index = m[1] + n + m[4];
map[index] = { type: type, kind: kind, index: index };
}
}
else {
map[name] = { type: type, kind: kind, index: i };
}
}
}
}
// HACK: In instruction manuals `r8` denotes low 8-bit register, however,
// that collides with `r8`, which is a 64-bit register. Since the result
// of this function is only used internally we patch it to be compatible
// with what Intel specifies.
map.r8.type = "r8";
return map;
}
const CpuRegisters = buildCpuRegs({
"r8" : { "kind": "gp" , "any": "r8" , "names": ["al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8-15b"] },
"r8hi": { "kind": "gp" , "names": ["ah", "ch", "dh", "bh"] },
"r16" : { "kind": "gp" , "any": "r16" , "names": ["ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8-15w"] },
"r32" : { "kind": "gp" , "any": "r32" , "names": ["eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8-15d"] },
"r64" : { "kind": "gp" , "any": "r64" , "names": ["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8-15"] },
"rxx" : { "kind": "gp" , "names": ["zax", "zcx", "zdx", "zbx", "zsp", "zbp", "zsi", "zdi"] },
"sreg": { "kind": "sreg", "any": "sreg" , "names": ["es", "cs", "ss", "ds", "fs", "gs" ] },
"creg": { "kind": "creg", "any": "creg" , "names": ["cr0-15"] },
"dreg": { "kind": "dreg", "any": "dreg" , "names": ["dr0-15"] },
"bnd" : { "kind": "bnd" , "any": "bnd" , "names": ["bnd0-3"] },
"st" : { "kind": "st" , "any": "st(i)", "names": ["st(0-7)"] },
"mm" : { "kind": "mm" , "any": "mm" , "names": ["mm0-7"] },
"k" : { "kind": "k" , "any": "k" , "names": ["k0-7"] },
"xmm" : { "kind": "vec" , "any": "xmm" , "names": ["xmm0-31"] },
"ymm" : { "kind": "vec" , "any": "ymm" , "names": ["ymm0-31"] },
"zmm" : { "kind": "vec" , "any": "zmm" , "names": ["zmm0-31"] },
"tmm" : { "kind": "tile", "any": "tmm" , "names": ["tmm0-7"] }
});
// asmdb.x86.Utils
// ===============
// X86/X64 utilities.
class Utils {
static groupOf(op) {
return Object.hasOwn(OperandGroupInfo, op) ? OperandGroupInfo[op].group : null;
}
static splitInstructionSignature(s) {
let prefixes = [];
if (s.startsWith("[")) {
const prefixEnd = Parsing.matchClosingChar(s, 0);
prefixes = s.substring(1, prefixEnd).replace("xacqrel", "xacquire|xrelease").split("|");
s = s.substring(prefixEnd + 1).trim();
}
let nameEnd = s.indexOf(" ");
let names = s.substring(0, nameEnd === -1 ? s.length : nameEnd);
let operands = nameEnd === -1 ? "" : s.substring(nameEnd + 1).trim();
if (names.endsWith("{nf}")) {
names = names.substring(0, names.length - 4);
prefixes.nf = true;
}
return {
names: names.split("|"),
prefixes: prefixes,
operands: operands
}
}
// Split the operand(s) string into individual operands as defined by the
// instruction database.
//
// NOTE: X86/X64 doesn't require anything else than separating the commas,
// this function is here for compatibility with other instruction sets.
static splitOperands(s) {
const array = s.split(",");
for (let i = 0; i < array.length; i++)
array[i] = array[i].trim();
return array;
}
// Get whether the string `s` describes a register operand.
static isRegOp(s) { return s && Object.hasOwn(CpuRegisters, s); }
// Get whether the string `s` describes a memory operand.
static isMemOp(s) { return s && /^(?:mem|mib|tmem|moff||(?:m(?:off)?\d+(?:dec|bcd|fp|int)?)|(?:m16_\d+)|(?:vm\d+(?:x|y|z)))$/.test(s); }
// Get whether the string `s` describes an immediate operand.
static isImmOp(s) { return s && /^(?:1|imm4|imm8|imm16|imm32|imm64|imms8|imms32|immu16|immu32|immv|if|p16_16|p16_32|dfv)$/.test(s); }
// Get whether the string `s` describes a relative displacement (label).
static isRelOp(s) { return s && /^rel\d+$/.test(s); }
// Get a register type of a `s`, returns `null` if the register is unknown.
static regTypeOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].type : null; }
// Get a register kind of a `s`, returns `null` if the register is unknown.
static regKindOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].kind : null; }
// Get a register type of a `s`, returns `null` if the register is unknown and `-1`
// if the given string does only represent a register type, but not a specific reg.
static regIndexOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].index : null; }
static regSize(s) {
if (s in RegSize)
return RegSize[s];
const reg = CpuRegisters[s];
if (reg && reg.type in RegSize)
return RegSize[reg.type];
return -1;
}
// Get size of an immediate `s` [in bits].
//
// Handles "ib", "iw", "id", "if", "iq", and also "/is4".
static immSize(s) {
switch (s) {
case "/is4" : return 4;
case "imm4" : return 4;
case "1" : return 8;
case "imm8" : return 8;
case "imm16" : return 16;
case "imm32" : return 32;
case "imm64" : return 64;
case "imms8" : return 8;
case "imms32": return 32;
case "immu16": return 16;
case "immu32": return 32;
case "ib" :
case "ub" : return 8;
case "iw" :
case "uw" : return 16;
case "id" :
case "ud" : return 32;
case "iq" :
case "uq" : return 64;
case "p16_16": return 32;
case "if" :
case "p16_32": return 48;
// Influences EVEX encoding, not an immediate byte.
case "dfv" : return 0;
// Invalid immediate.
default : FAIL(`Invalid immediate ${s}`);
}
}
// Get size of a relative displacement [in bits].
static relSize(s) {
switch (s) {
case "rel8" : return 8;
case "rel16" : return 16;
case "rel32" : return 32;
default : return -1;
}
}
}
x86.Utils = Utils;
// asmdb.x86.Operand
// =================
// X86/X64 operand.
class Operand extends base.Operand {
constructor() {
super();
this.groupPattern = ""; // Group pattern in case this operand was created from a group.
this.memSegment = ""; // Segment specified with register that is used to perform a memory IO.
this.memOff = false; // Memory operand is an absolute offset (only a specific version of MOV).
this.memFar = false; // Memory is a far pointer (includes segment in first two bytes).
this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm).
this.vsibSize = -1; // AVX VSIB register size (32/64).
this.bcstSize = -1; // AVX-512 broadcast size.
}
_substituteGroupOp(op, groupIndex) {
const opPart = op.match(/^([A-Za-z]+)/);
if (opPart) {
const groupPattern = Utils.groupOf(opPart[1]);
if (groupPattern) {
this.groupPattern = groupPattern;
return OperandGroupInfo[opPart[1]].subst[groupIndex] + op.substring(opPart[1].length);
}
}
return op;
}
assignData(data, defaultAccess, groupIndex) {
let s = data;
this.data = data;
const type = [];
// Handle RWX decorators prefix "[RWwXx]:".
let access = defaultAccess;
const access_match = /^(R|W|w|X|x)(\?)?\:/.exec(s);
if (access_match) {
// TODO: Conditional access is ignored at the moment.
access = access_match[1];
s = s.substring(access_match[0].length);
}
// Handle commutativity attribute.
if (Parsing.isCommutative(s)) {
this.commutative = true;
s = Parsing.clearCommutative(s);
}
// Handle AVX-512 broadcast possibility specified as "/bN" suffix.
const mBcst = /\/b(\d+)/.exec(s);
if (mBcst) {
this.bcstSize = parseInt(mBcst[1], 10);
// Remove the broadcast attribute from the definition; it's not needed anymore.
s = s.substring(0, mBcst.index) + s.substring(mBcst.index + mBcst[0].length);
}
// Handle <implicit> attribute.
if (Parsing.isImplicit(s)) {
this.implicit = true;
s = Parsing.clearImplicit(s);
}
// Support multiple operands separated by "/" (only used by r/m).
let ops = s.split("/");
let oArr = [];
for (let i = 0; i < ops.length; i++) {
let origOp = ops[i].trim();
let op = this._substituteGroupOp(origOp, groupIndex);
// Handle range suffix [A] or [A:B]:
const mRange = /\[(\d+)\s*(?:\:\s*(\d+)\s*)?\]$/.exec(op);
if (mRange) {
const a = parseInt(mRange[1], 10);
const b = parseInt(mRange[2] || String(a), 10);
if (a < b)
FAIL(`Operand '${origOp}' contains invalid range '[${a}:${b}]'`)
this.rwxIndex = b;
this.rwxWidth = a - b + 1;
op = op.substring(0, op.length - mRange[0].length);
}
// Handle a segment specification if this is an implicit register performing memory access.
const memSegRegM = op.match(/\((ds|es)\:\s*([\w]+)\)$/);
if (memSegRegM) {
this.memSegment = memSegRegM[1];
this.memRegOnly = memSegRegM[2];
op = op.substring(0, memSegRegM.index).trim();
}
oArr.push(op);
let regIndexRel = 0;
if (op.endsWith("+1") || op.endsWith("+2") || op.endsWith("+3")) {
regIndexRel = parseInt(op.substr(op.length - 1, 1));
op = op.substring(0, op.length - 2);
}
// Group substitution - when a rv/mv instruction uses 'w' or 'x' access it's only used by
// the 16-bit form, 32-bit and 64-bit always use 'W' and 'X' when used in a 'rv/mv' group.
if (this.groupPattern === "rv" && groupIndex > 0 && access !== "R") {
access = access.toUpperCase();
}
if (Utils.isRegOp(op)) {
this.reg = op;
this.regType = Utils.regTypeOf(op);
this.regIndexRel = regIndexRel;
this.setAccess(access);
type.push("reg");
continue;
}
if (Utils.isMemOp(op)) {
this.mem = op;
this.setAccess(access);
// Handle memory size.
const mOff = /^m(?:off)?(\d+)/.exec(op);
this.memSize = mOff ? parseInt(mOff[1], 10) : 0;
this.memOff = op.indexOf("moff") === 0;
const mSeg = /^m16_(\d+)/.exec(op);
if (mSeg) {
this.memFar = true;
this.memSize = parseInt(mSeg[1], 10) + 16;
}
// Handle vector addressing mode and size "vmXXr".
const mVM = /^vm(\d+)(x|y|z)$/.exec(op);
if (mVM) {
this.vsibReg = mVM[2] + "mm";
this.vsibSize = parseInt(mVM[1], 10);
}
type.push("mem");
continue;
}
if (Utils.isImmOp(op)) {
const size = Utils.immSize(op);
if (!this.imm)
this.imm = size;
else if (this.imm !== size)
FAIL(`Immediate size mismatch: ${this.imm} != ${size}`);
// Sign-extend / zero-extend.
const sign = op.startsWith("imms") ? "signed" :
op.startsWith("immu") ? "unsigned" : "any";
this.immSign = sign;
if (op === "1") {
this.immValue = 1;
this.implicit = true;
}
if (type.indexOf("imm") !== -1)
type.push("imm");
continue;
}
if (Utils.isRelOp(op)) {
this.rel = Utils.relSize(op);
type.push("rel");
continue;
}
FAIL(`Operand '${origOp}' unhandled`);
}
// In case the data has been modified it's always better to use the stripped off
// version as we have already processed and stored all the possible decorators.
this.data = oArr.join("/");
this.type = type.join("/");
if (this.rwxIndex === -1) {
const opSize = this.isReg() ? this.regSize :
this.isMem() ? this.memSize : -1;
if (opSize !== -1) {
this.rwxIndex = 0;
this.rwxWidth = opSize;
}
}
}
get regSize() {
return Utils.regSize(this.reg);
}
setAccess(x) {
const u = x.toUpperCase();
this.zext = x === "W" || x === "X";
this.read = u === "R" || u === "X";
this.write = u === "W" || u === "X";
return this;
}
isFixedReg() { return this.reg && this.reg !== this.regType && this.reg !== "st(i)"; }
isFixedMem() { return this.memSegment && this.isFixedReg(); }
isPartialOp() {
const maybePartial = this.regType === "r8" ||
this.regType === "r8hi" ||
this.regType === "r16" ||
this.regType === "xmm";
return maybePartial && !this.zext;
}
toRegMem() {
if (this.reg && this.mem)
return this.reg + "/m";
else if (this.mem && (this.vsibReg || /fp$|int$/.test(this.mem)))
return this.mem;
else if (this.mem)
return "m";
else
return this.toString();
}
toString() { return this.data; }
}
x86.Operand = Operand;
// asmdb.x86.Instruction
// =====================
// X86/X64 instruction.
class Instruction extends base.Instruction {
constructor(db) {
super(db);
this.opcode = dict({
byte : "", // Opcode byte (a single value specified as HEX string "00-FF").
ri : false, // Instruction opcode is combined with register, "XX+r" or "XX+i".
_67h : false, // Opcode 67h prefix use.
mm : "", // Opcode MM[MMM] part (map).
pp : "", // Opcode PP part.
w : "", // Opcode W field.
l : "", // EVEX.LL (nothing, 128, 256, 512, LIG).
nd : 0, // EVEX.ND (new dest) field (default is false, specified as ND=0 or ND=1).
nf : 0, // EVEX.NF (no flags) field (default is false, specified as NF=0 or NF=1).
scc : "", // EVEX.SCC field (4 bits - condition flags).
mod : "", // MODRM.MOD part (2 bits) - either "xx", "11" or "!(11)".
modr : "", // MODRM.R part (3 bits) - either "rrr"
modrm: "" // MODRM.R/M part - either "bbb"
});
this.prefix = ""; // Prefix - "", "3DNOW", "EVEX", "VEX", "XOP".
this.privilege = "L3"; // Privilege level required to execute the instruction.
this.groupPattern = ""; // Group pattern in case the instruction was created from a group such as "ry", "rv", "xy", "xyz".
this.groupIndex = -1; // Group index.
this.rel = 0; // Displacement ("cb", "cw", and "cd" parts).
this.fpuTop = 0; // FPU top index manipulation [-1, 0, 1, 2].
this.fpuStack = ""; // FPU stack manipulation
this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm).
this.vsibSize = -1; // AVX VSIB register size (32/64).
this.broadcast = false; // AVX-512 broadcast support.
this.bcstSize = -1; // AVX-512 broadcast size.
this.k = ""; // AVX-512 K function ("", "blend", "zeroing").
this.kmask = false; // AVX-512 merging {k}.
this.zmask = false; // AVX-512 zeroing {kz}, implies {k}.
this.er = false; // AVX-512 embedded rounding {er}, implies {sae}.
this.sae = false; // AVX-512 suppress all exceptions {sae} support.
this.tupleType = ""; // AVX-512 tuple-type.
this.elementSize = -1; // Instruction's element size.
this.encodingPreference = ""; // Encoding preference (either nothing or "EVEX").
this.consecutiveLead = 0; // Consecutive register leading N other registers.
this.prefixes = dict(); // Allowed prefixes.
}
_substituteOpcodePart(op, groupIndex) {
if (Object.hasOwn(OpcodeGroupInfo, op)) {
return OpcodeGroupInfo[op].subst[groupIndex];
}
else {
return op;
}
}
assignData(data, groupIndex) {
this.name = data.name;
this.groupIndex = groupIndex;
if (data.tt)
this.tupleType = data.tt;
const em = data.op.match(/^\[\s*(\w+)\s*\](.*)$/);
const encodingField = em ? em[1] : "NONE";
const opcodeField = em ? em[2] : data.op;
this._assignOperands(data.operands, groupIndex);
this._assignEncoding(encodingField);
this._assignOpcode(opcodeField.trim(), groupIndex);
for (let k in data) {
if (k === "name" || k === "op" || k === "operands")
continue;
this._assignAttribute(k, data[k]);
}
this._updateOperandsInfo();
this._postProcess();
}
_assignAttribute(key, value) {
switch (key) {
case "vl":
if (value) {
this.ext["AVX512_VL"] = true;
}
return;
case "prefixes":
this._combineAttribute("prefixes", value);
return;
case "fpuStack":
this.fpuStack = value;
switch (value) {
case "dec" : this.fpuTop = -1; break;
case "inc" : this.fpuTop = 1; break;
case "pop" : this.fpuTop = 1; break;
case "pop2x": this.fpuTop = 2; break;
case "push" : this.fpuTop = -1; break;
default:
FAIL(`Invalid fpuStack value '${value}'`);
}
return;
case "kz":
this.zmask = true;
this.kmask = true;
return;
case "k":
this.kmask = true;
if (typeof value === "string")
super._assignAttribute(key, value);
return;
case "er":
this.er = true;
this.sae = true; // {er} implies {sae}.
return;
case "sae":
this.sae = true;
return;
case "broadcast":
this.broadcast = true;
this.elementSize = value;
return;
default:
super._assignAttribute(key, value);
}
}
_assignOperands(s, groupIndex) {
if (!s) return;
// First remove all flags specified as {...}. We put them into `flags`
// map and mix with others. This seems to be the best we can do here.
for (;;) {
let a = s.indexOf("{");
let b = s.indexOf("}");
if (a === -1 || b === -1)
break;
// Get the `flag` and remove it from `s`.
this._assignAttribute(s.substring(a + 1, b), true);
s = s.substring(0, a) + s.substring(b + 1);
}
// Split into individual operands and push them to `operands`.
const arr = Utils.splitOperands(s);
for (let i = 0; i < arr.length; i++) {
const operand = new Operand();
operand.assignData(arr[i].trim(), i === 0 ? "X" : "R", groupIndex);
if (operand.mem == "tmem") {
this.tsib = true;
}
if (operand.groupPattern && this.groupPattern !== operand.groupPattern) {
if (this.groupPattern) {
FAIL(`Instruction ${this.name}: Operand's group pattern mismatch '${this.groupPattern}' != '${operand.groupPattern}'`);
}
this.groupPattern = operand.groupPattern;
}
this.operands.push(operand);
}
}
_assignEncoding(s) {
this.encoding = s;
}
_assignOpcode(s, groupIndex) {
this.opcodeString = s;
let parts = s.split(" ");
if (/^(VEX|EVEX|XOP)\./.test(s)) {
// Parse VEX/XOP and EVEX encoded instruction, which looks like "<PREFIX>.[APX-DATA].<LL>.<PP>.<MAP>.<W>"
let prefix = parts[0].split(".");
this.prefix = prefix[0];
for (let i = 1; i < prefix.length; i++) {
let comp = prefix[i];
if (/^(Pv|Wv|Wy)$/.test(comp)) {
comp = OpcodeGroupInfo[comp].subst[groupIndex];
}
// Process APX EVEX.ND field - ND=0 or ND=1.
if (/^ND=[01]$/.test(comp)) {
this.opcode.nd = comp === "ND=1";
continue;
}
// Process APX EVEX.NF field - NF=0 or NF=1.
if (/^NF=[01]$/.test(comp)) {
this.opcode.nf = comp === "NF=1";
continue;
}
// Process APX EVEX.SCC field - SCC=0-F
if (/^SCC=[0-9A-F]$/.test(comp)) {
this.opcode.scc = comp.charAt(5);
continue;
}
// Process `L/LL` field.
if (Object.hasOwn(OpcodeLLMapping, comp)) {
this.opcode.l = OpcodeLLMapping[comp];
continue;
}
// Process `PP` field - 66/F2/F3/NP (NP means no PP field used)
if (comp === "P0") { /* ignored, `P` is zero... */ continue; }
if (/^(?:66|F2|F3|NP)$/.test(comp)) { this.opcode.pp = comp; continue; }
// Process `MM` field - 0F/0F3A/0F38/MAP4/MAP5/MAP6/M8/M9.
if (/^(?:0F|0F3A|0F38|MAP[4-9A])$/.test(comp)) { this.opcode.mm = comp; continue; }
// Process `W` field.
if (/^(WIG|W0|W1|)$/.test(comp)) { this.opcode.w = comp; continue; }
// TODO: Some new APX instructions don't have W specified (ENQCMD/ENQCMDS).
if (comp === "W?") { this.opcode.w = "W0"; continue; }
// ERROR.
this.report(`'${this.opcodeString}' Unhandled component: ${comp}`);
}
for (let i = 1; i < parts.length; i++) {
let comp = parts[i];
// Parse opcode.
if (/^[0-9A-Fa-f]{2}$/.test(comp)) {
this.opcode.byte = comp.toUpperCase();
continue;
}
// Parse ModR/M field using "/r" or "/0-7" notation.
if (/^\/[r0-7]$/.test(comp)) {
this.opcode.mod = "xx";
this.opcode.modr = comp.charAt(1);
this.opcode.modm = "b";
continue;
}
// Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation.
const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/);
if (m) {
this.opcode.mod = m[1];
this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2));
this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2));
continue;
}
// Parse immediate byte, word, dword, or qword.
comp = this._substituteOpcodePart(comp, groupIndex);
if (/^(?:ib|iw|id|iq|\/is4)$/.test(comp)) {
this.imm += Utils.immSize(comp);
continue;
}
this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`);
}
}
else {
// Parse X86/X64 instruction (including legacy MMX/SSE/3DNOW instructions).
let rex_parsed = false;
for (let i = 0; i < parts.length; i++) {
let comp = parts[i];
if (comp === "NFx" || comp === "NOREP" || comp === "NO67") {
// Ignored for now.
continue;
}
// Parse REX or REX2 prefix.
if (comp.startsWith("REX2.") || comp === "REX.W") {
if (rex_parsed) {
FAIL(`'${this.opcodeString}' Multiple REX prefixes are invalid`);
}
rex_parsed = true;
// Instructions that force REX.W prefix or use REX2 prefix are always 64-bit instructions.
this.arch = "X64";
if (comp === "REX.W") {
this.opcode.w = "W1";
}
else {
this.prefix = "REX2";
// REX2 has always 3 components - "REX2.<MAP>.<W>".
const rex2 = comp.split(".");
if (rex2.length !== 3) {
FAIL(`'${this.opcodeString}' Invalid REX2 prefix - expected exactly 3 REX2 components`);
}
if (rex2[1] === "MAP0") {
// nothing.
}
else if (rex2[1] === "MAP1") {
this.opcode.mm = "0F";
}
else {
FAIL(`'${this.opcodeString}' Invalid REX2 prefix - REX2.MAP component could be either MAP0 or MAP1`);
}
this.opcode.w = rex2[2];
}
continue;
}
// Parse `PP` prefixes.
if (this.opcode.mm === "") {
if (this.opcode.pp === "" && /^(?:66|F2|F3|NP)$/.test(comp) ||
this.opcode.pp === "66" && /^(?:F2|F3)$/.test(comp)) {
this.opcode.pp += comp;
continue;
}
}
// Parse `MM` prefixes.
if ((this.opcode.mm === "" && comp === "0F") ||
(this.opcode.mm === "0F" && /^(?:01|3A|38)$/.test(comp))) {
this.opcode.mm += comp;
continue;
}
// Recognize "0F 0F /r XX" encoding.
if (this.opcode.mm === "0F" && comp === "0F") {
this.prefix = "3DNOW";
continue;
}
// Parse opcode byte.
if (/^[0-9A-F]{2}(?:\+[ri])?$/.test(comp)) {
// Parse "+r" or "+i" suffix.
if (comp.length > 2) {
this.opcode.ri = true;
comp = comp.substring(0, 2);
}
// FPU instructions are encoded as "PREFIX XX", where prefix is not the same
// as MM prefixes used everywhere else. AsmJit internally extends MM field in
// instruction tables to allow storing this prefix together with other "MM"
// prefixes, currently the unused indexes are used, but if X86 moves forward
// and starts using these we can simply use more bits in the opcode DWORD.
if (!this.opcode.pp && this.opcode.byte === "9B") {
this.opcode.pp = this.opcode.byte;
this.opcode.byte = comp;
continue;
}
if (!this.opcode.mm && (/^(?:D8|D9|DA|DB|DC|DD|DE|DF)$/.test(this.opcode.byte))) {
this.opcode.mm = this.opcode.byte;
this.opcode.byte = comp;
continue;
}
if (this.opcode.byte) {
if (this.opcode.byte === "67") {
this.opcode._67h = true;
}
else {
if (!this.opcode.modr && !this.opcode.modrm) {
const value = parseInt(comp, 16);
if ((value & 0xC0) == 0xC0) {
this.opcode.mod = "11";
this.opcode.modr = String((value >> 3) & 0x7);
this.opcode.modrm = String((value >> 0) & 0x7);
}
else {
this.report(`'${this.opcodeString}' Unsupported secondary opcode (MOD/RM) '${comp}' value`);
}
}
else {
this.report(`'${this.opcodeString}' Multiple opcodes, have ${this.opcode.byte}, found ${comp}`);
}
}
}
this.opcode.byte = comp;
continue;
}
// Parse ModR/M field using "/r" or "/0-7" notation.
if (/^\/[r0-7]$/.test(comp) && !this.opcode.modr) {
this.opcode.mod = "xx";
this.opcode.modr = comp.charAt(1);
this.opcode.modm = "b";
continue;
}
// Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation.
const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/);
if (m) {
this.opcode.mod = m[1];
this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2));
this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2));
continue;
}
// Parse immediate byte, word, dword, fword, or qword.
if (/^(?:ib|iw|id|iq|iv|if)$/.test(comp)) {
if (comp === "iv")
comp = OpcodeGroupInfo[comp].subst[groupIndex];
this.imm += Utils.immSize(comp);
continue;
}
if (comp === "moff") {
this.moff = true;
continue;
}
// Parse displacement.
if (/^(?:cb|cw|cd)$/.test(comp) && !this.rel) {
this.rel = comp === "cb" ? 1 :
comp === "cw" ? 2 :
comp === "cd" ? 4 : -1;
continue;
}
// ERROR.
this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`);
}
}
// HACK: Fix instructions having opcode "01".
if (this.opcode.byte === "" && this.opcode.mm.indexOf("0F01") === this.opcode.mm.length - 4) {
this.opcode.byte = "01";
this.opcode.mm = this.opcode.mm.substring(0, this.opcode.mm.length - 2);
}
if (this.opcode.byte)
this.opcodeValue = parseInt(this.opcode.byte, 16);
if (!this.opcode.byte)
this.report(`Couldn't parse instruction's opcode '${this.opcodeString}'`);
}
_updateOperandsInfo() {
super._updateOperandsInfo();
let consecutiveLead = null;
let consecutiveLastIndex = 0;
for (let i = 0; i < this.operands.length; i++) {
const op = this.operands[i];
// Instructions that use 64-bit GP registers are always 64-bit instructions.
if (op.reg === "r64" || op.reg === "rax" || op.reg === "rbx" || op.reg === "rcx" || op.reg === "rdx" || op.reg === "rsi" || op.reg === "rdi")
this.arch = "X64";
// Propagate broadcast.
if (op.bcstSize > 0)
this._assignAttribute("broadcast", op.bcstSize);
// Propagate VSIB.
if (op.vsibReg) {
if (this.vsibReg) {
this.report("Only one operand can be a vector memory address (vmNNx)");
}
this.vsibReg = op.vsibReg;
this.vsibSize = op.vsibSize;
}
if (op.regIndexRel) {
if (i - op.regIndexRel < 0) {
this.report(`The consecutive register information is invalid, index of the lead (${i - op.regIndexRel}) is out of range`);
}
else {
const lead = this.operands[i - op.regIndexRel];
if (consecutiveLead && consecutiveLead != lead) {
this.report(`The consecutive register chain is invalid`);
}
else {
consecutiveLead = lead;
consecutiveLastIndex = Math.max(consecutiveLastIndex, op.regIndexRel);
}
}
}
}
if (consecutiveLead) {
consecutiveLead.consecutive_lead_count = consecutiveLastIndex + 1;
}
}
// Validate the instruction's definition. Common mistakes can be checked and
// reported easily, however, if the mistake is just an invalid opcode or
// something else it's impossible to detect.
_postProcess() {
if (this.groupPattern) {
const archInfo = ArchGroupInfo[this.groupPattern];
if (this.arch === "ANY" && archInfo && this.arch !== archInfo[this.groupIndex]) {
// TODO: Never triggered, which means it should be removed.
this.arch = archInfo[this.groupIndex];
}
}
else {
this.groupIndex = -1;
}
if (this.privilege === "L0")
this.category.SYSTEM = true;
let immCount = this.immCount;
// Verify that the immediate operand/operands are specified in instruction
// encoding and opcode field. Basically if there is an "ix" in operands,
// the encoding should contain "I".
if (immCount > 0) {
if (immCount === 1 && this.operands[this.operands.length - 1].data === "1") {
// This must be one of rcl|rcr|rol|ror|sar|sal|shr. We won't validate
// these as these have "1" as implicit (encoded within opcode, not after).
}
else {
// Every immediate should have its imm byte ("ib", "iw", "id", or "iq") in the opcode data.
let m = this.opcodeString.match(/(?:^|\s+)(ib|iw|id|iq|iv|if|\/is4)/g);
if (!m || m.length !== immCount) {
this.report(`Immediate(s) [${immCount}] not found in opcode: ${this.opcodeString}`);
}
}
}
}
isAVX() { return this.isVEX() || this.isEVEX(); }
isVEX() { return this.prefix === "VEX" || this.prefix === "XOP"; }
isEVEX() { return this.prefix === "EVEX" }
getWValue() {
switch (this.opcode.w) {
case "W0": return 0;
case "W1": return 1;
}
return -1;
}
// Get signature of the instruction as "ARCH PREFIX ENCODING[:operands]" form.
get signature() {
let operands = this.operands;
let sign = this.arch;
if (this.prefix) {
sign += " " + this.prefix;
if (this.prefix !== "3DNOW") {
if (this.opcode.l === "L1")
sign += ".256";
else if (this.opcode.l === "256" || this.opcode.l === "512")
sign += `.${this.opcode.l}`;
else
sign += ".128";
if (this.opcode.w === "W1")
sign += ".W";
}
}
else if (this.opcode.w === "W1") {
sign += " REX.W";
}
sign += " " + this.encoding;
for (let i = 0; i < operands.length; i++) {
sign += (i === 0) ? ":" : ",";
let operand = operands[i];
if (operand.implicit)
sign += `[${operand.reg}]`;
else
sign += operand.toRegMem();
}
return sign;
}
get immCount() {
let ops = this.operands;
let n = 0;
for (let i = 0; i < ops.length; i++)
if (ops[i].isImm())
n++;
return n;
}
get modRValue() {
if (/^[0-7]$/.test(this.opcode.modr))
return parseInt(this.opcode.modr, 10);
else
return 0;
}
get modRMValue() {
if (/^[0-7]$/.test(this.opcode.modrm))
return parseInt(this.opcode.modrm, 10);
else
return 0;
}
}
x86.Instruction = Instruction;
// asmdb.x86.ISA
// =============
const ArchKeys = MapUtils.mapFromArray(["any", "x86", "x64", "apx", "___"]);
function findArch(inst) {
for (let a in ArchKeys) {
if (typeof inst[a] === "string") {
return a;
}
}
FAIL(`Instruction signature not found in record: ${JSON.stringify(inst)}`);
}
function mergeGroupData(data, group) {
for (let k in group) {
switch (k) {
case "group":
case "instructions":
break;
case "ext":
data[k] = (data[k] ? data[k] + " " : "") + group[k];
break;
default:
if (data[k] === undefined)
data[k] = group[k]
break;
}
}
}
// X86/X64 instruction database - stores Instruction instances in a map and
// aggregates all instructions with the same name.
class ISA extends base.ISA {
constructor(data) {
super(data);
this.addData(data || NONE);
}
_addInstructions(groups) {
for (let group of groups) {
for (let record of group.instructions) {
let arch = findArch(record);
// TODO: Ignore records having this (only used for testing purposes).
if (arch === "___")
continue;
const apx = arch === "apx";
const sgn = Utils.splitInstructionSignature(record[arch]);
const data = MapUtils.cloneExcept(record, arch);
mergeGroupData(data, group)
for (let j = 0; j < sgn.names.length; j++) {
data.name = sgn.names[j];
data.prefixes = sgn.prefixes;
data.operands = sgn.operands;
if (j > 0) {
data.aliasOf = sgn.names[0];
}
let groupIndex = 0;
let instruction = null;
do {
instruction = new Instruction(this);
instruction.arch = apx ? "X64" : arch.toUpperCase();
instruction.assignData(data, groupIndex);
if (apx) {
instruction.ext["APX_F"] = true;
if (instruction.category.GP) {
instruction.category.GP_EXT = true
}
}
this._addInstruction(instruction);
} while (instruction.groupPattern && ++groupIndex < OperandGroupInfo[instruction.groupPattern].subst.length);
}
}
}
return this;
}
}
x86.ISA = ISA;
// asmdb.x86.X86DataCheck
// ======================
class X86DataCheck {
static checkVexEvex(db) {
const map = db.instructionMap;
for (let name in map) {
const instructions = map[name];
for (let i = 0; i < instructions.length; i++) {
const instA = instructions[i];
for (let j = i + 1; j < instructions.length; j++) {
const instB = instructions[j];
if (instA.operands.join("_") === instB.operands.join("_")) {
const vex = instA.prefix === "VEX" ? instA : instB.prefix === "VEX" ? instB : null;
const evex = instA.prefix === "EVEX" ? instA : instB.prefix === "EVEX" ? instB : null;
if (vex && evex && vex.opcode.byte === evex.opcode.byte) {
// NOTE: There are some false positives, they will be printed as well.
let ok = vex.opcode.w === evex.opcode.w && vex.opcode.l === evex.opcode.l;
if (!ok) {
console.log(`Instruction ${name} differs:`);
console.log(` ${vex.operands.join(" ")}: ${vex.opcodeString}`);
console.log(` ${evex.operands.join(" ")}: ${evex.opcodeString}`);
}
}
}
}
}
}
}
}
x86.X86DataCheck = X86DataCheck;
}).apply(this, typeof module === "object" && module && module.exports
? [module, "exports"] : [this.asmdb || (this.asmdb = {}), "x86"]);