Files
asmjit/db/x86.js
kobalicek 2e2866d481 Minor update of X86 ISA DB
* Instructions wr[u]ss[d|q] no longer accept register as the first
    operand (that was a bug to accept this form)
  * Moved APX version of legacy instructions closer so they are next
    to each other
2025-05-25 08:19:44 +02:00

1275 lines
40 KiB
JavaScript

// This file is part of AsmJit project <https://asmjit.com>
//
// See asmjit.h or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
(function($scope, $as) {
"use strict";
// Import.
const base = $scope.base ? $scope.base : require("./base.js");
const hasOwn = base.hasOwn;
const dict = base.dict;
const NONE = base.NONE;
const Parsing = base.Parsing;
const MapUtils = base.MapUtils;
// Export.
const x86 = $scope[$as] = {};
function FAIL(msg) { throw new Error("[X86] " + msg); }
// Database
// ========
x86.dbName = "isa_x86.json";
// Metadata Tables
// ===============
const ArchGroupInfo = dict({
"ry": ["ANY", "X64"],
"rv": ["ANY", "ANY", "X64"]
});
// Groups are used by instruction tables to group multiple operand combinations into a single record. In general
// X86 and X86_64 instructions can be divided into GP and SIMD groups, where GP groups use `ry/my` syntax to
// specify operation for 16/32/64 bit registers and "xy/mxy"/"xyz/mxyz" groups to specify a SIMD instruction that
// uses either XMM/YMM (AVX) or XMM/YMM/ZMM registers (AVX-512).
const OperandGroupInfo = dict({
"ry" : { "group": "ry" , "subst": ["r32", "r64"] },
"my" : { "group": "ry" , "subst": ["m32", "m64"] },
"axy" : { "group": "ry" , "subst": ["eax", "rax"] },
"bxy" : { "group": "ry" , "subst": ["ebx", "rbx"] },
"cxy" : { "group": "ry" , "subst": ["ecx", "rcx"] },
"dxy" : { "group": "ry" , "subst": ["edx", "rdx"] },
"rv" : { "group": "rv" , "subst": ["r16", "r32", "r64"] },
"mv" : { "group": "rv" , "subst": ["m16", "m32", "m64"] },
"axv" : { "group": "rv" , "subst": ["ax", "eax", "rax"] },
"bxv" : { "group": "rv" , "subst": ["bx", "ebx", "rbx"] },
"cxv" : { "group": "rv" , "subst": ["cx", "ecx", "rcx"] },
"dxv" : { "group": "rv" , "subst": ["dx", "edx", "rdx"] },
"immv" : { "group": "rv" , "subst": ["imm16", "imm32", "imms32"] },
"xy" : { "group": "xy" , "subst": ["xmm", "ymm"] },
"mxy" : { "group": "xy" , "subst": ["m128", "m256"] },
"xxx" : { "group": "xyz", "subst": ["xmm[31:0]", "xmm[63:0]", "xmm"] },
"xxy" : { "group": "xyz", "subst": ["xmm[63:0]", "xmm", "ymm"] },
"xyz" : { "group": "xyz", "subst": ["xmm", "ymm", "zmm"] },
"mxxx" : { "group": "xyz", "subst": ["m32", "m64", "m128"] },
"mxxy" : { "group": "xyz", "subst": ["m64", "m128", "m256"] },
"mxyz" : { "group": "xyz", "subst": ["m128", "m256", "m512"] }
});
const OpcodeGroupInfo = dict({
"Wy" : { "group": "ry" , "subst": ["W0", "W1"] },
"iv" : { "group": "rv" , "subst": ["iw", "id", "id"] },
"Pv" : { "group": "rv" , "subst": ["66", "NP", "NP"] },
"Wv" : { "group": "rv" , "subst": ["W0", "W0", "W1"] }
});
// Instruction tables use various notations to specify L/LL field, which is used by VEX/EVEX/XOP encodings. This
// field has 1 bit (VEX/XOP) and 2 bits (EVEX) and in general the notation used is 128/256/512, which determines
// the size of SIMD operation, and this is also the notation we want to convert everything else into.
const OpcodeLLMapping = dict({
"128": "128",
"256": "256",
"512": "512",
"LZ" : "128",
"LLZ": "128",
"L0" : "128",
"L1" : "256",
"LIG": "LIG",
"Lxy": "xy",
"xyz": "xyz"
});
const RegSize = Object.freeze({
"r8" : 8,
"r8hi": 8,
"r16" : 16,
"r32" : 32,
"r64" : 64,
"mm" : 64,
"xmm" : 128,
"ymm" : 256,
"zmm" : 512,
"tmm" : 512, // Maximum size (64 bytes).
"bnd" : 128,
"k" : 64,
"st" : 80
});
// CpuRegs
// =======
// Build an object containing CPU registers as keys mapping them to type, kind, and index.
function buildCpuRegs(defs) {
const map = dict();
for (let type in defs) {
const def = defs[type];
const kind = def.kind;
const names = def.names;
const group = def.group;
if (def.any)
map[def.any] = { type: type, kind: kind, index: -1, group: group };
if (names) {
for (let i = 0; i < names.length; i++) {
let name = names[i];
let m = /^([A-Za-z\(\)]+)(\d+)-(\d+)([A-Za-z\(\)]*)$/.exec(name);
if (m) {
let a = parseInt(m[2], 10);
let b = parseInt(m[3], 10);
for (let n = a; n <= b; n++) {
const index = m[1] + n + m[4];
map[index] = { type: type, kind: kind, index: index };
}
}
else {
map[name] = { type: type, kind: kind, index: i };
}
}
}
}
// HACK: In instruction manuals `r8` denotes low 8-bit register, however,
// that collides with `r8`, which is a 64-bit register. Since the result
// of this function is only used internally we patch it to be compatible
// with what Intel specifies.
map.r8.type = "r8";
return map;
}
const CpuRegisters = buildCpuRegs({
"r8" : { "kind": "gp" , "any": "r8" , "names": ["al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8-15b"] },
"r8hi": { "kind": "gp" , "names": ["ah", "ch", "dh", "bh"] },
"r16" : { "kind": "gp" , "any": "r16" , "names": ["ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8-15w"] },
"r32" : { "kind": "gp" , "any": "r32" , "names": ["eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8-15d"] },
"r64" : { "kind": "gp" , "any": "r64" , "names": ["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8-15"] },
"rxx" : { "kind": "gp" , "names": ["zax", "zcx", "zdx", "zbx", "zsp", "zbp", "zsi", "zdi"] },
"sreg": { "kind": "sreg", "any": "sreg" , "names": ["es", "cs", "ss", "ds", "fs", "gs" ] },
"creg": { "kind": "creg", "any": "creg" , "names": ["cr0-15"] },
"dreg": { "kind": "dreg", "any": "dreg" , "names": ["dr0-15"] },
"bnd" : { "kind": "bnd" , "any": "bnd" , "names": ["bnd0-3"] },
"st" : { "kind": "st" , "any": "st(i)", "names": ["st(0-7)"] },
"mm" : { "kind": "mm" , "any": "mm" , "names": ["mm0-7"] },
"k" : { "kind": "k" , "any": "k" , "names": ["k0-7"] },
"xmm" : { "kind": "vec" , "any": "xmm" , "names": ["xmm0-31"] },
"ymm" : { "kind": "vec" , "any": "ymm" , "names": ["ymm0-31"] },
"zmm" : { "kind": "vec" , "any": "zmm" , "names": ["zmm0-31"] },
"tmm" : { "kind": "tile", "any": "tmm" , "names": ["tmm0-7"] }
});
// asmdb.x86.Utils
// ===============
// X86/X64 utilities.
class Utils {
static groupOf(op) {
return hasOwn(OperandGroupInfo, op) ? OperandGroupInfo[op].group : null;
}
static splitInstructionSignature(s) {
let prefixes = [];
if (s.startsWith("[")) {
const prefixEnd = Parsing.matchClosingChar(s, 0);
prefixes = s.substring(1, prefixEnd).replace("xacqrel", "xacquire|xrelease").split("|");
s = s.substring(prefixEnd + 1).trim();
}
let nameEnd = s.indexOf(" ");
let names = s.substring(0, nameEnd === -1 ? s.length : nameEnd);
let operands = nameEnd === -1 ? "" : s.substring(nameEnd + 1).trim();
if (names.endsWith("{nf}")) {
names = names.substring(0, names.length - 4);
prefixes.nf = true;
}
return {
names: names.split("|"),
prefixes: prefixes,
operands: operands
}
}
// Split the operand(s) string into individual operands as defined by the
// instruction database.
//
// NOTE: X86/X64 doesn't require anything else than separating the commas,
// this function is here for compatibility with other instruction sets.
static splitOperands(s) {
const array = s.split(",");
for (let i = 0; i < array.length; i++)
array[i] = array[i].trim();
return array;
}
// Get whether the string `s` describes a register operand.
static isRegOp(s) { return s && hasOwn(CpuRegisters, s); }
// Get whether the string `s` describes a memory operand.
static isMemOp(s) { return s && /^(?:mem|mib|tmem|moff||(?:m(?:off)?\d+(?:dec|bcd|fp|int)?)|(?:m16_\d+)|(?:vm\d+(?:x|y|z)))$/.test(s); }
// Get whether the string `s` describes an immediate operand.
static isImmOp(s) { return s && /^(?:1|imm4|imm8|imm16|imm32|imm64|imms8|imms32|immu16|immu32|immv|if|p16_16|p16_32|dfv)$/.test(s); }
// Get whether the string `s` describes a relative displacement (label).
static isRelOp(s) { return s && /^rel\d+$/.test(s); }
// Get a register type of a `s`, returns `null` if the register is unknown.
static regTypeOf(s) { return hasOwn(CpuRegisters, s) ? CpuRegisters[s].type : null; }
// Get a register kind of a `s`, returns `null` if the register is unknown.
static regKindOf(s) { return hasOwn(CpuRegisters, s) ? CpuRegisters[s].kind : null; }
// Get a register type of a `s`, returns `null` if the register is unknown and `-1`
// if the given string does only represent a register type, but not a specific reg.
static regIndexOf(s) { return hasOwn(CpuRegisters, s) ? CpuRegisters[s].index : null; }
static regSize(s) {
if (s in RegSize)
return RegSize[s];
const reg = CpuRegisters[s];
if (reg && reg.type in RegSize)
return RegSize[reg.type];
return -1;
}
// Get size of an immediate `s` [in bits].
//
// Handles "ib", "iw", "id", "if", "iq", and also "/is4".
static immSize(s) {
switch (s) {
case "/is4" : return 4;
case "imm4" : return 4;
case "1" : return 8;
case "imm8" : return 8;
case "imm16" : return 16;
case "imm32" : return 32;
case "imm64" : return 64;
case "imms8" : return 8;
case "imms32": return 32;
case "immu16": return 16;
case "immu32": return 32;
case "ib" :
case "ub" : return 8;
case "iw" :
case "uw" : return 16;
case "id" :
case "ud" : return 32;
case "iq" :
case "uq" : return 64;
case "p16_16": return 32;
case "if" :
case "p16_32": return 48;
// Influences EVEX encoding, not an immediate byte.
case "dfv" : return 0;
// Invalid immediate.
default : FAIL(`Invalid immediate ${s}`);
}
}
// Get size of a relative displacement [in bits].
static relSize(s) {
switch (s) {
case "rel8" : return 8;
case "rel16" : return 16;
case "rel32" : return 32;
default : return -1;
}
}
}
x86.Utils = Utils;
// asmdb.x86.Operand
// =================
// X86/X64 operand.
class Operand extends base.Operand {
constructor() {
super();
this.groupPattern = ""; // Group pattern in case this operand was created from a group.
this.memSegment = ""; // Segment specified with register that is used to perform a memory IO.
this.memOff = false; // Memory operand is an absolute offset (only a specific version of MOV).
this.memFar = false; // Memory is a far pointer (includes segment in first two bytes).
this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm).
this.vsibSize = -1; // AVX VSIB register size (32/64).
this.bcstSize = -1; // AVX-512 broadcast size.
}
_substituteGroupOp(op, groupIndex) {
const opPart = op.match(/^([A-Za-z]+)/);
if (opPart) {
const groupPattern = Utils.groupOf(opPart[1]);
if (groupPattern) {
this.groupPattern = groupPattern;
return OperandGroupInfo[opPart[1]].subst[groupIndex] + op.substring(opPart[1].length);
}
}
return op;
}
assignData(data, defaultAccess, groupIndex) {
let s = data;
this.data = data;
const type = [];
// Handle RWX decorators prefix "[RWwXx]:".
let access = defaultAccess;
const access_match = /^(R|W|w|X|x)(\?)?\:/.exec(s);
if (access_match) {
// TODO: Conditional access is ignored at the moment.
access = access_match[1];
s = s.substring(access_match[0].length);
}
// Handle commutativity attribute.
if (Parsing.isCommutative(s)) {
this.commutative = true;
s = Parsing.clearCommutative(s);
}
// Handle AVX-512 broadcast possibility specified as "/bN" suffix.
const mBcst = /\/b(\d+)/.exec(s);
if (mBcst) {
this.bcstSize = parseInt(mBcst[1], 10);
// Remove the broadcast attribute from the definition; it's not needed anymore.
s = s.substring(0, mBcst.index) + s.substring(mBcst.index + mBcst[0].length);
}
// Handle <implicit> attribute.
if (Parsing.isImplicit(s)) {
this.implicit = true;
s = Parsing.clearImplicit(s);
}
// Support multiple operands separated by "/" (only used by r/m).
let ops = s.split("/");
let oArr = [];
for (let i = 0; i < ops.length; i++) {
let origOp = ops[i].trim();
let op = this._substituteGroupOp(origOp, groupIndex);
// Handle range suffix [A] or [A:B]:
const mRange = /\[(\d+)\s*(?:\:\s*(\d+)\s*)?\]$/.exec(op);
if (mRange) {
const a = parseInt(mRange[1], 10);
const b = parseInt(mRange[2] || String(a), 10);
if (a < b)
FAIL(`Operand '${origOp}' contains invalid range '[${a}:${b}]'`)
this.rwxIndex = b;
this.rwxWidth = a - b + 1;
op = op.substring(0, op.length - mRange[0].length);
}
// Handle a segment specification if this is an implicit register performing memory access.
const memSegRegM = op.match(/\((ds|es)\:\s*([\w]+)\)$/);
if (memSegRegM) {
this.memSegment = memSegRegM[1];
this.memRegOnly = memSegRegM[2];
op = op.substring(0, memSegRegM.index).trim();
}
oArr.push(op);
let regIndexRel = 0;
if (op.endsWith("+1") || op.endsWith("+2") || op.endsWith("+3")) {
regIndexRel = parseInt(op.substr(op.length - 1, 1));
op = op.substring(0, op.length - 2);
}
// Group substitution - when a rv/mv instruction uses 'w' or 'x' access it's only used by
// the 16-bit form, 32-bit and 64-bit always use 'W' and 'X' when used in a 'rv/mv' group.
if (this.groupPattern === "rv" && groupIndex > 0 && access !== "R") {
access = access.toUpperCase();
}
if (Utils.isRegOp(op)) {
this.reg = op;
this.regType = Utils.regTypeOf(op);
this.regIndexRel = regIndexRel;
this.setAccess(access);
type.push("reg");
continue;
}
if (Utils.isMemOp(op)) {
this.mem = op;
this.setAccess(access);
// Handle memory size.
const mOff = /^m(?:off)?(\d+)/.exec(op);
this.memSize = mOff ? parseInt(mOff[1], 10) : 0;
this.memOff = op.indexOf("moff") === 0;
const mSeg = /^m16_(\d+)/.exec(op);
if (mSeg) {
this.memFar = true;
this.memSize = parseInt(mSeg[1], 10) + 16;
}
// Handle vector addressing mode and size "vmXXr".
const mVM = /^vm(\d+)(x|y|z)$/.exec(op);
if (mVM) {
this.vsibReg = mVM[2] + "mm";
this.vsibSize = parseInt(mVM[1], 10);
}
type.push("mem");
continue;
}
if (Utils.isImmOp(op)) {
const size = Utils.immSize(op);
if (!this.imm)
this.imm = size;
else if (this.imm !== size)
FAIL(`Immediate size mismatch: ${this.imm} != ${size}`);
// Sign-extend / zero-extend.
const sign = op.startsWith("imms") ? "signed" :
op.startsWith("immu") ? "unsigned" : "any";
this.immSign = sign;
if (op === "1") {
this.immValue = 1;
this.implicit = true;
}
if (type.indexOf("imm") !== -1)
type.push("imm");
continue;
}
if (Utils.isRelOp(op)) {
this.rel = Utils.relSize(op);
type.push("rel");
continue;
}
FAIL(`Operand '${origOp}' unhandled`);
}
// In case the data has been modified it's always better to use the stripped off
// version as we have already processed and stored all the possible decorators.
this.data = oArr.join("/");
this.type = type.join("/");
if (this.rwxIndex === -1) {
const opSize = this.isReg() ? this.regSize :
this.isMem() ? this.memSize : -1;
if (opSize !== -1) {
this.rwxIndex = 0;
this.rwxWidth = opSize;
}
}
}
get regSize() {
return Utils.regSize(this.reg);
}
setAccess(x) {
const u = x.toUpperCase();
this.zext = x === "W" || x === "X";
this.read = u === "R" || u === "X";
this.write = u === "W" || u === "X";
return this;
}
isFixedReg() { return this.reg && this.reg !== this.regType && this.reg !== "st(i)"; }
isFixedMem() { return this.memSegment && this.isFixedReg(); }
isPartialOp() {
const maybePartial = this.regType === "r8" ||
this.regType === "r8hi" ||
this.regType === "r16" ||
this.regType === "xmm";
return maybePartial && !this.zext;
}
toRegMem() {
if (this.reg && this.mem)
return this.reg + "/m";
else if (this.mem && (this.vsibReg || /fp$|int$/.test(this.mem)))
return this.mem;
else if (this.mem)
return "m";
else
return this.toString();
}
toString() { return this.data; }
}
x86.Operand = Operand;
// asmdb.x86.Instruction
// =====================
// X86/X64 instruction.
class Instruction extends base.Instruction {
constructor(db) {
super(db);
this.opcode = dict({
byte : "", // Opcode byte (a single value specified as HEX string "00-FF").
ri : false, // Instruction opcode is combined with register, "XX+r" or "XX+i".
_67h : false, // Opcode 67h prefix use.
mm : "", // Opcode MM[MMM] part (map).
pp : "", // Opcode PP part.
w : "", // Opcode W field.
l : "", // EVEX.LL (nothing, 128, 256, 512, LIG).
nd : 0, // EVEX.ND (new dest) field (default is false, specified as ND=0 or ND=1).
nf : 0, // EVEX.NF (no flags) field (default is false, specified as NF=0 or NF=1).
scc : "", // EVEX.SCC field (4 bits - condition flags).
mod : "", // MODRM.MOD part (2 bits) - either "xx", "11" or "!(11)".
modr : "", // MODRM.R part (3 bits) - either "rrr"
modrm: "" // MODRM.R/M part - either "bbb"
});
this.prefix = ""; // Prefix - "", "3DNOW", "EVEX", "VEX", "XOP".
this.privilege = "L3"; // Privilege level required to execute the instruction.
this.groupPattern = ""; // Group pattern in case the instruction was created from a group such as "ry", "rv", "xy", "xyz".
this.groupIndex = -1; // Group index.
this.rel = 0; // Displacement ("cb", "cw", and "cd" parts).
this.fpuTop = 0; // FPU top index manipulation [-1, 0, 1, 2].
this.fpuStack = ""; // FPU stack manipulation
this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm).
this.vsibSize = -1; // AVX VSIB register size (32/64).
this.broadcast = false; // AVX-512 broadcast support.
this.bcstSize = -1; // AVX-512 broadcast size.
this.k = ""; // AVX-512 K function ("", "blend", "zeroing").
this.kmask = false; // AVX-512 merging {k}.
this.zmask = false; // AVX-512 zeroing {kz}, implies {k}.
this.er = false; // AVX-512 embedded rounding {er}, implies {sae}.
this.sae = false; // AVX-512 suppress all exceptions {sae} support.
this.tupleType = ""; // AVX-512 tuple-type.
this.elementSize = -1; // Instruction's element size.
this.encodingPreference = ""; // Encoding preference (either nothing or "EVEX").
this.consecutiveLead = 0; // Consecutive register leading N other registers.
this.prefixes = dict(); // Allowed prefixes.
}
_substituteOpcodePart(op, groupIndex) {
if (hasOwn(OpcodeGroupInfo, op)) {
return OpcodeGroupInfo[op].subst[groupIndex];
}
else {
return op;
}
}
assignData(data, groupIndex) {
this.name = data.name;
this.groupIndex = groupIndex;
if (data.tt)
this.tupleType = data.tt;
const em = data.op.match(/^\[\s*(\w+)\s*\](.*)$/);
const encodingField = em ? em[1] : "NONE";
const opcodeField = em ? em[2] : data.op;
this._assignOperands(data.operands, groupIndex);
this._assignEncoding(encodingField);
this._assignOpcode(opcodeField.trim(), groupIndex);
for (let k in data) {
if (k === "name" || k === "op" || k === "operands")
continue;
this._assignAttribute(k, data[k]);
}
this._updateOperandsInfo();
this._postProcess();
}
_assignAttribute(key, value) {
switch (key) {
case "vl":
if (value) {
this.ext["AVX512_VL"] = true;
}
return;
case "prefixes":
this._combineAttribute("prefixes", value);
return;
case "fpuStack":
this.fpuStack = value;
switch (value) {
case "dec" : this.fpuTop = -1; break;
case "inc" : this.fpuTop = 1; break;
case "pop" : this.fpuTop = 1; break;
case "pop2x": this.fpuTop = 2; break;
case "push" : this.fpuTop = -1; break;
default:
FAIL(`Invalid fpuStack value '${value}'`);
}
return;
case "kz":
this.zmask = true;
this.kmask = true;
return;
case "k":
this.kmask = true;
if (typeof value === "string")
super._assignAttribute(key, value);
return;
case "er":
this.er = true;
this.sae = true; // {er} implies {sae}.
return;
case "sae":
this.sae = true;
return;
case "broadcast":
this.broadcast = true;
this.elementSize = value;
return;
default:
super._assignAttribute(key, value);
}
}
_assignOperands(s, groupIndex) {
if (!s) return;
// First remove all flags specified as {...}. We put them into `flags`
// map and mix with others. This seems to be the best we can do here.
for (;;) {
let a = s.indexOf("{");
let b = s.indexOf("}");
if (a === -1 || b === -1)
break;
// Get the `flag` and remove it from `s`.
this._assignAttribute(s.substring(a + 1, b), true);
s = s.substring(0, a) + s.substring(b + 1);
}
// Split into individual operands and push them to `operands`.
const arr = Utils.splitOperands(s);
for (let i = 0; i < arr.length; i++) {
const operand = new Operand();
operand.assignData(arr[i].trim(), i === 0 ? "X" : "R", groupIndex);
if (operand.mem == "tmem") {
this.tsib = true;
}
if (operand.groupPattern && this.groupPattern !== operand.groupPattern) {
if (this.groupPattern) {
FAIL(`Instruction ${this.name}: Operand's group pattern mismatch '${this.groupPattern}' != '${operand.groupPattern}'`);
}
this.groupPattern = operand.groupPattern;
}
this.operands.push(operand);
}
}
_assignEncoding(s) {
this.encoding = s;
}
_assignOpcode(s, groupIndex) {
this.opcodeString = s;
let parts = s.split(" ");
if (/^(VEX|EVEX|XOP)\./.test(s)) {
// Parse VEX/XOP and EVEX encoded instruction, which looks like "<PREFIX>.[APX-DATA].<LL>.<PP>.<MAP>.<W>"
let prefix = parts[0].split(".");
this.prefix = prefix[0];
for (let i = 1; i < prefix.length; i++) {
let comp = prefix[i];
if (/^(Pv|Wv|Wy)$/.test(comp)) {
comp = OpcodeGroupInfo[comp].subst[groupIndex];
}
// Process APX EVEX.ND field - ND=0 or ND=1.
if (/^ND=[01]$/.test(comp)) {
this.opcode.nd = comp === "ND=1";
continue;
}
// Process APX EVEX.NF field - NF=0 or NF=1.
if (/^NF=[01]$/.test(comp)) {
this.opcode.nf = comp === "NF=1";
continue;
}
// Process APX EVEX.SCC field - SCC=0-F
if (/^SCC=[0-9A-F]$/.test(comp)) {
this.opcode.scc = comp.charAt(5);
continue;
}
// Process `L/LL` field.
if (hasOwn(OpcodeLLMapping, comp)) {
this.opcode.l = OpcodeLLMapping[comp];
continue;
}
// Process `PP` field - 66/F2/F3/NP (NP means no PP field used)
if (comp === "P0") { /* ignored, `P` is zero... */ continue; }
if (/^(?:66|F2|F3|NP)$/.test(comp)) { this.opcode.pp = comp; continue; }
// Process `MM` field - 0F/0F3A/0F38/MAP4/MAP5/MAP6/M8/M9.
if (/^(?:0F|0F3A|0F38|MAP[4-9A])$/.test(comp)) { this.opcode.mm = comp; continue; }
// Process `W` field.
if (/^(WIG|W0|W1|)$/.test(comp)) { this.opcode.w = comp; continue; }
// TODO: Some new APX instructions don't have W specified (ENQCMD/ENQCMDS).
if (comp === "W?") { this.opcode.w = "W0"; continue; }
// ERROR.
this.report(`'${this.opcodeString}' Unhandled component: ${comp}`);
}
for (let i = 1; i < parts.length; i++) {
let comp = parts[i];
// Parse opcode.
if (/^[0-9A-Fa-f]{2}$/.test(comp)) {
this.opcode.byte = comp.toUpperCase();
continue;
}
// Parse ModR/M field using "/r" or "/0-7" notation.
if (/^\/[r0-7]$/.test(comp)) {
this.opcode.mod = "xx";
this.opcode.modr = comp.charAt(1);
this.opcode.modm = "b";
continue;
}
// Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation.
const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/);
if (m) {
this.opcode.mod = m[1];
this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2));
this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2));
continue;
}
// Parse immediate byte, word, dword, or qword.
comp = this._substituteOpcodePart(comp, groupIndex);
if (/^(?:ib|iw|id|iq|\/is4)$/.test(comp)) {
this.imm += Utils.immSize(comp);
continue;
}
this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`);
}
}
else {
// Parse X86/X64 instruction (including legacy MMX/SSE/3DNOW instructions).
let rex_parsed = false;
for (let i = 0; i < parts.length; i++) {
let comp = parts[i];
if (comp === "NFx" || comp === "NOREP" || comp === "NO67") {
// Ignored for now.
continue;
}
// Parse REX or REX2 prefix.
if (comp.startsWith("REX2.") || comp === "REX.W") {
if (rex_parsed) {
FAIL(`'${this.opcodeString}' Multiple REX prefixes are invalid`);
}
rex_parsed = true;
// Instructions that force REX.W prefix or use REX2 prefix are always 64-bit instructions.
this.arch = "X64";
if (comp === "REX.W") {
this.opcode.w = "W1";
}
else {
this.prefix = "REX2";
// REX2 has always 3 components - "REX2.<MAP>.<W>".
const rex2 = comp.split(".");
if (rex2.length !== 3) {
FAIL(`'${this.opcodeString}' Invalid REX2 prefix - expected exactly 3 REX2 components`);
}
if (rex2[1] === "MAP0") {
// nothing.
}
else if (rex2[1] === "MAP1") {
this.opcode.mm = "0F";
}
else {
FAIL(`'${this.opcodeString}' Invalid REX2 prefix - REX2.MAP component could be either MAP0 or MAP1`);
}
this.opcode.w = rex2[2];
}
continue;
}
// Parse `PP` prefixes.
if (this.opcode.mm === "") {
if (this.opcode.pp === "" && /^(?:66|F2|F3|NP)$/.test(comp) ||
this.opcode.pp === "66" && /^(?:F2|F3)$/.test(comp)) {
this.opcode.pp += comp;
continue;
}
}
// Parse `MM` prefixes.
if ((this.opcode.mm === "" && comp === "0F") ||
(this.opcode.mm === "0F" && /^(?:01|3A|38)$/.test(comp))) {
this.opcode.mm += comp;
continue;
}
// Recognize "0F 0F /r XX" encoding.
if (this.opcode.mm === "0F" && comp === "0F") {
this.prefix = "3DNOW";
continue;
}
// Parse opcode byte.
if (/^[0-9A-F]{2}(?:\+[ri])?$/.test(comp)) {
// Parse "+r" or "+i" suffix.
if (comp.length > 2) {
this.opcode.ri = true;
comp = comp.substring(0, 2);
}
// FPU instructions are encoded as "PREFIX XX", where prefix is not the same
// as MM prefixes used everywhere else. AsmJit internally extends MM field in
// instruction tables to allow storing this prefix together with other "MM"
// prefixes, currently the unused indexes are used, but if X86 moves forward
// and starts using these we can simply use more bits in the opcode DWORD.
if (!this.opcode.pp && this.opcode.byte === "9B") {
this.opcode.pp = this.opcode.byte;
this.opcode.byte = comp;
continue;
}
if (!this.opcode.mm && (/^(?:D8|D9|DA|DB|DC|DD|DE|DF)$/.test(this.opcode.byte))) {
this.opcode.mm = this.opcode.byte;
this.opcode.byte = comp;
continue;
}
if (this.opcode.byte) {
if (this.opcode.byte === "67") {
this.opcode._67h = true;
}
else {
if (!this.opcode.modr && !this.opcode.modrm) {
const value = parseInt(comp, 16);
if ((value & 0xC0) == 0xC0) {
this.opcode.mod = "11";
this.opcode.modr = String((value >> 3) & 0x7);
this.opcode.modrm = String((value >> 0) & 0x7);
}
else {
this.report(`'${this.opcodeString}' Unsupported secondary opcode (MOD/RM) '${comp}' value`);
}
}
else {
this.report(`'${this.opcodeString}' Multiple opcodes, have ${this.opcode.byte}, found ${comp}`);
}
}
}
this.opcode.byte = comp;
continue;
}
// Parse ModR/M field using "/r" or "/0-7" notation.
if (/^\/[r0-7]$/.test(comp) && !this.opcode.modr) {
this.opcode.mod = "xx";
this.opcode.modr = comp.charAt(1);
this.opcode.modm = "b";
continue;
}
// Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation.
const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/);
if (m) {
this.opcode.mod = m[1];
this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2));
this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2));
continue;
}
// Parse immediate byte, word, dword, fword, or qword.
if (/^(?:ib|iw|id|iq|iv|if)$/.test(comp)) {
if (comp === "iv")
comp = OpcodeGroupInfo[comp].subst[groupIndex];
this.imm += Utils.immSize(comp);
continue;
}
if (comp === "moff") {
this.moff = true;
continue;
}
// Parse displacement.
if (/^(?:cb|cw|cd)$/.test(comp) && !this.rel) {
this.rel = comp === "cb" ? 1 :
comp === "cw" ? 2 :
comp === "cd" ? 4 : -1;
continue;
}
// ERROR.
this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`);
}
}
// HACK: Fix instructions having opcode "01".
if (this.opcode.byte === "" && this.opcode.mm.indexOf("0F01") === this.opcode.mm.length - 4) {
this.opcode.byte = "01";
this.opcode.mm = this.opcode.mm.substring(0, this.opcode.mm.length - 2);
}
if (this.opcode.byte)
this.opcodeValue = parseInt(this.opcode.byte, 16);
if (!this.opcode.byte)
this.report(`Couldn't parse instruction's opcode '${this.opcodeString}'`);
}
_updateOperandsInfo() {
super._updateOperandsInfo();
let consecutiveLead = null;
let consecutiveLastIndex = 0;
for (let i = 0; i < this.operands.length; i++) {
const op = this.operands[i];
// Instructions that use 64-bit GP registers are always 64-bit instructions.
if (op.reg === "r64" || op.reg === "rax" || op.reg === "rbx" || op.reg === "rcx" || op.reg === "rdx" || op.reg === "rsi" || op.reg === "rdi")
this.arch = "X64";
// Propagate broadcast.
if (op.bcstSize > 0)
this._assignAttribute("broadcast", op.bcstSize);
// Propagate VSIB.
if (op.vsibReg) {
if (this.vsibReg) {
this.report("Only one operand can be a vector memory address (vmNNx)");
}
this.vsibReg = op.vsibReg;
this.vsibSize = op.vsibSize;
}
if (op.regIndexRel) {
if (i - op.regIndexRel < 0) {
this.report(`The consecutive register information is invalid, index of the lead (${i - op.regIndexRel}) is out of range`);
}
else {
const lead = this.operands[i - op.regIndexRel];
if (consecutiveLead && consecutiveLead != lead) {
this.report(`The consecutive register chain is invalid`);
}
else {
consecutiveLead = lead;
consecutiveLastIndex = Math.max(consecutiveLastIndex, op.regIndexRel);
}
}
}
}
if (consecutiveLead) {
consecutiveLead.consecutiveLeadCount = consecutiveLastIndex + 1;
}
}
// Validate the instruction's definition. Common mistakes can be checked and
// reported easily, however, if the mistake is just an invalid opcode or
// something else it's impossible to detect.
_postProcess() {
if (this.groupPattern) {
const archInfo = ArchGroupInfo[this.groupPattern];
if (this.arch === "ANY" && archInfo && this.arch !== archInfo[this.groupIndex]) {
// TODO: Never triggered, which means it should be removed.
this.arch = archInfo[this.groupIndex];
}
}
else {
this.groupIndex = -1;
}
if (this.privilege === "L0")
this.category.SYSTEM = true;
let immCount = this.immCount;
// Verify that the immediate operand/operands are specified in instruction
// encoding and opcode field. Basically if there is an "ix" in operands,
// the encoding should contain "I".
if (immCount > 0) {
if (immCount === 1 && this.operands[this.operands.length - 1].data === "1") {
// This must be one of rcl|rcr|rol|ror|sar|sal|shr. We won't validate
// these as these have "1" as implicit (encoded within opcode, not after).
}
else {
// Every immediate should have its imm byte ("ib", "iw", "id", or "iq") in the opcode data.
let m = this.opcodeString.match(/(?:^|\s+)(ib|iw|id|iq|iv|if|\/is4)/g);
if (!m || m.length !== immCount) {
this.report(`Immediate(s) [${immCount}] not found in opcode: ${this.opcodeString}`);
}
}
}
}
isAVX() { return this.isVEX() || this.isEVEX(); }
isVEX() { return this.prefix === "VEX" || this.prefix === "XOP"; }
isEVEX() { return this.prefix === "EVEX" }
getWValue() {
switch (this.opcode.w) {
case "W0": return 0;
case "W1": return 1;
}
return -1;
}
// Get signature of the instruction as "ARCH PREFIX ENCODING[:operands]" form.
get signature() {
let operands = this.operands;
let sign = this.arch;
if (this.prefix) {
sign += " " + this.prefix;
if (this.prefix !== "3DNOW") {
if (this.opcode.l === "L1")
sign += ".256";
else if (this.opcode.l === "256" || this.opcode.l === "512")
sign += `.${this.opcode.l}`;
else
sign += ".128";
if (this.opcode.w === "W1")
sign += ".W";
}
}
else if (this.opcode.w === "W1") {
sign += " REX.W";
}
sign += " " + this.encoding;
for (let i = 0; i < operands.length; i++) {
sign += (i === 0) ? ":" : ",";
let operand = operands[i];
if (operand.implicit)
sign += `[${operand.reg}]`;
else
sign += operand.toRegMem();
}
return sign;
}
get immCount() {
let ops = this.operands;
let n = 0;
for (let i = 0; i < ops.length; i++)
if (ops[i].isImm())
n++;
return n;
}
get modRValue() {
if (/^[0-7]$/.test(this.opcode.modr))
return parseInt(this.opcode.modr, 10);
else
return 0;
}
get modRMValue() {
if (/^[0-7]$/.test(this.opcode.modrm))
return parseInt(this.opcode.modrm, 10);
else
return 0;
}
}
x86.Instruction = Instruction;
// asmdb.x86.ISA
// =============
const ArchKeys = MapUtils.mapFromArray(["any", "x86", "x64", "apx", "___"]);
function findArch(inst) {
for (let a in ArchKeys) {
if (typeof inst[a] === "string") {
return a;
}
}
FAIL(`Instruction signature not found in record: ${JSON.stringify(inst)}`);
}
function mergeGroupData(data, group) {
for (let k in group) {
switch (k) {
case "group":
case "instructions":
break;
case "ext":
data[k] = (data[k] ? data[k] + " " : "") + group[k];
break;
default:
if (data[k] === undefined)
data[k] = group[k]
break;
}
}
}
// X86/X64 instruction database - stores Instruction instances in a map and
// aggregates all instructions with the same name.
class ISA extends base.ISA {
constructor(data) {
super(data);
this.addData(data || NONE);
}
_addInstructions(groups) {
for (let group of groups) {
for (let record of group.instructions) {
let arch = findArch(record);
// TODO: Ignore records having this (only used for testing purposes).
if (arch === "___")
continue;
const apx = arch === "apx";
const sgn = Utils.splitInstructionSignature(record[arch]);
const data = MapUtils.cloneExcept(record, arch);
mergeGroupData(data, group)
for (let j = 0; j < sgn.names.length; j++) {
data.name = sgn.names[j];
data.prefixes = sgn.prefixes;
data.operands = sgn.operands;
if (j > 0) {
data.aliasOf = sgn.names[0];
}
let groupIndex = 0;
let instruction = null;
do {
instruction = new Instruction(this);
instruction.arch = apx ? "X64" : arch.toUpperCase();
instruction.assignData(data, groupIndex);
if (apx) {
instruction.ext["APX_F"] = true;
if (instruction.category.GP) {
instruction.category.GP_EXT = true
}
}
this._addInstruction(instruction);
} while (instruction.groupPattern && ++groupIndex < OperandGroupInfo[instruction.groupPattern].subst.length);
}
}
}
return this;
}
}
x86.ISA = ISA;
// asmdb.x86.X86DataCheck
// ======================
class X86DataCheck {
static checkVexEvex(db) {
const map = db.instructionMap;
for (let name in map) {
const instructions = map[name];
for (let i = 0; i < instructions.length; i++) {
const instA = instructions[i];
for (let j = i + 1; j < instructions.length; j++) {
const instB = instructions[j];
if (instA.operands.join("_") === instB.operands.join("_")) {
const vex = instA.prefix === "VEX" ? instA : instB.prefix === "VEX" ? instB : null;
const evex = instA.prefix === "EVEX" ? instA : instB.prefix === "EVEX" ? instB : null;
if (vex && evex && vex.opcode.byte === evex.opcode.byte) {
// NOTE: There are some false positives, they will be printed as well.
let ok = vex.opcode.w === evex.opcode.w && vex.opcode.l === evex.opcode.l;
if (!ok) {
console.log(`Instruction ${name} differs:`);
console.log(` ${vex.operands.join(" ")}: ${vex.opcodeString}`);
console.log(` ${evex.operands.join(" ")}: ${evex.opcodeString}`);
}
}
}
}
}
}
}
}
x86.X86DataCheck = X86DataCheck;
}).apply(this, typeof module === "object" && module && module.exports
? [module, "exports"] : [this.asmdb || (this.asmdb = {}), "x86"]);