// This file is part of AsmJit project // // See or LICENSE.md for license and copyright information // SPDX-License-Identifier: Zlib (function($scope, $as) { "use strict"; // Import. const base = $scope.base ? $scope.base : require("./base.js"); const dict = base.dict; const NONE = base.NONE; const Parsing = base.Parsing; const MapUtils = base.MapUtils; // Export. const x86 = $scope[$as] = {}; function FAIL(msg) { throw new Error("[X86] " + msg); } // Database // ======== x86.dbName = "isa_x86.json"; // Metadata Tables // =============== const ArchGroupInfo = dict({ "ry": ["ANY", "X64"], "rv": ["ANY", "ANY", "X64"] }); // Groups are used by instruction tables to group multiple operand combinations into a single record. In general // X86 and X86_64 instructions can be divided into GP and SIMD groups, where GP groups use `ry/my` syntax to // specify operation for 16/32/64 bit registers and "xy/mxy"/"xyz/mxyz" groups to specify a SIMD instruction that // uses either XMM/YMM (AVX) or XMM/YMM/ZMM registers (AVX-512). const OperandGroupInfo = dict({ "ry" : { "group": "ry" , "subst": ["r32", "r64"] }, "my" : { "group": "ry" , "subst": ["m32", "m64"] }, "axy" : { "group": "ry" , "subst": ["eax", "rax"] }, "bxy" : { "group": "ry" , "subst": ["ebx", "rbx"] }, "cxy" : { "group": "ry" , "subst": ["ecx", "rcx"] }, "dxy" : { "group": "ry" , "subst": ["edx", "rdx"] }, "rv" : { "group": "rv" , "subst": ["r16", "r32", "r64"] }, "mv" : { "group": "rv" , "subst": ["m16", "m32", "m64"] }, "axv" : { "group": "rv" , "subst": ["ax", "eax", "rax"] }, "bxv" : { "group": "rv" , "subst": ["bx", "ebx", "rbx"] }, "cxv" : { "group": "rv" , "subst": ["cx", "ecx", "rcx"] }, "dxv" : { "group": "rv" , "subst": ["dx", "edx", "rdx"] }, "immv" : { "group": "rv" , "subst": ["imm16", "imm32", "imms32"] }, "xy" : { "group": "xy" , "subst": ["xmm", "ymm"] }, "mxy" : { "group": "xy" , "subst": ["m128", "m256"] }, "xxx" : { "group": "xyz", "subst": ["xmm[31:0]", "xmm[63:0]", "xmm"] }, "xxy" : { "group": "xyz", "subst": ["xmm[63:0]", "xmm", "ymm"] }, "xyz" : { "group": "xyz", "subst": ["xmm", "ymm", "zmm"] }, "mxxx" : { "group": "xyz", "subst": ["m32", "m64", "m128"] }, "mxxy" : { "group": "xyz", "subst": ["m64", "m128", "m256"] }, "mxyz" : { "group": "xyz", "subst": ["m128", "m256", "m512"] } }); const OpcodeGroupInfo = dict({ "Wy" : { "group": "ry" , "subst": ["W0", "W1"] }, "iv" : { "group": "rv" , "subst": ["iw", "id", "id"] }, "Pv" : { "group": "rv" , "subst": ["66", "NP", "NP"] }, "Wv" : { "group": "rv" , "subst": ["W0", "W0", "W1"] } }); // Instruction tables use various notations to specify L/LL field, which is used by VEX/EVEX/XOP encodings. This // field has 1 bit (VEX/XOP) and 2 bits (EVEX) and in general the notation used is 128/256/512, which determines // the size of SIMD operation, and this is also the notation we want to convert everything else into. const OpcodeLLMapping = dict({ "128": "128", "256": "256", "512": "512", "LZ" : "128", "LLZ": "128", "L0" : "128", "L1" : "256", "LIG": "LIG", "Lxy": "xy", "xyz": "xyz" }); const RegSize = Object.freeze({ "r8" : 8, "r8hi": 8, "r16" : 16, "r32" : 32, "r64" : 64, "mm" : 64, "xmm" : 128, "ymm" : 256, "zmm" : 512, "tmm" : 512, // Maximum size (64 bytes). "bnd" : 128, "k" : 64, "st" : 80 }); // CpuRegs // ======= // Build an object containing CPU registers as keys mapping them to type, kind, and index. function buildCpuRegs(defs) { const map = dict(); for (let type in defs) { const def = defs[type]; const kind = def.kind; const names = def.names; const group = def.group; if (def.any) map[def.any] = { type: type, kind: kind, index: -1, group: group }; if (names) { for (let i = 0; i < names.length; i++) { let name = names[i]; let m = /^([A-Za-z\(\)]+)(\d+)-(\d+)([A-Za-z\(\)]*)$/.exec(name); if (m) { let a = parseInt(m[2], 10); let b = parseInt(m[3], 10); for (let n = a; n <= b; n++) { const index = m[1] + n + m[4]; map[index] = { type: type, kind: kind, index: index }; } } else { map[name] = { type: type, kind: kind, index: i }; } } } } // HACK: In instruction manuals `r8` denotes low 8-bit register, however, // that collides with `r8`, which is a 64-bit register. Since the result // of this function is only used internally we patch it to be compatible // with what Intel specifies. map.r8.type = "r8"; return map; } const CpuRegisters = buildCpuRegs({ "r8" : { "kind": "gp" , "any": "r8" , "names": ["al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8-15b"] }, "r8hi": { "kind": "gp" , "names": ["ah", "ch", "dh", "bh"] }, "r16" : { "kind": "gp" , "any": "r16" , "names": ["ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8-15w"] }, "r32" : { "kind": "gp" , "any": "r32" , "names": ["eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8-15d"] }, "r64" : { "kind": "gp" , "any": "r64" , "names": ["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8-15"] }, "rxx" : { "kind": "gp" , "names": ["zax", "zcx", "zdx", "zbx", "zsp", "zbp", "zsi", "zdi"] }, "sreg": { "kind": "sreg", "any": "sreg" , "names": ["es", "cs", "ss", "ds", "fs", "gs" ] }, "creg": { "kind": "creg", "any": "creg" , "names": ["cr0-15"] }, "dreg": { "kind": "dreg", "any": "dreg" , "names": ["dr0-15"] }, "bnd" : { "kind": "bnd" , "any": "bnd" , "names": ["bnd0-3"] }, "st" : { "kind": "st" , "any": "st(i)", "names": ["st(0-7)"] }, "mm" : { "kind": "mm" , "any": "mm" , "names": ["mm0-7"] }, "k" : { "kind": "k" , "any": "k" , "names": ["k0-7"] }, "xmm" : { "kind": "vec" , "any": "xmm" , "names": ["xmm0-31"] }, "ymm" : { "kind": "vec" , "any": "ymm" , "names": ["ymm0-31"] }, "zmm" : { "kind": "vec" , "any": "zmm" , "names": ["zmm0-31"] }, "tmm" : { "kind": "tile", "any": "tmm" , "names": ["tmm0-7"] } }); // asmdb.x86.Utils // =============== // X86/X64 utilities. class Utils { static groupOf(op) { return Object.hasOwn(OperandGroupInfo, op) ? OperandGroupInfo[op].group : null; } static splitInstructionSignature(s) { let prefixes = []; if (s.startsWith("[")) { const prefixEnd = Parsing.matchClosingChar(s, 0); prefixes = s.substring(1, prefixEnd).replace("xacqrel", "xacquire|xrelease").split("|"); s = s.substring(prefixEnd + 1).trim(); } let nameEnd = s.indexOf(" "); let names = s.substring(0, nameEnd === -1 ? s.length : nameEnd); let operands = nameEnd === -1 ? "" : s.substring(nameEnd + 1).trim(); if (names.endsWith("{nf}")) { names = names.substring(0, names.length - 4); prefixes.nf = true; } return { names: names.split("|"), prefixes: prefixes, operands: operands } } // Split the operand(s) string into individual operands as defined by the // instruction database. // // NOTE: X86/X64 doesn't require anything else than separating the commas, // this function is here for compatibility with other instruction sets. static splitOperands(s) { const array = s.split(","); for (let i = 0; i < array.length; i++) array[i] = array[i].trim(); return array; } // Get whether the string `s` describes a register operand. static isRegOp(s) { return s && Object.hasOwn(CpuRegisters, s); } // Get whether the string `s` describes a memory operand. static isMemOp(s) { return s && /^(?:mem|mib|tmem|moff||(?:m(?:off)?\d+(?:dec|bcd|fp|int)?)|(?:m16_\d+)|(?:vm\d+(?:x|y|z)))$/.test(s); } // Get whether the string `s` describes an immediate operand. static isImmOp(s) { return s && /^(?:1|imm4|imm8|imm16|imm32|imm64|imms8|imms32|immu16|immu32|immv|if|p16_16|p16_32|dfv)$/.test(s); } // Get whether the string `s` describes a relative displacement (label). static isRelOp(s) { return s && /^rel\d+$/.test(s); } // Get a register type of a `s`, returns `null` if the register is unknown. static regTypeOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].type : null; } // Get a register kind of a `s`, returns `null` if the register is unknown. static regKindOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].kind : null; } // Get a register type of a `s`, returns `null` if the register is unknown and `-1` // if the given string does only represent a register type, but not a specific reg. static regIndexOf(s) { return Object.hasOwn(CpuRegisters, s) ? CpuRegisters[s].index : null; } static regSize(s) { if (s in RegSize) return RegSize[s]; const reg = CpuRegisters[s]; if (reg && reg.type in RegSize) return RegSize[reg.type]; return -1; } // Get size of an immediate `s` [in bits]. // // Handles "ib", "iw", "id", "if", "iq", and also "/is4". static immSize(s) { switch (s) { case "/is4" : return 4; case "imm4" : return 4; case "1" : return 8; case "imm8" : return 8; case "imm16" : return 16; case "imm32" : return 32; case "imm64" : return 64; case "imms8" : return 8; case "imms32": return 32; case "immu16": return 16; case "immu32": return 32; case "ib" : case "ub" : return 8; case "iw" : case "uw" : return 16; case "id" : case "ud" : return 32; case "iq" : case "uq" : return 64; case "p16_16": return 32; case "if" : case "p16_32": return 48; // Influences EVEX encoding, not an immediate byte. case "dfv" : return 0; // Invalid immediate. default : FAIL(`Invalid immediate ${s}`); } } // Get size of a relative displacement [in bits]. static relSize(s) { switch (s) { case "rel8" : return 8; case "rel16" : return 16; case "rel32" : return 32; default : return -1; } } } x86.Utils = Utils; // asmdb.x86.Operand // ================= // X86/X64 operand. class Operand extends base.Operand { constructor() { super(); this.groupPattern = ""; // Group pattern in case this operand was created from a group. this.memSegment = ""; // Segment specified with register that is used to perform a memory IO. this.memOff = false; // Memory operand is an absolute offset (only a specific version of MOV). this.memFar = false; // Memory is a far pointer (includes segment in first two bytes). this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm). this.vsibSize = -1; // AVX VSIB register size (32/64). this.bcstSize = -1; // AVX-512 broadcast size. } _substituteGroupOp(op, groupIndex) { const opPart = op.match(/^([A-Za-z]+)/); if (opPart) { const groupPattern = Utils.groupOf(opPart[1]); if (groupPattern) { this.groupPattern = groupPattern; return OperandGroupInfo[opPart[1]].subst[groupIndex] + op.substring(opPart[1].length); } } return op; } assignData(data, defaultAccess, groupIndex) { let s = data; this.data = data; const type = []; // Handle RWX decorators prefix "[RWwXx]:". let access = defaultAccess; const access_match = /^(R|W|w|X|x)(\?)?\:/.exec(s); if (access_match) { // TODO: Conditional access is ignored at the moment. access = access_match[1]; s = s.substring(access_match[0].length); } // Handle commutativity attribute. if (Parsing.isCommutative(s)) { this.commutative = true; s = Parsing.clearCommutative(s); } // Handle AVX-512 broadcast possibility specified as "/bN" suffix. const mBcst = /\/b(\d+)/.exec(s); if (mBcst) { this.bcstSize = parseInt(mBcst[1], 10); // Remove the broadcast attribute from the definition; it's not needed anymore. s = s.substring(0, mBcst.index) + s.substring(mBcst.index + mBcst[0].length); } // Handle attribute. if (Parsing.isImplicit(s)) { this.implicit = true; s = Parsing.clearImplicit(s); } // Support multiple operands separated by "/" (only used by r/m). let ops = s.split("/"); let oArr = []; for (let i = 0; i < ops.length; i++) { let origOp = ops[i].trim(); let op = this._substituteGroupOp(origOp, groupIndex); // Handle range suffix [A] or [A:B]: const mRange = /\[(\d+)\s*(?:\:\s*(\d+)\s*)?\]$/.exec(op); if (mRange) { const a = parseInt(mRange[1], 10); const b = parseInt(mRange[2] || String(a), 10); if (a < b) FAIL(`Operand '${origOp}' contains invalid range '[${a}:${b}]'`) this.rwxIndex = b; this.rwxWidth = a - b + 1; op = op.substring(0, op.length - mRange[0].length); } // Handle a segment specification if this is an implicit register performing memory access. const memSegRegM = op.match(/\((ds|es)\:\s*([\w]+)\)$/); if (memSegRegM) { this.memSegment = memSegRegM[1]; this.memRegOnly = memSegRegM[2]; op = op.substring(0, memSegRegM.index).trim(); } oArr.push(op); let regIndexRel = 0; if (op.endsWith("+1") || op.endsWith("+2") || op.endsWith("+3")) { regIndexRel = parseInt(op.substr(op.length - 1, 1)); op = op.substring(0, op.length - 2); } // Group substitution - when a rv/mv instruction uses 'w' or 'x' access it's only used by // the 16-bit form, 32-bit and 64-bit always use 'W' and 'X' when used in a 'rv/mv' group. if (this.groupPattern === "rv" && groupIndex > 0 && access !== "R") { access = access.toUpperCase(); } if (Utils.isRegOp(op)) { this.reg = op; this.regType = Utils.regTypeOf(op); this.regIndexRel = regIndexRel; this.setAccess(access); type.push("reg"); continue; } if (Utils.isMemOp(op)) { this.mem = op; this.setAccess(access); // Handle memory size. const mOff = /^m(?:off)?(\d+)/.exec(op); this.memSize = mOff ? parseInt(mOff[1], 10) : 0; this.memOff = op.indexOf("moff") === 0; const mSeg = /^m16_(\d+)/.exec(op); if (mSeg) { this.memFar = true; this.memSize = parseInt(mSeg[1], 10) + 16; } // Handle vector addressing mode and size "vmXXr". const mVM = /^vm(\d+)(x|y|z)$/.exec(op); if (mVM) { this.vsibReg = mVM[2] + "mm"; this.vsibSize = parseInt(mVM[1], 10); } type.push("mem"); continue; } if (Utils.isImmOp(op)) { const size = Utils.immSize(op); if (!this.imm) this.imm = size; else if (this.imm !== size) FAIL(`Immediate size mismatch: ${this.imm} != ${size}`); // Sign-extend / zero-extend. const sign = op.startsWith("imms") ? "signed" : op.startsWith("immu") ? "unsigned" : "any"; this.immSign = sign; if (op === "1") { this.immValue = 1; this.implicit = true; } if (type.indexOf("imm") !== -1) type.push("imm"); continue; } if (Utils.isRelOp(op)) { this.rel = Utils.relSize(op); type.push("rel"); continue; } FAIL(`Operand '${origOp}' unhandled`); } // In case the data has been modified it's always better to use the stripped off // version as we have already processed and stored all the possible decorators. this.data = oArr.join("/"); this.type = type.join("/"); if (this.rwxIndex === -1) { const opSize = this.isReg() ? this.regSize : this.isMem() ? this.memSize : -1; if (opSize !== -1) { this.rwxIndex = 0; this.rwxWidth = opSize; } } } get regSize() { return Utils.regSize(this.reg); } setAccess(x) { const u = x.toUpperCase(); this.zext = x === "W" || x === "X"; this.read = u === "R" || u === "X"; this.write = u === "W" || u === "X"; return this; } isFixedReg() { return this.reg && this.reg !== this.regType && this.reg !== "st(i)"; } isFixedMem() { return this.memSegment && this.isFixedReg(); } isPartialOp() { const maybePartial = this.regType === "r8" || this.regType === "r8hi" || this.regType === "r16" || this.regType === "xmm"; return maybePartial && !this.zext; } toRegMem() { if (this.reg && this.mem) return this.reg + "/m"; else if (this.mem && (this.vsibReg || /fp$|int$/.test(this.mem))) return this.mem; else if (this.mem) return "m"; else return this.toString(); } toString() { return this.data; } } x86.Operand = Operand; // asmdb.x86.Instruction // ===================== // X86/X64 instruction. class Instruction extends base.Instruction { constructor(db) { super(db); this.opcode = dict({ byte : "", // Opcode byte (a single value specified as HEX string "00-FF"). ri : false, // Instruction opcode is combined with register, "XX+r" or "XX+i". _67h : false, // Opcode 67h prefix use. mm : "", // Opcode MM[MMM] part (map). pp : "", // Opcode PP part. w : "", // Opcode W field. l : "", // EVEX.LL (nothing, 128, 256, 512, LIG). nd : 0, // EVEX.ND (new dest) field (default is false, specified as ND=0 or ND=1). nf : 0, // EVEX.NF (no flags) field (default is false, specified as NF=0 or NF=1). scc : "", // EVEX.SCC field (4 bits - condition flags). mod : "", // MODRM.MOD part (2 bits) - either "xx", "11" or "!(11)". modr : "", // MODRM.R part (3 bits) - either "rrr" modrm: "" // MODRM.R/M part - either "bbb" }); this.prefix = ""; // Prefix - "", "3DNOW", "EVEX", "VEX", "XOP". this.privilege = "L3"; // Privilege level required to execute the instruction. this.groupPattern = ""; // Group pattern in case the instruction was created from a group such as "ry", "rv", "xy", "xyz". this.groupIndex = -1; // Group index. this.rel = 0; // Displacement ("cb", "cw", and "cd" parts). this.fpuTop = 0; // FPU top index manipulation [-1, 0, 1, 2]. this.fpuStack = ""; // FPU stack manipulation this.vsibReg = ""; // AVX VSIB register type (xmm/ymm/zmm). this.vsibSize = -1; // AVX VSIB register size (32/64). this.broadcast = false; // AVX-512 broadcast support. this.bcstSize = -1; // AVX-512 broadcast size. this.k = ""; // AVX-512 K function ("", "blend", "zeroing"). this.kmask = false; // AVX-512 merging {k}. this.zmask = false; // AVX-512 zeroing {kz}, implies {k}. this.er = false; // AVX-512 embedded rounding {er}, implies {sae}. this.sae = false; // AVX-512 suppress all exceptions {sae} support. this.tupleType = ""; // AVX-512 tuple-type. this.elementSize = -1; // Instruction's element size. this.encodingPreference = ""; // Encoding preference (either nothing or "EVEX"). this.consecutiveLead = 0; // Consecutive register leading N other registers. this.prefixes = dict(); // Allowed prefixes. } _substituteOpcodePart(op, groupIndex) { if (Object.hasOwn(OpcodeGroupInfo, op)) { return OpcodeGroupInfo[op].subst[groupIndex]; } else { return op; } } assignData(data, groupIndex) { this.name = data.name; this.groupIndex = groupIndex; if (data.tt) this.tupleType = data.tt; const em = data.op.match(/^\[\s*(\w+)\s*\](.*)$/); const encodingField = em ? em[1] : "NONE"; const opcodeField = em ? em[2] : data.op; this._assignOperands(data.operands, groupIndex); this._assignEncoding(encodingField); this._assignOpcode(opcodeField.trim(), groupIndex); for (let k in data) { if (k === "name" || k === "op" || k === "operands") continue; this._assignAttribute(k, data[k]); } this._updateOperandsInfo(); this._postProcess(); } _assignAttribute(key, value) { switch (key) { case "vl": if (value) { this.ext["AVX512_VL"] = true; } return; case "prefixes": this._combineAttribute("prefixes", value); return; case "fpuStack": this.fpuStack = value; switch (value) { case "dec" : this.fpuTop = -1; break; case "inc" : this.fpuTop = 1; break; case "pop" : this.fpuTop = 1; break; case "pop2x": this.fpuTop = 2; break; case "push" : this.fpuTop = -1; break; default: FAIL(`Invalid fpuStack value '${value}'`); } return; case "kz": this.zmask = true; this.kmask = true; return; case "k": this.kmask = true; if (typeof value === "string") super._assignAttribute(key, value); return; case "er": this.er = true; this.sae = true; // {er} implies {sae}. return; case "sae": this.sae = true; return; case "broadcast": this.broadcast = true; this.elementSize = value; return; default: super._assignAttribute(key, value); } } _assignOperands(s, groupIndex) { if (!s) return; // First remove all flags specified as {...}. We put them into `flags` // map and mix with others. This seems to be the best we can do here. for (;;) { let a = s.indexOf("{"); let b = s.indexOf("}"); if (a === -1 || b === -1) break; // Get the `flag` and remove it from `s`. this._assignAttribute(s.substring(a + 1, b), true); s = s.substring(0, a) + s.substring(b + 1); } // Split into individual operands and push them to `operands`. const arr = Utils.splitOperands(s); for (let i = 0; i < arr.length; i++) { const operand = new Operand(); operand.assignData(arr[i].trim(), i === 0 ? "X" : "R", groupIndex); if (operand.mem == "tmem") { this.tsib = true; } if (operand.groupPattern && this.groupPattern !== operand.groupPattern) { if (this.groupPattern) { FAIL(`Instruction ${this.name}: Operand's group pattern mismatch '${this.groupPattern}' != '${operand.groupPattern}'`); } this.groupPattern = operand.groupPattern; } this.operands.push(operand); } } _assignEncoding(s) { this.encoding = s; } _assignOpcode(s, groupIndex) { this.opcodeString = s; let parts = s.split(" "); if (/^(VEX|EVEX|XOP)\./.test(s)) { // Parse VEX/XOP and EVEX encoded instruction, which looks like ".[APX-DATA]...." let prefix = parts[0].split("."); this.prefix = prefix[0]; for (let i = 1; i < prefix.length; i++) { let comp = prefix[i]; if (/^(Pv|Wv|Wy)$/.test(comp)) { comp = OpcodeGroupInfo[comp].subst[groupIndex]; } // Process APX EVEX.ND field - ND=0 or ND=1. if (/^ND=[01]$/.test(comp)) { this.opcode.nd = comp === "ND=1"; continue; } // Process APX EVEX.NF field - NF=0 or NF=1. if (/^NF=[01]$/.test(comp)) { this.opcode.nf = comp === "NF=1"; continue; } // Process APX EVEX.SCC field - SCC=0-F if (/^SCC=[0-9A-F]$/.test(comp)) { this.opcode.scc = comp.charAt(5); continue; } // Process `L/LL` field. if (Object.hasOwn(OpcodeLLMapping, comp)) { this.opcode.l = OpcodeLLMapping[comp]; continue; } // Process `PP` field - 66/F2/F3/NP (NP means no PP field used) if (comp === "P0") { /* ignored, `P` is zero... */ continue; } if (/^(?:66|F2|F3|NP)$/.test(comp)) { this.opcode.pp = comp; continue; } // Process `MM` field - 0F/0F3A/0F38/MAP4/MAP5/MAP6/M8/M9. if (/^(?:0F|0F3A|0F38|MAP[4-9A])$/.test(comp)) { this.opcode.mm = comp; continue; } // Process `W` field. if (/^(WIG|W0|W1|)$/.test(comp)) { this.opcode.w = comp; continue; } // TODO: Some new APX instructions don't have W specified (ENQCMD/ENQCMDS). if (comp === "W?") { this.opcode.w = "W0"; continue; } // ERROR. this.report(`'${this.opcodeString}' Unhandled component: ${comp}`); } for (let i = 1; i < parts.length; i++) { let comp = parts[i]; // Parse opcode. if (/^[0-9A-Fa-f]{2}$/.test(comp)) { this.opcode.byte = comp.toUpperCase(); continue; } // Parse ModR/M field using "/r" or "/0-7" notation. if (/^\/[r0-7]$/.test(comp)) { this.opcode.mod = "xx"; this.opcode.modr = comp.charAt(1); this.opcode.modm = "b"; continue; } // Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation. const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/); if (m) { this.opcode.mod = m[1]; this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2)); this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2)); continue; } // Parse immediate byte, word, dword, or qword. comp = this._substituteOpcodePart(comp, groupIndex); if (/^(?:ib|iw|id|iq|\/is4)$/.test(comp)) { this.imm += Utils.immSize(comp); continue; } this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`); } } else { // Parse X86/X64 instruction (including legacy MMX/SSE/3DNOW instructions). let rex_parsed = false; for (let i = 0; i < parts.length; i++) { let comp = parts[i]; if (comp === "NFx" || comp === "NOREP" || comp === "NO67") { // Ignored for now. continue; } // Parse REX or REX2 prefix. if (comp.startsWith("REX2.") || comp === "REX.W") { if (rex_parsed) { FAIL(`'${this.opcodeString}' Multiple REX prefixes are invalid`); } rex_parsed = true; // Instructions that force REX.W prefix or use REX2 prefix are always 64-bit instructions. this.arch = "X64"; if (comp === "REX.W") { this.opcode.w = "W1"; } else { this.prefix = "REX2"; // REX2 has always 3 components - "REX2..". const rex2 = comp.split("."); if (rex2.length !== 3) { FAIL(`'${this.opcodeString}' Invalid REX2 prefix - expected exactly 3 REX2 components`); } if (rex2[1] === "MAP0") { // nothing. } else if (rex2[1] === "MAP1") { this.opcode.mm = "0F"; } else { FAIL(`'${this.opcodeString}' Invalid REX2 prefix - REX2.MAP component could be either MAP0 or MAP1`); } this.opcode.w = rex2[2]; } continue; } // Parse `PP` prefixes. if (this.opcode.mm === "") { if (this.opcode.pp === "" && /^(?:66|F2|F3|NP)$/.test(comp) || this.opcode.pp === "66" && /^(?:F2|F3)$/.test(comp)) { this.opcode.pp += comp; continue; } } // Parse `MM` prefixes. if ((this.opcode.mm === "" && comp === "0F") || (this.opcode.mm === "0F" && /^(?:01|3A|38)$/.test(comp))) { this.opcode.mm += comp; continue; } // Recognize "0F 0F /r XX" encoding. if (this.opcode.mm === "0F" && comp === "0F") { this.prefix = "3DNOW"; continue; } // Parse opcode byte. if (/^[0-9A-F]{2}(?:\+[ri])?$/.test(comp)) { // Parse "+r" or "+i" suffix. if (comp.length > 2) { this.opcode.ri = true; comp = comp.substring(0, 2); } // FPU instructions are encoded as "PREFIX XX", where prefix is not the same // as MM prefixes used everywhere else. AsmJit internally extends MM field in // instruction tables to allow storing this prefix together with other "MM" // prefixes, currently the unused indexes are used, but if X86 moves forward // and starts using these we can simply use more bits in the opcode DWORD. if (!this.opcode.pp && this.opcode.byte === "9B") { this.opcode.pp = this.opcode.byte; this.opcode.byte = comp; continue; } if (!this.opcode.mm && (/^(?:D8|D9|DA|DB|DC|DD|DE|DF)$/.test(this.opcode.byte))) { this.opcode.mm = this.opcode.byte; this.opcode.byte = comp; continue; } if (this.opcode.byte) { if (this.opcode.byte === "67") { this.opcode._67h = true; } else { if (!this.opcode.modr && !this.opcode.modrm) { const value = parseInt(comp, 16); if ((value & 0xC0) == 0xC0) { this.opcode.mod = "11"; this.opcode.modr = String((value >> 3) & 0x7); this.opcode.modrm = String((value >> 0) & 0x7); } else { this.report(`'${this.opcodeString}' Unsupported secondary opcode (MOD/RM) '${comp}' value`); } } else { this.report(`'${this.opcodeString}' Multiple opcodes, have ${this.opcode.byte}, found ${comp}`); } } } this.opcode.byte = comp; continue; } // Parse ModR/M field using "/r" or "/0-7" notation. if (/^\/[r0-7]$/.test(comp) && !this.opcode.modr) { this.opcode.mod = "xx"; this.opcode.modr = comp.charAt(1); this.opcode.modm = "b"; continue; } // Parse ModR/M field using "11:xxx:xxx" and "!(11):xxx:xxx" notation. const m = comp.match(/^(11|!\(11\)):(rrr|[01]{3}):(bbb|[01]{3})$/); if (m) { this.opcode.mod = m[1]; this.opcode.modr = m[2] === "rrr" ? "r" : String(parseInt(m[2], 2)); this.opcode.modrm = m[3] === "bbb" ? "b" : String(parseInt(m[3], 2)); continue; } // Parse immediate byte, word, dword, fword, or qword. if (/^(?:ib|iw|id|iq|iv|if)$/.test(comp)) { if (comp === "iv") comp = OpcodeGroupInfo[comp].subst[groupIndex]; this.imm += Utils.immSize(comp); continue; } if (comp === "moff") { this.moff = true; continue; } // Parse displacement. if (/^(?:cb|cw|cd)$/.test(comp) && !this.rel) { this.rel = comp === "cb" ? 1 : comp === "cw" ? 2 : comp === "cd" ? 4 : -1; continue; } // ERROR. this.report(`'${this.opcodeString}' Unhandled opcode component: ${comp}`); } } // HACK: Fix instructions having opcode "01". if (this.opcode.byte === "" && this.opcode.mm.indexOf("0F01") === this.opcode.mm.length - 4) { this.opcode.byte = "01"; this.opcode.mm = this.opcode.mm.substring(0, this.opcode.mm.length - 2); } if (this.opcode.byte) this.opcodeValue = parseInt(this.opcode.byte, 16); if (!this.opcode.byte) this.report(`Couldn't parse instruction's opcode '${this.opcodeString}'`); } _updateOperandsInfo() { super._updateOperandsInfo(); let consecutiveLead = null; let consecutiveLastIndex = 0; for (let i = 0; i < this.operands.length; i++) { const op = this.operands[i]; // Instructions that use 64-bit GP registers are always 64-bit instructions. if (op.reg === "r64" || op.reg === "rax" || op.reg === "rbx" || op.reg === "rcx" || op.reg === "rdx" || op.reg === "rsi" || op.reg === "rdi") this.arch = "X64"; // Propagate broadcast. if (op.bcstSize > 0) this._assignAttribute("broadcast", op.bcstSize); // Propagate VSIB. if (op.vsibReg) { if (this.vsibReg) { this.report("Only one operand can be a vector memory address (vmNNx)"); } this.vsibReg = op.vsibReg; this.vsibSize = op.vsibSize; } if (op.regIndexRel) { if (i - op.regIndexRel < 0) { this.report(`The consecutive register information is invalid, index of the lead (${i - op.regIndexRel}) is out of range`); } else { const lead = this.operands[i - op.regIndexRel]; if (consecutiveLead && consecutiveLead != lead) { this.report(`The consecutive register chain is invalid`); } else { consecutiveLead = lead; consecutiveLastIndex = Math.max(consecutiveLastIndex, op.regIndexRel); } } } } if (consecutiveLead) { consecutiveLead.consecutive_lead_count = consecutiveLastIndex + 1; } } // Validate the instruction's definition. Common mistakes can be checked and // reported easily, however, if the mistake is just an invalid opcode or // something else it's impossible to detect. _postProcess() { if (this.groupPattern) { const archInfo = ArchGroupInfo[this.groupPattern]; if (this.arch === "ANY" && archInfo && this.arch !== archInfo[this.groupIndex]) { // TODO: Never triggered, which means it should be removed. this.arch = archInfo[this.groupIndex]; } } else { this.groupIndex = -1; } if (this.privilege === "L0") this.category.SYSTEM = true; let immCount = this.immCount; // Verify that the immediate operand/operands are specified in instruction // encoding and opcode field. Basically if there is an "ix" in operands, // the encoding should contain "I". if (immCount > 0) { if (immCount === 1 && this.operands[this.operands.length - 1].data === "1") { // This must be one of rcl|rcr|rol|ror|sar|sal|shr. We won't validate // these as these have "1" as implicit (encoded within opcode, not after). } else { // Every immediate should have its imm byte ("ib", "iw", "id", or "iq") in the opcode data. let m = this.opcodeString.match(/(?:^|\s+)(ib|iw|id|iq|iv|if|\/is4)/g); if (!m || m.length !== immCount) { this.report(`Immediate(s) [${immCount}] not found in opcode: ${this.opcodeString}`); } } } } isAVX() { return this.isVEX() || this.isEVEX(); } isVEX() { return this.prefix === "VEX" || this.prefix === "XOP"; } isEVEX() { return this.prefix === "EVEX" } getWValue() { switch (this.opcode.w) { case "W0": return 0; case "W1": return 1; } return -1; } // Get signature of the instruction as "ARCH PREFIX ENCODING[:operands]" form. get signature() { let operands = this.operands; let sign = this.arch; if (this.prefix) { sign += " " + this.prefix; if (this.prefix !== "3DNOW") { if (this.opcode.l === "L1") sign += ".256"; else if (this.opcode.l === "256" || this.opcode.l === "512") sign += `.${this.opcode.l}`; else sign += ".128"; if (this.opcode.w === "W1") sign += ".W"; } } else if (this.opcode.w === "W1") { sign += " REX.W"; } sign += " " + this.encoding; for (let i = 0; i < operands.length; i++) { sign += (i === 0) ? ":" : ","; let operand = operands[i]; if (operand.implicit) sign += `[${operand.reg}]`; else sign += operand.toRegMem(); } return sign; } get immCount() { let ops = this.operands; let n = 0; for (let i = 0; i < ops.length; i++) if (ops[i].isImm()) n++; return n; } get modRValue() { if (/^[0-7]$/.test(this.opcode.modr)) return parseInt(this.opcode.modr, 10); else return 0; } get modRMValue() { if (/^[0-7]$/.test(this.opcode.modrm)) return parseInt(this.opcode.modrm, 10); else return 0; } } x86.Instruction = Instruction; // asmdb.x86.ISA // ============= const ArchKeys = MapUtils.mapFromArray(["any", "x86", "x64", "apx", "___"]); function findArch(inst) { for (let a in ArchKeys) { if (typeof inst[a] === "string") { return a; } } FAIL(`Instruction signature not found in record: ${JSON.stringify(inst)}`); } function mergeGroupData(data, group) { for (let k in group) { switch (k) { case "group": case "instructions": break; case "ext": data[k] = (data[k] ? data[k] + " " : "") + group[k]; break; default: if (data[k] === undefined) data[k] = group[k] break; } } } // X86/X64 instruction database - stores Instruction instances in a map and // aggregates all instructions with the same name. class ISA extends base.ISA { constructor(data) { super(data); this.addData(data || NONE); } _addInstructions(groups) { for (let group of groups) { for (let record of group.instructions) { let arch = findArch(record); // TODO: Ignore records having this (only used for testing purposes). if (arch === "___") continue; const apx = arch === "apx"; const sgn = Utils.splitInstructionSignature(record[arch]); const data = MapUtils.cloneExcept(record, arch); mergeGroupData(data, group) for (let j = 0; j < sgn.names.length; j++) { data.name = sgn.names[j]; data.prefixes = sgn.prefixes; data.operands = sgn.operands; if (j > 0) { data.aliasOf = sgn.names[0]; } let groupIndex = 0; let instruction = null; do { instruction = new Instruction(this); instruction.arch = apx ? "X64" : arch.toUpperCase(); instruction.assignData(data, groupIndex); if (apx) { instruction.ext["APX_F"] = true; if (instruction.category.GP) { instruction.category.GP_EXT = true } } this._addInstruction(instruction); } while (instruction.groupPattern && ++groupIndex < OperandGroupInfo[instruction.groupPattern].subst.length); } } } return this; } } x86.ISA = ISA; // asmdb.x86.X86DataCheck // ====================== class X86DataCheck { static checkVexEvex(db) { const map = db.instructionMap; for (let name in map) { const instructions = map[name]; for (let i = 0; i < instructions.length; i++) { const instA = instructions[i]; for (let j = i + 1; j < instructions.length; j++) { const instB = instructions[j]; if (instA.operands.join("_") === instB.operands.join("_")) { const vex = instA.prefix === "VEX" ? instA : instB.prefix === "VEX" ? instB : null; const evex = instA.prefix === "EVEX" ? instA : instB.prefix === "EVEX" ? instB : null; if (vex && evex && vex.opcode.byte === evex.opcode.byte) { // NOTE: There are some false positives, they will be printed as well. let ok = vex.opcode.w === evex.opcode.w && vex.opcode.l === evex.opcode.l; if (!ok) { console.log(`Instruction ${name} differs:`); console.log(` ${vex.operands.join(" ")}: ${vex.opcodeString}`); console.log(` ${evex.operands.join(" ")}: ${evex.opcodeString}`); } } } } } } } } x86.X86DataCheck = X86DataCheck; }).apply(this, typeof module === "object" && module && module.exports ? [module, "exports"] : [this.asmdb || (this.asmdb = {}), "x86"]);