Fixed BMI and BMI2 (finally).

This commit is contained in:
kobalicek
2016-06-30 19:44:12 +02:00
parent 5db35108d0
commit 6f6ca70c4c
2 changed files with 43 additions and 12 deletions

View File

@@ -2842,7 +2842,7 @@ CaseAvxRmi:
break; break;
case kX86InstEncodingAvxRmi_OptW: case kX86InstEncodingAvxRmi_OptW:
ADD_VEX_L(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1)); ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
goto CaseAvxRmi; goto CaseAvxRmi;
case kX86InstEncodingAvxRmi_OptL: case kX86InstEncodingAvxRmi_OptL:
@@ -2866,7 +2866,7 @@ _EmitAvxRvm_Reg:
break; break;
case kX86InstEncodingAvxRvm_OptW: case kX86InstEncodingAvxRvm_OptW:
ADD_VEX_L(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o1))); ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o1)));
goto _EmitAvxRvm; goto _EmitAvxRvm;
case kX86InstEncodingAvxRvm_OptL: case kX86InstEncodingAvxRvm_OptL:
@@ -2937,7 +2937,7 @@ CaseAvxRmv:
break; break;
case kX86InstEncodingAvxRmv_OptW: case kX86InstEncodingAvxRmv_OptW:
ADD_VEX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o2))); ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o2)));
goto CaseAvxRmv; goto CaseAvxRmv;
case kX86InstEncodingAvxRmvi: case kX86InstEncodingAvxRmvi:
@@ -3155,7 +3155,7 @@ CaseAvxVm:
break; break;
case kX86InstEncodingAvxVm_OptW: case kX86InstEncodingAvxVm_OptW:
ADD_VEX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1)); ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
goto CaseAvxVm; goto CaseAvxVm;
case kX86InstEncodingAvxVmi_OptL: case kX86InstEncodingAvxVmi_OptL:

View File

@@ -36,6 +36,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 =
X86GpReg gdA = useRex1 ? r8d : eax; X86GpReg gdA = useRex1 ? r8d : eax;
X86GpReg gdB = useRex2 ? r9d : ebx; X86GpReg gdB = useRex2 ? r9d : ebx;
X86GpReg gdC = useRex2 ? r10d : ecx;
X86GpReg gzA = useRex1 ? r8 : a.zax; X86GpReg gzA = useRex1 ? r8 : a.zax;
X86GpReg gzB = useRex2 ? r9 : a.zbx; X86GpReg gzB = useRex2 ? r9 : a.zbx;
@@ -2825,47 +2826,77 @@ static void opcode(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 =
// BMI. // BMI.
a.nop(); a.nop();
a.andn(gdA, gdB, gdC);
a.andn(gzA, gzB, gzC); a.andn(gzA, gzB, gzC);
a.andn(gdA, gdB, anyptr_gpC);
a.andn(gzA, gzB, anyptr_gpC); a.andn(gzA, gzB, anyptr_gpC);
a.bextr(gdA, gdB, gdC);
a.bextr(gzA, gzB, gzC); a.bextr(gzA, gzB, gzC);
a.bextr(gdA, anyptr_gpB, gdC);
a.bextr(gzA, anyptr_gpB, gzC); a.bextr(gzA, anyptr_gpB, gzC);
a.blsi(gdA, gdB);
a.blsi(gzA, gzB); a.blsi(gzA, gzB);
a.blsi(gdA, anyptr_gpB);
a.blsi(gzA, anyptr_gpB); a.blsi(gzA, anyptr_gpB);
a.blsmsk(gdA, gdB);
a.blsmsk(gzA, gzB); a.blsmsk(gzA, gzB);
a.blsmsk(gdA, anyptr_gpB);
a.blsmsk(gzA, anyptr_gpB); a.blsmsk(gzA, anyptr_gpB);
a.blsr(gdA, gdB);
a.blsr(gzA, gzB); a.blsr(gzA, gzB);
a.blsr(gdA, anyptr_gpB);
a.blsr(gzA, anyptr_gpB); a.blsr(gzA, anyptr_gpB);
// LZCNT. // LZCNT.
a.nop(); a.nop();
a.lzcnt(gdA, gdB);
a.lzcnt(gzA, gzB); a.lzcnt(gzA, gzB);
a.lzcnt(gdA, anyptr_gpB);
a.lzcnt(gzA, anyptr_gpB); a.lzcnt(gzA, anyptr_gpB);
// TZCNT. // TZCNT.
a.nop(); a.nop();
a.tzcnt(gdA, gdB);
a.tzcnt(gzA, gzB); a.tzcnt(gzA, gzB);
a.tzcnt(gdA, anyptr_gpB);
a.tzcnt(gzA, anyptr_gpB); a.tzcnt(gzA, anyptr_gpB);
// BMI2. // BMI2.
a.nop(); a.nop();
a.bzhi(gdA, gdB, gdC);
a.bzhi(gzA, gzB, gzC); a.bzhi(gzA, gzB, gzC);
a.bzhi(gdA, anyptr_gpB, gdC);
a.bzhi(gzA, anyptr_gpB, gzC); a.bzhi(gzA, anyptr_gpB, gzC);
a.mulx(gdA, gdB, gdC);
a.mulx(gzA, gzB, gzC); a.mulx(gzA, gzB, gzC);
a.mulx(gdA, gdB, anyptr_gpC);
a.mulx(gzA, gzB, anyptr_gpC); a.mulx(gzA, gzB, anyptr_gpC);
a.pdep(gdA, gdB, gdC);
a.pdep(gzA, gzB, gzC); a.pdep(gzA, gzB, gzC);
a.pdep(gdA, gdB, anyptr_gpC);
a.pdep(gzA, gzB, anyptr_gpC); a.pdep(gzA, gzB, anyptr_gpC);
a.pext(gdA, gdB, gdC);
a.pext(gzA, gzB, gzC); a.pext(gzA, gzB, gzC);
a.pext(gdA, gdB, anyptr_gpC);
a.pext(gzA, gzB, anyptr_gpC); a.pext(gzA, gzB, anyptr_gpC);
a.rorx(gdA, gdB, 0);
a.rorx(gzA, gzB, 0); a.rorx(gzA, gzB, 0);
a.rorx(gdA, anyptr_gpB, 0);
a.rorx(gzA, anyptr_gpB, 0); a.rorx(gzA, anyptr_gpB, 0);
a.sarx(gdA, gdB, gdC);
a.sarx(gzA, gzB, gzC); a.sarx(gzA, gzB, gzC);
a.sarx(gdA, anyptr_gpB, gdC);
a.sarx(gzA, anyptr_gpB, gzC); a.sarx(gzA, anyptr_gpB, gzC);
a.shlx(gdA, gdB, gdC);
a.shlx(gzA, gzB, gzC); a.shlx(gzA, gzB, gzC);
a.shlx(gdA, anyptr_gpB, gdC);
a.shlx(gzA, anyptr_gpB, gzC); a.shlx(gzA, anyptr_gpB, gzC);
a.shrx(gdA, gdB, gdC);
a.shrx(gzA, gzB, gzC); a.shrx(gzA, gzB, gzC);
a.shrx(gdA, anyptr_gpB, gdC);
a.shrx(gzA, anyptr_gpB, gzC); a.shrx(gzA, anyptr_gpB, gzC);
// RDRAND. // RDRAND.