Fixed BMI and BMI2 (finally).

This commit is contained in:
kobalicek
2016-06-30 19:44:12 +02:00
parent 5db35108d0
commit 6f6ca70c4c
2 changed files with 43 additions and 12 deletions

View File

@@ -2842,7 +2842,7 @@ CaseAvxRmi:
break;
case kX86InstEncodingAvxRmi_OptW:
ADD_VEX_L(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
goto CaseAvxRmi;
case kX86InstEncodingAvxRmi_OptL:
@@ -2866,7 +2866,7 @@ _EmitAvxRvm_Reg:
break;
case kX86InstEncodingAvxRvm_OptW:
ADD_VEX_L(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o1)));
ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o1)));
goto _EmitAvxRvm;
case kX86InstEncodingAvxRvm_OptL:
@@ -2937,7 +2937,7 @@ CaseAvxRmv:
break;
case kX86InstEncodingAvxRmv_OptW:
ADD_VEX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o2)));
ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(static_cast<const X86Reg*>(o2)));
goto CaseAvxRmv;
case kX86InstEncodingAvxRmvi:
@@ -3155,7 +3155,7 @@ CaseAvxVm:
break;
case kX86InstEncodingAvxVm_OptW:
ADD_VEX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
ADD_REX_W(x86IsGpq(static_cast<const X86Reg*>(o0)) | x86IsGpq(o1));
goto CaseAvxVm;
case kX86InstEncodingAvxVmi_OptL:

View File

@@ -36,6 +36,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 =
X86GpReg gdA = useRex1 ? r8d : eax;
X86GpReg gdB = useRex2 ? r9d : ebx;
X86GpReg gdC = useRex2 ? r10d : ecx;
X86GpReg gzA = useRex1 ? r8 : a.zax;
X86GpReg gzB = useRex2 ? r9 : a.zbx;
@@ -2825,47 +2826,77 @@ static void opcode(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 =
// BMI.
a.nop();
a.andn(gdA, gdB, gdC);
a.andn(gzA, gzB, gzC);
a.andn(gdA, gdB, anyptr_gpC);
a.andn(gzA, gzB, anyptr_gpC);
a.bextr(gdA, gdB, gdC);
a.bextr(gzA, gzB, gzC);
a.bextr(gdA, anyptr_gpB, gdC);
a.bextr(gzA, anyptr_gpB, gzC);
a.blsi(gdA, gdB);
a.blsi(gzA, gzB);
a.blsi(gdA, anyptr_gpB);
a.blsi(gzA, anyptr_gpB);
a.blsmsk(gdA, gdB);
a.blsmsk(gzA, gzB);
a.blsmsk(gdA, anyptr_gpB);
a.blsmsk(gzA, anyptr_gpB);
a.blsr(gdA, gdB);
a.blsr(gzA, gzB);
a.blsr(gdA, anyptr_gpB);
a.blsr(gzA, anyptr_gpB);
// LZCNT.
a.nop();
a.lzcnt(gdA, gdB);
a.lzcnt(gzA, gzB);
a.lzcnt(gdA, anyptr_gpB);
a.lzcnt(gzA, anyptr_gpB);
// TZCNT.
a.nop();
a.tzcnt(gdA, gdB);
a.tzcnt(gzA, gzB);
a.tzcnt(gdA, anyptr_gpB);
a.tzcnt(gzA, anyptr_gpB);
// BMI2.
a.nop();
a.bzhi(gdA, gdB, gdC);
a.bzhi(gzA, gzB, gzC);
a.bzhi(gdA, anyptr_gpB, gdC);
a.bzhi(gzA, anyptr_gpB, gzC);
a.mulx(gdA, gdB, gdC);
a.mulx(gzA, gzB, gzC);
a.mulx(gdA, gdB, anyptr_gpC);
a.mulx(gzA, gzB, anyptr_gpC);
a.pdep(gdA, gdB, gdC);
a.pdep(gzA, gzB, gzC);
a.pdep(gdA, gdB, anyptr_gpC);
a.pdep(gzA, gzB, anyptr_gpC);
a.pext(gdA, gdB, gdC);
a.pext(gzA, gzB, gzC);
a.pext(gdA, gdB, anyptr_gpC);
a.pext(gzA, gzB, anyptr_gpC);
a.rorx(gdA, gdB, 0);
a.rorx(gzA, gzB, 0);
a.rorx(gdA, anyptr_gpB, 0);
a.rorx(gzA, anyptr_gpB, 0);
a.sarx(gdA, gdB, gdC);
a.sarx(gzA, gzB, gzC);
a.sarx(gdA, anyptr_gpB, gdC);
a.sarx(gzA, anyptr_gpB, gzC);
a.shlx(gdA, gdB, gdC);
a.shlx(gzA, gzB, gzC);
a.shlx(gdA, anyptr_gpB, gdC);
a.shlx(gzA, anyptr_gpB, gzC);
a.shrx(gdA, gdB, gdC);
a.shrx(gzA, gzB, gzC);
a.shrx(gdA, anyptr_gpB, gdC);
a.shrx(gzA, anyptr_gpB, gzC);
// RDRAND.