Fixed pextrw SSE instruction, added a possibility to Compiler to handle non-initialized variables in alloc(), spill(), ...

This commit is contained in:
kobalicek
2015-05-18 11:33:14 +02:00
parent 9001d2f2b7
commit 3d62c94c8e
12 changed files with 675 additions and 676 deletions

View File

@@ -717,9 +717,9 @@ static void opcode(asmjit::X86Assembler& a) {
a.andnps(xmm0, ptr_gp0);
a.andps(xmm0, xmm7);
a.andps(xmm0, ptr_gp0);
a.cmpps(xmm0, xmm0, 0);
a.cmpps(xmm0, xmm7, 0);
a.cmpps(xmm0, ptr_gp0, 0);
a.cmpss(xmm0, xmm0, 0);
a.cmpss(xmm0, xmm7, 0);
a.cmpss(xmm0, ptr_gp0, 0);
a.comiss(xmm0, xmm7);
a.comiss(xmm0, ptr_gp0);
@@ -813,7 +813,7 @@ static void opcode(asmjit::X86Assembler& a) {
a.rsqrtss(xmm0, xmm7);
a.rsqrtss(xmm0, ptr_gp0);
a.sfence();
a.shufps(xmm0, xmm0, 0);
a.shufps(xmm0, xmm7, 0);
a.shufps(xmm0, ptr_gp0, 0);
a.sqrtps(xmm0, xmm7);
a.sqrtps(xmm0, ptr_gp0);
@@ -845,9 +845,9 @@ static void opcode(asmjit::X86Assembler& a) {
a.andpd(xmm0, xmm7);
a.andpd(xmm0, ptr_gp0);
a.clflush(ptr_gp0);
a.cmppd(xmm0, xmm0, 0);
a.cmppd(xmm0, xmm7, 0);
a.cmppd(xmm0, ptr_gp0, 0);
a.cmpsd(xmm0, xmm0, 0);
a.cmpsd(xmm0, xmm7, 0);
a.cmpsd(xmm0, ptr_gp0, 0);
a.comisd(xmm0, xmm7);
a.comisd(xmm0, ptr_gp0);
@@ -1024,11 +1024,11 @@ static void opcode(asmjit::X86Assembler& a) {
a.psubq(xmm0, ptr_gp0);
a.pmaddwd(xmm0, xmm7);
a.pmaddwd(xmm0, ptr_gp0);
a.pshufd(xmm0, xmm0, 0);
a.pshufd(xmm0, xmm7, 0);
a.pshufd(xmm0, ptr_gp0, 0);
a.pshufhw(xmm0, xmm0, 0);
a.pshufhw(xmm0, xmm7, 0);
a.pshufhw(xmm0, ptr_gp0, 0);
a.pshuflw(xmm0, xmm0, 0);
a.pshuflw(xmm0, xmm7, 0);
a.pshuflw(xmm0, ptr_gp0, 0);
a.psrld(xmm0, xmm7);
a.psrld(xmm0, ptr_gp0);
@@ -1174,47 +1174,47 @@ static void opcode(asmjit::X86Assembler& a) {
a.pshufb(xmm0, ptr_gp0);
a.palignr(mm0, mm7, 0);
a.palignr(mm0, ptr_gp0, 0);
a.palignr(xmm0, xmm0, 0);
a.palignr(xmm0, xmm7, 0);
a.palignr(xmm0, ptr_gp0, 0);
// SSE4.1.
a.nop();
a.blendpd(xmm0, xmm0, 0);
a.blendpd(xmm0, xmm7, 0);
a.blendpd(xmm0, ptr_gp0, 0);
a.blendps(xmm0, xmm0, 0);
a.blendps(xmm0, xmm7, 0);
a.blendps(xmm0, ptr_gp0, 0);
a.blendvpd(xmm0, xmm7);
a.blendvpd(xmm0, ptr_gp0);
a.blendvps(xmm0, xmm7);
a.blendvps(xmm0, ptr_gp0);
a.dppd(xmm0, xmm0, 0);
a.dppd(xmm0, xmm7, 0);
a.dppd(xmm0, ptr_gp0, 0);
a.dpps(xmm0, xmm0, 0);
a.dpps(xmm0, xmm7, 0);
a.dpps(xmm0, ptr_gp0, 0);
a.extractps(gp0, xmm0, 0);
a.extractps(ptr_gp0, xmm0, 0);
a.extractps(gp0, xmm7, 0);
a.extractps(ptr_gp0, xmm7, 0);
a.insertps(xmm0, xmm1, 0);
a.insertps(xmm0, ptr_gp0, 0);
a.movntdqa(xmm0, ptr_gp0);
a.mpsadbw(xmm0, xmm0, 0);
a.mpsadbw(xmm0, xmm7, 0);
a.mpsadbw(xmm0, ptr_gp0, 0);
a.packusdw(xmm0, xmm7);
a.packusdw(xmm0, ptr_gp0);
a.pblendvb(xmm0, xmm7);
a.pblendvb(xmm0, ptr_gp0);
a.pblendw(xmm0, xmm0, 0);
a.pblendw(xmm0, xmm7, 0);
a.pblendw(xmm0, ptr_gp0, 0);
a.pcmpeqq(xmm0, xmm7);
a.pcmpeqq(xmm0, ptr_gp0);
a.pextrb(gp0, xmm0, 0);
a.pextrb(ptr_gp0, xmm0, 0);
a.pextrb(ptr_gp0, xmm7, 0);
a.pextrd(gp0, xmm0, 0);
a.pextrd(ptr_gp0, xmm0, 0);
a.pextrd(ptr_gp0, xmm7, 0);
a.pextrq(gp0, xmm0, 0);
a.pextrq(ptr_gp0, xmm0, 0);
a.pextrq(ptr_gp0, xmm7, 0);
a.pextrw(gp0, xmm0, 0);
a.pextrw(ptr_gp0, xmm0, 0);
a.pextrw(ptr_gp0, xmm7, 0);
a.phminposuw(xmm0, xmm7);
a.phminposuw(xmm0, ptr_gp0);
a.pinsrb(xmm0, eax, 0);
@@ -1269,25 +1269,25 @@ static void opcode(asmjit::X86Assembler& a) {
a.pmulld(xmm0, ptr_gp0);
a.ptest(xmm0, xmm7);
a.ptest(xmm0, ptr_gp0);
a.roundps(xmm0, xmm0, 0);
a.roundps(xmm0, xmm7, 0);
a.roundps(xmm0, ptr_gp0, 0);
a.roundss(xmm0, xmm0, 0);
a.roundss(xmm0, xmm7, 0);
a.roundss(xmm0, ptr_gp0, 0);
a.roundpd(xmm0, xmm0, 0);
a.roundpd(xmm0, xmm7, 0);
a.roundpd(xmm0, ptr_gp0, 0);
a.roundsd(xmm0, xmm0, 0);
a.roundsd(xmm0, xmm7, 0);
a.roundsd(xmm0, ptr_gp0, 0);
// SSE4.2.
a.nop();
a.pcmpestri(xmm0, xmm0, 0);
a.pcmpestri(xmm0, xmm7, 0);
a.pcmpestri(xmm0, ptr_gp0, 0);
a.pcmpestrm(xmm0, xmm0, 0);
a.pcmpestrm(xmm0, xmm7, 0);
a.pcmpestrm(xmm0, ptr_gp0, 0);
a.pcmpistri(xmm0, xmm0, 0);
a.pcmpistri(xmm0, xmm7, 0);
a.pcmpistri(xmm0, ptr_gp0, 0);
a.pcmpistrm(xmm0, xmm0, 0);
a.pcmpistrm(xmm0, xmm7, 0);
a.pcmpistrm(xmm0, ptr_gp0, 0);
a.pcmpgtq(xmm0, xmm7);
a.pcmpgtq(xmm0, ptr_gp0);
@@ -1299,8 +1299,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.extrq(xmm0, 0x1, 0x2);
a.insertq(xmm0, xmm1);
a.insertq(xmm0, xmm1, 0x1, 0x2);
a.movntsd(ptr_gp0, xmm0);
a.movntss(ptr_gp0, xmm0);
a.movntsd(ptr_gp0, xmm7);
a.movntss(ptr_gp0, xmm7);
// POPCNT.
a.nop();
@@ -1473,7 +1473,7 @@ static void opcode(asmjit::X86Assembler& a) {
a.vdpps(xmm0, xmm1, ptr_gp0, 0);
a.vdpps(ymm0, ymm1, ymm2, 0);
a.vdpps(ymm0, ymm1, ptr_gp0, 0);
a.vextractf128(xmm0, ymm0, 0);
a.vextractf128(xmm0, ymm1, 0);
a.vextractf128(ptr_gp0, ymm1, 0);
a.vextractps(gp0, xmm1, 0);
a.vextractps(ptr_gp0, xmm1, 0);

View File

@@ -6,7 +6,7 @@
// [Dependencies - AsmJit]
#if !defined(_ASMJIT_BUILD_H)
#include "build.h"
#include "./build.h"
#endif // !_ASMJIT_BUILD_H
// [Guard]
@@ -39,7 +39,7 @@
// ============================================================================
#if defined(_MSC_VER)
// Disable some warnings we know about
# pragma warning(push)
# pragma warning(disable: 4127) // conditional expression is constant
# pragma warning(disable: 4201) // nameless struct/union
@@ -53,7 +53,8 @@
# pragma warning(disable: 4480) // specifying underlying type for enum
# pragma warning(disable: 4800) // forcing value to bool 'true' or 'false'
// Rename symbols.
// TODO: Check if these defins are needed and for which version of MSC. There are
// news about these as they are part of C99.
# if !defined(vsnprintf)
# define ASMJIT_UNDEF_VSNPRINTF
# define vsnprintf _vsnprintf
@@ -62,6 +63,7 @@
# define ASMJIT_UNDEF_SNPRINTF
# define snprintf _snprintf
# endif // !snprintf
#endif // _MSC_VER
// ============================================================================

View File

@@ -82,7 +82,7 @@ ASMJIT_ENUM(InstOptions) {
//! \internal
//!
//! Data structure used to link linked-labels.
//! Data structure used to link labels.
struct LabelLink {
//! Previous link.
LabelLink* prev;
@@ -129,11 +129,9 @@ struct RelocData {
//! Size of relocation (4 or 8 bytes).
uint32_t size;
//! Offset from code begin address.
//! Offset from the initial code address.
Ptr from;
//! Relative displacement from code begin address (not to `offset`) or
//! absolute address.
//! Relative displacement from the initial code address or from the absolute address.
Ptr data;
};
@@ -337,7 +335,6 @@ struct ASMJIT_VCLASS Assembler : public CodeGen {
ASMJIT_INLINE bool isLabelValid(const Label& label) const {
return isLabelValid(label.getId());
}
//! \overload
ASMJIT_INLINE bool isLabelValid(uint32_t id) const {
return static_cast<size_t>(id) < _labelList.getLength();
@@ -352,7 +349,6 @@ struct ASMJIT_VCLASS Assembler : public CodeGen {
ASMJIT_INLINE bool isLabelBound(const Label& label) const {
return isLabelBound(label.getId());
}
//! \overload
ASMJIT_INLINE bool isLabelBound(uint32_t id) const {
ASMJIT_ASSERT(isLabelValid(id));
@@ -364,7 +360,6 @@ struct ASMJIT_VCLASS Assembler : public CodeGen {
ASMJIT_INLINE intptr_t getLabelOffset(const Label& label) const {
return getLabelOffset(label.getId());
}
//! \overload
ASMJIT_INLINE intptr_t getLabelOffset(uint32_t id) const {
ASMJIT_ASSERT(isLabelValid(id));
@@ -375,7 +370,6 @@ struct ASMJIT_VCLASS Assembler : public CodeGen {
ASMJIT_INLINE LabelData* getLabelData(const Label& label) const {
return getLabelData(label.getId());
}
//! \overload
ASMJIT_INLINE LabelData* getLabelData(uint32_t id) const {
ASMJIT_ASSERT(isLabelValid(id));
@@ -535,9 +529,8 @@ struct ASMJIT_VCLASS Assembler : public CodeGen {
// [Defined-Later]
// ============================================================================
ASMJIT_INLINE Label::Label(Assembler& a) : Operand(NoInit) {
a._newLabel(this);
}
ASMJIT_INLINE Label::Label(Assembler& a)
: Operand(NoInit) { a._newLabel(this); }
} // asmjit namespace

View File

@@ -512,26 +512,38 @@ _NoMemory:
}
void Compiler::alloc(Var& var) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintAlloc, kInvalidValue);
}
void Compiler::alloc(Var& var, uint32_t regIndex) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintAlloc, regIndex);
}
void Compiler::alloc(Var& var, const Reg& reg) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintAlloc, reg.getRegIndex());
}
void Compiler::save(Var& var) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintSave, kInvalidValue);
}
void Compiler::spill(Var& var) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintSpill, kInvalidValue);
}
void Compiler::unuse(Var& var) {
if (var.getId() == kInvalidValue)
return;
addHint(var, kVarHintUnuse, kInvalidValue);
}

View File

@@ -235,9 +235,11 @@ ASMJIT_ENUM(kVarState) {
//! - `X86FuncConv` - X86/X64 calling conventions.
ASMJIT_ENUM(FuncConv) {
//! Calling convention is invalid (can't be used).
kFuncConvNone = 0,
kFuncConvNone = 0
#if defined(ASMJIT_DOCGEN)
,
//! Default calling convention for current platform / operating system.
kFuncConvHost = DependsOnHost,
@@ -276,7 +278,7 @@ ASMJIT_ENUM(FuncHint) {
//! X86/X64 Specific
//! ----------------
//!
//! Standard prolog sequence is:
//! Common prolog sequence is:
//!
//! ~~~
//! push zbp
@@ -290,7 +292,7 @@ ASMJIT_ENUM(FuncHint) {
//! enter StackAdjustment, 0
//! ~~~
//!
//! Standard epilog sequence is:
//! Common epilog sequence is:
//!
//! ~~~
//! mov zsp, zbp
@@ -2968,42 +2970,6 @@ struct ASMJIT_VCLASS Compiler : public CodeGen {
//! Unuse variable `var`.
ASMJIT_API void unuse(Var& var);
//! Alloc variable `var` (if initialized), but only if it's initialized.
ASMJIT_INLINE void allocUnsafe(Var& var) {
if (var.isInitialized())
alloc(var);
}
//! Alloc variable `var` (if initialized) using `regIndex` as a register index
ASMJIT_INLINE void allocUnsafe(Var& var, uint32_t regIndex) {
if (var.isInitialized())
alloc(var, regIndex);
}
//! Alloc variable `var` (if initialized) using `reg` as a register operand.
ASMJIT_INLINE void allocUnsafe(Var& var, const Reg& reg) {
if (var.isInitialized())
alloc(var, reg);
}
//! Spill variable `var` (if initialized).
ASMJIT_INLINE void spillUnsafe(Var& var) {
if (var.isInitialized())
spill(var);
}
//! Save variable `var` (if initialized) if the status is `modified` at this point.
ASMJIT_INLINE void saveUnsafe(Var& var) {
if (var.isInitialized())
save(var);
}
//! Unuse variable `var` (if initialized).
ASMJIT_INLINE void unuseUnsafe(Var& var) {
if (var.isInitialized())
unuse(var);
}
//! Get priority of variable `var`.
ASMJIT_API uint32_t getPriority(Var& var) const;
//! Set priority of variable `var` to `priority`.

View File

@@ -1078,8 +1078,9 @@ static ASMJIT_INLINE Imm imm_u(uint64_t val) {
return Imm(static_cast<int64_t>(val));
}
//! Create void* pointer immediate value operand.
static ASMJIT_INLINE Imm imm_ptr(void* p) {
//! Create a `void*` immediate value operand.
template<typename T>
static ASMJIT_INLINE Imm imm_ptr(T p) {
return Imm(static_cast<int64_t>((intptr_t)p));
}

View File

@@ -1176,12 +1176,12 @@ static void VMemTest_stats(VMemMgr& memmgr) {
INFO("Allocated: %u", static_cast<unsigned int>(memmgr.getAllocatedBytes()));
}
static void VMemTest_shuffle(void **a, void **b, size_t count) {
static void VMemTest_shuffle(void** a, void** b, size_t count) {
for (size_t i = 0; i < count; ++i) {
size_t si = (size_t)rand() % count;
void *ta = a[i];
void *tb = b[i];
void* ta = a[i];
void* tb = b[i];
a[i] = a[si];
b[i] = b[si];

View File

@@ -244,7 +244,7 @@
#endif
// ============================================================================
// [asmjit::Build - BLEND_OFFSET_OF]
// [asmjit::Build - ASMJIT_OFFSET_OF]
// ============================================================================
//! Cross-platform solution to get offset of `_Field_` in `_Struct_`.

View File

@@ -2289,6 +2289,32 @@ _EmitFpArith_Mem:
}
break;
case kX86InstEncodingIdExtExtrW:
if (encoded == ENC_OPS(Reg, Reg, Imm)) {
ADD_66H_P(static_cast<const X86Reg*>(o1)->isXmm());
imVal = static_cast<const Imm*>(o2)->getInt64();
imLen = 1;
opReg = x86OpReg(o0);
rmReg = x86OpReg(o1);
goto _EmitX86R;
}
if (encoded == ENC_OPS(Mem, Reg, Imm)) {
// Secondary opcode of 'pextrw' instruction (SSE4.1).
opCode = extendedInfo.getSecondaryOpCode();
ADD_66H_P(static_cast<const X86Reg*>(o1)->isXmm());
imVal = static_cast<const Imm*>(o2)->getInt64();
imLen = 1;
opReg = x86OpReg(o1);
rmMem = x86OpMem(o0);
goto _EmitX86M;
}
break;
case kX86InstEncodingIdExtExtract:
if (encoded == ENC_OPS(Reg, Reg, Imm)) {
ADD_66H_P(static_cast<const X86Reg*>(o1)->isXmm());
@@ -2302,8 +2328,6 @@ _EmitFpArith_Mem:
}
if (encoded == ENC_OPS(Mem, Reg, Imm)) {
// Secondary opcode for 'pextrw' instruction (SSE2).
opCode = extendedInfo.getSecondaryOpCode();
ADD_66H_P(static_cast<const X86Reg*>(o1)->isXmm());
imVal = static_cast<const Imm*>(o2)->getInt64();

View File

@@ -2369,12 +2369,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(comisd, kX86InstIdComisd, X86XmmReg, X86Mem)
//! Convert packed QWORDs to packed DP-FP (SSE2).
//! Convert packed DWORDs to packed DP-FP (SSE2).
INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmReg, X86Mem)
//! Convert packed QWORDs to packed SP-FP (SSE2).
//! Convert packed DWORDs to packed SP-FP (SSE2).
INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86Mem)

File diff suppressed because it is too large Load Diff

View File

@@ -1318,7 +1318,9 @@ ASMJIT_ENUM(X86InstEncodingId) {
kX86InstEncodingIdExtRmi_P,
//! Crc32.
kX86InstEncodingIdExtCrc,
//! Pextrb/Pextrw/Pextrd/Pextrq/Extractps.
//! Pextrw.
kX86InstEncodingIdExtExtrW,
//! Pextrb/Pextrd/Pextrq/Extractps.
kX86InstEncodingIdExtExtract,
//! Lfence/Mfence/Sfence.
kX86InstEncodingIdExtFence,