mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-18 04:54:36 +03:00
[abi] AsmJit v1.17 - cumulative & breaking changes
* Reworked register operands - all vector registers are now
platform::Vec deriving from UniVec (universal vector operand),
additionally, there is no platform::Reg, instead asmjit::Reg
provides all necessary features to make it a base register for
each target architecture
* Reworked casting between registers - now architecture agnostic
names are preferred - use Gp32 instead of Gpd or GpW, Gp64
instead of Gpq and GpX, etc...
* Reworked vector registers and their names - architecture
agnostic naming is now preferred Vec32, Vec64, Vec128, etc...
* Reworked naming conventions used across AsmJit - for clarity
Identifiers are now prefixed with the type, like sectionId(),
labelId(), etc...
* Reworked how Zone and ZoneAllocator are used across AsmJit,
prefering Zone in most cases and ZoneAllocator only for
containers - this change alone achieves around 5% better
performance of Builder and Compiler
* Reworked LabelEntry - decreased the size of the base entry
to 16 bytes for anonymous and unnamed labels. Avoided an
indirection when using labelEntries() - LabelEntry is now
a value and not a pointer
* Renamed LabelLink to Fixup
* Added a new header <asmjit/host.h> which would include
<asmjit/core.h> + target tools for the host architecture,
if enabled and supported
* Added new AArch64 instructions (BTI, CSSC, CHKFEAT)
* Added a mvn_ alternative of mvn instruction (fix for Windows
ARM64 SDK)
* Added more AArch64 CPU features to CpuInfo
* Added better support for Apple CPU detection (Apple M3, M4)
* Added a new benchmarking tool asmjit_bench_overhead, which
benchmarks the overhead of CodeHolder::init()/reset() and
creating/attaching emitters to it. Thanks to the benchmark the
most common code-paths were optimized
* Added a new benchmarking tool asmjit_bench_regalloc, which
aims to benchmark the cost and complexity of register allocation.
* Renamed asmjit_test_perf to asmjit_bench_codegen to make it
clear what is a test and what is a benchmark
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// See <asmjit/core.h> or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_TEST_MISC_H_INCLUDED
|
||||
@@ -20,15 +20,15 @@ static void generateSseAlphaBlendInternal(
|
||||
Emitter& cc,
|
||||
const x86::Gp& dst, const x86::Gp& src, const x86::Gp& n,
|
||||
const x86::Gp& gp0,
|
||||
const x86::Xmm& simd0, const x86::Xmm& simd1, const x86::Xmm& simd2, const x86::Xmm& simd3,
|
||||
const x86::Xmm& simd4, const x86::Xmm& simd5, const x86::Xmm& simd6, const x86::Xmm& simd7) {
|
||||
const x86::Vec& simd0, const x86::Vec& simd1, const x86::Vec& simd2, const x86::Vec& simd3,
|
||||
const x86::Vec& simd4, const x86::Vec& simd5, const x86::Vec& simd6, const x86::Vec& simd7) {
|
||||
|
||||
x86::Gp i = n;
|
||||
x86::Gp j = gp0;
|
||||
|
||||
x86::Xmm vzero = simd0;
|
||||
x86::Xmm v0080 = simd1;
|
||||
x86::Xmm v0101 = simd2;
|
||||
x86::Vec vzero = simd0;
|
||||
x86::Vec v0080 = simd1;
|
||||
x86::Vec v0101 = simd2;
|
||||
|
||||
Label L_SmallLoop = cc.newLabel();
|
||||
Label L_SmallEnd = cc.newLabel();
|
||||
@@ -59,9 +59,9 @@ static void generateSseAlphaBlendInternal(
|
||||
// Small loop.
|
||||
cc.bind(L_SmallLoop);
|
||||
{
|
||||
x86::Xmm x0 = simd3;
|
||||
x86::Xmm y0 = simd4;
|
||||
x86::Xmm a0 = simd5;
|
||||
x86::Vec x0 = simd3;
|
||||
x86::Vec y0 = simd4;
|
||||
x86::Vec a0 = simd5;
|
||||
|
||||
cc.movd(y0, x86::ptr(src));
|
||||
cc.movd(x0, x86::ptr(dst));
|
||||
@@ -104,11 +104,11 @@ static void generateSseAlphaBlendInternal(
|
||||
// Aligned loop.
|
||||
cc.bind(L_LargeLoop);
|
||||
{
|
||||
x86::Xmm x0 = simd3;
|
||||
x86::Xmm x1 = simd4;
|
||||
x86::Xmm y0 = simd5;
|
||||
x86::Xmm a0 = simd6;
|
||||
x86::Xmm a1 = simd7;
|
||||
x86::Vec x0 = simd3;
|
||||
x86::Vec x1 = simd4;
|
||||
x86::Vec y0 = simd5;
|
||||
x86::Vec a0 = simd6;
|
||||
x86::Vec a1 = simd7;
|
||||
|
||||
cc.movups(y0, x86::ptr(src));
|
||||
cc.movaps(x0, x86::ptr(dst));
|
||||
@@ -233,14 +233,14 @@ static void generateSseAlphaBlend(asmjit::BaseEmitter& emitter, bool emitPrologE
|
||||
Gp i = cc.newIntPtr("i");
|
||||
Gp j = cc.newIntPtr("j");
|
||||
|
||||
Xmm v0 = cc.newXmm("v0");
|
||||
Xmm v1 = cc.newXmm("v1");
|
||||
Xmm v2 = cc.newXmm("v2");
|
||||
Xmm v3 = cc.newXmm("v3");
|
||||
Xmm v4 = cc.newXmm("v4");
|
||||
Xmm v5 = cc.newXmm("v5");
|
||||
Xmm v6 = cc.newXmm("v6");
|
||||
Xmm v7 = cc.newXmm("v7");
|
||||
Vec v0 = cc.newXmm("v0");
|
||||
Vec v1 = cc.newXmm("v1");
|
||||
Vec v2 = cc.newXmm("v2");
|
||||
Vec v3 = cc.newXmm("v3");
|
||||
Vec v4 = cc.newXmm("v4");
|
||||
Vec v5 = cc.newXmm("v5");
|
||||
Vec v6 = cc.newXmm("v6");
|
||||
Vec v7 = cc.newXmm("v7");
|
||||
|
||||
FuncNode* funcNode = cc.addFunc(FuncSignature::build<void, void*, const void*, size_t>());
|
||||
funcNode->setArg(0, dst);
|
||||
|
||||
Reference in New Issue
Block a user