summaryrefslogtreecommitdiff
path: root/Runtime/Math/Simd/xenon.h
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/Math/Simd/xenon.h
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/Math/Simd/xenon.h')
-rw-r--r--Runtime/Math/Simd/xenon.h275
1 files changed, 275 insertions, 0 deletions
diff --git a/Runtime/Math/Simd/xenon.h b/Runtime/Math/Simd/xenon.h
new file mode 100644
index 0000000..5ddfc68
--- /dev/null
+++ b/Runtime/Math/Simd/xenon.h
@@ -0,0 +1,275 @@
+#ifndef SIMD_XENON
+#define SIMD_XENON
+
+#include <vectorintrinsics.h>
+#include <xnamath.h>
+
+typedef __vector4 vec4f; // vector 4 packed
+typedef __vector4 vec4fs; // vector 4 scalar
+typedef __vector4 vec4b; // vector 4 bool packed
+typedef __vector4 vec4bs; // vector 4 bool scalar
+
+#define SWZ_MASK(x, y, z, w) VPERMWI_CONST(x, y, z, w)
+
+#define cvec4f(name, x,y,z,w) static const vec4f name = {(x),(y),(z),(w)}
+#define cvec4b(name, x,y,z,w) static const vec4b name = {(x),(y),(z),(w)}
+#define cvec4fs(name, s) static const vec4fs name = {(s),(s),(s),(s)}
+
+enum simd_mask
+{
+ kXYZW = SWZ_MASK(0,1,2,3),
+ kXXXX = SWZ_MASK(0,0,0,0),
+ kYYYY = SWZ_MASK(1,1,1,1),
+ kZZZZ = SWZ_MASK(2,2,2,2),
+ kWWWW = SWZ_MASK(3,3,3,3),
+
+ kXWYZ = SWZ_MASK(0,3,1,2),
+ kXZWY = SWZ_MASK(0,2,3,1),
+
+ kYZWX = SWZ_MASK(1,2,3,0),
+ kYXZW = SWZ_MASK(1,0,2,3),
+ kYWZX = SWZ_MASK(1,3,2,0),
+ kYZXW = SWZ_MASK(1,2,0,3),
+ kYXWZ = SWZ_MASK(1,0,3,2),
+
+ kZWXY = SWZ_MASK(2,3,0,1),
+ kZYXW = SWZ_MASK(2,1,0,3),
+ kZYWX = SWZ_MASK(2,1,3,0),
+ kZXYW = SWZ_MASK(2,0,1,3),
+
+ kWYZX = SWZ_MASK(3,1,2,0),
+ kWXZY = SWZ_MASK(3,0,2,1),
+ kWYXZ = SWZ_MASK(3,1,0,2),
+ kWWWZ = SWZ_MASK(3,3,3,2),
+ kWWZZ = SWZ_MASK(3,3,2,2),
+ kWZYX = SWZ_MASK(3,2,1,0),
+};
+
+#define Vzero() __vzero()
+#define Vone() __vupkd3d(__vspltisw(0), VPACK_D3DCOLOR)
+
+#define Vpermute(v, mask) __vpermwi( (v), (mask) )
+
+ MECANIM_FORCE_INLINE vec4f Vmove(vec4f l, vec4f r)
+ {
+ static const XMVECTORU32 vu32 = {0xFFFFFFFF,0,0,0};
+ return __vsel(l, r, vu32.v);
+ }
+
+template<int SWZ> struct Vswizzle
+{
+ static inline vec4f rhs(vec4f r)
+ {
+ return Vpermute(r, SWZ);
+ }
+
+ static inline vec4f lhs(vec4f l, vec4f r)
+ {
+ vec4f m = Vmove(Vswizzle<SWZ>::rhs(l), r);
+ return Vswizzle<SWZ>::rhs(m);
+ }
+};
+template<> struct Vswizzle<kXYZW>
+{
+ static inline vec4f rhs(vec4f r)
+ {
+ return r;
+ }
+ static inline vec4f lhs(vec4f l, vec4f r)
+ {
+ return Vmove(l, r);
+ }
+};
+
+MECANIM_FORCE_INLINE float Vstoresf(vec4f r)
+{
+ float f; __stvewx(__vspltw(r, 0), &f, 0); return f;
+}
+
+MECANIM_FORCE_INLINE bool Vstoresb(vec4f r)
+{
+ union {
+ vec4f v;
+ int i[4];
+ } a; a.v = r;
+ return a.i[0] != 0;
+}
+
+// Aligned store
+#define Vstorepf(v, base, offset) __stvx((v), (base), (offset))
+
+MECANIM_FORCE_INLINE void Vstorepb(vec4f v, bool* r)
+{
+ union {
+ vec4f v;
+ int i[4];
+ } a; a.v = v;
+ r[0] = a.i[0] != 0;
+ r[1] = a.i[1] != 0;
+ r[2] = a.i[2] != 0;
+ r[3] = a.i[3] != 0;
+}
+
+MECANIM_FORCE_INLINE vec4f Vloadsf(float s)
+{
+ vec4f v = {s,s,s,s};
+ return v;
+}
+
+MECANIM_FORCE_INLINE vec4f Vloadsb(bool s)
+{
+ union {
+ int b[4];
+ vec4f v;
+ } static const false_true[2] = {
+ {0,0,0,0},
+ {~0,~0,~0,~0}
+ };
+
+ return false_true[s].v;
+}
+
+MECANIM_FORCE_INLINE vec4f Vload4sf(float x, float y, float z, float w)
+{
+ vec4f v = {x,y,z,w};
+ return v;
+}
+
+static MECANIM_FORCE_INLINE vec4f Vload4sb(bool x, bool y, bool z, bool w)
+{
+ union {
+ int b;
+ float f;
+ } static const false_true[2] = {
+ 0,~0
+ };
+
+ vec4f v = {false_true[x].f,false_true[y].f,false_true[z].f,false_true[w].f};
+ return v;
+}
+
+#define Vloadpf(v, offset) __lvx( (v), (offset))
+
+#define Vadd(l, r) __vaddfp((l), (r))
+
+#define Vsub( l, r) __vsubfp((l), (r))
+
+#define Vmul( l, r) __vmulfp((l), (r))
+
+MECANIM_FORCE_INLINE vec4f Vrcp(vec4f r)
+{
+ // This function does two iterations of Newton's method!
+ return XMVectorReciprocal(r);
+}
+
+MECANIM_FORCE_INLINE vec4f Vdiv(vec4f l, vec4f r)
+{
+ // This function does two iterations of Newton's method!
+ return XMVectorDivide(l, r);
+}
+
+#define Vmadd( a, b, c) __vmaddfp((a), (b), (c))
+
+#define Vmsub( a, b, c) Vneg(__vnmsubfp((a), (b), (c)))
+
+static const vec4f sign_constant = {-0.f,-0.f,-0.f,-0.f};
+
+#define Vneg(r) __vxor( (r), sign_constant)
+
+
+// vector sgn: return -1, 1
+#define Vsgn(r) __vor(Vone(), __vand(sign_constant, (r) ))
+
+// vector sgn: return -1, 0, 1
+static MECANIM_FORCE_INLINE vec4f Vsign(vec4f r)
+{
+ vec4f c = __vcmpeqfp(r, Vzero());
+ return __vor( __vand(__vnor(c,c), Vone()), __vand(sign_constant, r ));
+}
+
+#define Vinc(r) Vadd( (r), Vone())
+#define Vdec(r) Vsub( (r), Vone())
+#define Vabs(r) __vandc((r), sign_constant)
+#define Vmax( l, r) __vmaxfp((l), (r))
+#define Vmin( l, r) __vminfp((l), (r))
+
+MECANIM_FORCE_INLINE vec4fs Vlargest(vec4f r)
+{
+ r = Vmax(r, Vswizzle<kYZWX>::rhs(r));
+ r = Vmax(r, Vswizzle<kZWXY>::rhs(r));
+ return r;
+}
+
+MECANIM_FORCE_INLINE vec4fs Vsmallest(vec4f r)
+{
+ r = Vmin(r, Vswizzle<kYZWX>::rhs(r));
+ r = Vmin(r, Vswizzle<kZWXY>::rhs(r));
+ return r;
+}
+
+MECANIM_FORCE_INLINE vec4fs Vsum(vec4f r)
+{
+ r = Vadd(r, Vswizzle<kYZWX>::rhs(r) );
+ r = Vadd(r, Vswizzle<kZWXY>::rhs(r) );
+ return Vswizzle<kXXXX>::rhs(r);
+}
+
+#define Vdot( l, r) __vmsum4fp( (l), (r) )
+
+MECANIM_FORCE_INLINE vec4f Vrsqrt(vec4f r)
+{
+ static const vec4f three = {3.f,3.f,3.f,3.f};
+ static const vec4f a = {0.5f,0.5f,0.5f,0.5f};
+
+ vec4f const e = __vrsqrtefp(r);
+ return Vmul( Vmul(e, Vsub(three, Vmul( Vmul(e,e),r))), a);
+}
+
+
+#define Vsqrt(r) __vsel( Vmul(r, Vrsqrt(r)), Vzero(), __vcmpeqfp(r, Vzero()))
+
+#define Vcombine(x,y,z,w) __vmrghw(__vmrghw((x), (z)), __vmrghw((y), (w)))
+
+// Vector comparison
+#define Vcmpeq( a, b) __vcmpeqfp((a), (b))
+
+MECANIM_FORCE_INLINE vec4f Vcmpneq( vec4f a, vec4f b)
+{
+ vec4f c = __vcmpeqfp(a, b);
+ return __vnor(c, c);
+}
+
+#define Vcmpgt( a, b) __vcmpgtfp((a), (b))
+#define Vcmpge( a, b) __vcmpgefp((a), (b))
+#define Vcmplt( a, b) __vcmpgtfp((b), (a))
+#define Vcmple( a, b) __vcmpgefp((b), (a))
+
+#define Vsel( c, a, b) __vxor(b, __vand(__vxor(a, b), c))
+
+// vector logics
+#define Vnot(r) __vnor( (r), (r) )
+#define Vxnor( a, b) Vnot(__vxor((a), (b)))
+#define Vxor( a, b) __vxor((a), (b))
+#define Vand( a, b) __vand((a), (b))
+#define Vor( a, b) __vor((a), (b))
+
+MECANIM_FORCE_INLINE bool Vall(vec4b a)
+{
+ // All words equal
+ static const XMVECTORU32 vu32 = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF};
+ const int All = 0x00000080;
+ unsigned int compareResult;
+ __vcmpequwR(a, vu32.v, &compareResult);
+ return (compareResult & All) == All;
+}
+
+MECANIM_FORCE_INLINE bool Vany(vec4b a)
+{
+ // Not all words equal to 0
+ const int All = 0x00000080;
+ unsigned int compareResult;
+ __vcmpequwR(a, Vzero(), &compareResult);
+ return (compareResult & All) == 0;
+}
+
+#endif