diff options
author | chai <chaifix@163.com> | 2019-08-14 22:50:43 +0800 |
---|---|---|
committer | chai <chaifix@163.com> | 2019-08-14 22:50:43 +0800 |
commit | 15740faf9fe9fe4be08965098bbf2947e096aeeb (patch) | |
tree | a730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/Math/Matrix4x4_VFP.s |
Diffstat (limited to 'Runtime/Math/Matrix4x4_VFP.s')
-rw-r--r-- | Runtime/Math/Matrix4x4_VFP.s | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/Runtime/Math/Matrix4x4_VFP.s b/Runtime/Math/Matrix4x4_VFP.s new file mode 100644 index 0000000..1745cc3 --- /dev/null +++ b/Runtime/Math/Matrix4x4_VFP.s @@ -0,0 +1,149 @@ +#define UNITY_ASSEMBLER +#include "Configuration/PrefixConfigure.h" +#include "Runtime/Utilities/VFPUtility.h" + +#if UNITY_SUPPORTS_VFP + +.syntax unified + +.set device,0 +.set device,__arm__ + +.if device + +//.code32 + +.globl _MultiplyMatrices4x4_VFP +.globl _MultiplyMatrixArray4x4_VFP + +#if UNITY_ANDROID + +.hidden _MultiplyMatrices4x4_VFP +.hidden _MultiplyMatrixArray4x4_VFP + +#endif + + +//=========================================================================================================================================== + + +// void MultiplyMatrices4x4_VFP(const Matrix4x4f* __restrict lhs, const Matrix4x4f* __restrict rhs, Matrix4x4f* __restrict res) +_MultiplyMatrices4x4_VFP: +// r0: A +// r1: B +// r2: dst + +vpush {d8-d15} + +mov ip, r0 + +// VFP_VECTOR_LENGTH(3) + +mov r0, ip + +vldmia.32 r0, {s8-s23} +vldmia.32 r1!, {s0-s7} + +FMULS4 (24,25,26,27, 8,9,10,11, 0,0,0,0) +FMULS4 (28,29,30,31, 8,9,10,11, 4,4,4,4) + +FMACS4 (24,25,26,27, 12,13,14,15, 1,1,1,1) +FMACS4 (28,29,30,31, 12,13,14,15, 5,5,5,5) + +FMACS4 (24,25,26,27, 16,17,18,19, 2,2,2,2) +FMACS4 (28,29,30,31, 16,17,18,19, 6,6,6,6) + +FMACS4 (24,25,26,27, 20,21,22,23, 3,3,3,3) +FMACS4 (28,29,30,31, 20,21,22,23, 7,7,7,7) + + +vstmia.32 r2!, {s24-s31} +vldmia.32 r1, {s0-s7} + +FMULS4 (24,25,26,27, 8,9,10,11, 0,0,0,0) +FMULS4 (28,29,30,31, 8,9,10,11, 4,4,4,4) + +FMACS4 (24,25,26,27, 12,13,14,15, 1,1,1,1) +FMACS4 (28,29,30,31, 12,13,14,15, 5,5,5,5) + +FMACS4 (24,25,26,27, 16,17,18,19, 2,2,2,2) +FMACS4 (28,29,30,31, 16,17,18,19, 6,6,6,6) + +FMACS4 (24,25,26,27, 20,21,22,23, 3,3,3,3) +FMACS4 (28,29,30,31, 20,21,22,23, 7,7,7,7) + +vstmia.32 r2, {s24-s31} + +// VFP_VECTOR_LENGTH_ZERO + +vpop {d8-d15} +bx lr + + +//=========================================================================================================================================== + +// void MultiplyMatrixArray4x4_VFP(const Matrix4x4f* arrayA, const Matrix4x4f* arrayB, Matrix4x4f* arrayRes, size_t count) +_MultiplyMatrixArray4x4_VFP: +// r0: A +// r1: B +// r2: dst +// r3: A end + +vpush {d8-d15} + +mov ip, r0 + +// VFP_VECTOR_LENGTH(3) + +mov r0, ip +add r3, r0, r3, lsl #6 + + +.align 4 +_MultiplyMatrixArray4x4_VFP_loop: + +vldmia.32 r0!, {s16-s31} +vldmia.32 r1!, {s0-s7} + +FMULS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) +FMULS4 (12,13,14,15, 16,17,18,19, 4,4,4,4) + +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) +FMACS4 (12,13,14,15, 20,21,22,23, 5,5,5,5) + +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) +FMACS4 (12,13,14,15, 24,25,26,27, 6,6,6,6) + +FMACS4 (8,9,10,11, 28,29,30,31, 3,3,3,3) +FMACS4 (12,13,14,15, 28,29,30,31, 7,7,7,7) + + +vldmia.32 r1!, {s0-s7} +vstmia.32 r2!, {s8-s15} + +FMULS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) +FMULS4 (12,13,14,15, 16,17,18,19, 4,4,4,4) + +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) +FMACS4 (12,13,14,15, 20,21,22,23, 5,5,5,5) + +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) +FMACS4 (12,13,14,15, 24,25,26,27, 6,6,6,6) + +FMACS4 (8,9,10,11, 28,29,30,31, 3,3,3,3) +FMACS4 (12,13,14,15, 28,29,30,31, 7,7,7,7) + +vstmia.32 r2!, {s8-s15} + +cmp r0, r3 +bcc _MultiplyMatrixArray4x4_VFP_loop + +// VFP_VECTOR_LENGTH_ZERO + +vpop {d8-d15} +bx lr + + +.endif + +#endif
\ No newline at end of file |