summaryrefslogtreecommitdiff
path: root/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h')
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h487
1 files changed, 487 insertions, 0 deletions
diff --git a/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
new file mode 100644
index 0000000..8e584da
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
@@ -0,0 +1,487 @@
+
+// defines
+// SKIN_1BONE
+// SKIN_2BONES
+// SKIN_4BONES
+// LOOP_NAME
+// VERTEX_SZ
+
+// skin types
+// SKIN_POS
+// SKIN_POS_NRM
+// SKIN_POS_NRM_TAN
+
+
+
+//r0: const void* bones4x4
+//r1: const void* srcVertData
+//r2: const void* srcVertDataEnd
+//r3: const BoneInfluence4* srcBoneInfluence4
+//[sp+0] -> r4: const void* dstVertData
+
+// r5, r6: index
+// r7: matrix address
+// r8: 12 (offset for vector3)
+
+// q0 <- output: pos
+// q1 <- output: nrm
+// q2 <- output: tan
+// q3 <- input: pos
+// q4 <- input: nrm
+// q5 <- input: tan
+// d11,d12 <- weights
+// q12-q15 (blended matrix)
+// q8-q11 (cur matrix)
+
+
+// input:
+// d6[0], d6[1], d7[0] <- pos
+// d7[1], d8[0], d8[1] <- nrm
+// d9[0], d9[1], d10[0], d10[1] <- tan
+// q3 <- pos.x, pos.y, pos.z, nrm.x
+// q4 <- nrm.y, nrm.z, tan.x, tan.y
+// q5 <- tan.z, tan.w, w0, w1
+
+
+//===========================================================================================================================================
+//
+// Common
+
+#define CALC_POS_1 vmul.f32 q0, q12, d6[0]
+#define CALC_POS_2 vmla.f32 q0, q13, d6[1]
+#define CALC_POS_3 vmla.f32 q0, q14, d7[0]
+#define CALC_POS_4 vadd.f32 q0, q15
+
+#define STORE_POS vst1.32 {d0, d1}, [r4], r8
+
+#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) \
+ || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) \
+ || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+ #define LOAD_POS_NRM vld1.32 {d6, d7, d8}, [r1, :64]!
+ #define STORE_NRM vst1.32 {d2, d3}, [r4], r8
+ #define CALC_NRM_1 vmul.f32 q1, q12, d7[1]
+ #define CALC_NRM_2 vmla.f32 q1, q13, d8[0]
+ #define CALC_NRM_3 vmla.f32 q1, q14, d8[1]
+#else
+ #define LOAD_POS_NRM vld1.32 {d6, d7}, [r1], r8
+ #define STORE_NRM
+ #define CALC_NRM_1
+ #define CALC_NRM_2
+ #define CALC_NRM_3
+#endif
+
+#if (SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+ #define LOAD_TAN vld1.32 {d9, d10}, [r1, :64]!
+ #define STORE_TAN vst1.32 {d4, d5}, [r4]!
+ #define CALC_TAN_1 vmul.f32 q2, q12, d9[0]
+ #define CALC_TAN_2 vmla.f32 q2, q13, d9[1]
+ #define CALC_TAN_3 vmla.f32 q2, q14, d10[0]
+ #define CALC_TAN_4 vmov.f32 s11, s21
+#else
+ #define LOAD_TAN
+ #define STORE_TAN
+ #define CALC_TAN_1
+ #define CALC_TAN_2
+ #define CALC_TAN_3
+ #define CALC_TAN_4
+#endif
+
+// right after vertex-data will be copy-data stream, so be careful to not overwrite anything
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_2BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS)
+#define STORE_POS_LAST1 vst1.32 {d0}, [r4]!
+#define STORE_POS_LAST2 vst1.32 {d1[0]}, [r4]!
+#else
+#define STORE_POS_LAST1 STORE_POS
+#define STORE_POS_LAST2
+#endif
+
+#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM)
+#define STORE_NRM_LAST1 vst1.32 {d2}, [r4]!
+#define STORE_NRM_LAST2 vst1.32 {d3[0]}, [r4]!
+#else
+#define STORE_NRM_LAST1 STORE_NRM
+#define STORE_NRM_LAST2
+#endif
+
+#define __NAME_EPILOGUE(x) x ## EPILOGUE
+#define _NAME_EPILOGUE(x) __NAME_EPILOGUE(x)
+#define LOOP_EPILOGUE _NAME_EPILOGUE(LOOP_NAME)
+
+
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+ #define LOAD_M_12 vld1.32 {q12,q13}, [r7,:128]!
+ #define LOAD_M_34 vld1.32 {q14,q15}, [r7,:128]
+#else
+ #define LOAD_M_12 vld1.32 {q8,q9}, [r7,:128]!
+ #define LOAD_M_34 vld1.32 {q10,q11}, [r7,:128]
+#endif
+
+#define WEIGHT_MATRIX_1(op,r) op.f32 q12, q8, r
+#define WEIGHT_MATRIX_2(op,r) op.f32 q13, q9, r
+#define WEIGHT_MATRIX_3(op,r) op.f32 q14, q10, r
+#define WEIGHT_MATRIX_4(op,r) op.f32 q15, q11, r
+
+#define WEIGHT_M0_1 WEIGHT_MATRIX_1(vmul, d11[0])
+#define WEIGHT_M0_2 WEIGHT_MATRIX_2(vmul, d11[0])
+#define WEIGHT_M0_3 WEIGHT_MATRIX_3(vmul, d11[0])
+#define WEIGHT_M0_4 WEIGHT_MATRIX_4(vmul, d11[0])
+
+#define WEIGHT_M1_1 WEIGHT_MATRIX_1(vmla, d11[1])
+#define WEIGHT_M1_2 WEIGHT_MATRIX_2(vmla, d11[1])
+#define WEIGHT_M1_3 WEIGHT_MATRIX_3(vmla, d11[1])
+#define WEIGHT_M1_4 WEIGHT_MATRIX_4(vmla, d11[1])
+
+#define WEIGHT_M2_1 WEIGHT_MATRIX_1(vmla, d12[0])
+#define WEIGHT_M2_2 WEIGHT_MATRIX_2(vmla, d12[0])
+#define WEIGHT_M2_3 WEIGHT_MATRIX_3(vmla, d12[0])
+#define WEIGHT_M2_4 WEIGHT_MATRIX_4(vmla, d12[0])
+
+#define WEIGHT_M3_1 WEIGHT_MATRIX_1(vmla, d12[1])
+#define WEIGHT_M3_2 WEIGHT_MATRIX_2(vmla, d12[1])
+#define WEIGHT_M3_3 WEIGHT_MATRIX_3(vmla, d12[1])
+#define WEIGHT_M3_4 WEIGHT_MATRIX_4(vmla, d12[1])
+
+
+//===========================================================================================================================================
+//
+// 1 bone skinning
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+
+vpush {d8-d10}
+stmfd sp!, {r4-r8}
+
+ldr r4, [ip, #0]
+mov r8, #12
+
+ ldr r5, [r3], #4
+ add r7, r0, r5, lsl #6
+
+LOOP_NAME:
+
+
+
+LOAD_M_12
+LOAD_M_34
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_1
+CALC_NRM_1
+CALC_TAN_1
+
+ cmp r1, r2
+ pld [r1, #256]
+
+CALC_POS_2
+CALC_NRM_2
+CALC_TAN_2
+
+ ldrcc r5, [r3], #4
+ add r7, r0, r5, lsl #6
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+ pld [r7]
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4-r8}
+vpop {d8-d10}
+
+bx lr
+
+
+//===========================================================================================================================================
+//
+// 2 bones skinning
+
+#elif (SKIN_2BONES == SKIN_POS || SKIN_2BONES == SKIN_POS_NRM || SKIN_2BONES == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+
+vpush {d8-d11}
+stmfd sp!, {r4,r5,r6,r7,r8,r10}
+
+ldr r4, [ip, #0]
+
+vld1.32 {d11}, [r3,:64]! // wgt ->
+ldmia r3!, {r5-r6} // idx ->
+
+add r7, r0, r5, lsl #6 // M0 ..
+LOAD_M_12 // M0
+WEIGHT_M0_1
+WEIGHT_M0_2
+
+LOAD_M_34 // M0
+add r7, r0, r6, lsl #6 // M1 ..
+WEIGHT_M0_3
+WEIGHT_M0_4
+
+LOAD_M_12 // M1
+WEIGHT_M1_1
+WEIGHT_M1_2
+
+ldr r5, [r3, #8] // idx0
+
+mov r8, #12
+sub r10, r2, #VERTEX_SZ
+
+LOAD_M_34 // M1
+
+WEIGHT_M1_3
+
+.align 4
+LOOP_NAME:
+
+ cmp r1, r10
+
+ add r7, r0, r5, lsl #6 // M0 ..
+ ldrcc r6, [r3, #12] // idx1
+LOAD_POS_NRM
+
+WEIGHT_M1_4
+
+LOAD_TAN
+
+CALC_POS_1
+LOAD_M_12 // M0
+ cmp r1, r10
+CALC_NRM_1
+CALC_TAN_1
+vld1.32 {d11}, [r3,:64] // wgt ->
+
+WEIGHT_M0_1
+ pld [r1,#256]
+
+CALC_POS_2
+LOAD_M_34 // M0
+ add r7, r0, r6, lsl #6 // M1 ..
+CALC_NRM_2
+CALC_TAN_2
+ ldrcc r5, [r3, #24] // idx0
+WEIGHT_M0_2
+CALC_POS_3
+
+ cmp r1, r2
+CALC_NRM_3
+CALC_TAN_3
+LOAD_M_12 // M1
+
+
+WEIGHT_M0_3
+
+CALC_POS_4
+CALC_TAN_4
+
+WEIGHT_M0_4
+LOAD_M_34 // M1
+
+beq LOOP_EPILOGUE
+
+WEIGHT_M1_1
+STORE_POS
+
+WEIGHT_M1_2
+STORE_NRM
+ add r3, r3, #16
+WEIGHT_M1_3
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4,r5,r6,r7,r8,r10}
+vpop {d8-d11}
+bx lr
+
+
+//===========================================================================================================================================
+//
+// 4 bones skinning
+
+#elif (SKIN_4BONES == SKIN_POS || SKIN_4BONES == SKIN_POS_NRM || SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+
+mov ip, sp
+
+vpush {d8-d12}
+stmfd sp!, {r4-r8}
+
+ldr r4, [ip, #0]
+
+vld1.32 {d11,d12}, [r3,:128]! // wgt ->
+ldmia r3!, {r5-r6} // idx' ->
+
+add r7, r0, r5, lsl #6 // M0 ..
+LOAD_M_12 // M0
+LOAD_M_34 // M0
+
+mov r8, #12
+
+.align 4
+LOOP_NAME:
+
+WEIGHT_M0_1
+LOAD_POS_NRM
+
+WEIGHT_M0_2
+LOAD_TAN
+ add r7, r0, r6, lsl #6 // M1 ..
+
+
+WEIGHT_M0_3
+LOAD_M_12 // M1
+
+WEIGHT_M0_4
+LOAD_M_34 // M1
+
+WEIGHT_M1_1
+ ldmia r3!, {r5-r6} // idx'' ->
+
+WEIGHT_M1_2
+ add r7, r0, r5, lsl #6 // M2 ..
+ cmp r1, r2
+
+WEIGHT_M1_3
+LOAD_M_12 // M2
+
+WEIGHT_M1_4
+ pld [r3, #256]
+LOAD_M_34 // M2
+
+WEIGHT_M2_1
+ add r7, r0, r6, lsl #6 // M3 ..
+WEIGHT_M2_2
+WEIGHT_M2_3
+LOAD_M_12 // M3
+WEIGHT_M2_4
+
+LOAD_M_34 // M3
+WEIGHT_M3_1
+WEIGHT_M3_2
+WEIGHT_M3_3
+WEIGHT_M3_4
+ pld [r1, #256]
+
+CALC_POS_1
+vld1.32 {d11,d12}, [r3,:128]! // wgt ->
+
+CALC_NRM_1
+CALC_TAN_1
+ ldmcc r3!, {r5-r6} // idx ->
+
+CALC_POS_2
+ add r7, r0, r5, lsl #6 // M0 ..
+CALC_NRM_2
+CALC_TAN_2
+vldmia r7, {q8-q11} // M0 ->
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4-r8}
+vpop {d8-d12}
+bx lr
+
+
+//===========================================================================================================================================
+
+#endif
+
+#undef __NAME_EPILOGUE
+#undef _NAME_EPILOGUE
+#undef LOOP_EPILOGUE
+#undef CALC_POS_1
+#undef CALC_POS_2
+#undef CALC_POS_3
+#undef STORE_POS
+#undef STORE_POS_LAST1
+#undef STORE_POS_LAST2
+#undef LOAD_POS_NRM
+#undef STORE_NRM
+#undef STORE_NRM_LAST1
+#undef STORE_NRM_LAST2
+#undef CALC_NRM_1
+#undef CALC_NRM_2
+#undef CALC_NRM_3
+#undef LOAD_TAN
+#undef STORE_TAN
+#undef CALC_TAN_1
+#undef CALC_TAN_2
+#undef CALC_TAN_3
+#undef CALC_TAN_4
+#undef LOAD_M_12
+#undef LOAD_M_34
+#undef WEIGHT_MATRIX_1
+#undef WEIGHT_MATRIX_2
+#undef WEIGHT_MATRIX_3
+#undef WEIGHT_MATRIX_4
+#undef WEIGHT_M0_1
+#undef WEIGHT_M0_2
+#undef WEIGHT_M0_3
+#undef WEIGHT_M0_4
+#undef WEIGHT_M1_1
+#undef WEIGHT_M1_2
+#undef WEIGHT_M1_3
+#undef WEIGHT_M1_4
+#undef WEIGHT_M2_1
+#undef WEIGHT_M2_2
+#undef WEIGHT_M2_3
+#undef WEIGHT_M2_4
+#undef WEIGHT_M3_1
+#undef WEIGHT_M3_2
+#undef WEIGHT_M3_3
+#undef WEIGHT_M3_4