summaryrefslogtreecommitdiff
path: root/Runtime/Filters/Mesh/MeshSkinningSSE2.h
blob: c085309f1dbe1ced20e2c1cc6ab17b9257ce11be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#if UNITY_SUPPORTS_SSE && !UNITY_64

#if UNITY_OSX || UNITY_LINUX
#define __cdecl
#endif

#define SKIN_SSE2_PARAMS \
    const void* inVertices, \
    void* outVertices, \
    int numVertices, \
    const void* boneMatrices, \
    const void* weightsAndIndices, \
    int inputStride, \
    int outputStride

typedef void (__cdecl *SkinSSE2_Function)(SKIN_SSE2_PARAMS);

extern "C"
{
    void __cdecl SkinSSE2_1Bone_Pos(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_2Bones_Pos(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_4Bones_Pos(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_1Bone_PosNormal(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_2Bones_PosNormal(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_4Bones_PosNormal(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_1Bone_PosNormalTan(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_2Bones_PosNormalTan(SKIN_SSE2_PARAMS);
    void __cdecl SkinSSE2_4Bones_PosNormalTan(SKIN_SSE2_PARAMS);
}


bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
{
	if (!CPUInfo::HasSSE2Support())
    {
        return false;
    }

	SkinSSE2_Function skinFunc = NULL;

	if (!info.skinNormals && !info.skinTangents)
	{
		switch (info.bonesPerVertex)
		{
			DebugAssert(info.inStride == sizeof(Vector3f));
			case 1:
				skinFunc = &SkinSSE2_1Bone_Pos;
				break;
			case 2:
				skinFunc = &SkinSSE2_2Bones_Pos;
				break;
			case 4:
				skinFunc = &SkinSSE2_4Bones_Pos;
				break;
				
		}
	}
	else if (info.skinNormals && !info.skinTangents)
	{
		DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f));
		switch (info.bonesPerVertex)
		{
			case 1:
				skinFunc = &SkinSSE2_1Bone_PosNormal;
				break;
			case 2:
				skinFunc = &SkinSSE2_2Bones_PosNormal;
				break;
			case 4:
				skinFunc = &SkinSSE2_4Bones_PosNormal;
				break;
				
		}
	}
	else if (info.skinNormals && info.skinTangents)
    {
		DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f) + sizeof(Vector4f));
		switch (info.bonesPerVertex)
        {
			case 1:
				skinFunc = &SkinSSE2_1Bone_PosNormalTan;
				break;
			case 2:
				skinFunc = &SkinSSE2_2Bones_PosNormalTan;
				break;
			case 4:
				skinFunc = &SkinSSE2_4Bones_PosNormalTan;
				break;
				
		}
	}
	
	if (skinFunc == NULL)
		return false;
	
	// Skin all vertices apart from last one!
	if (info.vertexCount > 1)
	{
		(*skinFunc)(info.inVertices, info.outVertices, info.vertexCount - 1,info.cachedPose, info.compactSkin, info.inStride, info.outStride);
	}
	// Copy last vertex to stack to avoid reading/writing past end of buffer
	if (info.vertexCount > 0)
	{
		const int maxStride = 2 * sizeof(Vector3f) + sizeof(Vector4f) + 4;
		Assert(info.inStride <= maxStride && info.outStride <= maxStride);
		// Need 4 bytes padding to access Vec3 as Vec4
		char vertexCopyIn[maxStride + 4];
		char vertexCopyOut[maxStride + 4];
		int skinStride = (info.bonesPerVertex == 4) ? sizeof(BoneInfluence) :
			(info.bonesPerVertex == 2) ? sizeof(BoneInfluence2) : 
			(info.bonesPerVertex == 1) ? sizeof(int) : 0;
		Assert(skinStride != 0);
		int index = info.vertexCount - 1;
		const char* compactSkin = static_cast<const char*>(info.compactSkin) + index * skinStride;
		const char* inVertex = static_cast<const char*>(info.inVertices) + index * info.inStride;
		char* outVertex = static_cast<char*>(info.outVertices) + index * info.outStride;
		memcpy(vertexCopyIn, inVertex, info.inStride);
		(*skinFunc)(vertexCopyIn, vertexCopyOut, 1, info.cachedPose, compactSkin, info.inStride, info.outStride);
		memcpy(outVertex, vertexCopyOut, info.outStride);
	}
	
    return true;
}
#else
inline bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
{
	return false;
}
#endif