1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#if UNITY_SUPPORTS_SSE && !UNITY_64
#if UNITY_OSX || UNITY_LINUX
#define __cdecl
#endif
#define SKIN_SSE2_PARAMS \
const void* inVertices, \
void* outVertices, \
int numVertices, \
const void* boneMatrices, \
const void* weightsAndIndices, \
int inputStride, \
int outputStride
typedef void (__cdecl *SkinSSE2_Function)(SKIN_SSE2_PARAMS);
extern "C"
{
void __cdecl SkinSSE2_1Bone_Pos(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_2Bones_Pos(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_4Bones_Pos(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_1Bone_PosNormal(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_2Bones_PosNormal(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_4Bones_PosNormal(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_1Bone_PosNormalTan(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_2Bones_PosNormalTan(SKIN_SSE2_PARAMS);
void __cdecl SkinSSE2_4Bones_PosNormalTan(SKIN_SSE2_PARAMS);
}
bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
{
if (!CPUInfo::HasSSE2Support())
{
return false;
}
SkinSSE2_Function skinFunc = NULL;
if (!info.skinNormals && !info.skinTangents)
{
switch (info.bonesPerVertex)
{
DebugAssert(info.inStride == sizeof(Vector3f));
case 1:
skinFunc = &SkinSSE2_1Bone_Pos;
break;
case 2:
skinFunc = &SkinSSE2_2Bones_Pos;
break;
case 4:
skinFunc = &SkinSSE2_4Bones_Pos;
break;
}
}
else if (info.skinNormals && !info.skinTangents)
{
DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f));
switch (info.bonesPerVertex)
{
case 1:
skinFunc = &SkinSSE2_1Bone_PosNormal;
break;
case 2:
skinFunc = &SkinSSE2_2Bones_PosNormal;
break;
case 4:
skinFunc = &SkinSSE2_4Bones_PosNormal;
break;
}
}
else if (info.skinNormals && info.skinTangents)
{
DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f) + sizeof(Vector4f));
switch (info.bonesPerVertex)
{
case 1:
skinFunc = &SkinSSE2_1Bone_PosNormalTan;
break;
case 2:
skinFunc = &SkinSSE2_2Bones_PosNormalTan;
break;
case 4:
skinFunc = &SkinSSE2_4Bones_PosNormalTan;
break;
}
}
if (skinFunc == NULL)
return false;
// Skin all vertices apart from last one!
if (info.vertexCount > 1)
{
(*skinFunc)(info.inVertices, info.outVertices, info.vertexCount - 1,info.cachedPose, info.compactSkin, info.inStride, info.outStride);
}
// Copy last vertex to stack to avoid reading/writing past end of buffer
if (info.vertexCount > 0)
{
const int maxStride = 2 * sizeof(Vector3f) + sizeof(Vector4f) + 4;
Assert(info.inStride <= maxStride && info.outStride <= maxStride);
// Need 4 bytes padding to access Vec3 as Vec4
char vertexCopyIn[maxStride + 4];
char vertexCopyOut[maxStride + 4];
int skinStride = (info.bonesPerVertex == 4) ? sizeof(BoneInfluence) :
(info.bonesPerVertex == 2) ? sizeof(BoneInfluence2) :
(info.bonesPerVertex == 1) ? sizeof(int) : 0;
Assert(skinStride != 0);
int index = info.vertexCount - 1;
const char* compactSkin = static_cast<const char*>(info.compactSkin) + index * skinStride;
const char* inVertex = static_cast<const char*>(info.inVertices) + index * info.inStride;
char* outVertex = static_cast<char*>(info.outVertices) + index * info.outStride;
memcpy(vertexCopyIn, inVertex, info.inStride);
(*skinFunc)(vertexCopyIn, vertexCopyOut, 1, info.cachedPose, compactSkin, info.inStride, info.outStride);
memcpy(outVertex, vertexCopyOut, info.outStride);
}
return true;
}
#else
inline bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
{
return false;
}
#endif
|