summaryrefslogtreecommitdiff
path: root/Runtime/Filters/Mesh
diff options
context:
space:
mode:
Diffstat (limited to 'Runtime/Filters/Mesh')
-rw-r--r--Runtime/Filters/Mesh/CompressedMesh.cpp755
-rw-r--r--Runtime/Filters/Mesh/CompressedMesh.h175
-rw-r--r--Runtime/Filters/Mesh/LodMesh.cpp2344
-rw-r--r--Runtime/Filters/Mesh/LodMesh.h509
-rw-r--r--Runtime/Filters/Mesh/LodMeshFilter.cpp96
-rw-r--r--Runtime/Filters/Mesh/LodMeshFilter.h38
-rw-r--r--Runtime/Filters/Mesh/Mesh.h76
-rw-r--r--Runtime/Filters/Mesh/MeshBlendShape.cpp234
-rw-r--r--Runtime/Filters/Mesh/MeshBlendShape.h115
-rw-r--r--Runtime/Filters/Mesh/MeshBlendShaping.cpp184
-rw-r--r--Runtime/Filters/Mesh/MeshBlendShaping.h12
-rw-r--r--Runtime/Filters/Mesh/MeshCombiner.cpp502
-rw-r--r--Runtime/Filters/Mesh/MeshCombiner.h33
-rw-r--r--Runtime/Filters/Mesh/MeshOptimizer.cpp359
-rw-r--r--Runtime/Filters/Mesh/MeshOptimizer.h13
-rw-r--r--Runtime/Filters/Mesh/MeshPartitioner.cpp346
-rw-r--r--Runtime/Filters/Mesh/MeshPartitioner.h5
-rw-r--r--Runtime/Filters/Mesh/MeshRenderer.cpp664
-rw-r--r--Runtime/Filters/Mesh/MeshRenderer.h87
-rw-r--r--Runtime/Filters/Mesh/MeshSkinning.cpp165
-rw-r--r--Runtime/Filters/Mesh/MeshSkinning.h64
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h212
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningMobile.h160
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningNEON.asm527
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningNEON.s183
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h487
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningSSE2.asm323
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningSSE2.h129
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningTests.cpp228
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningVFP.s187
-rw-r--r--Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h335
-rw-r--r--Runtime/Filters/Mesh/MeshUtility.cpp58
-rw-r--r--Runtime/Filters/Mesh/MeshUtility.h42
-rw-r--r--Runtime/Filters/Mesh/SkinGeneric.h338
-rw-r--r--Runtime/Filters/Mesh/SpriteRenderer.cpp338
-rw-r--r--Runtime/Filters/Mesh/SpriteRenderer.h60
-rw-r--r--Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp68
-rw-r--r--Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h2
-rw-r--r--Runtime/Filters/Mesh/TransformVertex.cpp205
-rw-r--r--Runtime/Filters/Mesh/TransformVertex.h175
-rw-r--r--Runtime/Filters/Mesh/TransformVertexNEON.asm694
-rw-r--r--Runtime/Filters/Mesh/TransformVertexNEON.s224
-rw-r--r--Runtime/Filters/Mesh/TransformVertexNEON_Loop.h254
-rw-r--r--Runtime/Filters/Mesh/TransformVertexVFP.s250
-rw-r--r--Runtime/Filters/Mesh/TransformVertexVFP_Loop.h252
-rw-r--r--Runtime/Filters/Mesh/VertexData.cpp559
-rw-r--r--Runtime/Filters/Mesh/VertexData.h253
47 files changed, 13319 insertions, 0 deletions
diff --git a/Runtime/Filters/Mesh/CompressedMesh.cpp b/Runtime/Filters/Mesh/CompressedMesh.cpp
new file mode 100644
index 0000000..02cc74c
--- /dev/null
+++ b/Runtime/Filters/Mesh/CompressedMesh.cpp
@@ -0,0 +1,755 @@
+#include "UnityPrefix.h"
+#include "CompressedMesh.h"
+#include "LodMesh.h"
+#include "Runtime/Animation/AnimationCurveUtility.h"
+
+
+#define sqr(x) ((x)*(x))
+
+void PackedFloatVector::PackFloats(float *data, int itemCountInChunk, int chunkStride, int numChunks, int bitSize, bool adjustBitSize)
+{
+ float maxf = -std::numeric_limits<float>::infinity();
+ float minf = std::numeric_limits<float>::infinity();
+ float* end = Stride (data, numChunks * chunkStride);
+ for(float* it = data; it != end; it = Stride (it, chunkStride))
+ {
+ for (int i=0; i<itemCountInChunk; ++i)
+ {
+ if(maxf < it[i])
+ maxf = it[i];
+ if(minf > it[i])
+ minf = it[i];
+ }
+ }
+
+ m_Range = maxf-minf;
+
+ if(adjustBitSize)
+ bitSize += int(ceilf(Log2(m_Range)));
+ if(bitSize > 32)
+ bitSize = 32;
+
+ m_Start = minf;
+ m_NumItems = numChunks * itemCountInChunk;
+ m_BitSize = bitSize;
+ m_Data.resize((m_NumItems * bitSize + 7)/8, 0);
+
+
+ float scale = 1.0/m_Range;
+
+ int indexPos = 0;
+ int bitPos = 0;
+
+ for(float* it = data; it != end; it = Stride (it, chunkStride))
+ {
+ for(int i=0; i<itemCountInChunk; ++i)
+ {
+ float scaled = (it[i] - m_Start) * scale;
+ if(scaled < 0) scaled = 0;
+ if(scaled > 1) scaled = 1;
+
+ UInt32 x = UInt32(scaled * ((1 << (m_BitSize)) - 1));
+
+ int bits = 0;
+ while(bits < m_BitSize)
+ {
+ m_Data[indexPos] |= (x >> bits) << bitPos;
+ int num = std::min( m_BitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ }
+ }
+}
+
+void PackedFloatVector::UnpackFloats(float *data, int itemCountInChunk, int chunkStride, int start, int numChunks)
+{
+ int bitPos = m_BitSize*start;
+ int indexPos = bitPos/8;
+ bitPos %= 8;
+
+ float scale = 1.0/m_Range;
+ if (numChunks == -1)
+ numChunks = m_NumItems / itemCountInChunk;
+
+ for(float* end = Stride (data, chunkStride * numChunks); data != end; data = Stride (data, chunkStride))
+ {
+ for (int i=0; i<itemCountInChunk; ++i)
+ {
+ UInt32 x = 0;
+
+ int bits = 0;
+ while(bits < m_BitSize)
+ {
+ x |= (m_Data[indexPos] >> bitPos) << bits;
+ int num = std::min( m_BitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ x &= (1 << m_BitSize) - 1;
+ data[i] = (x / (scale * ((1 << (m_BitSize)) - 1))) + m_Start;
+ }
+ }
+}
+
+template <class IntSize> void PackedIntVector::PackInts(IntSize *data, int numItems)
+{
+ // make sure that the intsize is an unsigned type
+ Assert( (IntSize)0 < (IntSize)-1 );
+
+ UInt32 maxi = 0;
+ for(int i=0; i<numItems; i++)
+ if(maxi < data[i])
+ maxi = data[i];
+
+ m_NumItems = numItems;
+ //Prevent overflow
+ m_BitSize = UInt8(maxi == 0xFFFFFFFF ? 32 : ceilf(Log2(maxi+1)));
+ m_Data.resize((numItems * m_BitSize + 7)/8, 0);
+
+
+ int indexPos = 0;
+ int bitPos = 0;
+ for(int i=0; i<numItems; i++)
+ {
+ int bits = 0;
+ while(bits < m_BitSize)
+ {
+ m_Data[indexPos] |= (data[i] >> bits) << bitPos;
+ int num = std::min( m_BitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ }
+}
+
+template <class IntSize> void PackedIntVector::UnpackInts(IntSize *data)
+{
+ int indexPos = 0;
+ int bitPos = 0;
+ for(int i=0; i<m_NumItems; i++)
+ {
+ int bits = 0;
+ data[i] = 0;
+ while(bits < m_BitSize)
+ {
+ data[i] |= (m_Data[indexPos] >> bitPos) << bits;
+ int num = std::min( m_BitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ data[i] &= (1ULL << m_BitSize) - 1;
+ }
+}
+
+
+void PackedQuatVector::PackQuats(Quaternionf *data, int numItems)
+{
+ m_NumItems = numItems;
+ m_Data.resize(numItems * (32/8), 0);
+
+ int indexPos = 0;
+ int bitPos = 0;
+
+ for(int i=0; i<numItems; i++)
+ {
+ Quaternionf &q = data[i];
+ UInt8 flags = q.x<0? 4:0;
+
+ float max=fabs(q.x);
+ if(fabs(q.y) > max)
+ {
+ max = fabs(q.y);
+ flags = 1;
+ if(q.y<0)
+ flags |= 4;
+ }
+ if(fabs(q.z) > max)
+ {
+ max = fabs(q.z);
+ flags = 2;
+ if(q.z<0)
+ flags |= 4;
+ }
+ if(fabs(q.w) > max)
+ {
+ max = fabs(q.w);
+ flags = 3;
+ if(q.w<0)
+ flags |= 4;
+ }
+ int bits = 0;
+ while(bits < 3)
+ {
+ m_Data[indexPos] |= (flags >> bits) << bitPos;
+ int num = std::min( 3-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ for(int j=0;j<4;j++)
+ {
+ if((flags&3) != j)
+ {
+ int bitSize = (((flags&3)+1)%4 == j)?9:10;
+ float scaled = (q[j] + 1) * 0.5;
+ if(scaled < 0) scaled = 0;
+ if(scaled > 1) scaled = 1;
+
+ UInt32 x = UInt32(scaled * ((1 << bitSize) - 1));
+
+ bits = 0;
+ while(bits < bitSize)
+ {
+ m_Data[indexPos] |= (x >> bits) << bitPos;
+ int num = std::min( bitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ }
+ }
+ }
+}
+
+void PackedQuatVector::UnpackQuats(Quaternionf *data)
+{
+ int indexPos = 0;
+ int bitPos = 0;
+
+ for(int i=0; i<m_NumItems; i++)
+ {
+ UInt32 flags = 0;
+
+ int bits = 0;
+ while(bits < 3)
+ {
+ flags |= (m_Data[indexPos] >> bitPos) << bits;
+ int num = std::min( 3-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ flags &= 7;
+
+
+ Quaternionf &q = data[i];
+ float sum = 0;
+ for(int j=0;j<4;j++)
+ {
+ if((flags&3) != j)
+ {
+ int bitSize = (((flags&3)+1)%4 == j)?9:10;
+ UInt32 x = 0;
+
+ bits = 0;
+ while(bits < bitSize)
+ {
+ x |= (m_Data[indexPos] >> bitPos) << bits;
+ int num = std::min( bitSize-bits, 8-bitPos);
+ bitPos += num;
+ bits += num;
+ if(bitPos == 8)
+ {
+ indexPos++;
+ bitPos = 0;
+ }
+ }
+ x &= (1 << bitSize) - 1;
+ q[j] = (x / (0.5 * ((1 << (bitSize)) - 1))) - 1;
+ sum += sqr(q[j]);
+ }
+ }
+
+ int lastComponent = flags&3;
+ q[lastComponent] = FastSqrt(1 - sum);
+ if(flags & 4)
+ q[lastComponent] = -q[lastComponent];
+ }
+}
+
+void CompressedMesh::Compress(Mesh &src, int compression)
+{
+ int numVertices = src.GetVertexCount();
+
+ int vertexBits = 0;
+ switch(compression)
+ {
+ case kMeshCompressionHigh: vertexBits = 10; break;
+ case kMeshCompressionMed: vertexBits = 16; break;
+ case kMeshCompressionLow: vertexBits = 20; break;
+ }
+ m_Vertices.PackFloats((float*)src.GetChannelPointer(kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex), numVertices, vertexBits, false);
+
+ //Possible optimization: use Edgebreaker algorithm
+ //for 1.8 bits per triangle connectivity information
+ //http://www.gvu.gatech.edu/~jarek/edgebreaker/eb/
+
+ int numIndices = src.m_IndexBuffer.size();
+ numIndices/=2;
+
+ m_Triangles.PackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0],numIndices);
+
+ if(src.IsAvailable(kShaderChannelTexCoord0))
+ {
+ int uvBits = 0;
+ switch(compression)
+ {
+ case kMeshCompressionHigh: uvBits = 8; break;
+ case kMeshCompressionMed: uvBits = 10; break;
+ case kMeshCompressionLow: uvBits = 16; break;
+ }
+ if(src.IsAvailable(kShaderChannelTexCoord1))
+ {
+ Vector2f *uv12 = new Vector2f[numVertices*2];
+ src.ExtractUvArray(0, uv12);
+ src.ExtractUvArray(1, uv12 + numVertices);
+ m_UV.PackFloats(&uv12->x, 2, sizeof(Vector2f), numVertices*2, uvBits, true);
+ delete[] uv12;
+ }
+ else
+ m_UV.PackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), numVertices, uvBits, true);
+ }
+ else if(src.IsAvailable(kShaderChannelTexCoord1))
+ ErrorString( "Mesh compression doesn't work on Meshes wich only have a UV1 channel but no UV0 channel. UVs will be dropped." );
+
+ if(src.IsAvailable (kShaderChannelNormal))
+ {
+ int normalBits = 0;
+ switch(compression)
+ {
+ case kMeshCompressionHigh: normalBits = 6; break;
+ case kMeshCompressionMed: normalBits = 8; break;
+ case kMeshCompressionLow: normalBits = 8; break;
+ }
+
+ float *normals = new float[numVertices*2];
+ UInt32 *signs = new UInt32[numVertices];
+ StrideIterator<Vector3f> n = src.GetNormalBegin ();
+ for(int i=0;i<numVertices; ++i, ++n)
+ {
+ normals[i*2+0] = n->x;
+ normals[i*2+1] = n->y;
+ signs[i] = n->z>0?1:0;
+ }
+ m_Normals.PackFloats(normals, 2, sizeof (float) * 2, numVertices, normalBits, false);
+ m_NormalSigns.PackInts(signs, numVertices);
+ delete[] normals;
+ delete[] signs;
+ }
+
+ if(src.IsAvailable (kShaderChannelTangent))
+ {
+ int normalBits = 0;
+ switch(compression)
+ {
+ case kMeshCompressionHigh: normalBits = 6; break;
+ case kMeshCompressionMed: normalBits = 8; break;
+ case kMeshCompressionLow: normalBits = 8; break;
+ }
+
+ float *tangents = new float[numVertices*2];
+ UInt32 *signs = new UInt32[numVertices*2];
+ StrideIterator<Vector4f> t = src.GetTangentBegin ();
+ for(int i=0;i<numVertices; ++i, ++t)
+ {
+ tangents[i*2+0] = t->x;
+ tangents[i*2+1] = t->y;
+ signs[i*2+0] = t->z>0?1:0;
+ signs[i*2+1] = t->w>0?1:0;
+ }
+ m_Tangents.PackFloats(tangents, 2, sizeof (float) * 2, numVertices, normalBits, false);
+ m_TangentSigns.PackInts(signs, numVertices*2);
+ delete[] tangents;
+ delete[] signs;
+ }
+
+ // TODO: do an actual compression
+ if(src.IsAvailable (kShaderChannelColor))
+ {
+ dynamic_array<UInt32> tempColors (numVertices, kMemTempAlloc);
+ std::transform (src.GetColorBegin (), src.GetColorEnd (), tempColors.begin (), OpColorRGBA32ToUInt32());
+ m_Colors.PackInts<UInt32> (tempColors.data (), tempColors.size ());
+ }
+
+ BoneInfluence* influence = src.GetBoneWeights();
+ if(influence)
+ {
+ UInt32 *weights = new UInt32[numVertices*3];
+ UInt32 *indices = new UInt32[numVertices*4];
+ int weightPos = 0;
+ int boneIndexPos = 0;
+ for(int i=0;i<numVertices;i++)
+ {
+ int j;
+ int sum = 0;
+
+ //As all four bone weights always add up to 1, we can always calculate the fourth one
+ // by subtracting the other three from 1. So we don't need to store it.
+
+ //Furthermore, once the weights we stored add up to 1, we don't need to store further
+ //weights or indices, as these will necessarily be zero. This is often the case, as many
+ //vertices have only the first weight set to one, and all others to zero.
+
+ //find last non-zero entry -- we don't need to store those after this.
+ int lastNonZero;
+ for(lastNonZero=3;lastNonZero>0&&influence[i].weight[lastNonZero]==0;lastNonZero--)
+ {}
+
+
+ for(j=0;j<3 && j<=lastNonZero && sum<31;j++)
+ {
+ weights[weightPos] = UInt32(influence[i].weight[j] * 31);
+ indices[boneIndexPos++] = influence[i].boneIndex[j];
+ sum += weights[weightPos++];
+ }
+ if(lastNonZero<3)
+ {
+ //we stored less then 3 weights, but they don't add up to one, due to quantization
+ //inprecision.
+ //Add the difference, so the math works out on decompression.
+ if(sum<31)
+ weights[weightPos-1] += 31-sum;
+ }
+
+ //we stored three weights, but they don't add up to one. we don't need to store the fourth weight
+ //(as it can be calculated from the other three), but we need the bone index.
+ else if(sum<31)
+ indices[boneIndexPos++] = influence[i].boneIndex[j];
+ }
+
+ m_Weights.PackInts(weights, weightPos);
+ m_BoneIndices.PackInts(indices, boneIndexPos);
+
+ delete[] weights;
+ delete[] indices;
+ }
+}
+
+void CompressedMesh::Decompress(Mesh &src)
+{
+ int numIndices = m_Triangles.Count();
+ src.m_IndexBuffer.resize(numIndices * 2);
+ m_Triangles.UnpackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0]);
+
+ int numVertices = m_Vertices.Count()/3;
+ unsigned decompressedFormat = 0;
+ if (m_Vertices.Count ()) decompressedFormat |= VERTEX_FORMAT1(Vertex);
+ if (m_Normals.Count()) decompressedFormat |= VERTEX_FORMAT1(Normal);
+ if (m_UV.Count()) decompressedFormat |= VERTEX_FORMAT1(TexCoord0);
+ if (m_UV.Count() == numVertices * 4) decompressedFormat |= VERTEX_FORMAT1(TexCoord1);
+ if (m_Tangents.Count()) decompressedFormat |= VERTEX_FORMAT1(Tangent);
+ if (m_Colors.Count()) decompressedFormat |= VERTEX_FORMAT1(Color);
+
+ src.ResizeVertices(numVertices, decompressedFormat);
+ Assert (src.GetVertexCount () == numVertices);
+
+ m_Vertices.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex));
+
+ if(m_UV.Count())
+ {
+ m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), 0, numVertices);
+
+ if(m_UV.Count()==numVertices * 4)
+ {
+ m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord1), 2, src.GetStride (kShaderChannelTexCoord1), numVertices*2, numVertices);
+ }
+ }
+
+ // TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data.
+ // Most likely no version ever did. Remove code and bindpose serialization.
+ if(m_BindPoses.Count())
+ {
+ src.m_Bindpose.resize_initialized(m_BindPoses.Count()/16);
+ m_BindPoses.UnpackFloats(src.m_Bindpose[0].m_Data, 16, sizeof(float) * 16);
+ }
+
+ if(m_Normals.Count())
+ {
+ float *normalData = new float[m_Normals.Count()];
+ UInt32 *signs = new UInt32[m_NormalSigns.Count()];
+
+ m_Normals.UnpackFloats(normalData, 2, sizeof(float) * 2);
+ m_NormalSigns.UnpackInts(signs);
+
+ StrideIterator<Vector3f> n = src.GetNormalBegin ();
+ for(int i=0;i<m_Normals.Count()/2; ++i, ++n)
+ {
+ n->x = normalData[i*2+0];
+ n->y = normalData[i*2+1];
+ float zsqr = 1 - sqr(n->x) - sqr(n->y);
+ if(zsqr >= 0)
+ n->z = FastSqrt( zsqr );
+ else
+ {
+ n->z = 0;
+ *n = Normalize(*n);
+ }
+ if(signs[i]==0)
+ n->z = -n->z;
+ }
+
+ delete[] normalData;
+ delete[] signs;
+ }
+
+ if(m_Tangents.Count())
+ {
+ float *tangentData = new float[m_Tangents.Count()];
+ UInt32 *signs = new UInt32[m_TangentSigns.Count()];
+
+ m_Tangents.UnpackFloats(tangentData, 2, sizeof(float) * 2);
+ m_TangentSigns.UnpackInts(signs);
+
+ StrideIterator<Vector4f> t = src.GetTangentBegin ();
+ for(int i=0;i<m_Tangents.Count()/2; ++i, ++t)
+ {
+ t->x = tangentData[i*2+0];
+ t->y = tangentData[i*2+1];
+ float zsqr = 1-sqr(tangentData[i*2+0])-sqr(tangentData[i*2+1]);
+ if(zsqr >= 0.0f)
+ t->z = FastSqrt( zsqr );
+ else
+ {
+ t->z = 0;
+ *(Vector3f*)(&*t) = Normalize(*(Vector3f*)(&*t));
+ }
+ if(signs[i*2+0]==0)
+ t->z = -t->z;
+
+ t->w = signs[i*2+1]?1.0:-1.0;
+ }
+
+ delete[] tangentData;
+ delete[] signs;
+ }
+
+ // TODO: do an actual compression
+ if (m_Colors.Count())
+ {
+ dynamic_array<UInt32> tempColors (m_Colors.Count (), kMemTempAlloc);
+ m_Colors.UnpackInts<UInt32> (tempColors.data ());
+ Assert (tempColors.size () == src.GetVertexCount ());
+ strided_copy ((ColorRGBA32*)tempColors.begin (), (ColorRGBA32*)tempColors.end (), src.GetColorBegin ());
+ }
+
+ if(m_Weights.Count())
+ {
+ UInt32 *weights = new UInt32[m_Weights.Count()];
+ m_Weights.UnpackInts(weights);
+ UInt32 *boneIndices = new UInt32[m_BoneIndices.Count()];
+ m_BoneIndices.UnpackInts(boneIndices);
+ src.m_Skin.resize_uninitialized(numVertices);
+ int bonePos = 0;
+ int boneIndexPos = 0;
+ int j=0;
+ int sum = 0;
+
+ for(int i=0;i<m_Weights.Count();i++)
+ {
+ //read bone index and weight.
+ src.m_Skin[bonePos].weight[j] = weights[i]/31.0;
+ src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++];
+ j++;
+ sum += weights[i];
+
+ //the weights add up to one. fill the rest for this vertex with zero, and continue with next one.
+ if(sum >= 31)
+ {
+ for(;j<4;j++)
+ {
+ src.m_Skin[bonePos].weight[j] = 0;
+ src.m_Skin[bonePos].boneIndex[j] = 0;
+ }
+ bonePos++;
+ j = 0;
+ sum = 0;
+ }
+ //we read three weights, but they don't add up to one. calculate the fourth one, and read
+ //missing bone index. continue with next vertex.
+ else if(j==3)
+ {
+ src.m_Skin[bonePos].weight[j] = (31-sum)/31.0;
+ src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++];
+ bonePos++;
+ j = 0;
+ sum = 0;
+ }
+ }
+
+ delete[] weights;
+ delete[] boneIndices;
+ }
+}
+
+template <class T> void CompressedAnimationCurve::CompressTimeKeys(AnimationCurveTpl<T> &src)
+{
+ int numKeys = src.GetKeyCount();
+
+ float minTime=0;
+ for(int i=0;i<numKeys;i++)
+ {
+ float t = src.GetKey(i).time;
+ if(t < minTime)
+ {
+ //negative time key. offset all keys by this, so math doesn't break - but it's still wrong.
+ minTime = t;
+ }
+ }
+
+
+ UInt32 *times = new UInt32[numKeys];
+ UInt32 t=0;
+ for(int i=0;i<numKeys;i++)
+ {
+ times[i] = UInt32((src.GetKey(i).time - minTime) * 100);
+ times[i] -= t;
+ t += times[i];
+ }
+
+ m_Times.PackInts(times, numKeys);
+
+ delete[] times;
+}
+
+template <class T> void CompressedAnimationCurve::DecompressTimeKeys(AnimationCurveTpl<T> &src)
+{
+ int numKeys = m_Times.Count();
+ UInt32 *times = new UInt32[numKeys];
+ m_Times.UnpackInts(times);
+
+ UInt32 t=0;
+
+ src.ResizeUninitialized(numKeys);
+
+ for(int i=0;i<numKeys;i++)
+ {
+ t+=times[i];
+ src.GetKey(i).time = t*0.01;
+ }
+ delete[] times;
+}
+
+void CompressedAnimationCurve::CompressQuatCurve(AnimationClip::QuaternionCurve &src)
+{
+ CompressTimeKeys(src.curve);
+ int numKeys = src.curve.GetKeyCount();
+
+ Quaternionf *qkeys = new Quaternionf[numKeys];
+ for(int i=0;i<numKeys;i++)
+ qkeys[i] = src.curve.GetKey(i).value;
+ m_Values.PackQuats(qkeys, numKeys);
+
+ delete[] qkeys;
+
+ bool same = true;
+
+ for(int i=0;i<numKeys && same;i++)
+ {
+ Quaternionf &q1 = src.curve.GetKey(i).inSlope;
+ Quaternionf &q2 = src.curve.GetKey(i).inSlope;
+ if(q1.x!=q2.x)
+ same = false;
+ if(q1.y!=q2.y)
+ same = false;
+ if(q1.z!=q2.z)
+ same = false;
+ if(q1.w!=q2.w)
+ same = false;
+ }
+
+ float *keys = new float[numKeys*8];
+ for(int i=0;i<numKeys;i++)
+ {
+ Quaternionf q = src.curve.GetKey(i).inSlope;
+ keys[i*4+0] = q.x;
+ keys[i*4+1] = q.y;
+ keys[i*4+2] = q.z;
+ keys[i*4+3] = q.w;
+ q = src.curve.GetKey(i).outSlope;
+ keys[(i+numKeys)*4+0] = q.x;
+ keys[(i+numKeys)*4+1] = q.y;
+ keys[(i+numKeys)*4+2] = q.z;
+ keys[(i+numKeys)*4+3] = q.w;
+ }
+
+ //if in and out slopes are all the same, pack only the first of the two.
+ if(same)
+ m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 4, 6, false);
+ else
+ m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 8, 6, false);
+
+ delete[] keys;
+
+ m_PreInfinity = src.curve.GetPreInfinityInternal();
+ m_PostInfinity = src.curve.GetPostInfinityInternal();
+ m_Path = src.path;
+}
+
+void CompressedAnimationCurve::DecompressQuatCurve(AnimationClip::QuaternionCurve &src)
+{
+ DecompressTimeKeys(src.curve);
+ int numKeys = m_Values.Count();
+
+ Quaternionf *qkeys = new Quaternionf[numKeys];
+ m_Values.UnpackQuats(qkeys);
+ for(int i=0;i<numKeys;i++)
+ src.curve.GetKey(i).value = qkeys[i];
+ delete[] qkeys;
+
+ float *keys = new float[numKeys*8];
+ m_Slopes.UnpackFloats(keys, 1, sizeof(float));
+
+ //are there seperate in and out slopes?
+ int offs = 0;
+ if(m_Slopes.Count() == numKeys*8)
+ offs = numKeys;
+ for(int i=0;i<numKeys;i++)
+ {
+ src.curve.GetKey(i).inSlope.x = keys[i*4+0];
+ src.curve.GetKey(i).inSlope.y = keys[i*4+1];
+ src.curve.GetKey(i).inSlope.z = keys[i*4+2];
+ src.curve.GetKey(i).inSlope.w = keys[i*4+3];
+ src.curve.GetKey(i).outSlope.x = keys[(i+offs)*4+0];
+ src.curve.GetKey(i).outSlope.y = keys[(i+offs)*4+1];
+ src.curve.GetKey(i).outSlope.z = keys[(i+offs)*4+2];
+ src.curve.GetKey(i).outSlope.w = keys[(i+offs)*4+3];
+ }
+ delete[] keys;
+
+ src.curve.SetPreInfinityInternal( m_PreInfinity );
+ src.curve.SetPostInfinityInternal( m_PostInfinity );
+ src.path = m_Path;
+}
diff --git a/Runtime/Filters/Mesh/CompressedMesh.h b/Runtime/Filters/Mesh/CompressedMesh.h
new file mode 100644
index 0000000..cf2f01c
--- /dev/null
+++ b/Runtime/Filters/Mesh/CompressedMesh.h
@@ -0,0 +1,175 @@
+#ifndef COMPRESSEDMESH_H
+#define COMPRESSEDMESH_H
+
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Animation/AnimationClip.h"
+class Mesh;
+class AnimationClip;
+
+enum
+{
+ kMeshCompressionOff = 0,
+ kMeshCompressionLow = 1,
+ kMeshCompressionMed = 2,
+ kMeshCompressionHigh = 3,
+};
+
+typedef std::vector<UInt8> DataVector;
+
+class PackedFloatVector
+{
+public:
+ DECLARE_SERIALIZE (PackedBitVector)
+
+ PackedFloatVector() { m_NumItems = 0; m_Range = 0; m_Start = 0; m_BitSize = 0; }
+
+ void PackFloats(float *data, int chunkSize, int chunkStride, int chunkCount, int bitSize, bool adjustBitSize);
+ void UnpackFloats(float *data, int chunkSize, int chunkStride, int start = 0, int count = -1);
+ int Count() {return m_NumItems;}
+
+private:
+ UInt32 m_NumItems;
+ float m_Range;
+ float m_Start;
+ UInt8 m_BitSize;
+ std::vector<UInt8> m_Data;
+};
+
+class PackedIntVector
+{
+public:
+ DECLARE_SERIALIZE (PackedBitVector)
+
+ PackedIntVector() { m_NumItems = 0; m_BitSize = 0; }
+
+ template <class IntSize> void PackInts(IntSize *data, int numItems);
+ template <class IntSize> void UnpackInts(IntSize *data);
+ int Count() {return m_NumItems;}
+
+private:
+ UInt32 m_NumItems;
+ UInt8 m_BitSize;
+ std::vector<UInt8> m_Data;
+};
+
+class PackedQuatVector
+{
+public:
+ DECLARE_SERIALIZE (PackedBitVector)
+
+ PackedQuatVector() {m_NumItems = 0;}
+
+ void PackQuats(Quaternionf *data, int numItems);
+ void UnpackQuats(Quaternionf *data);
+ int Count() {return m_NumItems;}
+
+private:
+ UInt32 m_NumItems;
+ std::vector<UInt8> m_Data;
+};
+
+class CompressedMesh
+{
+public:
+ DECLARE_SERIALIZE (CompressedMesh)
+
+ void Compress(Mesh &src, int quality);
+ void Decompress(Mesh &src);
+
+private:
+ PackedFloatVector m_Vertices;
+ PackedFloatVector m_UV;
+
+ // TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data.
+ // Most likely no version ever did. Remove code and bindpose serialization.
+ PackedFloatVector m_BindPoses;
+
+ PackedFloatVector m_Normals;
+ PackedIntVector m_NormalSigns;
+ PackedFloatVector m_Tangents;
+ PackedIntVector m_TangentSigns;
+ PackedIntVector m_Weights;
+ PackedIntVector m_BoneIndices;
+ PackedIntVector m_Triangles;
+ PackedIntVector m_Colors;
+};
+
+template<class TransferFunc>
+void PackedFloatVector::Transfer (TransferFunc& transfer) {
+ TRANSFER ( m_NumItems );
+ TRANSFER( m_Range );
+ TRANSFER( m_Start );
+ TRANSFER( m_Data );
+ TRANSFER( m_BitSize );
+ transfer.Align();
+}
+
+template<class TransferFunc>
+void PackedIntVector::Transfer (TransferFunc& transfer) {
+ TRANSFER( m_NumItems );
+ TRANSFER( m_Data );
+ TRANSFER( m_BitSize );
+ transfer.Align();
+}
+
+template<class TransferFunc>
+void PackedQuatVector::Transfer (TransferFunc& transfer) {
+ TRANSFER( m_NumItems );
+ TRANSFER( m_Data );
+ transfer.Align();
+}
+
+template<class TransferFunc>
+void CompressedMesh::Transfer (TransferFunc& transfer) {
+ TRANSFER( m_Vertices );
+ TRANSFER( m_UV );
+ TRANSFER( m_BindPoses );
+ TRANSFER( m_Normals );
+ TRANSFER( m_Tangents );
+ TRANSFER( m_Weights );
+ TRANSFER( m_NormalSigns );
+ TRANSFER( m_TangentSigns );
+ TRANSFER( m_BoneIndices );
+ TRANSFER( m_Triangles );
+ TRANSFER( m_Colors );
+}
+
+class CompressedAnimationCurve
+{
+public:
+ DECLARE_SERIALIZE (CompressedAnimationCurve)
+
+ CompressedAnimationCurve() { m_PreInfinity = 0; m_PostInfinity = 0; }
+
+ void CompressQuatCurve(AnimationClip::QuaternionCurve &src);
+ void DecompressQuatCurve(AnimationClip::QuaternionCurve &src);
+
+private:
+
+ template <class T> void CompressTimeKeys(AnimationCurveTpl<T> &src);
+ template <class T> void DecompressTimeKeys(AnimationCurveTpl<T> &src);
+
+ PackedIntVector m_Times;
+ PackedQuatVector m_Values;
+ PackedFloatVector m_Slopes;
+
+ int m_PreInfinity;
+ int m_PostInfinity;
+
+ UnityStr m_Path;
+};
+
+template<class TransferFunc>
+void CompressedAnimationCurve::Transfer (TransferFunc& transfer) {
+
+ TRANSFER( m_Path );
+
+ TRANSFER( m_Times );
+ TRANSFER( m_Values );
+ TRANSFER( m_Slopes );
+
+ TRANSFER( m_PreInfinity );
+ TRANSFER( m_PostInfinity );
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/LodMesh.cpp b/Runtime/Filters/Mesh/LodMesh.cpp
new file mode 100644
index 0000000..fc5dca8
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMesh.cpp
@@ -0,0 +1,2344 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "LodMesh.h"
+#include "Runtime/Utilities/vector_utility.h"
+#include "Runtime/Utilities/Utility.h"
+#include "Runtime/Math/FloatConversion.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/PersistentManager.h"
+#include "Runtime/Graphics/TriStripper.h"
+#include "MeshUtility.h"
+#include "Runtime/Geometry/TangentSpaceCalculation.h"
+#include "Runtime/BaseClasses/GameObject.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Serialize/TransferUtility.h"
+#include "Runtime/Serialize/SwapEndianArray.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/BaseClasses/IsPlaying.h"
+#include "Runtime/Camera/IntermediateRenderer.h"
+#include "Runtime/Filters/Mesh/MeshRenderer.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "Runtime/Threads/Thread.h"
+#include "Runtime/Misc/BuildSettings.h"
+#include "Runtime/Utilities/UniqueIDGenerator.h"
+#if UNITY_XENON
+#include "PlatformDependent/Xbox360/Source/GfxDevice/GfxXenonVBO.h"
+#endif
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+
+#if UNITY_FLASH
+#include <limits.h>
+#define FLT_MAX __FLT_MAX__
+#define FLT_MIN __FLT_MIN__
+#endif
+
+#if UNITY_EDITOR
+# include "Editor/Src/BuildPipeline/PrepareMeshDataForBuildTarget.h"
+# include "Runtime/Camera/RenderLoops/RenderLoopPrivate.h"
+# include "Runtime/Misc/Player.h"
+#endif
+
+
+///* Checkbox in mesh importer that allows you have mesh access (Done)
+///* Default for new importers is to have mesh access enabled (done)
+///* Error Messages when acessing data although you shouldn't be allowed (--)
+///* MeshColliders / SkinnedMeshes / non-uniform scale. Forces meshes to be non-readable. (Done)
+
+
+///* MeshCollider with no-access allowed. Does it work / no errors
+///* MeshCollider with no-access allowed, mesh is assigned from script. Does it give an error in editor & player
+///* MeshCollider with no-access allowed, mesh is scaled at runtime does it give an error
+///* MeshCollider with no-access allowed, mesh is scaled in scene. Does it work without errors.
+///* Mesh data accessed from script, does it give an error.
+
+
+
+static char const* kMeshAPIErrorMessage =
+"Mesh.%s is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array.";
+
+
+static UniqueIDGenerator s_MeshIDGenerator;
+
+
+// The Mesh class contains one of these for every Material that is bound to it.
+struct DeprecatedMeshData
+{
+ std::vector<Face> faces; // Indices for specific faces
+ std::vector <unsigned short> strips; // A list of triangle strips
+ int triangleCount;
+ DECLARE_SERIALIZE_NO_PPTR (MeshData)
+};
+
+template<class TransferFunc>
+void DeprecatedMeshData::Transfer (TransferFunc& transfer)
+{
+ TRANSFER (faces);
+ TRANSFER (strips);
+ TRANSFER(triangleCount);
+}
+
+struct DeprecatedLOD
+{
+ vector<DeprecatedMeshData> m_MeshData;
+
+ DECLARE_SERIALIZE (LOD)
+};
+
+template<class TransferFunction>
+void DeprecatedLOD::Transfer (TransferFunction& transfer)
+{
+ TRANSFER (m_MeshData);
+}
+
+static void LoadDeprecatedMeshData (Mesh& mesh, vector<DeprecatedLOD> &lods)
+{
+ mesh.GetIndexBuffer().clear();
+ mesh.GetSubMeshes().clear();
+
+ if (lods.empty())
+ return;
+
+ DeprecatedLOD& lod = lods.front();
+
+ mesh.SetSubMeshCount(lod.m_MeshData.size());
+ for (int i=0;i<lod.m_MeshData.size();i++)
+ {
+ DeprecatedMeshData& oldMeshData = lod.m_MeshData[i];
+ if (oldMeshData.faces.size())
+ mesh.SetIndicesComplex (&oldMeshData.faces[0].v1, oldMeshData.faces.size()*3, i, kPrimitiveTriangles, Mesh::k16BitIndices);
+ else
+ {
+ UNITY_TEMP_VECTOR(UInt16) triangles;
+ Destripify(&oldMeshData.strips[0], oldMeshData.strips.size(), triangles);
+ mesh.SetIndicesComplex (&triangles[0], triangles.size(), i, kPrimitiveTriangles, Mesh::k16BitIndices);
+ }
+ }
+}
+
+
+using namespace std;
+
+Mesh::Mesh (MemLabelId label, ObjectCreationMode mode)
+: Super(label, mode)
+, m_ChannelsInVBO(0)
+, m_VerticesDirty(true)
+, m_IndicesDirty(true)
+, m_IsDynamic(false)
+, m_HideFromRuntimeStats(false)
+, m_VertexColorsSwizzled(false)
+, m_MeshUsageFlags(0)
+, m_LocalAABB(Vector3f::zero, Vector3f::zero)
+, m_VBO(NULL)
+, m_InternalMeshID (0)
+, m_Skin (label)
+, m_CachedSkin2 (label)
+, m_CachedSkin1 (label)
+, m_CachedBonesAABB(label)
+, m_Bindpose(label)
+, m_BonePathHashes(label)
+, m_RootBonePathHash(0)
+{
+ m_MaxBoneIndex = -1;
+ SubMesh sub;
+ m_SubMeshes.push_back(sub);
+
+ m_MeshCompression = kMeshCompressionOff;
+ m_StreamCompression = kStreamCompressionDefault;
+ m_IsReadable = true;
+ m_KeepVertices = false;
+ m_KeepIndices = false;
+
+#if UNITY_EDITOR
+ m_MeshOptimized = false;
+#endif
+
+#if ENABLE_MULTITHREADED_CODE
+ m_CurrentCPUFence = 0;
+ m_WaitOnCPUFence = false;
+#endif
+
+ m_InternalMeshID = 0;
+}
+
+Mesh::~Mesh ()
+{
+ MainThreadCleanup ();
+}
+
+bool Mesh::MainThreadCleanup ()
+{
+ WaitOnRenderThreadUse();
+ NotifyObjectUsers( kDidDeleteMesh );
+ m_IntermediateUsers.Notify( kImNotifyAssetDeleted );
+
+ m_CollisionMesh.Cleanup();
+
+ if (m_VBO)
+ {
+ GetGfxDevice().DeleteVBO(m_VBO);
+ m_VBO = NULL;
+ }
+
+ if (m_InternalMeshID != 0)
+ {
+ s_MeshIDGenerator.RemoveID (m_InternalMeshID);
+ m_InternalMeshID = 0;
+ }
+
+ return true;
+}
+
+void Mesh::LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &inTangents)
+{
+ int count = inTangents.size();
+ unsigned needChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT2(Normal, Tangent);
+ if (count != GetVertexCount () || m_VertexData.GetChannelMask () != needChannels)
+ ResizeVertices (count, needChannels);
+
+ Assert (GetVertexCount () == count);
+
+ StrideIterator<Vector3f> normals = GetNormalBegin ();
+ StrideIterator<Vector4f> tangents = GetTangentBegin ();
+
+ for(int i=0;i<count; ++i, ++normals, ++tangents)
+ {
+ *normals = inTangents[i].normal;
+ *tangents = Vector4f(inTangents[i].tangent.x,inTangents[i].tangent.y,inTangents[i].tangent.z,inTangents[i].handedness);
+ }
+}
+
+void Mesh::SwizzleVertexColorsIfNeeded ()
+{
+ // Early out if color are already in the right format
+ if (gGraphicsCaps.needsToSwizzleVertexColors == m_VertexColorsSwizzled)
+ return;
+
+ // Due to runtime GfxDevice switching we might need to unswizzle vertex colors (case 562695)
+ if (m_VertexColorsSwizzled)
+ {
+ std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+ m_VertexColorsSwizzled = false;
+ }
+ else
+ {
+ std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform);
+ m_VertexColorsSwizzled = true;
+ }
+}
+
+void Mesh::ExtractVertexArray (Vector3f* destination) const
+{
+ StrideIterator<Vector3f> v = GetVertexBegin ();
+ for (Vector3f* end = destination + GetVertexCount(); destination != end; ++v, ++destination)
+ *destination = *v;
+}
+
+void Mesh::ExtractNormalArray (Vector3f* destination) const
+{
+ StrideIterator<Vector3f> n = GetNormalBegin ();
+ for (Vector3f* end = destination + GetVertexCount(); destination != end; ++n, ++destination)
+ *destination = *n;
+}
+
+void Mesh::ExtractColorArray (ColorRGBA32* destination) const
+{
+ if (m_VertexColorsSwizzled)
+ std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform);
+ else
+ std::copy(GetColorBegin(), GetColorEnd(), destination);
+}
+
+void Mesh::ExtractColorArrayConverting (ColorRGBAf* destination) const
+{
+ if (m_VertexColorsSwizzled)
+ std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform);
+ else
+ std::copy(GetColorBegin(), GetColorEnd(), destination);
+}
+
+void Mesh::ExtractUvArray (int uvIndex, Vector2f* destination) const
+{
+ StrideIterator<Vector2f> uv = GetUvBegin (uvIndex);
+ for (Vector2f* end = destination + GetVertexCount(); destination != end; ++uv, ++destination)
+ *destination = *uv;
+}
+
+void Mesh::ExtractTangentArray (Vector4f* destination) const
+{
+ StrideIterator<Vector4f> t = GetTangentBegin ();
+ for (Vector4f* end = destination + GetVertexCount(); destination != end; ++t, ++destination)
+ *destination = *t;
+}
+
+
+UInt32 Mesh::ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+ Assert (count <= std::numeric_limits<UInt16>::max());
+
+ UInt32 prevChannels = m_VertexData.GetChannelMask();
+
+ if (m_VertexData.GetVertexCount() != count ||
+ m_VertexData.GetChannelMask() != shaderChannels ||
+ !m_VertexData.ConformsToStreamsLayout(streams) ||
+ !m_VertexData.ConformsToChannelsLayout(channels))
+ {
+ WaitOnRenderThreadUse();
+
+ SET_ALLOC_OWNER(this);
+ m_VertexData.Resize(count, shaderChannels, streams, channels);
+
+ if (!m_Skin.empty ())
+ m_Skin.resize_initialized (count, BoneInfluence());
+ }
+
+ return m_VertexData.GetChannelMask() & ~prevChannels;
+}
+
+
+UInt32 Mesh::FormatVertices (UInt32 shaderChannels)
+{
+ return ResizeVertices(GetVertexCount(), shaderChannels);
+}
+
+void Mesh::InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels)
+{
+ if (shaderChannels & VERTEX_FORMAT1(Vertex))
+ std::fill (GetVertexBegin () + begin, GetVertexBegin () + begin + count, Vector3f (0,0,0));
+ if (shaderChannels & VERTEX_FORMAT1(Normal))
+ std::fill (GetNormalBegin () + begin, GetNormalBegin () + begin + count, Vector3f (0,0,0));
+ if (shaderChannels & VERTEX_FORMAT1(Color))
+ std::fill (GetColorBegin () + begin, GetColorBegin () + begin + count, ColorRGBA32 (0xffffffff));
+ if (shaderChannels & VERTEX_FORMAT1(TexCoord0))
+ std::fill (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, Vector2f (0,0));
+ if (shaderChannels & VERTEX_FORMAT1(Tangent))
+ std::fill (GetTangentBegin () + begin, GetTangentBegin () + begin + count, Vector4f (0,0,0,0));
+
+ if (shaderChannels & VERTEX_FORMAT1(TexCoord1))
+ {
+ if( GetAvailableChannels () & VERTEX_FORMAT1(TexCoord0) )
+ std::copy (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, GetUvBegin (1) + begin);
+ else
+ std::fill (GetUvBegin (1) + begin, GetUvBegin (1) + begin + count, Vector2f (0,0));
+ }
+}
+
+namespace
+{
+ bool IsStripValid(const Mesh::TemporaryIndexContainer& triangles, const Mesh::TemporaryIndexContainer& newStrip)
+ {
+ int invalidTriangleCount = 0;
+ for (int j = 0; j < triangles.size(); j += 3)
+ {
+ int i0 = triangles[j + 0];
+ int i1 = triangles[j + 1];
+ int i2 = triangles[j + 2];
+
+ bool found = false;
+ for (int k = 0; k < newStrip.size() - 2; ++k)
+ {
+ int s0 = newStrip[k + 0];
+ int s1 = newStrip[k + 1];
+ int s2 = newStrip[k + 2];
+
+ if (k&1)
+ std::swap(s1, s2);
+
+ if ((s0 == i0 && s1 == i1 && s2 == i2) ||
+ (s0 == i1 && s1 == i2 && s2 == i0) ||
+ (s0 == i2 && s1 == i0 && s2 == i1))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ ++invalidTriangleCount;
+ }
+
+ AssertMsg(invalidTriangleCount == 0, "Mesh strip is missing %d triangles", invalidTriangleCount);
+ return invalidTriangleCount == 0;
+ }
+}
+
+void Mesh::RecalculateBoundsInternal ()
+{
+ MinMaxAABB minmax;
+ minmax.Init ();
+ for (StrideIterator<Vector3f> it = GetVertexBegin (), end = GetVertexEnd (); it != end; ++it)
+ minmax.Encapsulate (*it);
+
+ // Apply all blendshape targets to bounding volumes
+ if (!m_Shapes.vertices.empty())
+ {
+ StrideIterator<Vector3f> verts = GetVertexBegin ();
+
+ for (int i=0;i<m_Shapes.vertices.size();i++)
+ {
+ Vector3f pos = verts[m_Shapes.vertices[i].index] + m_Shapes.vertices[i].vertex;
+ minmax.Encapsulate (pos);
+ }
+ }
+
+ AABB aabb;
+ if (GetVertexCount ())
+ aabb = minmax;
+ else
+ aabb = AABB (Vector3f::zero, Vector3f::zero);
+
+ m_LocalAABB = aabb;
+
+ for (int submesh = 0; submesh < m_SubMeshes.size(); ++submesh)
+ RecalculateSubmeshBoundsInternal (submesh);
+}
+
+void Mesh::RecalculateSubmeshBoundsInternal (unsigned submesh)
+{
+ MinMaxAABB minmax;
+ minmax.Init ();
+
+ const UInt16* indices = GetSubMeshBuffer16(submesh);
+ StrideIterator<Vector3f> vertices = GetVertexBegin ();
+ for (unsigned int i = 0; i < GetSubMeshFast(submesh).indexCount; i++)
+ minmax.Encapsulate (vertices[indices[i]]);
+
+ AABB aabb;
+ if (GetSubMeshFast(submesh).indexCount > 0)
+ aabb = minmax;
+ else
+ aabb = AABB (Vector3f::zero, Vector3f::zero);
+
+ GetSubMeshFast(submesh).localAABB = aabb;
+}
+
+
+void Mesh::RecalculateBounds ()
+{
+ RecalculateBoundsInternal ();
+
+ SetDirty();
+ NotifyObjectUsers( kDidModifyBounds );
+ m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::RecalculateSubmeshBounds (unsigned submesh)
+{
+ RecalculateSubmeshBoundsInternal (submesh);
+
+ SetDirty();
+ NotifyObjectUsers( kDidModifyBounds );
+ m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+
+void Mesh::Clear (bool keepVertexLayout)
+{
+ WaitOnRenderThreadUse();
+
+ m_SubMeshes.clear();
+ SubMesh sub;
+ m_SubMeshes.push_back(sub);
+
+ ClearBlendShapes (m_Shapes);
+
+ m_IndexBuffer.clear();
+#if UNITY_EDITOR
+ m_MeshOptimized = false;
+#endif
+
+#if UNITY_PS3 || UNITY_EDITOR
+ m_PartitionInfos.clear();
+ m_Partitions.clear();
+#endif
+
+ unsigned prevFormat = m_VertexData.GetChannelMask();
+
+ if (m_VertexData.GetVertexCount() > 0)
+ {
+ // keepVertexLayout added in Unity 3.5.3; keep previous behaviour
+ // for older content for safety.
+ if (keepVertexLayout && IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1))
+ {
+ ResizeVertices (0, prevFormat);
+ }
+ else
+ {
+ VertexData tempVD;
+ swap (tempVD, m_VertexData);
+ }
+ }
+
+ if (!m_Skin.empty())
+ {
+ m_Skin.clear();
+ }
+
+ m_VertexColorsSwizzled = false;
+ ClearSkinCache();
+
+ SetChannelsDirty( prevFormat, true );
+}
+
+IMPLEMENT_CLASS (Mesh)
+IMPLEMENT_OBJECT_SERIALIZE (Mesh)
+
+template <typename Index>
+static void GetVertexBufferRange(const Index* indices, int indexCount, UInt32& fromVertex, UInt32& toVertex)
+{
+ Index a = Index(INT_MAX);
+ Index b = 0;
+ const Index* indicesEnd = indices + indexCount;
+ for (const Index* index = indices; index < indicesEnd; ++index)
+ {
+ a = std::min(a, *index);
+ b = std::max(b, *index);
+ }
+ fromVertex = a;
+ toVertex = b;
+}
+
+void Mesh::ByteSwapIndices ()
+{
+ SwapEndianArray (&m_IndexBuffer[0], kVBOIndexSize, GetTotalndexCount());
+}
+
+template<class T>
+bool ShouldSerializeForBigEndian (T& transfer)
+{
+ bool bigEndian = UNITY_BIG_ENDIAN;
+ if (transfer.ConvertEndianess())
+ bigEndian = !bigEndian;
+ return bigEndian;
+}
+
+void Mesh::DestripifyIndices ()
+{
+ if (m_IndexBuffer.empty() || m_SubMeshes.empty())
+ return;
+
+ int submeshCount = m_SubMeshes.size();
+ bool anyStripped = false;
+ for (size_t i = 0; i < submeshCount; ++i)
+ {
+ if (m_SubMeshes[i].topology == kPrimitiveTriangleStripDeprecated)
+ {
+ anyStripped = true;
+ break;
+ }
+ }
+ if(!anyStripped)
+ return;
+
+ // destripify the stripped submeshes
+ typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer;
+
+ std::vector<TemporaryIndexContainer> submeshIndices;
+ submeshIndices.resize(submeshCount);
+ for(int i=0;i<submeshCount;i++)
+ {
+ SubMesh& sm = m_SubMeshes[i];
+ if (sm.topology == kPrimitiveTriangleStripDeprecated)
+ Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]);
+ else
+ {
+ submeshIndices[i].resize(sm.indexCount);
+ memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1);
+ }
+ }
+
+ SetSubMeshCount(0);
+ SetSubMeshCount(submeshCount);
+
+ for(int i=0;i<submeshCount;i++)
+ SetIndices(&submeshIndices[i][0], submeshIndices[i].size(), i, kPrimitiveTriangles);
+}
+
+bool Mesh::CanAccessFromScript() const
+{
+#if UNITY_EDITOR
+ // Allow editor scripts access even if not allowed in runtime
+ if (!IsInsidePlayerLoop() && !IsInsideRenderLoop())
+ return true;
+#endif
+ return m_IsReadable;
+}
+
+
+template<class TransferFunction>
+void Mesh::Transfer (TransferFunction& transfer)
+{
+ #if SUPPORT_SERIALIZED_TYPETREES
+ // See TransferWorkaround35SerializeFuckup below for comments.
+ // Remove when we can break backwards-compatiblity.
+ if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+ {
+ TransferWorkaround35SerializeFuckup (transfer);
+ return;
+ }
+ #endif
+
+ Super::Transfer (transfer);
+ transfer.SetVersion (8);
+
+ #if UNITY_EDITOR
+ const UInt32 supportedChannels = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshSupportedChannels : 0;
+ const UInt32 meshUsageFlags = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshUsageFlags : 0;
+ PrepareMeshDataForBuildTarget prepareMesh(*this, transfer.GetBuildingTarget().platform, supportedChannels, meshUsageFlags);
+ #endif
+
+ bool reswizzleColors = false;
+ if (m_VertexColorsSwizzled)
+ {
+ // Unswizzle colors before serializing
+ std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+ m_VertexColorsSwizzled = false;
+ reswizzleColors = true;
+ }
+
+ transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask);
+ transfer.Transfer (m_Shapes, "m_Shapes", kHideInEditorMask);
+ transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+ transfer.Transfer (m_BonePathHashes, "m_BoneNameHashes", kHideInEditorMask);
+ transfer.Transfer (m_RootBonePathHash, "m_RootBoneNameHash", kHideInEditorMask);
+
+ transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask);
+ transfer.Transfer (m_StreamCompression, "m_StreamCompression", kHideInEditorMask);
+ transfer.Transfer (m_IsReadable, "m_IsReadable", kHideInEditorMask);
+ transfer.Transfer (m_KeepVertices, "m_KeepVertices", kHideInEditorMask);
+ transfer.Transfer (m_KeepIndices, "m_KeepIndices", kHideInEditorMask);
+ transfer.Align();
+
+ // Notice the two codepaths for serialization here.
+ // It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash.
+ // Look at kSerializeForPrefabSystem to disable compression when using Transfer to instantiate a Mesh.
+ // Changes to compression can break web content if we recompress at runtime. (case 546159)
+ bool doCompression = m_MeshCompression && !(transfer.GetFlags() & kSerializeForPrefabSystem);
+ if (!doCompression)
+ {
+ if (transfer.ConvertEndianess() && transfer.IsWriting ())
+ ByteSwapIndices();
+
+ transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask);
+
+ if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+ ByteSwapIndices();
+
+ transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask);
+
+ if (transfer.IsVersionSmallerOrEqual (5))
+ {
+ dynamic_array<Vector4f> tangents;
+ dynamic_array<Vector3f> vertices, normals;
+ dynamic_array<Vector2f> uvs, uvs1;
+ dynamic_array<ColorRGBA32> colors;
+
+
+ transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask);
+ transfer.Transfer (uvs, "m_UV", kHideInEditorMask);
+ transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask);
+ transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask);
+ transfer.Transfer (normals, "m_Normals", kHideInEditorMask);
+ transfer.Transfer (colors, "m_Colors", kHideInEditorMask);
+
+ unsigned format = 0;
+ if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex);
+ if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent);
+ if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal);
+ if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0);
+ if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1);
+ if (!colors.empty ()) format |= VERTEX_FORMAT1(Color);
+
+ size_t vertexCount = vertices.size ();
+ if (GetVertexCount () != vertexCount || GetAvailableChannels () != format)
+ ResizeVertices (vertexCount, format);
+
+ strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ());
+ strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ());
+ strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0));
+ strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1));
+ strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ());
+ strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ());
+ }
+ else
+ {
+ // version 6 introduces interleaved buffer
+ if (transfer.ConvertEndianess() && transfer.IsWriting ())
+ m_VertexData.SwapEndianess ();
+
+ transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask);
+
+ if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+ m_VertexData.SwapEndianess ();
+ }
+ }
+ // Notice the two codepaths for serialization here.
+ // It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash.
+ else
+ {
+ BoneInfluenceContainer dummySkin;
+ VertexData dummyVertexData;
+ IndexContainer dummyIndexContainer;
+
+ transfer.Transfer (dummyIndexContainer, "m_IndexBuffer", kHideInEditorMask);
+ transfer.Transfer (dummySkin, "m_Skin", kHideInEditorMask);
+ transfer.Transfer (dummyVertexData, "m_VertexData", kHideInEditorMask);
+ }
+
+ {
+ // only keep the compressed mesh in memory while needed
+ CompressedMesh m_CompressedMesh;
+ transfer.Align();
+ // Check both IsWriting() and IsReading() since both are true when reading with SafeBinaryRead
+ if (doCompression && transfer.IsWriting())
+ m_CompressedMesh.Compress(*this, m_MeshCompression);
+
+ transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask);
+
+ if (doCompression && transfer.DidReadLastProperty ())
+ m_CompressedMesh.Decompress(*this);
+ }
+
+ #if !GFX_SUPPORTS_TRISTRIPS
+ if (transfer.IsReading())
+ DestripifyIndices ();
+ #endif
+
+ // Reswizzle colors after serializing
+ if (reswizzleColors)
+ {
+ std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform);
+ m_VertexColorsSwizzled = true;
+ }
+
+ transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask);
+
+ #if UNITY_EDITOR
+ // When building player we precalcuate mesh usage based on who uses the different MeshColliders in different scenes.
+ if (transfer.IsWritingGameReleaseData())
+ {
+ int buildMeshUsageFlags = transfer.GetBuildUsage().meshUsageFlags;
+ transfer.Transfer (buildMeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+ }
+ else
+ transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+ #else
+ transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+ #endif
+
+ m_CollisionMesh.Transfer(transfer, *this);
+
+ if (transfer.IsOldVersion(1))
+ {
+ vector<DeprecatedLOD> lod;
+ transfer.Transfer (lod, "m_LODData", kHideInEditorMask);
+ LoadDeprecatedMeshData(*this, lod);
+ }
+
+ if (transfer.IsVersionSmallerOrEqual(4))
+ {
+ for (int sm = 0; sm < m_SubMeshes.size(); ++sm)
+ {
+ UpdateSubMeshVertexRange (sm);
+ RecalculateSubmeshBoundsInternal (sm);
+ }
+ }
+
+ if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1))
+ {
+ DeprecatedTangentsArray m_TangentSpace;
+ transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask);
+ if(transfer.IsReading())
+ LoadDeprecatedTangentData(*this,m_TangentSpace);
+ }
+
+ if (transfer.IsVersionSmallerOrEqual(7))
+ {
+ DestripifySubmeshOnTransferInternal();
+ }
+ TRANSFER_EDITOR_ONLY_HIDDEN(m_MeshOptimized);
+
+#if UNITY_EDITOR || UNITY_PS3
+ TransferPS3Data(transfer);
+#endif
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+// Except for some dead-path removal and a change to the ResizeVertices call to account for an
+// API change, this is an exact copy of the Mesh::Transfer function as it shipped in 3.5.0 final.
+// This path exists solely to work around the issue with compressed mesh serialization in 3.5.0
+// which produced different serializations for compressed and uncompressed meshes while using the
+// same type tree for either case. This makes it impossible for SafeBinaryRead to sort things out.
+//
+// By having the exact same transfer path, we end up with identical type trees compared to version
+// 3.5.0 and thus automatically end up on the StreamedBinaryRead codepath. Also, as long as this
+// separate path here is preserved, we can read the faulty 3.5.0 streams without having to worry
+// about it in the normal transfer path.
+template<class TransferFunction>
+void Mesh::TransferWorkaround35SerializeFuckup (TransferFunction& transfer)
+{
+ Super::Transfer (transfer);
+ transfer.SetVersion (6);
+
+ if (m_VertexColorsSwizzled)
+ {
+ // Unswizzle colors before serializing
+ std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+ m_VertexColorsSwizzled = false;
+ }
+
+ transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask);
+
+ if (!transfer.IsVersionSmallerOrEqual(3))
+ transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask);
+ else
+ m_MeshCompression = kMeshCompressionOff;
+
+ transfer.Align();
+ if (m_MeshCompression == kMeshCompressionOff)
+ {
+ if (transfer.ConvertEndianess() && transfer.IsWriting ())
+ ByteSwapIndices();
+
+ transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask);
+
+ if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+ ByteSwapIndices();
+
+ transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask);
+ transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+
+ if (transfer.IsVersionSmallerOrEqual (5))
+ {
+ dynamic_array<Vector4f> tangents;
+ dynamic_array<Vector3f> vertices, normals;
+ dynamic_array<Vector2f> uvs, uvs1;
+ dynamic_array<ColorRGBA32> colors;
+
+
+ transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask);
+ transfer.Transfer (uvs, "m_UV", kHideInEditorMask);
+ transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask);
+ transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask);
+ transfer.Transfer (normals, "m_Normals", kHideInEditorMask);
+ transfer.Transfer (colors, "m_Colors", kHideInEditorMask);
+
+ unsigned format = 0;
+ if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex);
+ if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent);
+ if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal);
+ if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0);
+ if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1);
+ if (!colors.empty ()) format |= VERTEX_FORMAT1(Color);
+
+ size_t vertexCount = vertices.size ();
+ if (GetVertexCount () != vertexCount || GetAvailableChannels () != format)
+ ResizeVertices (vertexCount, format);
+
+ strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ());
+ strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ());
+ strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0));
+ strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1));
+ strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ());
+ strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ());
+ }
+ else
+ {
+ // version 6 introduces interleaved buffer
+ if (transfer.ConvertEndianess() && transfer.IsWriting ())
+ m_VertexData.SwapEndianess ();
+
+ transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask);
+
+ if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+ m_VertexData.SwapEndianess ();
+ }
+ }
+ else
+ {
+ vector<Vector4f> emptyVector4;
+ vector<Vector3f> emptyVector3;
+ vector<Vector2f> emptyVector2;
+ vector<BoneInfluence> emptyBones;
+ vector<UInt8> emptyIndices;
+ vector<ColorRGBA32> emptyColors;
+
+ transfer.Transfer (emptyIndices, "m_IndexBuffer", kHideInEditorMask);
+ transfer.Transfer (emptyVector3, "m_Vertices", kHideInEditorMask);
+ transfer.Transfer (emptyBones, "m_Skin", kHideInEditorMask);
+ transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+ transfer.Transfer (emptyVector2, "m_UV", kHideInEditorMask);
+ transfer.Transfer (emptyVector2, "m_UV1", kHideInEditorMask);
+ transfer.Transfer (emptyVector4, "m_Tangents", kHideInEditorMask);
+ transfer.Transfer (emptyVector3, "m_Normals", kHideInEditorMask);
+ transfer.Transfer (emptyColors, "m_Colors", kHideInEditorMask);
+ }
+
+ CompressedMesh m_CompressedMesh;
+ transfer.Align();
+ if (transfer.IsWriting() && m_MeshCompression)
+ m_CompressedMesh.Compress(*this, m_MeshCompression);
+
+ printf_console( "Reading compressed mesh...\n" );
+ transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask);
+
+ if (transfer.DidReadLastProperty () && m_MeshCompression)
+ m_CompressedMesh.Decompress(*this);
+
+
+#if !GFX_SUPPORTS_TRISTRIPS
+ if (transfer.IsReading())
+ DestripifyIndices ();
+#endif
+
+ transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask);
+ transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+
+ m_CollisionMesh.Transfer(transfer, *this);
+
+ if (transfer.IsOldVersion(1))
+ {
+ vector<DeprecatedLOD> lod;
+ transfer.Transfer (lod, "m_LODData", kHideInEditorMask);
+ LoadDeprecatedMeshData(*this, lod);
+ }
+
+ if (transfer.IsVersionSmallerOrEqual(4))
+ {
+ for (int sm = 0; sm < m_SubMeshes.size(); ++sm)
+ {
+ UpdateSubMeshVertexRange (sm);
+ RecalculateSubmeshBoundsInternal (sm);
+ }
+ }
+
+ if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1))
+ {
+ DeprecatedTangentsArray m_TangentSpace;
+ transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask);
+ if(transfer.IsReading())
+ LoadDeprecatedTangentData(*this,m_TangentSpace);
+ }
+
+ if (transfer.IsReading())
+ DestripifySubmeshOnTransferInternal();
+}
+#endif
+
+#if UNITY_EDITOR || UNITY_PS3
+template<class TransferFunction>
+void Mesh::TransferPS3Data (TransferFunction& transfer)
+{
+ if (UNITY_PS3 || (kBuildPS3 == transfer.GetBuildingTarget().platform))
+ {
+ transfer.Transfer(m_Partitions, "m_Partitions", kHideInEditorMask);
+ transfer.Transfer(m_PartitionInfos, "m_PartitionInfos", kHideInEditorMask);
+ }
+}
+#endif
+
+
+void Mesh::UpdateSubMeshVertexRange (int index)
+{
+ SubMesh& submesh = m_SubMeshes[index];
+ if (submesh.indexCount > 0)
+ {
+ UInt32 lastVertex = 0;
+ GetVertexBufferRange(GetSubMeshBuffer16(index), submesh.indexCount, submesh.firstVertex, lastVertex);
+ Assert(lastVertex < GetVertexCount ());
+ Assert(submesh.firstVertex <= lastVertex);
+ submesh.vertexCount = lastVertex - submesh.firstVertex + 1;
+ }
+ else
+ {
+ submesh.firstVertex = 0;
+ submesh.vertexCount = 0;
+ }
+}
+
+static bool CheckOutOfBounds (unsigned max, const UInt16* p, unsigned count)
+{
+ for (int i=0;i<count;i++)
+ {
+ if (p[i] >= max)
+ return false;
+ }
+ return true;
+}
+
+static bool CheckOutOfBounds (unsigned max, const UInt32* p, unsigned count)
+{
+ for (int i=0;i<count;i++)
+ {
+ if (p[i] >= max)
+ return false;
+ }
+ return true;
+}
+
+bool Mesh::ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount)
+{
+ if (newTriangles)
+ {
+ return CheckOutOfBounds (newVertexCount, reinterpret_cast<const UInt16*>(newTriangles), indexCount);
+ }
+ else
+ {
+ return CheckOutOfBounds(newVertexCount, reinterpret_cast<const UInt16*>(&m_IndexBuffer[0]), GetTotalndexCount());
+ }
+}
+
+int Mesh::GetTotalndexCount () const
+{
+ return m_IndexBuffer.size () / kVBOIndexSize;
+}
+
+void Mesh::SetVertices (Vector3f const* data, size_t count)
+{
+ if (m_StreamCompression)
+ return;
+
+ if (count > std::numeric_limits<UInt16>::max())
+ {
+ ErrorString("Mesh.vertices is too large. A mesh may not have more than 65000 vertices.");
+ return;
+ }
+
+ size_t prevCount = GetVertexCount ();
+ if (IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1) && count < prevCount && !ValidateVertexCount(count, NULL, 0))
+ {
+ ErrorString("Mesh.vertices is too small. The supplied vertex array has less vertices than are referenced by the triangles array.");
+ return;
+ }
+
+ WaitOnRenderThreadUse();
+
+#if UNITY_PS3
+ if(m_Skin.empty() || (!(m_Skin.empty() || m_PartitionInfos.empty())))
+ {
+ // mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226)
+ SET_ALLOC_OWNER(this);
+ VertexData vertexData(m_VertexData, GetAvailableChannels(), GetStreamsLayout(), GetChannelsLayout());
+ swap(vertexData, m_VertexData);
+ }
+#endif
+
+ if (prevCount != count)
+ {
+ unsigned prevChannels = GetAvailableChannels ();
+ ResizeVertices (count, prevChannels | VERTEX_FORMAT1(Vertex));
+
+ // In case there were other channels present, initialize the newly created values of
+ // the expanded buffer to something meaningful.
+ if (prevCount != 0 && count > prevCount && (prevChannels & ~VERTEX_FORMAT1(Vertex)))
+ {
+ InitChannelsToDefault (prevCount, count - prevCount, prevChannels & ~VERTEX_FORMAT1(Vertex));
+ }
+ }
+
+ // Make sure we'll not be overrunning the buffer
+ if (GetVertexCount () < count)
+ count = GetVertexCount ();
+
+ strided_copy (data, data + count, GetVertexBegin ());
+ SetChannelsDirty (VERTEX_FORMAT1(Vertex), false);
+
+ // We do not recalc the bounds automatically when re-writing existing vertices
+ if (prevCount != count)
+ RecalculateBounds ();
+}
+
+void Mesh::SetNormals (Vector3f const* data, size_t count)
+{
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ if (count == 0 || !data)
+ {
+ FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Normal));
+ SetChannelsDirty (VERTEX_FORMAT1(Normal), false);
+ return;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ ErrorStringMsg(kMeshAPIErrorMessage, "normals");
+ return;
+ }
+
+ if (!IsAvailable (kShaderChannelNormal))
+ FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Normal));
+
+ strided_copy (data, data + count, GetNormalBegin ());
+
+ SetChannelsDirty (VERTEX_FORMAT1(Normal), false);
+}
+
+void Mesh::SetTangents (Vector4f const* data, size_t count)
+{
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ if (count == 0 || !data)
+ {
+ FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Tangent));
+ SetChannelsDirty (VERTEX_FORMAT1(Tangent), false);
+ return;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ ErrorStringMsg(kMeshAPIErrorMessage, "tangents");
+ return;
+ }
+
+ if (!IsAvailable (kShaderChannelTangent))
+ FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Tangent));
+
+ strided_copy (data, data + count, GetTangentBegin ());
+ SetChannelsDirty( VERTEX_FORMAT1(Tangent), false );
+}
+
+void Mesh::SetUv (int uvIndex, Vector2f const* data, size_t count)
+{
+ Assert (uvIndex <= 1);
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ ShaderChannel texCoordChannel = static_cast<ShaderChannel>(kShaderChannelTexCoord0 + uvIndex);
+ unsigned texCoordMask = 1 << texCoordChannel;
+ if (count == 0 || !data)
+ {
+ FormatVertices (GetAvailableChannels () & ~texCoordMask);
+ SetChannelsDirty (texCoordMask, false);
+ return;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ const char* uvName = uvIndex == 1 ? "uv2" : "uv";
+ ErrorStringMsg(kMeshAPIErrorMessage, uvName);
+ return;
+ }
+
+ if (!IsAvailable (texCoordChannel))
+ FormatVertices (GetAvailableChannels () | texCoordMask);
+
+ strided_copy (data, data + count, GetUvBegin (uvIndex));
+ SetChannelsDirty (texCoordMask, false);
+}
+
+void Mesh::SetColors (ColorRGBA32 const* data, size_t count)
+{
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ if (count == 0 || !data)
+ {
+ FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color));
+ SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+ return;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ ErrorStringMsg(kMeshAPIErrorMessage, "colors");
+ return;
+ }
+
+ if (!IsAvailable (kShaderChannelColor))
+ {
+ FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color));
+ }
+ m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors;
+
+ if (m_VertexColorsSwizzled)
+ std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform);
+ else
+ std::copy(data, data + count, GetColorBegin());
+
+ SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+}
+
+void Mesh::SetColorsConverting (ColorRGBAf const* data, size_t count)
+{
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ if (count == 0 || !data)
+ {
+ FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color));
+ SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+ return;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ ErrorStringMsg(kMeshAPIErrorMessage, "colors");
+ return;
+ }
+
+ if (!IsAvailable (kShaderChannelColor))
+ {
+ FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color));
+ }
+ m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors;
+
+ if (m_VertexColorsSwizzled)
+ std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform);
+ else
+ strided_copy_convert(data, data + count, GetColorBegin());
+
+ SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+}
+
+
+void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles) const
+{
+ triangles.clear();
+ for (unsigned m=0;m<GetSubMeshCount();m++)
+ AppendTriangles(triangles, m);
+}
+
+void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+ triangles.clear();
+ AppendTriangles(triangles, submesh);
+}
+
+void QuadsToTriangles(const UInt16* quads, const int indexCount, Mesh::TemporaryIndexContainer& triangles)
+{
+ DebugAssert (indexCount%4 == 0);
+ triangles.resize((indexCount/2)*3);
+ for (int q = 0, t = 0; q < indexCount; q += 4, t +=6)
+ {
+ triangles[t] = quads[q];
+ triangles[t + 1] = quads[q + 1];
+ triangles[t + 2] = quads[q + 2];
+
+ triangles[t + 3] = quads[q];
+ triangles[t + 4] = quads[q + 2];
+ triangles[t + 5] = quads[q + 3];
+ }
+}
+
+
+void Mesh::AppendTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+ if (submesh >= GetSubMeshCount())
+ {
+ ErrorString("Failed getting triangles. Submesh index is out of bounds.");
+ return;
+ }
+
+ int topology = GetSubMeshFast(submesh).topology;
+ if (topology == kPrimitiveTriangleStripDeprecated)
+ Destripify(GetSubMeshBuffer16(submesh), GetSubMeshFast(submesh).indexCount, triangles);
+ else if (topology == kPrimitiveQuads)
+ QuadsToTriangles (GetSubMeshBuffer16 (submesh), GetSubMeshFast (submesh).indexCount, triangles);
+ else if (topology == kPrimitiveTriangles)
+ triangles.insert(triangles.end(), GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+ else
+ ErrorString("Failed getting triangles. Submesh topology is lines or points.");
+}
+
+void Mesh::GetStrips (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+ triangles.clear();
+ if (submesh >= GetSubMeshCount())
+ {
+ ErrorString("Failed getting triangles. Submesh index is out of bounds.");
+ return;
+ }
+
+ if (GetSubMeshFast(submesh).topology != kPrimitiveTriangleStripDeprecated)
+ return;
+
+ triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+}
+
+void Mesh::GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+ triangles.clear();
+ if (submesh >= GetSubMeshCount())
+ {
+ ErrorString("Failed getting indices. Submesh index is out of bounds.");
+ return;
+ }
+ triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+}
+
+
+bool Mesh::SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology)
+{
+ int mask = kRebuildCollisionTriangles;
+ return SetIndicesComplex (indices, count, submesh, topology, mask);
+}
+
+bool Mesh::SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology)
+{
+ int mask = kRebuildCollisionTriangles | k16BitIndices;
+ return SetIndicesComplex (indices, count, submesh, topology, mask);
+}
+
+
+bool Mesh::SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode)
+{
+ WaitOnRenderThreadUse();
+
+ if (indices == NULL && count != 0 && (mode & kDontAssignIndices) == 0)
+ {
+ ErrorString("failed setting triangles. triangles is NULL");
+ return false;
+ }
+
+ if (submesh >= GetSubMeshCount())
+ {
+ ErrorString("Failed setting triangles. Submesh index is out of bounds.");
+ return false;
+ }
+
+ if ((topology == kPrimitiveTriangles) && (count % 3 != 0))
+ {
+ ErrorString("Failed setting triangles. The number of supplied triangle indices must be a multiple of 3.");
+ return false;
+ }
+
+ if ((mode & kDontAssignIndices) == 0)
+ {
+ bool valid;
+ if (mode & k16BitIndices)
+ valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt16*>(indices), count);
+ else
+ valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt32*>(indices), count);
+
+ if (!valid)
+ {
+ ErrorString("Failed setting triangles. Some indices are referencing out of bounds vertices.");
+ return false;
+ }
+ }
+
+ SetIndexData(submesh, count, indices, topology, mode);
+
+ if (mode & Mesh::kDontSupportSubMeshVertexRanges)
+ {
+ Assert(m_SubMeshes.size () == 1);
+ m_SubMeshes[0].firstVertex = 0;
+ m_SubMeshes[0].vertexCount = GetVertexCount();
+ m_SubMeshes[0].localAABB = m_LocalAABB;
+ }
+ else
+ {
+ // Update vertex range
+ UpdateSubMeshVertexRange (submesh);
+ RecalculateSubmeshBounds(submesh);
+ }
+
+ if (mode & kRebuildCollisionTriangles)
+ RebuildCollisionTriangles();
+
+ SetChannelsDirty( 0, true );
+
+ return true;
+}
+
+void Mesh::DestripifySubmeshOnTransferInternal()
+{
+ if (m_IndexBuffer.empty() || m_SubMeshes.empty())
+ return;
+
+ int submeshCount = m_SubMeshes.size();
+ typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer;
+
+ std::vector<TemporaryIndexContainer> submeshIndices;
+ submeshIndices.resize(submeshCount);
+
+ // We have to do this in two batches, as SetIndexData seems to have a bug that causes
+ // triangle windings to get screwed up if we attempt to modify the submeshes in-place.
+
+ for (size_t i = 0; i < submeshCount; ++i)
+ {
+ SubMesh& sm = m_SubMeshes[i];
+ if (sm.topology == kPrimitiveTriangleStripDeprecated)
+ {
+ Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]);
+ }
+ else
+ {
+ submeshIndices[i].resize(sm.indexCount);
+ memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1);
+ }
+ }
+
+ for(size_t i = 0; i < submeshCount; ++i)
+ {
+ SetIndexData(i, submeshIndices[i].size(), &submeshIndices[i][0], kPrimitiveTriangles, kRebuildCollisionTriangles | k16BitIndices);
+ }
+}
+
+void Mesh::SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode)
+{
+ int newByteSize = indexCount * kVBOIndexSize;
+ int oldSubmeshSize = GetSubMeshBufferByteSize (submeshIndex);
+ int insertedBytes = newByteSize - GetSubMeshBufferByteSize (submeshIndex);
+ int oldFirstByte = m_SubMeshes[submeshIndex].firstByte;
+ // Growing the buffer
+ if (insertedBytes > 0)
+ {
+ m_IndexBuffer.insert(m_IndexBuffer.begin() + oldFirstByte + oldSubmeshSize, insertedBytes, 0);
+ }
+ // Shrinking the buffer
+ else
+ {
+ m_IndexBuffer.erase(m_IndexBuffer.begin() + oldFirstByte, m_IndexBuffer.begin() + oldFirstByte - insertedBytes);
+ }
+
+#if UNITY_PS3
+
+ // mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226)
+ IndexContainer newIndexContainer;
+ newIndexContainer.resize(m_IndexBuffer.size());
+ m_IndexBuffer.swap(newIndexContainer);
+
+#endif
+
+ // Update the sub mesh
+ m_SubMeshes[submeshIndex].indexCount = indexCount;
+ m_SubMeshes[submeshIndex].topology = topology;
+
+ // Synchronize subsequent sub meshes
+ for (int i=submeshIndex+1;i<m_SubMeshes.size();i++)
+ {
+ m_SubMeshes[i].firstByte = m_SubMeshes[i-1].firstByte + m_SubMeshes[i-1].indexCount * kVBOIndexSize;
+ }
+
+ // Write indices into the allocated data
+ if ((mode & kDontAssignIndices) == 0)
+ {
+ if (mode & k16BitIndices)
+ {
+ const UInt16* src = reinterpret_cast<const UInt16*>(indices);
+ UInt16* dst = GetSubMeshBuffer16(submeshIndex);
+ for (int i=0;i<indexCount;i++)
+ dst[i] = src[i];
+ }
+ else
+ {
+ const UInt32* src = reinterpret_cast<const UInt32*>(indices);
+ UInt16* dst = GetSubMeshBuffer16(submeshIndex);
+ for (int i=0;i<indexCount;i++)
+ dst[i] = src[i];
+ }
+ }
+
+ return;
+}
+
+const UInt16* Mesh::GetSubMeshBuffer16 (int submesh) const
+{
+ return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<const UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL;
+}
+UInt16* Mesh::GetSubMeshBuffer16 (int submesh)
+{
+ return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL;
+}
+
+void Mesh::SetBindposes (const Matrix4x4f* bindposes, int count)
+{
+ m_Bindpose.assign(bindposes, bindposes + count);
+ SetDirty();
+}
+
+void Mesh::SetBounds (const AABB& aabb)
+{
+ m_LocalAABB = aabb;
+ SetDirty();
+ NotifyObjectUsers( kDidModifyBounds );
+ m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::SetBounds (unsigned submesh, const AABB& aabb)
+{
+ GetSubMeshFast(submesh).localAABB = aabb;
+ SetDirty();
+ NotifyObjectUsers( kDidModifyBounds );
+ m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::NotifyObjectUsers(const MessageIdentifier& msg)
+{
+ ASSERT_RUNNING_ON_MAIN_THREAD;
+
+ MessageData data;
+ data.SetData (this, ClassID (Mesh));
+
+ ObjectList::iterator next;
+ for( ObjectList::iterator i = m_ObjectUsers.begin(); i != m_ObjectUsers.end(); i=next )
+ {
+ next = i;
+ ++next;
+ Object& target = **i;
+ SendMessageDirect(target, msg, data);
+ }
+}
+
+void Mesh::WaitOnRenderThreadUse()
+{
+#if ENABLE_MULTITHREADED_CODE
+ if (m_WaitOnCPUFence)
+ {
+ GetGfxDevice().WaitOnCPUFence(m_CurrentCPUFence);
+ m_WaitOnCPUFence = false;
+ }
+#endif
+}
+
+void Mesh::RebuildCollisionTriangles()
+{
+ m_CollisionMesh.VertexDataHasChanged ();
+}
+
+PROFILER_INFORMATION(gRecalculateNormals, "Mesh.RecalculateNormals", kProfilerOther)
+
+void Mesh::RecalculateNormals()
+{
+ if (m_StreamCompression)
+ return;
+ WaitOnRenderThreadUse();
+
+ PROFILER_AUTO(gRecalculateNormals, this);
+
+ if (int vertexCount = GetVertexCount())
+ {
+ unsigned newChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT1(Normal);
+ if (newChannels != m_VertexData.GetChannelMask ())
+ FormatVertices (newChannels);
+
+ TemporaryIndexContainer triangles;
+ GetTriangles (triangles);
+
+ CalculateNormals( GetVertexBegin (), &triangles[0], vertexCount, triangles.size()/3, GetNormalBegin () );
+ }
+
+ SetChannelsDirty( VERTEX_FORMAT1(Normal), false );
+}
+
+
+void Mesh::SetSubMeshCount (unsigned int count)
+{
+ WaitOnRenderThreadUse();
+
+ if (count == 0)
+ {
+ m_IndexBuffer.clear();
+ m_SubMeshes.clear();
+ return;
+ }
+
+ // Remove elements
+ if (count < m_SubMeshes.size ())
+ {
+ m_IndexBuffer.resize(m_SubMeshes[count].firstByte);
+ m_SubMeshes.resize(count);
+ }
+ // Append elements
+ else if (count > m_SubMeshes.size ())
+ {
+ SubMesh data;
+ data.firstByte = m_IndexBuffer.size();
+ data.indexCount = 0;
+ data.topology = kPrimitiveTriangles;
+ data.firstVertex = 0;
+ data.vertexCount = 0;
+ data.localAABB = AABB (Vector3f::zero, Vector3f::zero);
+ m_SubMeshes.resize(count, data);
+ RecalculateBounds();
+ }
+}
+
+size_t Mesh::GetSubMeshCount () const
+{
+ return m_SubMeshes.size();
+}
+
+int Mesh::GetPrimitiveCount() const
+{
+ int submeshes = GetSubMeshCount();
+ int count = 0;
+ for( int m = 0; m < submeshes; ++m ) {
+ const SubMesh& sub = m_SubMeshes[m];
+ count += ::GetPrimitiveCount(sub.indexCount, sub.topology, false);
+ }
+ return count;
+}
+
+int Mesh::CalculateTriangleCount() const
+{
+ int submeshes = GetSubMeshCount();
+ int count = 0;
+ for( int m = 0; m < submeshes; ++m )
+ {
+ const SubMesh& sub = m_SubMeshes[m];
+ if (sub.topology == kPrimitiveTriangleStripDeprecated)
+ {
+ const UInt16* indices = GetSubMeshBuffer16(m);
+ int triCount = CountTrianglesInStrip (indices, sub.indexCount);
+ count += triCount;
+ }
+ else if (sub.topology == kPrimitiveTriangles)
+ {
+ count += sub.indexCount / 3;
+ }
+ }
+ return count;
+}
+
+Mesh& Mesh::GetInstantiatedMesh (Mesh* mesh, Object& owner)
+{
+ if (NULL != mesh && mesh->m_Owner == PPtr<Object> (&owner))
+ return *mesh;
+
+ if (!IsWorldPlaying())
+ ErrorStringObject("Instantiating mesh due to calling MeshFilter.mesh during edit mode. This will leak meshes. Please use MeshFilter.sharedMesh instead.", &owner);
+
+ if (mesh == NULL || !mesh->HasVertexData ())
+ {
+ if (!mesh)
+ mesh = NEW_OBJECT (Mesh);
+ mesh->Reset();
+
+ mesh->SetName(owner.GetName());
+ mesh->m_Owner = &owner;
+
+ mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad);
+ return *mesh;
+ }
+
+ Mesh* instance = NEW_OBJECT (Mesh);
+ CopySerialized(*mesh, *instance);
+ instance->SetNameCpp (Append (mesh->GetName (), " Instance"));
+ instance->m_Owner = &owner;
+ return *instance;
+}
+
+const VertexStreamsLayout& Mesh::GetStreamsLayout() const
+{
+ if (!m_Skin.empty() || GetBlendShapeChannelCount() != 0)
+ return VertexDataInfo::kVertexStreamsSkinnedHotColdSplit;
+ else
+ return VertexDataInfo::kVertexStreamsDefault;
+}
+
+const VertexChannelsLayout& Mesh::GetChannelsLayout() const
+{
+ UInt8 compressed = m_StreamCompression;
+#if !UNITY_EDITOR
+ // Editor only does build step for compression and never draws float16 vertices
+ if (!gGraphicsCaps.has16BitFloatVertex)
+ {
+ compressed = kStreamCompressionDefault;
+ }
+#endif
+ switch (compressed)
+ {
+ default: // fall through
+ case kStreamCompressionDefault:
+ return VertexDataInfo::kVertexChannelsDefault;
+ case kStreamCompressionCompressed:
+ return VertexDataInfo::kVertexChannelsCompressed;
+ case kStreamCompressionCompressedAggressive:
+ return VertexDataInfo::kVertexChannelsCompressedAggressive;
+ }
+}
+
+void Mesh::InitVertexBufferData( UInt32 wantedChannels )
+{
+#if GFX_CAN_UNLOAD_MESH_DATA
+ // If data was uploaded and freed we cannot update it.
+ if (!HasVertexData())
+ return;
+#endif
+ UInt32 presentChannels = GetAvailableChannels ();
+
+ // Modify the vertex buffer before fetching any channel pointers, as modifying the format reallocates the buffer and pointers
+ // are invalidated. Due to possible format changes, also fetch the stride sizes only after buffer reformatting.
+ unsigned initChannels = 0;
+
+ // Silently create an all-white color array if shader wants colors, but mesh does not have them.
+ // On D3D, some runtime/driver combinations will crash if a vertex shader wants colors but does not
+ // have them (e.g. Vista drivers for Intel 965). In other cases it will default to white for fixed function
+ // pipe, and to undefined value for vertex shaders, which is not good either.
+ if( (wantedChannels & VERTEX_FORMAT1(Color)) && !(presentChannels & VERTEX_FORMAT1(Color)) )
+ initChannels |= VERTEX_FORMAT1(Color);
+
+#if UNITY_PEPPER
+ // Pepper OpenGL implementation fails to draw anything if any channel is missing.
+ if( (wantedChannels & VERTEX_FORMAT1(Tangent)) && !(presentChannels & VERTEX_FORMAT1(Tangent)) )
+ initChannels |= VERTEX_FORMAT1(Tangent);
+#endif
+
+ if ((initChannels & presentChannels) != initChannels)
+ {
+ FormatVertices (presentChannels | initChannels);
+ InitChannelsToDefault (0, GetVertexCount (), initChannels);
+ }
+}
+
+void Mesh::GetVertexBufferData( VertexBufferData& buffer, UInt32 wantedChannels )
+{
+ InitVertexBufferData(wantedChannels);
+
+ for (int i = 0; i < kShaderChannelCount; i++)
+ buffer.channels[i] = m_VertexData.GetChannel(i);
+
+ for (int i = 0; i < kMaxVertexStreams; i++)
+ buffer.streams[i] = m_VertexData.GetStream(i);
+
+ int srcTexcoord = kShaderChannelNone;
+ for (int i = kShaderChannelTexCoord0; i <= kShaderChannelTexCoord1; i++)
+ {
+ if (buffer.channels[i].IsValid())
+ {
+ // We have a valid texcoord
+ srcTexcoord = i;
+ continue;
+ }
+ UInt32 channelMask = 1 << i;
+ if (srcTexcoord != kShaderChannelNone)
+ {
+ // Replicate last valid texture coord
+ const ChannelInfo& srcChannel = buffer.channels[srcTexcoord];
+ buffer.channels[i] = srcChannel;
+ buffer.streams[srcChannel.stream].channelMask |= channelMask;
+ }
+ }
+
+ // Data pointer can be NULL if we are only updating declaration of uploaded VBO
+ buffer.buffer = m_VertexData.GetDataPtr();
+ buffer.bufferSize = m_VertexData.GetDataSize();
+ buffer.vertexCount = GetVertexCount();
+
+#if UNITY_EDITOR
+ #define LogStringObjectEditor(x) LogStringObject(Format(x, GetName()),this)
+
+ if (Camera::ShouldShowChannelErrors(GetCurrentCameraPtr()))
+ {
+ const ChannelInfo* channels = buffer.channels;
+
+ if ((wantedChannels & VERTEX_FORMAT1(Tangent)) && !channels[kShaderChannelTangent].IsValid())
+ LogStringObjectEditor ("Shader wants tangents, but the mesh %s doesn't have them");
+
+ if ((wantedChannels & VERTEX_FORMAT1(Normal)) && !channels[kShaderChannelNormal].IsValid())
+ LogStringObjectEditor ("Shader wants normals, but the mesh %s doesn't have them");
+
+ if ((wantedChannels & VERTEX_FORMAT1(TexCoord0)) && !channels[kShaderChannelTexCoord0].IsValid())
+ LogStringObjectEditor ("Shader wants texture coordinates, but the mesh %s doesn't have them");
+
+ if ((wantedChannels & VERTEX_FORMAT1(TexCoord1)) && !channels[kShaderChannelTexCoord1].IsValid())
+ LogStringObjectEditor ("Shader wants secondary texture coordinates, but the mesh %s doesn't have any");
+
+ if ((wantedChannels & VERTEX_FORMAT1(Color)) && !channels[kShaderChannelColor].IsValid())
+ LogStringObjectEditor ("Shader wants vertex colors, and failed to create a vertex color array");
+ }
+ #undef LogStringObjectEditor
+#endif
+
+#if UNITY_PS3
+ if(m_PartitionInfos.empty())
+ {
+ int submeshCount = m_SubMeshes.size();
+ for (int submesh=0; submesh<submeshCount; submesh++)
+ {
+ SubMesh& sm = GetSubMeshFast(submesh);
+
+ MeshPartitionInfo partInfo;
+ partInfo.submeshStart = submesh;
+ partInfo.partitionCount = 1;
+ buffer.partInfo.push_back(partInfo);
+
+ MeshPartition part;
+ part.vertexCount = sm.vertexCount;
+ part.vertexOffset = 0;
+ part.indexCount = sm.indexCount;
+ part.indexByteOffset = sm.firstByte;
+ buffer.partitions.push_back(part);;
+ }
+ }
+ else
+ {
+ buffer.partInfo = m_PartitionInfos;
+ buffer.partitions = m_Partitions;
+ }
+
+#endif
+
+ buffer.vertexCount = GetVertexCount ();
+}
+
+void Mesh::GetIndexBufferData (IndexBufferData& buffer)
+{
+ DebugAssert (!m_IndexBuffer.empty());
+ buffer.indices = m_IndexBuffer.empty() ? NULL : (void*)&m_IndexBuffer[0];
+
+ ///@TODO: HACK for now to get index buffers working, without changing a lot of vbo code
+ // We should be passing the byte size not the number of indices
+ buffer.count = GetTotalndexCount();
+ buffer.hasTopologies = 0;
+ for (size_t i = 0, n = m_SubMeshes.size(); i < n; ++i)
+ {
+ buffer.hasTopologies |= (1<<m_SubMeshes[i].topology);
+ }
+}
+
+PROFILER_INFORMATION(gCreateVBOProfile, "Mesh.CreateVBO", kProfilerRender);
+PROFILER_INFORMATION(gAwakeFromLoadMesh, "Mesh.AwakeFromLoad", kProfilerLoading);
+PROFILER_INFORMATION(gUploadMeshDataMesh, "Mesh.UploadMeshData", kProfilerLoading);
+
+VBO* Mesh::GetSharedVBO( UInt32 wantedChannels )
+{
+ // Some badly written shaders have no Bind statements in the vertex shaders parts;
+ // and only happened to work before by accident. If requiredChannels turns out to be
+ // zero, let's pretend it did request at least position.
+ if (wantedChannels == 0)
+ wantedChannels = (1<<kShaderChannelVertex);
+
+ UInt32 newChannels = wantedChannels | m_ChannelsInVBO;
+ bool addedChannels = newChannels != m_ChannelsInVBO;
+
+#if GFX_CAN_UNLOAD_MESH_DATA
+ if (!m_IsReadable && !m_KeepVertices && m_VBO)
+ {
+ // Everything is already prepared, just return VBO
+ return m_VBO;
+ }
+#endif
+
+ if ((GFX_ALL_BUFFERS_CAN_BECOME_LOST || m_IsDynamic) && m_VBO && m_VBO->IsVertexBufferLost())
+ m_VerticesDirty = true;
+ if (GFX_ALL_BUFFERS_CAN_BECOME_LOST && m_VBO && m_VBO->IsIndexBufferLost())
+ m_IndicesDirty = true;
+
+ if (addedChannels || m_VerticesDirty || m_IndicesDirty)
+ CreateSharedVBO(wantedChannels);
+
+ return m_VBO;
+}
+
+void Mesh::CreateSharedVBO( UInt32 wantedChannels )
+{
+ if (m_IndexBuffer.empty())
+ {
+ if (m_VBO)
+ {
+ GetGfxDevice().DeleteVBO(m_VBO);
+ m_VBO = NULL;
+ }
+ return;
+ }
+
+ PROFILER_BEGIN(gCreateVBOProfile, this)
+ SET_ALLOC_OWNER(this);
+
+ if (!m_VBO)
+ {
+ m_VBO = GetGfxDevice().CreateVBO();
+ m_VBO->SetHideFromRuntimeStats(m_HideFromRuntimeStats);
+ }
+
+ UInt32 newChannels = wantedChannels | m_ChannelsInVBO;
+ if (m_VerticesDirty || newChannels != m_ChannelsInVBO)
+ {
+ if (m_IsDynamic)
+ m_VBO->SetVertexStreamMode(0, VBO::kStreamModeDynamic);
+
+ VertexBufferData vertexBuffer;
+ GetVertexBufferData (vertexBuffer, newChannels);
+ m_VBO->UpdateVertexData (vertexBuffer);
+ }
+
+ if (m_IndicesDirty)
+ {
+ // TODO: probably add separate script access to set vertex/index dynamic
+ if (m_IsDynamic)
+ m_VBO->SetIndicesDynamic(true);
+
+ IndexBufferData indexBuffer;
+ GetIndexBufferData (indexBuffer);
+ m_VBO->UpdateIndexData (indexBuffer);
+ }
+
+ m_VerticesDirty = false;
+ m_IndicesDirty = false;
+ m_ChannelsInVBO = newChannels;
+
+ PROFILER_END
+}
+
+bool Mesh::CopyToVBO ( UInt32 wantedChannels, VBO& vbo )
+{
+ if( m_IndexBuffer.empty() )
+ return false;
+
+ PROFILER_BEGIN(gCreateVBOProfile, this)
+
+ VertexBufferData vertexBuffer;
+ GetVertexBufferData( vertexBuffer, wantedChannels );
+ vbo.UpdateVertexData( vertexBuffer );
+
+ IndexBufferData indexBuffer;
+ GetIndexBufferData (indexBuffer);
+ vbo.UpdateIndexData (indexBuffer);
+#if UNITY_XENON
+ if( m_VBO )
+ vbo.CopyExtraUvChannels( m_VBO );
+#endif
+ PROFILER_END
+
+ return true;
+}
+
+
+void Mesh::UnloadVBOFromGfxDevice()
+{
+ if (m_VBO)
+ {
+ WaitOnRenderThreadUse();
+ GetGfxDevice().DeleteVBO (m_VBO);
+ }
+ m_VBO = NULL;
+ m_ChannelsInVBO = 0;
+ m_VerticesDirty = m_IndicesDirty = true;
+#if ENABLE_MULTITHREADED_CODE
+ m_CurrentCPUFence = 0;
+ m_WaitOnCPUFence = false;
+#endif
+}
+
+void Mesh::ReloadVBOToGfxDevice()
+{
+ const bool needReloadFromDisk = (!m_IsReadable && !HasVertexData());
+ if (needReloadFromDisk)
+ {
+ GetPersistentManager().ReloadFromDisk(this);
+ }
+ else
+ {
+ m_ChannelsInVBO = 0;
+ m_VerticesDirty = m_IndicesDirty = true;
+ }
+ SwizzleVertexColorsIfNeeded();
+}
+
+
+bool Mesh::ExtractTriangle (UInt32 face, UInt32* indices) const
+{
+ ///@TODO: OPTIMIZE this away
+ TemporaryIndexContainer triangles;
+ GetTriangles(triangles);
+ if (face * 3 > triangles.size ())
+ return false;
+
+ indices[0] = triangles[face * 3 + 0];
+ indices[1] = triangles[face * 3 + 1];
+ indices[2] = triangles[face * 3 + 2];
+ return true;
+}
+
+static void TransformNormals (const Matrix3x3f& invTranspose, StrideIterator<Vector3f> inNormals, StrideIterator<Vector3f> inNormalsEnd, StrideIterator<Vector3f> outNormals)
+{
+ for (; inNormals != inNormalsEnd; ++inNormals, ++outNormals)
+ *outNormals = NormalizeSafe (invTranspose.MultiplyVector3 (*inNormals));
+}
+
+static void TransformTangents (const Matrix3x3f& invTranspose, StrideIterator<Vector4f> inTangents, StrideIterator<Vector4f> inTangentsEnd, StrideIterator<Vector4f> outTangents)
+{
+ for ( ; inTangents != inTangentsEnd; ++inTangents, ++outTangents)
+ {
+ Vector3f tangent = Vector3f(inTangents->x,inTangents->y,inTangents->z);
+ Vector3f normalized = NormalizeSafe (invTranspose.MultiplyVector3 (tangent));
+ *outTangents = Vector4f(normalized.x, normalized.y ,normalized.z, inTangents->w);
+ }
+}
+
+void Mesh::CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform)
+{
+ int vertexCount = mesh.GetVertexCount();
+ unsigned outVertexFormat = mesh.GetAvailableChannelsForRendering ();
+
+ ResizeVertices(mesh.GetVertexCount (), outVertexFormat);
+
+ if (outVertexFormat & VERTEX_FORMAT1(Vertex))
+ TransformPoints3x4 (transform,
+ (Vector3f*)mesh.GetChannelPointer (kShaderChannelVertex), mesh.GetStride (kShaderChannelVertex),
+ (Vector3f*)GetChannelPointer (kShaderChannelVertex), GetStride (kShaderChannelVertex),
+ vertexCount);
+
+ Matrix3x3f invTranspose3x3 = Matrix3x3f(transform); invTranspose3x3.InvertTranspose ();
+
+ if (outVertexFormat & VERTEX_FORMAT1(Normal))
+ TransformNormals (invTranspose3x3, mesh.GetNormalBegin (), mesh.GetNormalEnd (), GetNormalBegin ());
+ if (outVertexFormat & VERTEX_FORMAT1(Tangent))
+ TransformTangents (invTranspose3x3, mesh.GetTangentBegin (), mesh.GetTangentEnd (), GetTangentBegin ());
+
+ m_IndexBuffer = mesh.m_IndexBuffer;
+ m_SubMeshes = mesh.m_SubMeshes;
+ m_Skin = mesh.m_Skin;
+ if (outVertexFormat & VERTEX_FORMAT1(TexCoord0))
+ strided_copy (mesh.GetUvBegin (0), mesh.GetUvEnd (0), GetUvBegin (0));
+ if (outVertexFormat & VERTEX_FORMAT1(TexCoord1))
+ strided_copy (mesh.GetUvBegin (1), mesh.GetUvEnd (1), GetUvBegin (1));
+ if (outVertexFormat & VERTEX_FORMAT1(Color))
+ strided_copy (mesh.GetColorBegin (), mesh.GetColorEnd (), GetColorBegin ());
+ m_VertexColorsSwizzled = mesh.m_VertexColorsSwizzled;
+ m_LocalAABB = mesh.m_LocalAABB;
+
+ SetChannelsDirty( outVertexFormat, true );
+ ClearSkinCache();
+}
+
+
+void Mesh::SetChannelsDirty (unsigned vertexChannelsChanged, bool indices)
+{
+ SetDirty();
+
+ m_VerticesDirty |= vertexChannelsChanged != 0;
+ m_IndicesDirty |= indices;
+
+ // We should regenreate physics mesh only if verex data have changed
+ if ((vertexChannelsChanged & VERTEX_FORMAT1(Vertex)) || indices)
+ {
+ m_CollisionMesh.VertexDataHasChanged();
+ m_CachedBonesAABB.clear();
+ }
+ NotifyObjectUsers( kDidModifyMesh );
+}
+
+bool Mesh::SetBoneWeights (const BoneInfluence* v, int count)
+{
+ WaitOnRenderThreadUse();
+ ClearSkinCache();
+ if (count == 0)
+ {
+ m_Skin.clear();
+ UpdateVertexFormat();
+ return true;
+ }
+
+ if (count != GetVertexCount ())
+ {
+ ErrorString("Mesh.boneWeights is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array.");
+ return false;
+ }
+ m_Skin.assign(v, v + count);
+ SetChannelsDirty (0, false);
+ UpdateVertexFormat();
+
+ return true;
+}
+
+static void ComputeBoneBindPoseAABB (const Matrix4x4f* bindPoses, size_t bindPoseCount, const StrideIterator<Vector3f> vertices, const BoneInfluence* influences, size_t vertexCount, const BlendShapeVertices& blendShapeVertices, MinMaxAABB* outputBounds)
+{
+ if (blendShapeVertices.empty())
+ {
+ for(int v=0;v<vertexCount;v++)
+ {
+ const Vector3f& vert = vertices[v];
+ for (int i = 0; i < 4; i++)
+ {
+ if(influences[v].weight[i] > 0.0f)
+ {
+ const UInt32 boneIndex = influences[v].boneIndex[i];
+
+ outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(vert));
+ }
+ }
+ }
+ }
+ else
+ {
+ Vector3f* minVertices;
+ ALLOC_TEMP(minVertices, Vector3f, vertexCount);
+ Vector3f* maxVertices;
+ ALLOC_TEMP(maxVertices, Vector3f, vertexCount);
+
+ strided_copy(vertices, vertices + vertexCount, minVertices);
+ strided_copy(vertices, vertices + vertexCount, maxVertices);
+
+ for (int i=0;i<blendShapeVertices.size();i++)
+ {
+ int index = blendShapeVertices[i].index;
+ Vector3f pos = blendShapeVertices[i].vertex + vertices[index];
+ maxVertices[index] = max (maxVertices[index], pos);
+ minVertices[index] = min (minVertices[index], pos);
+ }
+
+ for(int v=0;v<vertexCount;v++)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ if(influences[v].weight[i] > 0.0f)
+ {
+ const UInt32 boneIndex = influences[v].boneIndex[i];
+ outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(minVertices[v]));
+ outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(maxVertices[v]));
+ }
+ }
+ }
+ }
+}
+
+const Mesh::AABBContainer& Mesh::GetCachedBonesBounds()
+{
+ // Use cached result if it has the correct size (including empty)
+ if (m_CachedBonesAABB.size() == m_Bindpose.size())
+ return m_CachedBonesAABB;
+
+ Assert(GetMaxBoneIndex() < m_Bindpose.size());
+
+ m_CachedBonesAABB.resize_initialized(m_Bindpose.size(), MinMaxAABB());
+
+ ComputeBoneBindPoseAABB (GetBindposes(), m_CachedBonesAABB.size(), GetVertexBegin(), m_Skin.begin(), GetVertexCount(), m_Shapes.vertices, &m_CachedBonesAABB[0]);
+
+ return m_CachedBonesAABB;
+}
+
+void Mesh::ClearSkinCache ()
+{
+ m_CachedBonesAABB.clear();
+ m_CachedSkin2.clear();
+ m_CachedSkin1.clear();
+ m_MaxBoneIndex = -1;
+}
+
+int Mesh::GetMaxBoneIndex ()
+{
+ if (m_MaxBoneIndex != -1)
+ return m_MaxBoneIndex;
+
+ m_MaxBoneIndex = 0;
+ for (int i=0;i<m_Skin.size();i++)
+ {
+ m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[0]);
+ m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[1]);
+ m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[2]);
+ m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[3]);
+ }
+
+ return m_MaxBoneIndex;
+}
+
+void* Mesh::GetSkinInfluence (int count)
+{
+ if (!m_Skin.empty())
+ {
+ BoneInfluence* bones4 = &m_Skin[0];
+ if (count == 1)
+ {
+ if (!m_CachedSkin1.empty())
+ return &m_CachedSkin1[0];
+
+ // Cache 1 bone skin weights
+ int size = m_Skin.size();
+ m_CachedSkin1.resize_uninitialized(size);
+
+ int* bones1 = &m_CachedSkin1[0];
+ for (int i=0;i<size;i++)
+ bones1[i] = bones4[i].boneIndex[0];
+ return bones1;
+
+ }
+ else if (count == 2)
+ {
+ if (!m_CachedSkin2.empty ())
+ return &m_CachedSkin2[0];
+
+ // Cache 2 bone skin weights
+ int size = m_Skin.size();
+ m_CachedSkin2.resize_uninitialized(size);
+
+ BoneInfluence2* bones2 = &m_CachedSkin2[0];
+ for (int i=0;i<size;i++)
+ {
+ bones2[i].boneIndex[0] = bones4[i].boneIndex[0];
+ bones2[i].boneIndex[1] = bones4[i].boneIndex[1];
+
+ float invSum = 1.0F / (bones4[i].weight[0] + bones4[i].weight[1]);
+ bones2[i].weight[0] = bones4[i].weight[0] * invSum;
+ bones2[i].weight[1] = bones4[i].weight[1] * invSum;
+ }
+ return bones2;
+ }
+ else if (count == 4)
+ {
+ return bones4;
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+
+int Mesh::GetRuntimeMemorySize () const
+{
+ int size = Super::GetRuntimeMemorySize();
+
+ #if ENABLE_PROFILER
+ if (m_VBO)
+ size += m_VBO->GetRuntimeMemorySize();
+ #endif
+
+ return size;
+}
+
+
+void* Mesh::GetSharedNxMesh ()
+{
+ return m_CollisionMesh.GetSharedNxMesh (*this);
+}
+
+void* Mesh::GetSharedNxConvexMesh ()
+{
+ return m_CollisionMesh.GetSharedNxConvexMesh (*this);
+}
+
+void Mesh::UploadMeshData(bool markNoLongerReadable)
+{
+ if(markNoLongerReadable)
+ m_IsReadable = false;
+
+ ClearSkinCache();
+ UpdateVertexFormat();
+
+ // prepare VBO
+ UInt32 channelMask = GetAvailableChannelsForRendering();
+
+ // Create color channel in case it's needed by shader (and we can't patch it)
+#if GFX_CAN_UNLOAD_MESH_DATA
+ bool unloadData = !m_IsReadable && m_Skin.empty();
+ if (unloadData && !m_KeepVertices)
+ channelMask |= VERTEX_FORMAT1(Color);
+#endif
+
+ // Shared VBO is not required for skinned meshes (unless used as non-skinned)
+ if (m_Skin.empty())
+ CreateSharedVBO(channelMask);
+
+#if GFX_CAN_UNLOAD_MESH_DATA
+ if (unloadData)
+ {
+ if (!m_KeepVertices && m_VBO && !m_VBO->IsUsingSourceVertices())
+ {
+ Assert(m_Skin.empty());
+ m_VertexData.Deallocate();
+ m_VBO->UnloadSourceVertices();
+ }
+ if (!m_KeepIndices && m_VBO && !m_VBO->IsUsingSourceIndices())
+ {
+#if UNITY_METRO
+ m_IndexBuffer.clear();
+ m_IndexBuffer.shrink_to_fit();
+#else
+ // On Metro this throws "Expression: vector containers incompatible for swap" when compiling in VS 2013, works okay if compiling in VS 2012
+ // Case 568418
+ IndexContainer emptyIndices;
+ m_IndexBuffer.swap(emptyIndices);
+#endif
+ }
+ }
+#endif
+}
+
+void Mesh::AwakeFromLoad(AwakeFromLoadMode awakeMode)
+{
+ PROFILER_AUTO(gAwakeFromLoadMesh, this)
+
+ Super::AwakeFromLoad(awakeMode);
+ m_CollisionMesh.AwakeFromLoad(awakeMode);
+
+ UploadMeshData(!m_IsReadable);
+
+ if (m_InternalMeshID == 0)
+ m_InternalMeshID = s_MeshIDGenerator.AllocateID ();
+}
+
+void Mesh::AwakeFromLoadThreaded()
+{
+ Super::AwakeFromLoadThreaded();
+ m_CollisionMesh.AwakeFromLoadThreaded(*this);
+}
+
+void Mesh::MarkDynamic()
+{
+ // Optimize for frequent updates
+ m_IsDynamic = true;
+}
+
+void Mesh::UpdateVertexFormat()
+{
+ // Make sure vertex streams are in the format we want for rendering
+ // This will also handle decompression of unsupported vertex formats
+ FormatVertices(GetAvailableChannels());
+ SwizzleVertexColorsIfNeeded();
+}
+
+bool Mesh::ShouldIgnoreInGarbageDependencyTracking ()
+{
+ return true;
+}
+
+UInt32 Mesh::GetAvailableChannels() const
+{
+ return m_VertexData.GetChannelMask ();
+}
+
+UInt32 Mesh::GetAvailableChannelsForRendering() const
+{
+ unsigned availChannels = m_VertexData.GetChannelMask ();
+ return availChannels;
+}
+
+bool Mesh::IsSuitableSizeForDynamicBatching () const
+{
+ // If any submesh has too many vertices, don't keep mesh data for batching
+ for (size_t i = 0; i < GetSubMeshCount(); i++)
+ {
+ if (m_SubMeshes[i].vertexCount > kDynamicBatchingVerticesThreshold)
+ return false;
+ }
+ return true;
+}
+
+void Mesh::CheckConsistency()
+{
+ Super::CheckConsistency();
+
+ for (int i = 0; i < m_SubMeshes.size(); ++i)
+ {
+ Assert(m_SubMeshes[i].topology != kPrimitiveTriangleStripDeprecated);
+ }
+}
+
+void Mesh::SwapBlendShapeData (BlendShapeData& shapes)
+{
+ WaitOnRenderThreadUse();
+
+// swap (m_Shapes, shapes);
+ m_Shapes = shapes;
+
+ NotifyObjectUsers( kDidModifyMesh );
+}
diff --git a/Runtime/Filters/Mesh/LodMesh.h b/Runtime/Filters/Mesh/LodMesh.h
new file mode 100644
index 0000000..41fcf74
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMesh.h
@@ -0,0 +1,509 @@
+#ifndef LODMESH_H
+#define LODMESH_H
+
+#include "Runtime/BaseClasses/NamedObject.h"
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Vector4.h"
+#include "Mesh.h"
+#include "Runtime/Math/Color.h"
+#include <string>
+#include <vector>
+#include "Runtime/BaseClasses/MessageIdentifier.h"
+#include "Runtime/Shaders/VBO.h"
+#include "CompressedMesh.h"
+#include "VertexData.h"
+#include "Runtime/Dynamics/CollisionMeshData.h"
+#include "MeshBlendShape.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Camera/IntermediateUsers.h"
+
+class IntermediateRenderer;
+
+struct SubMesh
+{
+ UInt32 firstByte;
+ UInt32 indexCount;
+ GfxPrimitiveType topology;
+
+ UInt32 firstVertex;
+ UInt32 vertexCount;
+ AABB localAABB;
+
+ SubMesh ()
+ {
+ firstByte = 0;
+ indexCount = 0;
+ topology = kPrimitiveTriangles;
+ firstVertex = 0;
+ vertexCount = 0;
+ localAABB = AABB (Vector3f::zero, Vector3f::zero);
+ }
+
+ DECLARE_SERIALIZE_NO_PPTR (SubMesh)
+
+#if SUPPORT_SERIALIZED_TYPETREES
+ template<class TransferFunction>
+ void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+};
+
+/// typedef for tangent space lighting rotations
+typedef std::vector<DeprecatedTangent, STL_ALLOCATOR(kMemGeometry, DeprecatedTangent) > DeprecatedTangentsArray;
+
+template<class TransferFunc>
+void SubMesh::Transfer (TransferFunc& transfer)
+{
+ #if SUPPORT_SERIALIZED_TYPETREES
+ if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+ {
+ TransferWorkaround35SerializationFuckup (transfer);
+ return;
+ }
+ #endif
+
+ transfer.SetVersion (2);
+ TRANSFER(firstByte);
+ TRANSFER(indexCount);
+ TRANSFER_ENUM(topology);
+ TRANSFER(firstVertex);
+ TRANSFER(vertexCount);
+ TRANSFER(localAABB);
+ if (transfer.IsOldVersion(1))
+ {
+ UInt32 triStrip;
+ transfer.Transfer (triStrip, "isTriStrip");
+ topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles;
+ }
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunc>
+void SubMesh::TransferWorkaround35SerializationFuckup (TransferFunc& transfer)
+{
+ TRANSFER(firstByte);
+ TRANSFER(indexCount);
+
+ UInt32 triStrip;
+ transfer.Transfer (triStrip, "isTriStrip");
+ topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles;
+
+ UInt32 triangleCount;
+ transfer.Transfer (triangleCount, "triangleCount");
+
+ TRANSFER(firstVertex);
+ TRANSFER(vertexCount);
+ TRANSFER(localAABB);
+}
+#endif
+
+template<class TransferFunc>
+void MeshPartition::Transfer (TransferFunc& transfer)
+{
+ TRANSFER(vertexCount);
+ TRANSFER(vertexOffset);
+ TRANSFER(indexCount);
+ TRANSFER(indexByteOffset);
+}
+
+template<class TransferFunc>
+void MeshPartitionInfo::Transfer (TransferFunc& transfer)
+{
+ TRANSFER(submeshStart);
+ TRANSFER(partitionCount);
+}
+
+class EXPORT_COREMODULE Mesh : public NamedObject
+{
+public:
+ enum
+ {
+ #if UNITY_IPHONE || UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+ alignBoneContainer = 16,
+ #else
+ alignBoneContainer = kDefaultMemoryAlignment,
+ #endif
+ };
+
+ //mircea@INFO PS3 doesn't render from VBOs hence m_VertexData and m_IndexBuffer *have* to be allocated with kMemVertexData.
+ typedef UNITY_VECTOR(kMemVertexData, UInt8) IndexContainer;
+ typedef UNITY_VECTOR(kMemGeometry, SubMesh) SubMeshContainer;
+ typedef dynamic_array<Matrix4x4f> MatrixContainer;
+ typedef dynamic_array<int> SkinContainer;
+ typedef UNITY_VECTOR(kMemGeometry, UInt32) CollisionTriangleContainer;
+ typedef dynamic_array<MinMaxAABB> AABBContainer;
+
+ typedef dynamic_array<BoneInfluence, alignBoneContainer> BoneInfluenceContainer;
+ typedef dynamic_array<BoneInfluence2, alignBoneContainer> BoneInfluence2Container;
+
+ typedef UNITY_TEMP_VECTOR(UInt32) TemporaryIndexContainer;
+
+#if UNITY_PS3 || UNITY_EDITOR
+ typedef UNITY_VECTOR(kMemVertexData, MeshPartition) MeshPartitionContainer;
+ typedef UNITY_VECTOR(kMemVertexData, MeshPartitionInfo) MeshPartitionInfoContainer;
+#endif
+
+ REGISTER_DERIVED_CLASS (Mesh, NamedObject)
+ DECLARE_OBJECT_SERIALIZE (Mesh)
+
+ Mesh (MemLabelId label, ObjectCreationMode mode);
+ // ~Mesh (); declared-by-macro
+
+public:
+
+ virtual int GetRuntimeMemorySize () const;
+
+ VBO* GetSharedVBO( UInt32 wantedChannels );
+ bool CopyToVBO ( UInt32 wantedChannels, VBO& vbo );
+ void InitVertexBufferData ( UInt32 wantedChannels );
+ void GetVertexBufferData ( VertexBufferData& buffer, UInt32 wantedChannels );
+ void GetIndexBufferData (IndexBufferData& buffer);
+ void UnloadVBOFromGfxDevice();
+ void ReloadVBOToGfxDevice();
+
+
+ void AwakeFromLoad(AwakeFromLoadMode mode);
+ void AwakeFromLoadThreaded();
+ void UploadMeshData(bool markNoLongerReadable);
+
+ virtual bool MainThreadCleanup ();
+
+ void MarkDynamic();
+ void UpdateVertexFormat();
+
+ void SetBounds (const AABB& aabb );
+ const AABB& GetBounds () const { return m_LocalAABB; }
+
+ void SetBounds (unsigned submesh, const AABB& aabb );
+ const AABB& GetBounds (unsigned submesh) const
+ {
+ DebugAssertIf(submesh >= m_SubMeshes.size());
+ return m_SubMeshes[submesh].localAABB;
+ }
+
+ void Clear (bool keepVertexLayout);
+
+ /// Recalculate the bounding volume
+ void RecalculateBounds ();
+ void RecalculateSubmeshBounds (unsigned submesh);
+
+ // Recalculate normals
+ void RecalculateNormals();
+ void RecalculateNormalsWithHardAngle( float hardAngle );
+
+ // Validate that there are no out of bounds indices in the triangles
+ bool ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount);
+
+ int GetVertexCount () const { return m_VertexData.GetVertexCount (); }
+
+ // Gets count in all submeshes.
+ int GetPrimitiveCount() const;
+ int CalculateTriangleCount() const; // ignores degenerates in strips
+
+ // NOTE: make sure to call SetChannelDirty and RecalculateBounds when changing the geometry!
+ StrideIterator<Vector3f> GetVertexBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex); }
+ StrideIterator<Vector3f> GetVertexEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelVertex); }
+
+ StrideIterator<Vector3f> GetNormalBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal); }
+ StrideIterator<Vector3f> GetNormalEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelNormal); }
+
+ StrideIterator<ColorRGBA32> GetColorBegin () const { return m_VertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor); }
+ StrideIterator<ColorRGBA32> GetColorEnd () const { return m_VertexData.MakeEndIterator<ColorRGBA32> (kShaderChannelColor); }
+
+ StrideIterator<Vector2f> GetUvBegin (int uvIndex = 0) const { return m_VertexData.MakeStrideIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); }
+ StrideIterator<Vector2f> GetUvEnd (int uvIndex = 0) const { return m_VertexData.MakeEndIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); }
+
+ StrideIterator<Vector4f> GetTangentBegin () const { return m_VertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent); }
+ StrideIterator<Vector4f> GetTangentEnd () const { return m_VertexData.MakeEndIterator<Vector4f> (kShaderChannelTangent); }
+
+ void ExtractVertexArray (Vector3f* destination) const;
+ void ExtractNormalArray (Vector3f* destination) const;
+ void ExtractColorArray (ColorRGBA32* destination) const;
+ void ExtractColorArrayConverting (ColorRGBAf* destination) const;
+ void ExtractUvArray (int uvIndex, Vector2f* destination) const;
+ void ExtractTangentArray (Vector4f* destination) const;
+
+ void SetVertices (Vector3f const* data, size_t count);
+ void SetNormals (Vector3f const* data, size_t count);
+ void SetTangents (Vector4f const* data, size_t count);
+ void SetUv (int uvIndex, Vector2f const* data, size_t count);
+ void SetColors (ColorRGBA32 const* data, size_t count);
+ void SetColorsConverting (ColorRGBAf const* data, size_t count);
+
+ bool GetVertexColorsSwizzled() const { return m_VertexColorsSwizzled; }
+ void SetVertexColorsSwizzled(bool flag) { m_VertexColorsSwizzled = flag; }
+ bool HasVertexData () const { return m_VertexData.GetDataPtr () != NULL; }
+ void* GetVertexDataPointer () const { return m_VertexData.GetDataPtr (); }
+ size_t GetVertexDataSize () const { return m_VertexData.GetDataSize (); }
+ size_t GetVertexSize () const { return m_VertexData.GetVertexSize(); }
+
+ const void* GetChannelPointer (ShaderChannel channel) const { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); }
+ void* GetChannelPointer (ShaderChannel channel) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); }
+ void* GetChannelPointer (ShaderChannel channel, size_t offsetInElements) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel) + offsetInElements * m_VertexData.GetChannelStride(channel); }
+ size_t GetStride (ShaderChannel channel) const { return m_VertexData.GetChannelStride(channel); }
+
+ bool IsAvailable (ShaderChannel channel) const { return m_VertexData.HasChannel (channel); }
+ // returns a bitmask of a newly created channels
+ UInt32 ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels);
+ UInt32 ResizeVertices (size_t count, UInt32 shaderChannels) { return ResizeVertices(count, shaderChannels, GetStreamsLayout(), GetChannelsLayout()); }
+
+ // returns a bitmask of a newly created channels
+ UInt32 FormatVertices (UInt32 shaderChannels);
+ // initializes the specified channels to default values
+ void InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels);
+
+ bool SetBoneWeights (const BoneInfluence* v, int count);
+ const BoneInfluence* GetBoneWeights () const { return m_Skin.empty() ? NULL : &m_Skin[0]; }
+ BoneInfluence* GetBoneWeights () { return m_Skin.empty() ? NULL : &m_Skin[0]; }
+ void ClearSkinCache ();
+ int GetMaxBoneIndex ();
+
+ const Matrix4x4f* GetBindposes () const { return m_Bindpose.empty() ? NULL : &m_Bindpose[0]; }
+ int GetBindposeCount () const { return m_Bindpose.size(); }
+ void SetBindposes (const Matrix4x4f* bindposes, int count);
+
+ bool SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology);
+ bool SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology);
+
+ void GetTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const;
+ void GetTriangles (TemporaryIndexContainer& triangles) const;
+ void AppendTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const;
+ void GetStrips (TemporaryIndexContainer& triangles, unsigned submesh) const;
+ void GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const;
+
+ enum {
+ k16BitIndices = 1 << 0,
+ kRebuildCollisionTriangles = 1 << 2,
+ kDontAssignIndices = 1 << 3,
+ kDontSupportSubMeshVertexRanges = 1 << 4
+ };
+ bool SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode);
+
+ bool ExtractTriangle (UInt32 face, UInt32* indices) const;
+
+ void SetSubMeshCount (unsigned int count);
+ size_t GetSubMeshCount () const;
+
+ void UpdateSubMeshVertexRange (int index);
+
+ void AddObjectUser( ListNode<Object>& node ) { m_ObjectUsers.push_back(node); }
+ void AddIntermediateUser( ListNode<IntermediateRenderer>& node ) { m_IntermediateUsers.AddUser(node); }
+
+ const BlendShapeData& GetBlendShapeData() const { return m_Shapes; }
+ size_t GetBlendShapeChannelCount() const { return m_Shapes.channels.size(); }
+ void SwapBlendShapeData (BlendShapeData& shapes);
+
+
+ BlendShapeData& GetWriteBlendShapeDataInternal() { return m_Shapes; }
+
+
+ void CheckConsistency();
+
+#if ENABLE_MULTITHREADED_CODE
+ void SetCurrentCPUFence( UInt32 fence ) { m_CurrentCPUFence = fence; m_WaitOnCPUFence = true; }
+#endif
+
+ void WaitOnRenderThreadUse();
+
+ static Mesh& GetInstantiatedMesh (Mesh* mesh, Object& owner);
+
+ void CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform);
+
+ void SetChannelsDirty (unsigned vertexChannelsChanged, bool indices);
+
+ void* GetSharedNxMesh ();
+ void* GetSharedNxConvexMesh ();
+
+ void RebuildCollisionTriangles();
+
+ const SubMesh& GetSubMeshFast (unsigned int submesh) const
+ {
+ DebugAssertIf(submesh >= m_SubMeshes.size());
+ return m_SubMeshes[submesh];
+ }
+ SubMesh& GetSubMeshFast (unsigned int submesh)
+ {
+ DebugAssertIf(submesh >= m_SubMeshes.size());
+ return m_SubMeshes[submesh];
+ }
+
+ const UInt16* GetSubMeshBuffer16 (int submesh) const;
+ UInt16* GetSubMeshBuffer16 (int submesh);
+
+ int GetSubMeshBufferByteSize (int submesh) const { return kVBOIndexSize * m_SubMeshes[submesh].indexCount; }
+
+ // The number of indices contained in the index buffer (all submeshes)
+ int GetTotalndexCount () const;
+
+ void ByteSwapIndices ();
+
+ /// 4, 2, 1 bone influence (BoneInfluence, BoneInfluence2, int)
+ void* GetSkinInfluence (int count);
+
+ int GetMeshUsageFlags () const { return m_MeshUsageFlags; }
+
+ virtual bool ShouldIgnoreInGarbageDependencyTracking ();
+
+ UInt32 GetAvailableChannels() const;
+ // May return only a subset of channels that are present in the mesh
+ UInt32 GetAvailableChannelsForRendering() const;
+ UInt32 GetChannelsInVBO() const { return m_ChannelsInVBO; }
+
+ bool IsSuitableSizeForDynamicBatching () const;
+
+ // Calculate cached bone bounds per bone by calculating the bounding volume in bind pose space.
+ // This is used by the SkinnedMeshRenderer to compute an accurate world space bounding volume quickly.
+ const AABBContainer& GetCachedBonesBounds();
+
+ void DestripifyIndices ();
+ void SetHideFromRuntimeStats(bool flag) { m_HideFromRuntimeStats = flag; }
+
+ bool IsSharedPhysicsMeshDirty () { return m_CollisionMesh.IsSharedPhysicsMeshDirty(); }
+
+ bool CanAccessFromScript() const;
+
+ const VertexData& GetVertexData() const { return m_VertexData; }
+ VertexData& GetVertexData() { return m_VertexData; }
+
+ UInt8 GetMeshCompression() const { return m_MeshCompression; }
+ void SetMeshCompression(UInt8 mc) { m_MeshCompression = mc; }
+
+ enum
+ {
+ kStreamCompressionDefault = 0,
+ kStreamCompressionCompressed,
+ kStreamCompressionCompressedAggressive
+ };
+
+ UInt8 GetStreamCompression() const { return m_StreamCompression; }
+ void SetStreamCompression(UInt8 cs) { m_StreamCompression = cs; }
+ bool GetIsReadable() const { return m_IsReadable; }
+ void SetIsReadable(bool readable) { m_IsReadable = readable; }
+
+
+ bool GetKeepVertices() const { return m_KeepVertices; }
+ void SetKeepVertices(bool keep) { m_KeepVertices = keep; }
+
+ bool GetKeepIndices() const { return m_KeepIndices; }
+ void SetKeepIndices(bool keep) { m_KeepIndices = keep; }
+
+ const IndexContainer& GetIndexBuffer() const { return m_IndexBuffer; }
+ IndexContainer& GetIndexBuffer() { return m_IndexBuffer; }
+
+ const SubMeshContainer& GetSubMeshes() const { return m_SubMeshes; }
+ SubMeshContainer& GetSubMeshes() { return m_SubMeshes; }
+
+ const MatrixContainer& GetBindpose() const { return m_Bindpose; }
+ MatrixContainer& GetBindpose() { return m_Bindpose; }
+
+ const dynamic_array<BindingHash>& GetBonePathHashes() const { return m_BonePathHashes; }
+ dynamic_array<BindingHash>& GetBonePathHashes() { return m_BonePathHashes; }
+ BindingHash GetRootBonePathHash() const { return m_RootBonePathHash; }
+ void SetRootBonePathHash(BindingHash val) { m_RootBonePathHash = val; }
+
+ const BoneInfluenceContainer& GetSkin() const { return m_Skin; }
+ BoneInfluenceContainer& GetSkin() { return m_Skin; }
+
+ const AABB& GetLocalAABB() const { return m_LocalAABB; }
+ void SetLocalAABB(const AABB& aabb) { m_LocalAABB = aabb; }
+
+#if UNITY_PS3 || UNITY_EDITOR
+ MeshPartitionContainer m_Partitions;
+ MeshPartitionInfoContainer m_PartitionInfos;
+#endif
+
+
+#if UNITY_EDITOR
+ void SetMeshOptimized(bool meshOptimized) { m_MeshOptimized = meshOptimized; }
+ bool GetMeshOptimized() const { return m_MeshOptimized; }
+#endif
+
+ UInt32 GetInternalMeshID() const { Assert(m_InternalMeshID); return m_InternalMeshID; }
+
+private:
+ void CreateSharedVBO( UInt32 wantedChannels );
+ void NotifyObjectUsers( const MessageIdentifier& msg );
+ void RecalculateSubmeshBoundsInternal (unsigned submesh);
+ void RecalculateBoundsInternal ();
+ void LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &tangents);
+ void SwizzleVertexColorsIfNeeded ();
+
+ const VertexStreamsLayout& GetStreamsLayout() const;
+ const VertexChannelsLayout& GetChannelsLayout() const;
+
+ void DestripifySubmeshOnTransferInternal();
+ void SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode);
+
+#if SUPPORT_SERIALIZED_TYPETREES
+ template<class TransferFunction>
+ void TransferWorkaround35SerializeFuckup (TransferFunction& transfer);
+#endif
+
+#if UNITY_EDITOR || UNITY_PS3
+ template<class TransferFunction>
+ void TransferPS3Data (TransferFunction& transfer);
+#endif
+#if UNITY_EDITOR
+ bool m_MeshOptimized;
+#endif
+
+ VertexData m_VertexData;
+
+ UInt8 m_MeshCompression;
+ UInt8 m_StreamCompression;
+ bool m_IsReadable;
+ bool m_KeepVertices;
+ bool m_KeepIndices;
+ UInt32 m_InternalMeshID;
+
+ int m_MeshUsageFlags;
+
+ IndexContainer m_IndexBuffer;
+ SubMeshContainer m_SubMeshes;
+ MatrixContainer m_Bindpose;
+ BlendShapeData m_Shapes;
+
+ dynamic_array<BindingHash> m_BonePathHashes;
+ BindingHash m_RootBonePathHash;
+
+ AABBContainer m_CachedBonesAABB;
+
+ BoneInfluenceContainer m_Skin;
+ BoneInfluence2Container m_CachedSkin2;
+ SkinContainer m_CachedSkin1;
+
+ int m_MaxBoneIndex;
+
+ AABB m_LocalAABB;
+
+ CollisionMeshData m_CollisionMesh;
+
+ typedef List< ListNode<Object> > ObjectList;
+ ObjectList m_ObjectUsers; // Object-derived users of this mesh
+
+ IntermediateUsers m_IntermediateUsers; // IntermediateRenderer users of this mesh
+
+ #if ENABLE_MULTITHREADED_CODE
+ UInt32 m_CurrentCPUFence;
+ bool m_WaitOnCPUFence;
+ #endif
+
+ PPtr<Object> m_Owner;
+ VBO* m_VBO;
+
+
+ UInt32 m_ChannelsInVBO;
+ bool m_VerticesDirty;
+ bool m_IndicesDirty;
+ bool m_IsDynamic;
+ bool m_HideFromRuntimeStats;
+ bool m_VertexColorsSwizzled;
+
+ friend class MeshFilter;
+ friend class ClothAnimator;
+ friend class CompressedMesh;
+ friend void PartitionSubmeshes (Mesh& m);
+ friend void OptimizeReorderVertexBuffer (Mesh& mesh);
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/LodMeshFilter.cpp b/Runtime/Filters/Mesh/LodMeshFilter.cpp
new file mode 100644
index 0000000..512f153
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMeshFilter.cpp
@@ -0,0 +1,96 @@
+#include "UnityPrefix.h"
+#include "LodMeshFilter.h"
+#include "LodMesh.h"
+#include "MeshRenderer.h"
+#include "Runtime/Filters/Particles/MeshParticleEmitter.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/TransferFunctions/TransferNameConversions.h"
+
+MeshFilter::MeshFilter (MemLabelId label, ObjectCreationMode mode)
+: Super(label, mode)
+{
+ m_Mesh = NULL;
+}
+
+MeshFilter::~MeshFilter ()
+{
+}
+
+void MeshFilter::OnDidAddMesh ()
+{
+ AssignMeshToRenderer ();
+}
+
+void MeshFilter::AssignMeshToRenderer ()
+{
+ if (GetGameObjectPtr())
+ {
+ MeshRenderer* renderer = QueryComponent(MeshRenderer);
+ if (renderer && renderer->GetSharedMesh() != m_Mesh)
+ renderer->SetSharedMesh(m_Mesh);
+
+ MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter);
+ if (emitter && emitter->GetMesh() != m_Mesh)
+ emitter->SetMesh(m_Mesh);
+ }
+}
+
+void MeshFilter::SetSharedMesh (PPtr<Mesh> mesh)
+{
+ m_Mesh = mesh;
+
+ MeshRenderer* renderer = QueryComponent(MeshRenderer);
+ if (renderer)
+ renderer->SetSharedMesh(m_Mesh);
+
+ MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter);
+ if (emitter)
+ emitter->SetMesh(m_Mesh);
+
+ SetDirty ();
+}
+
+PPtr<Mesh> MeshFilter::GetSharedMesh ()
+{
+ return m_Mesh;
+}
+
+Mesh* MeshFilter::GetInstantiatedMesh ()
+{
+ Mesh* instantiated = &Mesh::GetInstantiatedMesh (m_Mesh, *this);
+ if (PPtr<Mesh> (instantiated) != m_Mesh)
+ {
+ SetSharedMesh(instantiated);
+ }
+
+ return instantiated;
+}
+
+void MeshFilter::SetInstantiatedMesh (Mesh* mesh)
+{
+ SetSharedMesh(mesh);
+}
+
+IMPLEMENT_CLASS_HAS_INIT (MeshFilter)
+IMPLEMENT_OBJECT_SERIALIZE (MeshFilter)
+
+template<class TransferFunction> inline
+void MeshFilter::Transfer (TransferFunction& transfer)
+{
+ Super::Transfer (transfer);
+ transfer.Transfer (m_Mesh, "m_Mesh", kSimpleEditorMask);
+}
+
+void MeshFilter::InitializeClass ()
+{
+ RegisterAllowNameConversion(GetClassStringStatic(), "m_LodMesh", "m_Mesh");
+ RegisterAllowTypeNameConversion ("PPtr<LodMesh>", "PPtr<Mesh>");
+
+ REGISTER_MESSAGE_VOID(MeshFilter, kDidAddComponent, OnDidAddMesh);
+}
+
+void MeshFilter::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+ Super::AwakeFromLoad (awakeMode);
+ AssignMeshToRenderer ();
+}
diff --git a/Runtime/Filters/Mesh/LodMeshFilter.h b/Runtime/Filters/Mesh/LodMeshFilter.h
new file mode 100644
index 0000000..ff6273b
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMeshFilter.h
@@ -0,0 +1,38 @@
+#ifndef LODMESHFILTER_H
+#define LODMESHFILTER_H
+
+#include "Runtime/BaseClasses/GameObject.h"
+#include "Runtime/Modules/ExportModules.h"
+
+class Mesh;
+
+class EXPORT_COREMODULE MeshFilter : public Unity::Component
+{
+public:
+ REGISTER_DERIVED_CLASS (MeshFilter, Unity::Component)
+ DECLARE_OBJECT_SERIALIZE (MeshFilter)
+
+ MeshFilter (MemLabelId label, ObjectCreationMode mode);
+
+ void SetSharedMesh (PPtr<Mesh> mesh);
+ PPtr<Mesh> GetSharedMesh ();
+
+ Mesh* GetInstantiatedMesh ();
+ void SetInstantiatedMesh (Mesh* mesh);
+
+ static void InitializeClass ();
+ static void CleanupClass () {}
+
+ void OnDidAddMesh ();
+
+protected:
+ virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode);
+
+
+private:
+ void AssignMeshToRenderer ();
+
+ PPtr<Mesh> m_Mesh;
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/Mesh.h b/Runtime/Filters/Mesh/Mesh.h
new file mode 100644
index 0000000..e6b58dc
--- /dev/null
+++ b/Runtime/Filters/Mesh/Mesh.h
@@ -0,0 +1,76 @@
+#ifndef MESH_H
+#define MESH_H
+
+#include <vector>
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Misc/Allocator.h"
+
+class Quaternionf;
+
+/// A face in the mesh.
+struct Face {
+ UInt16 v1, v2, v3;
+ Face (UInt16 vert1, UInt16 vert2, UInt16 vert3)
+ {v1 = vert1; v2 = vert2; v3 = vert3;}
+ Face () {}
+
+ UInt16 &operator[] (int i) { return (&v1)[i]; }
+ UInt16 operator[] (int i) const { return (&v1)[i]; }
+
+ DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Face)
+};
+
+template<class TransferFunc>
+void Face::Transfer (TransferFunc& transfer)
+{
+ TRANSFER (v1);
+ TRANSFER (v2);
+ TRANSFER (v3);
+}
+
+struct DeprecatedTangent
+{
+ Vector3f normal;
+ Vector3f tangent;
+ float handedness;
+ DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Tangent)
+};
+
+template<class TransferFunc>
+void DeprecatedTangent::Transfer (TransferFunc& transfer)
+{
+ TRANSFER (normal);
+ TRANSFER (tangent);
+ TRANSFER (handedness);
+}
+
+struct BoneInfluence
+{
+ float weight[4];
+ int boneIndex[4];
+
+ DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (BoneInfluence)
+};
+
+struct BoneInfluence2
+{
+ float weight[2];
+ int boneIndex[2];
+};
+
+template<class TransferFunc>
+void BoneInfluence::Transfer (TransferFunc& transfer)
+{
+ TRANSFER (weight[0]);
+ TRANSFER (weight[1]);
+ TRANSFER (weight[2]);
+ TRANSFER (weight[3]);
+
+ TRANSFER (boneIndex[0]);
+ TRANSFER (boneIndex[1]);
+ TRANSFER (boneIndex[2]);
+ TRANSFER (boneIndex[3]);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshBlendShape.cpp b/Runtime/Filters/Mesh/MeshBlendShape.cpp
new file mode 100644
index 0000000..c7588e2
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShape.cpp
@@ -0,0 +1,234 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "MeshBlendShape.h"
+#include "Runtime/mecanim/generic/crc32.h"
+
+static const float kVertexDeltaEpsilon = 1e-5f;
+static const float kNormalDeltaEpsilon = 1e-5f;
+
+void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame)
+{
+ Assert(deltaNormals.empty() || deltaVertices.size() == deltaNormals.size());
+ Assert(deltaTangents.empty() || deltaVertices.size() == deltaTangents.size());
+
+ frame.firstVertex = sharedSparceVertices.size();
+
+ // Converting blend shape in to sparse blend shape
+ sharedSparceVertices.reserve(sharedSparceVertices.size() + deltaVertices.size());
+
+ frame.hasNormals = frame.hasTangents = false;
+
+ for (int j = 0; j < deltaVertices.size(); ++j)
+ {
+ const bool vertexHasNormal = (!deltaNormals.empty() && Magnitude(deltaNormals[j]) > kNormalDeltaEpsilon);
+ const bool vertexHasTangent = (!deltaTangents.empty() && Magnitude(deltaTangents[j]) > kNormalDeltaEpsilon);
+
+ frame.hasNormals = frame.hasNormals || vertexHasNormal;
+ frame.hasTangents = frame.hasTangents || vertexHasTangent;
+
+ if (Magnitude(deltaVertices[j]) > kVertexDeltaEpsilon || vertexHasNormal || vertexHasTangent)
+ {
+ BlendShapeVertex v;
+
+ v.vertex = deltaVertices[j];
+ if (!deltaNormals.empty())
+ v.normal = deltaNormals[j];
+ if (!deltaTangents.empty())
+ v.tangent = deltaTangents[j];
+
+ v.index = j;
+ sharedSparceVertices.push_back(v);
+ }
+ }
+
+ frame.vertexCount = sharedSparceVertices.size() - frame.firstVertex;
+}
+
+void BlendShape::UpdateFlags(const BlendShapeVertices& sharedSparceVertices)
+{
+ hasNormals = hasTangents = false;
+
+ for (int j = 0; j < vertexCount; ++j)
+ {
+ const BlendShapeVertex& v = sharedSparceVertices[firstVertex + j];
+ const bool vertexHasNormal = Magnitude(v.normal) > kNormalDeltaEpsilon;
+ const bool vertexHasTangent = Magnitude(v.tangent) > kNormalDeltaEpsilon;
+
+ hasNormals = hasNormals || vertexHasNormal;
+ hasTangents = hasTangents || vertexHasTangent;
+ }
+}
+
+void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel)
+{
+ channel.name.assign(inName.c_str(), kMemGeometry);
+ channel.nameHash = mecanim::processCRC32(inName.c_str());
+ channel.frameIndex = frameIndex;
+ channel.frameCount = frameCount;
+}
+
+const char* GetChannelName (const BlendShapeData& data, int index)
+{
+ return data.channels[index].name.c_str();
+}
+
+int GetChannelIndex (const BlendShapeData& data, const char* name)
+{
+ for (int i=0;i<data.channels.size();i++)
+ {
+ if (name == data.channels[i].name)
+ return i;
+ }
+ return -1;
+}
+
+int GetChannelIndex (const BlendShapeData& data, BindingHash name)
+{
+ for (int i=0;i<data.channels.size();i++)
+ {
+ if (name == data.channels[i].nameHash)
+ return i;
+ }
+ return -1;
+}
+
+void ClearBlendShapes (BlendShapeData& data)
+{
+ data.vertices.clear();
+ data.shapes.clear();
+ data.channels.clear();
+ data.fullWeights.clear();
+}
+
+/*
+
+STRUCT BlendShapeChannel
+
+// BlendShape vertex class.
+STRUCT Vertex
+// Vertex delta.
+CSRAW public Vector3 vertex;
+
+// Normal delta.
+CSRAW public Vector3 normal;
+
+// Tangent delta.
+CSRAW public Vector3 tangent;
+
+// Index to [[Mesh]] vertex data.
+CSRAW public int index;
+END
+
+// A class representing a single BlendShape (also called morph-target).
+STRUCT BlendShape
+
+// The weight of the frame
+CSRAW public float weight;
+
+// Sparse vertex data.
+CSRAW public Vertex[] vertices;
+END
+
+// Name of the BlendShape.
+CSRAW public string name;
+
+// The frames making up a blendshape animation.
+// Each frame has a weight, based on the weight of the BlendShape in the SkinnedMeshRenderer, Unity will apply 1 or 2 frames.
+CSRAW public BlendShape[] shapes;
+END
+
+
+C++RAW
+/*
+ struct MonoMeshBlendShape
+ {
+ ScriptingStringPtr name;
+ ScriptingArrayPtr vertices;
+ };
+
+ void BlendShapeVertexToMono (const BlendShapeVertex &src, MonoBlendShapeVertex &dest) {
+ dest.vertex = src.vertex;
+ dest.normal = src.normal;
+ dest.tangent = src.tangent;
+ dest.index = src.index;
+ }
+ void BlendShapeVertexToCpp (const MonoBlendShapeVertex &src, BlendShapeVertex &dest) {
+ dest.vertex = src.vertex;
+ dest.normal = src.normal;
+ dest.tangent = src.tangent;
+ dest.index = src.index;
+ }
+
+ class MeshBlendShapeToMono
+ {
+ public:
+ MeshBlendShapeToMono(const BlendShapeVertices& sharedVertices_) : sharedVertices(sharedVertices_) {}
+
+ void operator() (const MeshBlendShape &src, MonoMeshBlendShape &dest)
+ {
+ dest.name = scripting_string_new(src.m_Name);
+ const BlendShapeVertices vertices(sharedVertices.begin() + src.firstVertex, sharedVertices.begin() + src.firstVertex + src.vertexCount);
+
+ ScriptingTypePtr classVertex = GetScriptingTypeRegistry().GetType("UnityEngine", "BlendShapeVertex");
+ dest.vertices = VectorToScriptingStructArray<BlendShapeVertex, MonoBlendShapeVertex>(vertices, classVertex, BlendShapeVertexToMono);
+ }
+
+ private:
+ const BlendShapeVertices& sharedVertices;
+ };
+
+ class MeshBlendShapeToCpp
+ {
+ public:
+ MeshBlendShapeToCpp(int meshVertexCount_, BlendShapeVertices& sharedVertices_) : meshVertexCount(meshVertexCount_), sharedVertices(sharedVertices_) {}
+
+ void operator() (MonoMeshBlendShape &src, MeshBlendShape &dest)
+ {
+ dest.weight = src.weight;
+
+ const BlendShapeVertex* vertices = Scripting::GetScriptingArrayStart<BlendShapeVertex> (src.vertices);
+ sharedVertices.insert(sharedVertices.end(), vertices, vertices + GetScriptingArraySize(src.vertices));
+
+ for (BlendShapeVertices::iterator it = vertices.begin(), end = vertices.end(); it != end; ++it)
+ {
+ BlendShapeVertex& v = *it;
+ if (v.index < 0 || v.index >= meshVertexCount)
+ {
+ ErrorStringMsg("Value (%d) of BlendShapeVertex.index #%d is out of bounds (Mesh vertex count: %d) on BlendShape '%s'. It will be reset to 0.", v.index, it - vertices.begin(), meshVertexCount, dest.m_Name.c_str());
+ v.index = 0;
+ }
+ }
+
+ dest.firstVertex = sharedVertices.size();
+ dest.vertexCount = vertices.size();
+
+ sharedVertices.insert(sharedVertices.end(), vertices.begin(), vertices.end());
+ dest.UpdateFlags(sharedVertices);
+ }
+
+ private:
+ int meshVertexCount;
+ BlendShapeVertices& sharedVertices;
+ };
+
+
+
+ ----------------
+
+ // BlendShapes for this mesh.
+ CUSTOM_PROP BlendShapeChannel[] blendShapes
+ {
+ // ScriptingTypePtr classBlendShape = GetScriptingTypeRegistry().GetType("UnityEngine", "MeshBlendShape");
+ // return VectorToScriptingStructArray<MeshBlendShape, MonoMeshBlendShape>(self->GetShapesVector(), classBlendShape, MeshBlendShapeToMono(self->GetShapeVertexVector()));
+ return SCRIPTING_NULL;
+ }
+ {
+ // Mesh::MeshBlendShapeContainer shapes;
+ // self->GetShapeVertexVector().clear();
+ // ScriptingStructArrayToVector<MeshBlendShape, MonoMeshBlendShape>(value, shapes, MeshBlendShapeToCpp(self->GetVertexCount(), self->GetShapeVertexVector()));
+ // self->SwapShapesVector(shapes);
+ }
+
+
+
+ */
diff --git a/Runtime/Filters/Mesh/MeshBlendShape.h b/Runtime/Filters/Mesh/MeshBlendShape.h
new file mode 100644
index 0000000..d4d0f41
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShape.h
@@ -0,0 +1,115 @@
+#ifndef MESHBLENDSHAPES_H
+#define MESHBLENDSHAPES_H
+
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Utilities/dynamic_array.h"
+#include "Runtime/Containers/ConstantString.h"
+#include "Runtime/Containers/ConstantStringSerialization.h"
+
+typedef UInt32 BindingHash;
+
+struct BlendShapeVertex
+{
+ // vertex, normal & tangent are stored as deltas
+ Vector3f vertex;
+ Vector3f normal;
+ Vector3f tangent;
+ UInt32 index;
+
+ BlendShapeVertex() : vertex(Vector3f::zero), normal(Vector3f::zero), tangent(Vector3f::zero), index(0) {}
+
+ DECLARE_SERIALIZE_NO_PPTR (BlendShapeVertex)
+};
+typedef dynamic_array<BlendShapeVertex> BlendShapeVertices;
+
+struct BlendShapeChannel
+{
+ ConstantString name;
+ BindingHash nameHash;
+
+ int frameIndex;
+ int frameCount;
+
+ DECLARE_SERIALIZE_NO_PPTR(MeshBlendShapeChannel)
+};
+
+struct BlendShape
+{
+ BlendShape() : firstVertex(0), vertexCount(0), hasNormals(false), hasTangents(false) {}
+
+ UInt32 firstVertex;
+ UInt32 vertexCount;
+
+ bool hasNormals;
+ bool hasTangents;
+
+
+ ///@TODO: MOve
+ // updates hasNormals and hasTangents based on data in vertices
+ void UpdateFlags(const BlendShapeVertices& sharedSparceVertices);
+
+ DECLARE_SERIALIZE_NO_PPTR (MeshBlendShape)
+};
+
+struct BlendShapeData
+{
+ BlendShapeVertices vertices;
+ dynamic_array<BlendShape> shapes;
+ std::vector<BlendShapeChannel> channels;
+ dynamic_array<float> fullWeights;
+
+ DECLARE_SERIALIZE_NO_PPTR(BlendShapeData)
+};
+
+
+// Convert between blendshape name and index
+const char* GetChannelName (const BlendShapeData& data, int index);
+inline size_t GetBlendShapeChannelCount (const BlendShapeData& data) { return data.channels.size(); }
+int GetChannelIndex (const BlendShapeData& data, const char* name);
+int GetChannelIndex (const BlendShapeData& data, BindingHash name);
+
+// data is passed as non-sparce arrays, i.e. deltaVertices.size() has to be the same as vertex count on the Mesh
+void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame);
+void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel);
+void ClearBlendShapes (BlendShapeData& data);
+
+template<class TransferFunc>
+void BlendShape::Transfer (TransferFunc& transfer)
+{
+ TRANSFER(firstVertex);
+ TRANSFER(vertexCount);
+ TRANSFER(hasNormals);
+ TRANSFER(hasTangents);
+ transfer.Align();
+}
+
+template<class TransferFunc>
+void BlendShapeData::Transfer (TransferFunc& transfer)
+{
+ TRANSFER (vertices);
+ TRANSFER (shapes);
+ TRANSFER (channels);
+ TRANSFER (fullWeights);
+}
+
+template<class TransferFunc>
+void BlendShapeVertex::Transfer (TransferFunc& transfer)
+{
+ TRANSFER(vertex);
+ TRANSFER(normal);
+ TRANSFER(tangent);
+ TRANSFER(index);
+}
+
+template<class TransferFunc>
+void BlendShapeChannel::Transfer (TransferFunc& transfer)
+{
+ TransferConstantString (name, "name", kNoTransferFlags, kMemGeometry, transfer);
+ TRANSFER (nameHash);
+ TRANSFER (frameIndex);
+ TRANSFER (frameCount);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.cpp b/Runtime/Filters/Mesh/MeshBlendShaping.cpp
new file mode 100644
index 0000000..a86a24d
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShaping.cpp
@@ -0,0 +1,184 @@
+#include "UnityPrefix.h"
+#include "MeshBlendShaping.h"
+#include "MeshSkinning.h"
+#include "MeshBlendShape.h"
+
+template<bool skinNormal, bool skinTangent>
+void ApplyBlendShapeTmpl (const BlendShapeVertex* vertices, size_t vertexCount, size_t dstVertexCount, float weight, int normalOffset, int tangentOffset, int inStride, UInt8* dst)
+{
+ for (int i = 0; i < vertexCount; ++i)
+ {
+ const BlendShapeVertex& blendShapeVertex = vertices[i];
+
+ int offset = inStride * blendShapeVertex.index;
+
+ *reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * weight;
+ if (skinNormal)
+ {
+ DebugAssert (offset + normalOffset < inStride * dstVertexCount);
+ *reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * weight;
+ }
+ if (skinTangent)
+ {
+ DebugAssert (offset + tangentOffset < inStride * dstVertexCount);
+ *reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * weight;
+ }
+ }
+}
+
+
+void ApplyBlendShape (const BlendShape& target, const BlendShapeVertices& vertices, float weight, const SkinMeshInfo& info, UInt8* dst)
+{
+ if (!HasValidWeight(weight))
+ return;
+
+ weight = std::min(weight, 1.0F);
+
+ const BlendShapeVertex* v = vertices.begin() + target.firstVertex;
+
+ if (info.skinNormals && info.skinTangents && target.hasNormals && target.hasTangents)
+ ApplyBlendShapeTmpl<true, true> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+ else if (info.skinNormals && target.hasNormals)
+ ApplyBlendShapeTmpl<true, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+ else
+ ApplyBlendShapeTmpl<false, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+}
+
+static int FindFrame (const float* weights, size_t count, float targetWeight)
+{
+ // Find frame (left index)
+ int frame = 0;
+ while (frame < count-1 && targetWeight > weights[frame+1])
+ frame++;
+
+ return frame;
+}
+
+void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst)
+{
+ DebugAssert (info.blendshapeCount != 0);
+ Assert (info.inStride == info.outStride);
+ const int inStride = info.inStride;
+ const int count = info.vertexCount;
+
+ Assert (dst);
+ memcpy (dst, info.inVertices, inStride * count);
+
+ const BlendShapeData& blendShapeData = *info.blendshapes;
+
+ for (int c = 0; c < info.blendshapeCount; ++c)
+ {
+ const float targetWeight = info.blendshapeWeights[c];
+
+ if (!HasValidWeight (targetWeight))
+ continue;
+
+ const BlendShapeChannel& channel = blendShapeData.channels[c];
+ Assert(channel.frameCount != 0);
+
+ const BlendShape* blendShapeFrames = &blendShapeData.shapes[channel.frameIndex];
+ const float* weights = &blendShapeData.fullWeights[channel.frameIndex];
+
+ // The first blendshape does not need to do any blending. Just fade it in.
+ if (targetWeight < weights[0] || channel.frameCount == 1)
+ {
+ float lhsShapeWeight = weights[0];
+ ApplyBlendShape (blendShapeFrames[0], blendShapeData.vertices, targetWeight / lhsShapeWeight, info, dst);
+ }
+ // We are blending with two frames
+ else
+ {
+ // Find the frame we are blending with
+ int frame = FindFrame(weights, channel.frameCount, targetWeight);
+
+ float lhsShapeWeight = weights[frame + 0];
+ float rhsShapeWeight = weights[frame + 1];
+
+ float relativeWeight = (targetWeight - lhsShapeWeight) / (rhsShapeWeight - lhsShapeWeight);
+
+ ApplyBlendShape (blendShapeFrames[frame + 0], blendShapeData.vertices, 1.0F - relativeWeight, info, dst);
+ ApplyBlendShape (blendShapeFrames[frame + 1], blendShapeData.vertices, relativeWeight, info, dst);
+ }
+ }
+}
+
+///@TODO: How do we deal with resizing vertex count once mesh blendshapes have been created???
+
+/*
+ template<bool skinNormal, bool skinTangent>
+ static void ApplyBlendShapesTmpl (SkinMeshInfo& info, UInt8* dst)
+ {
+ DebugAssert (info.blendshapeCount != 0);
+ Assert (info.inStride == info.outStride);
+ const int inStride = info.inStride;
+ const int count = info.vertexCount;
+
+ Assert (dst);
+ memcpy (dst, info.inVertices, inStride * count);
+
+ const int normalOffset = info.normalOffset;
+ const int tangentOffset = info.tangentOffset;
+
+ #if BLEND_DIRECT_NORMALS
+ if (skinNormal)
+ { // figure out how what fraction of original normal should be used
+ float totalBlendshapeWeight = 0.0f;
+ for (int i = 0; i < info.blendshapeCount; ++i)
+ totalBlendshapeWeight += info.blendshapeWeights[i];
+ Assert (totalBlendshapeWeight >= 0.0f);
+ if (totalBlendshapeWeight > 0.0f)
+ {
+ for (int i = 0; i < count; ++i)
+ *reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset) *= max(0.0f, (1.0f - totalBlendshapeWeight));
+ }
+ }
+
+ bool atLeastOneSparseBlendshape = false;
+ #endif
+ for (int bs = 0; bs < info.blendshapeCount; ++bs)
+ {
+ const float w = info.blendshapeWeights[bs];
+
+ if (HasWeight(w))
+ {
+ const MeshBlendShape& blendShape = info.blendshapes[bs];
+
+ const BlendShapeVertex* vertices = info.blendshapesVertices + blendShape.firstVertex;
+ for (int i = 0; i < blendShape.vertexCount; ++i)
+ {
+ const BlendShapeVertex& blendShapeVertex = vertices[i];
+
+ int offset = inStride * blendShapeVertex.index;
+ Assert (offset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * w;
+ if (skinNormal)
+ {
+ Assert (offset + normalOffset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * w;
+ }
+ if (skinTangent)
+ {
+ Assert (offset + tangentOffset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * w;
+ }
+ }
+
+ #if BLEND_DIRECT_NORMALS
+ if (vertices.size () < count)
+ atLeastOneSparseBlendshape = true;
+ #endif
+ }
+ }
+
+ #if BLEND_DIRECT_NORMALS
+ if (atLeastOneSparseBlendshape && skinNormal) // we might need to take larger fraction from original normal
+ for (int i = 0; i < count; ++i)
+ {
+ Vector3f const& srcNormal = *reinterpret_cast<Vector3f*>((UInt8*)info.inVertices + i*inStride + normalOffset);
+ Vector3f* dstNormal = reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset);
+ const float missingFractionOfNormal = max (0.0f, 1.0f - Magnitude (*dstNormal));
+ *dstNormal += srcNormal * missingFractionOfNormal;
+ }
+ #endif
+ }
+*/ \ No newline at end of file
diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.h b/Runtime/Filters/Mesh/MeshBlendShaping.h
new file mode 100644
index 0000000..7b39f26
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShaping.h
@@ -0,0 +1,12 @@
+#pragma once
+
+struct SkinMeshInfo;
+
+// Does "mesh skinning" logic for BlendShapes
+void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst);
+
+inline bool HasValidWeight(const float w)
+{
+ const float kWeightEpsilon = 1e-4f;
+ return w > kWeightEpsilon;
+}
diff --git a/Runtime/Filters/Mesh/MeshCombiner.cpp b/Runtime/Filters/Mesh/MeshCombiner.cpp
new file mode 100644
index 0000000..1bf93e5
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshCombiner.cpp
@@ -0,0 +1,502 @@
+#include "UnityPrefix.h"
+#include "MeshCombiner.h"
+#include "Runtime/Graphics/TriStripper.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "Runtime/Profiler/Profiler.h"
+#include <limits>
+
+
+#define sqr(x) ((x)*(x))
+
+PROFILER_INFORMATION(gCombineMeshesProfile, "CombineMeshes", kProfilerRender)
+PROFILER_INFORMATION(gCombineVerticesProfile, "CombineVertices", kProfilerRender)
+PROFILER_INFORMATION(gCombineIndicesProfile, "CombineIndices", kProfilerRender)
+
+static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh);
+
+
+size_t ExtractMeshIndices(Mesh::TemporaryIndexContainer& srcIndices, const CombineInstance& in, bool useVertexOffsets, size_t& inoutTotalVertexOffset, UInt16* dstIndices)
+{
+ srcIndices.clear();
+
+ if (in.subMeshIndex < 0 || in.subMeshIndex >= in.mesh->GetSubMeshCount())
+ return 0;
+
+ const int subMeshIndex = in.subMeshIndex;
+ const int vertexOffset = useVertexOffsets ? in.vertexOffset : inoutTotalVertexOffset;
+ inoutTotalVertexOffset += in.mesh->GetVertexCount();
+
+ in.mesh->GetTriangles( srcIndices, subMeshIndex );
+
+ size_t numIndices = srcIndices.size();
+ if (Dot (Cross(in.transform.GetAxisX(), in.transform.GetAxisY()), in.transform.GetAxisZ()) >= 0)
+ {
+ for ( size_t k=0; k!=numIndices; ++k )
+ dstIndices[k] = srcIndices[k] + vertexOffset;
+ }
+ else
+ {
+ // if trilist, then
+ // reverse Cull order by reversing indices
+ for ( size_t k=0; k!=numIndices; ++k )
+ dstIndices[k] = srcIndices[numIndices-k-1] + vertexOffset;
+ }
+
+ return numIndices;
+}
+
+static bool IsMeshBatchable (const Mesh* mesh, int subMeshIndex)
+{
+ return mesh && mesh->HasVertexData() && subMeshIndex >= 0 && subMeshIndex < mesh->GetSubMeshCount();
+}
+
+
+void CombineMeshIndicesForStaticBatching(const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets)
+{
+ PROFILER_AUTO(gCombineIndicesProfile, &inoutMesh);
+
+ size_t size = in.size();
+
+ UInt32 maxIndices = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const UInt32 numTris = in[i].mesh->GetSubMeshFast( in[i].subMeshIndex ).indexCount;
+ if (mergeSubMeshes)
+ maxIndices += numTris;
+ else
+ maxIndices = std::max( maxIndices, numTris );
+ }
+ }
+
+ UInt16* dstIndices = new UInt16[maxIndices+1];
+ Mesh::TemporaryIndexContainer srcIndices;
+ srcIndices.reserve( maxIndices+1 );
+
+ size_t totalVertexOffset = 0;
+ if (mergeSubMeshes)
+ {
+ inoutMesh.SetSubMeshCount( 1 );
+ size_t totalNumIndices = 0;
+ for ( size_t s=0; s!=size; ++s )
+ {
+ if (in[s].mesh)
+ {
+ size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices+totalNumIndices);
+
+ totalNumIndices += numIndices;
+ Assert(totalNumIndices <= (maxIndices+1));
+ }
+ }
+ int mask = Mesh::k16BitIndices;
+ inoutMesh.SetIndicesComplex (dstIndices, totalNumIndices, 0, kPrimitiveTriangles, mask);
+ }
+ else
+ {
+ inoutMesh.SetSubMeshCount( in.size() );
+ for ( size_t s=0; s!=size; ++s )
+ {
+ if (in[s].mesh)
+ {
+ size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices);
+ Assert(numIndices <= (maxIndices+1));
+
+ int mask = Mesh::k16BitIndices;
+ inoutMesh.SetIndicesComplex (dstIndices, numIndices, s, kPrimitiveTriangles, mask);
+ }
+ }
+ }
+
+ delete []dstIndices;
+}
+
+void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms )
+{
+ PROFILER_AUTO(gCombineVerticesProfile, &outCombinedMesh);
+
+ int vertexCount = 0;
+ size_t size = in.size();
+ for( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ vertexCount += in[i].mesh->GetVertexCount();
+ }
+
+ bool hasNormals = false;
+ bool hasTangents = false;
+ bool hasUV0 = false;
+ bool hasUV1 = false;
+ bool hasColors = false;
+ bool hasSkin = false;
+ int bindposeCount = 0;
+
+ for( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ const UInt32 channels = mesh->GetAvailableChannels();
+ hasNormals |= (channels & (1<<kShaderChannelNormal)) != 0;
+ hasTangents |= (channels & (1<<kShaderChannelTangent)) != 0;
+ hasUV0 |= (channels & (1<<kShaderChannelTexCoord0)) != 0;
+ hasUV1 |= (channels & (1<<kShaderChannelTexCoord1)) != 0 || (in[i].lightmapTilingOffset != Vector4f(1, 1, 0, 0));
+ hasColors |= (channels & (1<<kShaderChannelColor)) != 0;
+ hasSkin |= mesh->GetSkin().size() && mesh->GetBindpose().size();
+ bindposeCount += mesh->GetBindpose().size();
+ }
+ }
+
+ UInt32 channels = 1<<kShaderChannelVertex;
+ if ( hasNormals ) channels |= 1<<kShaderChannelNormal;
+ if ( hasTangents ) channels |= 1<<kShaderChannelTangent;
+ if ( hasUV0 ) channels |= 1<<kShaderChannelTexCoord0;
+ if ( hasUV1 ) channels |= 1<<kShaderChannelTexCoord1;
+ if ( hasColors ) channels |= 1<<kShaderChannelColor;
+
+ outCombinedMesh.Clear(true);
+ outCombinedMesh.ResizeVertices( vertexCount, channels );
+ outCombinedMesh.SetName( combinedMeshName.c_str() );
+ // Input meshes are already swizzled correctly, so we can copy colors directly
+ outCombinedMesh.SetVertexColorsSwizzled(gGraphicsCaps.needsToSwizzleVertexColors);
+
+ if ( hasSkin )
+ {
+ outCombinedMesh.GetSkin().resize_initialized(vertexCount);
+ outCombinedMesh.GetBindpose().resize_initialized(bindposeCount);
+ outCombinedMesh.GetBonePathHashes().resize_uninitialized(bindposeCount);
+ }
+
+ // avoid doing twice (in worst case)
+ Matrix4x4f* normalMatrices;
+ bool* isNonUniformScaleTransform;
+ ALLOC_TEMP (normalMatrices, Matrix4x4f, size);
+ ALLOC_TEMP (isNonUniformScaleTransform, bool, size);
+ if ( hasNormals || hasTangents )
+ {
+ for( size_t i=0; i!=size; ++i )
+ {
+ float uniformScale;
+ TransformType type = ComputeTransformType(in[i].transform, uniformScale);
+ Matrix4x4f m;
+ isNonUniformScaleTransform[i] = IsNonUniformScaleTransform(type);
+ if (isNonUniformScaleTransform[i])
+ {
+ Matrix4x4f::Invert_General3D( in[i].transform, normalMatrices[i] );
+ normalMatrices[i].Transpose();
+ }
+ else
+ {
+ normalMatrices[i] = Matrix3x3f(in[i].transform);
+ // Scale matrix to keep normals normalized
+ normalMatrices[i].Scale(Vector3f::one * (1.0f/uniformScale));
+ }
+ }
+ }
+
+ int offset = 0;
+ for( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Matrix4x4f& transform = in[i].transform;
+ const Mesh* mesh = in[i].mesh;
+ if (useTransforms)
+ TransformPoints3x4 (transform,
+ (Vector3f const*)mesh->GetChannelPointer (kShaderChannelVertex),
+ mesh->GetStride (kShaderChannelVertex),
+ (Vector3f*)outCombinedMesh.GetChannelPointer (kShaderChannelVertex, offset),
+ outCombinedMesh.GetStride (kShaderChannelVertex),
+ mesh->GetVertexCount());
+ else
+ strided_copy (mesh->GetVertexBegin (), mesh->GetVertexEnd (), outCombinedMesh.GetVertexBegin () + offset);
+ offset += mesh->GetVertexCount();
+ }
+ }
+
+ if ( hasNormals )
+ {
+ offset = 0;
+ for( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ int vertexCount = mesh->GetVertexCount ();
+ if (!mesh->IsAvailable (kShaderChannelNormal))
+ std::fill(outCombinedMesh.GetNormalBegin () + offset, outCombinedMesh.GetNormalBegin () + offset + vertexCount, Vector3f(0.0f,1.0f,0.0f));
+ else
+ {
+ const Matrix4x4f& transform = normalMatrices[i];
+
+ StrideIterator<Vector3f> outNormal = outCombinedMesh.GetNormalBegin () + offset;
+ if (useTransforms)
+ {
+ if (isNonUniformScaleTransform[i])
+ {
+ for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal)
+ *outNormal = Normalize( transform.MultiplyVector3( *it) );
+ }
+ else
+ {
+ for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal)
+ *outNormal = transform.MultiplyVector3( *it);
+ }
+ }
+ else
+ strided_copy (mesh->GetNormalBegin (), mesh->GetNormalEnd (), outCombinedMesh.GetNormalBegin () + offset);
+ }
+ offset += vertexCount;
+ }
+ }
+ }
+
+ if ( hasTangents )
+ {
+ offset = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ int vertexCount = mesh->GetVertexCount ();
+ if (!mesh->IsAvailable (kShaderChannelTangent))
+ std::fill(outCombinedMesh.GetTangentBegin () + offset, outCombinedMesh.GetTangentBegin () + offset + vertexCount, Vector4f(1.0f,0.0f,0.0f,1.0f));
+ else
+ {
+ const Matrix4x4f& transform = normalMatrices[i];
+
+ StrideIterator<Vector4f> outTanget = outCombinedMesh.GetTangentBegin () + offset;
+ if (useTransforms)
+ {
+ if (isNonUniformScaleTransform[i])
+ {
+ for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget)
+ {
+ Vector3f t3 = Normalize(transform.MultiplyVector3(Vector3f(it->x, it->y, it->z)));
+ *outTanget = Vector4f(t3.x,t3.y,t3.z,it->w);
+ }
+ }
+ else
+ {
+ for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget)
+ {
+ Vector3f t3 = transform.MultiplyVector3(Vector3f(it->x, it->y, it->z));
+ *outTanget = Vector4f(t3.x,t3.y,t3.z,it->w);
+ }
+ }
+ }
+ else
+ strided_copy (mesh->GetTangentBegin (), mesh->GetTangentEnd (), outCombinedMesh.GetTangentBegin () + offset);
+ }
+ offset += vertexCount;
+ }
+ }
+ }
+
+ if ( hasUV0 )
+ {
+ offset = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ int vertexCount = mesh->GetVertexCount ();
+ if (!mesh->IsAvailable (kShaderChannelTexCoord0))
+ std::fill (outCombinedMesh.GetUvBegin (0) + offset, outCombinedMesh.GetUvBegin (0) + offset + vertexCount, Vector2f(0.0f,0.0f));
+ else
+ strided_copy (mesh->GetUvBegin (0), mesh->GetUvEnd (0), outCombinedMesh.GetUvBegin (0) + offset);
+ offset += vertexCount;
+ }
+ }
+ }
+
+ if ( hasUV1 )
+ {
+ offset = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ const int uvIndex = (mesh->GetAvailableChannels() & (1<<kShaderChannelTexCoord1))!=0? 1 : 0;
+ StrideIterator<Vector2f> it = in[i].mesh->GetUvBegin( uvIndex );
+ StrideIterator<Vector2f> end = in[i].mesh->GetUvEnd( uvIndex );
+
+ int vertexCount = mesh->GetVertexCount ();
+ if ( it == end)
+ std::fill (outCombinedMesh.GetUvBegin (1) + offset, outCombinedMesh.GetUvBegin (1) + offset + vertexCount, Vector2f(0.0f,0.0f));
+ else
+ {
+ // we have to apply lightmap UV scale and offset factors
+ // callee is responsible to reset lightmapTilingOffset on the Renderer afterwards
+ const Vector4f uvScaleOffset = in[i].lightmapTilingOffset;
+ if ( uvScaleOffset != Vector4f(1, 1, 0, 0) )
+ {
+ StrideIterator<Vector2f> outUV = outCombinedMesh.GetUvBegin (1) + offset;
+ for (; it != end; ++it, ++outUV)
+ {
+ outUV->x = it->x * uvScaleOffset.x + uvScaleOffset.z;
+ outUV->y = it->y * uvScaleOffset.y + uvScaleOffset.w;
+ }
+ }
+ else
+ strided_copy (it, end, outCombinedMesh.GetUvBegin (1) + offset);
+ }
+ offset += vertexCount;
+ }
+ }
+ }
+
+ if ( hasColors )
+ {
+ offset = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ {
+ const Mesh* mesh = in[i].mesh;
+ int vertexCount = mesh->GetVertexCount ();
+ if (!mesh->IsAvailable (kShaderChannelColor))
+ std::fill (outCombinedMesh.GetColorBegin () + offset, outCombinedMesh.GetColorBegin () + offset + vertexCount, ColorRGBA32(255,255,255,255));
+ else
+ {
+ DebugAssert(mesh->GetVertexColorsSwizzled() == outCombinedMesh.GetVertexColorsSwizzled());
+ strided_copy (mesh->GetColorBegin (), mesh->GetColorEnd (), outCombinedMesh.GetColorBegin () + offset);
+ }
+ offset += vertexCount;
+ }
+ }
+ }
+
+ if ( hasSkin )
+ {
+ CombineBoneSkinning (in, outCombinedMesh);
+ }
+}
+
+static void CalculateRootBonePathHash (const CombineInstances &in, Mesh& outCombinedMesh)
+{
+ // We always pick the root bone path hash of the first combine instance.
+ // This is because anything else gives unpredictable behaviour and makes it impossible for the user
+ // to setup the skinned mesh renderer T/R/S correctly.
+ outCombinedMesh.SetRootBonePathHash(in[0].mesh->GetRootBonePathHash());
+
+ // If we made it so that the skinnedmeshrenderer always used the default pose from the Avatar
+ // Then it would be possible to pick the root bone from the mesh with the most bones instead.
+#if 0
+ size_t size = in.size();
+
+ BindingHash rootBonePathHash = 0;
+ int boneCount = 0;
+ for (size_t i=0; i<size; ++i)
+ {
+ }
+ }
+ if (rootBonePathHash)
+ outCombinedMesh.SetRootBonePathHash(rootBonePathHash);
+#endif
+}
+
+static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh)
+{
+ size_t size = in.size();
+
+ int boneOffset = 0;
+ int offset = 0;
+ for ( size_t i=0; i!=size; ++i )
+ {
+ if (!IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+ continue;
+
+ const Mesh* mesh = in[i].mesh;
+ Mesh::BoneInfluenceContainer& outSkin = outCombinedMesh.GetSkin();
+ const Mesh::BoneInfluenceContainer& inSkin = mesh->GetSkin();
+ int vertexCount = mesh->GetVertexCount ();
+ if (inSkin.empty())
+ {
+ for(int i=0; i<vertexCount;i++)
+ {
+ outSkin[offset+i].weight[0] = 0;
+ outSkin[offset+i].weight[1] = 0;
+ outSkin[offset+i].weight[2] = 0;
+ outSkin[offset+i].weight[3] = 0;
+ outSkin[offset+i].boneIndex[0] = 0;
+ outSkin[offset+i].boneIndex[1] = 0;
+ outSkin[offset+i].boneIndex[2] = 0;
+ outSkin[offset+i].boneIndex[3] = 0;
+ }
+ }
+ else
+ {
+ for(int i=0; i<vertexCount;i++)
+ {
+ outSkin[offset+i].weight[0] = inSkin[i].weight[0];
+ outSkin[offset+i].weight[1] = inSkin[i].weight[1];
+ outSkin[offset+i].weight[2] = inSkin[i].weight[2];
+ outSkin[offset+i].weight[3] = inSkin[i].weight[3];
+ outSkin[offset+i].boneIndex[0] = inSkin[i].boneIndex[0]+boneOffset;
+ outSkin[offset+i].boneIndex[1] = inSkin[i].boneIndex[1]+boneOffset;
+ outSkin[offset+i].boneIndex[2] = inSkin[i].boneIndex[2]+boneOffset;
+ outSkin[offset+i].boneIndex[3] = inSkin[i].boneIndex[3]+boneOffset;
+ }
+ }
+
+ offset += vertexCount;
+
+ int poseCount = mesh->GetBindpose().size();
+ int bindingHashCount = mesh->GetBonePathHashes().size();
+
+ memcpy(outCombinedMesh.GetBindpose().begin() + boneOffset, mesh->GetBindpose().begin(), poseCount*sizeof(Matrix4x4f));
+
+ // Old asset bundles might not have bindingHashCount in sync with bind poses.
+ if (poseCount == bindingHashCount)
+ memcpy(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, mesh->GetBonePathHashes().begin(), poseCount*sizeof(BindingHash));
+ else
+ memset(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, 0, poseCount*sizeof(BindingHash));
+
+ boneOffset += poseCount;
+ }
+
+ CalculateRootBonePathHash (in, outCombinedMesh);
+}
+
+
+void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms)
+{
+ if (!out.CanAccessFromScript())
+ {
+ ErrorStringMsg("Cannot combine into mesh that does not allow access: %s", out.GetName());
+ return;
+ }
+ for (size_t i = 0; i < in.size(); ++i)
+ {
+ Mesh* mesh = in[i].mesh;
+ if (!mesh)
+ {
+ WarningStringMsg("Combine mesh instance %" PRINTF_SIZET_FORMAT " is null.", i);
+ }
+ if (mesh && (in[i].subMeshIndex < 0 || in[i].subMeshIndex >= mesh->GetSubMeshCount()))
+ {
+ WarningStringMsg("Submesh index %d is invalid for mesh %s.", in[i].subMeshIndex, mesh->GetName());
+ }
+ if (mesh && !mesh->CanAccessFromScript())
+ {
+ ErrorStringMsg("Cannot combine mesh that does not allow access: %s", mesh->GetName());
+ return;
+ }
+ if (mesh == &out)
+ {
+ ErrorStringMsg("Cannot combine into a mesh that is also in the CombineInstances input: %s", mesh->GetName());
+ return;
+ }
+ }
+
+ CombineMeshVerticesForStaticBatching (in, out.GetName(), out, useTransforms);
+ CombineMeshIndicesForStaticBatching (in, out, mergeSubMeshes, false);
+
+ out.RecalculateBounds();
+ out.UpdateVertexFormat();
+}
+
diff --git a/Runtime/Filters/Mesh/MeshCombiner.h b/Runtime/Filters/Mesh/MeshCombiner.h
new file mode 100644
index 0000000..a6975a9
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshCombiner.h
@@ -0,0 +1,33 @@
+#ifndef MESHCOMBINER_H
+#define MESHCOMBINER_H
+
+#include "LodMesh.h"
+
+class Renderer;
+
+struct CombineInstance
+{
+ Mesh *mesh;
+ int subMeshIndex;
+ Matrix4x4f transform;
+
+ Vector4f lightmapTilingOffset;
+ int vertexOffset;
+
+ CombineInstance() :
+ mesh(NULL),
+ subMeshIndex(0),
+ lightmapTilingOffset(1, 1, 0, 0),
+ vertexOffset(0)
+ {}
+};
+
+typedef std::vector<CombineInstance> CombineInstances;
+
+void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms);
+// takes an array of meshes(their vertex data) and merges them into 1 combined mesh.
+void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms = true );
+// takes an array of meshes(their indices) and merges them in 1 mesh (setups subsets)
+void CombineMeshIndicesForStaticBatching (const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets);
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshOptimizer.cpp b/Runtime/Filters/Mesh/MeshOptimizer.cpp
new file mode 100644
index 0000000..068dc53
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshOptimizer.cpp
@@ -0,0 +1,359 @@
+#include "UnityPrefix.h"
+#include "MeshOptimizer.h"
+#include <vector>
+
+//@TODO:
+
+// Step 1
+
+//* bool ExtractCollisionData (Mesh& mesh, UNITY_TEMP_VECTOR(kMemGeometry, Vector3f)& vertices, UNITY_TEMP_VECTOR(kMemGeometry, UInt32)& triangles);
+// -> make it return welded vertices and triangle array
+//* Enable Deformablemesh code and make it work with welding code and check that cloth works visually...
+
+// Testing:
+//* Check mesh collision detection code to work visually correct.
+// * run functional test suite
+// * run lightmapper tests in the integration test suite. They have a complete test for the lightmap uv coordinates picking up lightmap values...
+
+
+// Step 2:
+//* Verify vertex cache performance on iPad1 / Wii / intel integrated graphics
+//* Switch to default gpu optimized mode and update all model importer templates
+
+
+
+template<typename T, const int CACHE_SIZE>
+class VertexCacheOptimizer
+{
+ UInt32* m_cacheEntries;
+ UInt32 m_cacheSize;
+
+ mutable UInt32 m_cacheMisses;
+ mutable UInt32 m_cacheHits;
+
+ UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const
+ {
+ return vertexInCache[lIndex] ? 1 : 0;
+ }
+
+ void AddToCache(UInt32 lIndex, char* vertexInCache)
+ {
+ if(m_cacheEntries[0]!=-1)
+ vertexInCache[m_cacheEntries[0]]=0;
+
+ for(UInt32 i=0; i<m_cacheSize-1; i++)
+ m_cacheEntries[i]=m_cacheEntries[i+1];
+
+ m_cacheEntries[m_cacheSize-1]=lIndex;
+ vertexInCache[lIndex]=1;
+ }
+
+public:
+
+ VertexCacheOptimizer () : m_cacheSize(CACHE_SIZE)
+ {
+ m_cacheEntries=new UInt32 [m_cacheSize];
+
+ m_cacheHits = m_cacheMisses = 0;
+ for(UInt32 i=0; i<m_cacheSize; i++)
+ m_cacheEntries[i]=(UInt32)-1;
+ }
+
+ ~VertexCacheOptimizer() { delete m_cacheEntries; }
+
+ UInt32 GetCacheMisses() { return m_cacheMisses; }
+ UInt32 GetCacheHits() { return m_cacheHits; }
+
+ void OptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles)
+ {
+ UInt32 cachedVerts=0;
+ char* triangleUsed=new char [numTriangles];
+ char* vertexInCache=new char [numVertices];
+ memset(triangleUsed,0,numTriangles);
+ memset(vertexInCache,0,numVertices);
+
+ bool foundTriangle=true;
+ while (foundTriangle)
+ {
+ foundTriangle=false;
+ UInt32 bestCandidate=0;
+ UInt32 bestCacheValue=0;
+ for (UInt32 i = 0; i < numTriangles; i++)
+ {
+ if (triangleUsed[i])
+ continue;
+
+ foundTriangle=true;
+ UInt32 i1=srcTris[i*3+0];
+ UInt32 i2=srcTris[i*3+1];
+ UInt32 i3=srcTris[i*3+2];
+
+ UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1;
+ if (lCacheValue > bestCacheValue)
+ {
+ bestCandidate=i;
+ bestCacheValue=lCacheValue;
+ if (bestCacheValue == 4)
+ break;
+ }
+ }
+ if(foundTriangle)
+ {
+ triangleUsed[bestCandidate]=1;
+ UInt32 i1=srcTris[bestCandidate*3+0];
+ UInt32 i2=srcTris[bestCandidate*3+1];
+ UInt32 i3=srcTris[bestCandidate*3+2];
+ *pdstTris++=(T)i1;
+ *pdstTris++=(T)i2;
+ *pdstTris++=(T)i3;
+ if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ }
+ }
+ delete[] triangleUsed;
+ delete[] vertexInCache;
+ }
+};
+
+inline bool CompareBlendShapeVertexIndex (const BlendShapeVertex& lhs, const BlendShapeVertex& rhs)
+{
+ return lhs.index < rhs.index;
+}
+
+void OptimizeReorderVertexBuffer (Mesh& mesh)
+{
+ const int submeshCount = mesh.GetSubMeshCount();
+ const int vertexCount = mesh.GetVertexCount();
+
+ // backup required data
+ VertexData backupVertexData(mesh.m_VertexData, mesh.GetAvailableChannels(), mesh.GetVertexData().GetStreamsLayout(), mesh.GetVertexData().GetChannelsLayout());
+
+ Mesh::BoneInfluenceContainer backupSkin;
+ if (!mesh.m_Skin.empty())
+ backupSkin.swap(mesh.m_Skin);
+
+ // reorder the vertices so they come in increasing order
+ dynamic_array<UInt32> oldToNew;
+ dynamic_array<UInt32> newToOld;
+ newToOld.resize_initialized(vertexCount, 0xFFFFFFFF);
+ oldToNew.resize_initialized(vertexCount, 0xFFFFFFFF);
+
+ Mesh::TemporaryIndexContainer dstIndices;
+ int newVertexCount = 0;
+ for (int submesh = 0; submesh < submeshCount; submesh++)
+ {
+ Mesh::TemporaryIndexContainer indices;
+ mesh.GetTriangles (indices, submesh);
+
+ const int indexCount = indices.size();
+ dstIndices.resize(indexCount);
+ for (int index=0; index < indexCount; index++)
+ {
+ int vertex = indices[index];
+ AssertBreak(vertex >= 0);
+ AssertBreak(vertex < vertexCount);
+
+ if (oldToNew[vertex] == 0xFFFFFFFF)
+ {
+ oldToNew[vertex]=newVertexCount;
+ newToOld[newVertexCount]=vertex;
+ newVertexCount++;
+ }
+ dstIndices[index] = oldToNew[vertex];
+ }
+
+ mesh.SetIndices (&dstIndices[0], dstIndices.size(), submesh, kPrimitiveTriangles);
+ }
+
+ mesh.ResizeVertices(newVertexCount, backupVertexData.GetChannelMask());
+
+ if (!backupSkin.empty())
+ mesh.m_Skin.resize_initialized(newVertexCount);
+
+ for (int vertex=0; vertex < newVertexCount; vertex++)
+ {
+ UInt32 remapNew = newToOld[vertex];
+ Assert(remapNew != 0xFFFFFFFF);
+
+ if (!backupSkin.empty())
+ mesh.m_Skin[vertex] = backupSkin[remapNew];
+
+ mesh.GetVertexBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex)[remapNew];
+
+ if (backupVertexData.HasChannel(kShaderChannelNormal))
+ mesh.GetNormalBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal)[remapNew];
+
+ if (backupVertexData.HasChannel(kShaderChannelColor))
+ mesh.GetColorBegin()[vertex] = backupVertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor)[remapNew];
+
+ if (backupVertexData.HasChannel(kShaderChannelTexCoord0))
+ mesh.GetUvBegin(0)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord0)[remapNew];
+
+ if (backupVertexData.HasChannel(kShaderChannelTexCoord1))
+ mesh.GetUvBegin(1)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord1)[remapNew];
+
+ if (backupVertexData.HasChannel(kShaderChannelTangent))
+ mesh.GetTangentBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent)[remapNew];
+ }
+
+ // Remap vertex indices stored in blend shapes
+ BlendShapeData& blendShapeData = mesh.GetWriteBlendShapeDataInternal();
+ BlendShapeVertices& blendShapeVertices = blendShapeData.vertices;
+ for (BlendShapeVertices::iterator itv = blendShapeVertices.begin(), endv = blendShapeVertices.end(); itv != endv; ++itv)
+ {
+ BlendShapeVertex& bsv = *itv;
+ bsv.index = oldToNew[bsv.index];
+ }
+
+ // Sort each shape's vertices by index so the blending writes to memory as linearly as possible
+ for (int shapeIndex = 0; shapeIndex < blendShapeData.shapes.size(); shapeIndex++)
+ {
+ const BlendShape& shape = blendShapeData.shapes[shapeIndex];
+ BlendShapeVertex* vertices = &blendShapeVertices[shape.firstVertex];
+ std::sort(vertices, vertices + shape.vertexCount, CompareBlendShapeVertexIndex);
+ }
+
+ mesh.SetChannelsDirty(mesh.GetAvailableChannels(), true);
+}
+
+void OptimizeIndexBuffers (Mesh& mesh)
+{
+ const int submeshCount = mesh.GetSubMeshCount();
+ const int vertexCount = mesh.GetVertexCount();
+
+ // first optimize the indices for each submesh
+ for (int submesh = 0; submesh < submeshCount; submesh++)
+ {
+ Mesh::TemporaryIndexContainer unoptimizedIndices;
+ mesh.GetTriangles (unoptimizedIndices, submesh);
+
+ Mesh::TemporaryIndexContainer optimizedIndices;
+ optimizedIndices.resize(unoptimizedIndices.size());
+
+ VertexCacheOptimizer<UInt32, 16> vertexCacheOptimizer;
+ vertexCacheOptimizer.OptimizeTriangles(&optimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3);
+ // LogString(Format("[Optimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheOptimizer.GetCacheHits(), vertexCacheOptimizer.GetCacheMisses()));
+
+ mesh.SetIndices (&optimizedIndices[0], optimizedIndices.size(), submesh, kPrimitiveTriangles);
+ }
+}
+
+
+template<typename T, const int CACHE_SIZE>
+class VertexCacheDeOptimizer
+{
+ UInt32* m_cacheEntries;
+ UInt32 m_cacheSize;
+
+ mutable UInt32 m_cacheMisses;
+ mutable UInt32 m_cacheHits;
+
+ UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const
+ {
+ return vertexInCache[lIndex] ? 1 : 0;
+ }
+
+ void AddToCache(UInt32 lIndex, char* vertexInCache)
+ {
+ if(m_cacheEntries[0]!=-1)
+ vertexInCache[m_cacheEntries[0]]=0;
+
+ for(UInt32 i=0; i<m_cacheSize-1; i++)
+ m_cacheEntries[i]=m_cacheEntries[i+1];
+
+ m_cacheEntries[m_cacheSize-1]=lIndex;
+ vertexInCache[lIndex]=1;
+ }
+
+public:
+
+ VertexCacheDeOptimizer () : m_cacheSize(CACHE_SIZE)
+ {
+ m_cacheEntries=new UInt32 [m_cacheSize];
+
+ m_cacheHits = m_cacheMisses = 0;
+ for(UInt32 i=0; i<m_cacheSize; i++)
+ m_cacheEntries[i]=(UInt32)-1;
+ }
+
+ ~VertexCacheDeOptimizer() { delete m_cacheEntries; }
+
+ UInt32 GetCacheMisses() { return m_cacheMisses; }
+ UInt32 GetCacheHits() { return m_cacheHits; }
+
+ void DeOptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles)
+ {
+ UInt32 cachedVerts=0;
+ char* triangleUsed=new char [numTriangles];
+ char* vertexInCache=new char [numVertices];
+ memset(triangleUsed,0,numTriangles);
+ memset(vertexInCache,0,numVertices);
+
+ bool foundTriangle=true;
+ while (foundTriangle)
+ {
+ foundTriangle=false;
+ UInt32 bestCandidate=0;
+ UInt32 bestCacheValue=4;
+ for (UInt32 i = 0; i < numTriangles; i++)
+ {
+ if (triangleUsed[i])
+ continue;
+
+ foundTriangle=true;
+ UInt32 i1=srcTris[i*3+0];
+ UInt32 i2=srcTris[i*3+1];
+ UInt32 i3=srcTris[i*3+2];
+
+ UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1;
+ if (lCacheValue <= bestCacheValue)
+ {
+ bestCandidate=i;
+ bestCacheValue=lCacheValue;
+ if (bestCacheValue == 1)
+ break;
+ }
+ }
+ if(foundTriangle)
+ {
+ triangleUsed[bestCandidate]=1;
+ UInt32 i1=srcTris[bestCandidate*3+0];
+ UInt32 i2=srcTris[bestCandidate*3+1];
+ UInt32 i3=srcTris[bestCandidate*3+2];
+ *pdstTris++=(T)i1;
+ *pdstTris++=(T)i2;
+ *pdstTris++=(T)i3;
+ if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+ }
+ }
+ delete triangleUsed;
+ delete vertexInCache;
+ }
+};
+
+void DeOptimizeIndexBuffers (Mesh& mesh)
+{
+ const int submeshCount = mesh.GetSubMeshCount();
+ const int vertexCount = mesh.GetVertexCount();
+
+ // first optimize the indices for each submesh
+ for (int submesh = 0; submesh < submeshCount; submesh++)
+ {
+ Mesh::TemporaryIndexContainer unoptimizedIndices;
+ mesh.GetTriangles (unoptimizedIndices, submesh);
+
+ Mesh::TemporaryIndexContainer deOptimizedIndices;
+ deOptimizedIndices.resize(unoptimizedIndices.size());
+
+ VertexCacheDeOptimizer<UInt32, 16> vertexCacheDeOptimizer;
+ vertexCacheDeOptimizer.DeOptimizeTriangles(&deOptimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3);
+
+ //LogString(Format("[Deoptimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheDeOptimizer.GetCacheHits(), vertexCacheDeOptimizer.GetCacheMisses()));
+
+ mesh.SetIndices (&deOptimizedIndices[0], deOptimizedIndices.size(), submesh, kPrimitiveTriangles);
+ }
+}
+
diff --git a/Runtime/Filters/Mesh/MeshOptimizer.h b/Runtime/Filters/Mesh/MeshOptimizer.h
new file mode 100644
index 0000000..8964edf
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshOptimizer.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#ifndef __importmeshoptimizer_h_included__
+#define __importmeshoptimizer_h_included__
+
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+void DeOptimizeIndexBuffers (Mesh& mesh);
+void OptimizeIndexBuffers (Mesh& mesh);
+void OptimizeReorderVertexBuffer (Mesh& mesh);
+
+
+#endif //__importmeshoptimizer_h_included__
diff --git a/Runtime/Filters/Mesh/MeshPartitioner.cpp b/Runtime/Filters/Mesh/MeshPartitioner.cpp
new file mode 100644
index 0000000..9ec9f87
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshPartitioner.cpp
@@ -0,0 +1,346 @@
+
+#include "UnityPrefix.h"
+#include "MeshPartitioner.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+#if UNITY_EDITOR
+
+static const UInt32 ComponentStride[] = { 12, 12, 4, 8, 8, 16, sizeof(BoneInfluence) };
+
+static int CalcDMABatchSize(int totalVerts, int stride, const int sizeRestriction, bool padded)
+{
+ const int alignmentRestriction = 16; // DMA transfers address must be a multiple of 16
+ int a = alignmentRestriction;
+
+ if(a>stride)
+ {
+ if(a % stride == 0)
+ return sizeRestriction;
+ while(a % stride) { a+=alignmentRestriction; }
+ }
+ else
+ {
+ if(stride % a == 0)
+ return sizeRestriction;
+ while(stride % a) { a+=alignmentRestriction; }
+ }
+
+ int batchMultiple = a / stride;
+ totalVerts = (totalVerts < sizeRestriction) ? totalVerts : sizeRestriction;
+ if(padded)
+ totalVerts += batchMultiple - 1;
+ totalVerts /= batchMultiple;
+ totalVerts *= batchMultiple;
+ return totalVerts;
+};
+
+static int CalcBestFitBatchSize(const UInt32 availableChannels, int vertexCount, int maxVerts, bool padded = false)
+{
+ int bestFit = INT_MAX;
+ for(int i=0;i<=kShaderChannelCount;i++)
+ {
+ if (availableChannels & (1<<i))
+ {
+ int maxVCount = CalcDMABatchSize(vertexCount, ComponentStride[i], maxVerts, padded);
+ bestFit = (bestFit > maxVCount) ? maxVCount : bestFit;
+ }
+ }
+ return bestFit;
+}
+
+template<typename T>
+struct TempPartition
+{
+ dynamic_array<Vector3f> m_Vertices;
+ dynamic_array<Vector2f> m_UV;
+ dynamic_array<Vector2f> m_UV1;
+ dynamic_array<ColorRGBA32> m_Colors;
+ dynamic_array<Vector3f> m_Normals;
+ dynamic_array<Vector4f> m_Tangents;
+ dynamic_array<BoneInfluence> m_Skin;
+ dynamic_array<T> indexBuffer;
+ dynamic_array<T> newToOld;
+ int vertexCount;
+ //
+ void InitRemapping(int numVertices)
+ {
+ newToOld.resize_uninitialized(numVertices);
+ memset(&newToOld[0],(T)-1,numVertices*sizeof(T));
+ }
+ void RemapVertices(Mesh& mesh, int actualVertexCount)
+ {
+ m_Vertices.resize_uninitialized(vertexCount);
+ const UInt32 channels = mesh.GetAvailableChannels();
+ if(channels&(1<<kShaderChannelNormal))
+ m_Normals.resize_uninitialized(vertexCount);
+ if(channels&(1<<kShaderChannelTexCoord0))
+ m_UV.resize_uninitialized(vertexCount);
+ if(channels&(1<<kShaderChannelTexCoord1))
+ m_UV1.resize_uninitialized(vertexCount);
+ if(channels&(1<<kShaderChannelTangent))
+ m_Tangents.resize_uninitialized(vertexCount);
+ if(channels&(1<<kShaderChannelColor))
+ m_Colors.resize_uninitialized(vertexCount);
+ if(!mesh.GetSkin().empty())
+ m_Skin.resize_uninitialized(vertexCount);
+
+ T remapNew = 0;
+ for(int vertex=0; vertex<vertexCount; vertex++)
+ {
+ if((T)-1 != newToOld[vertex])
+ remapNew = newToOld[vertex];
+ m_Vertices[vertex]=mesh.GetVertexBegin()[remapNew];
+ if(channels&(1<<kShaderChannelNormal))
+ m_Normals[vertex]=mesh.GetNormalBegin()[remapNew];
+ if(channels&(1<<kShaderChannelTexCoord0))
+ m_UV[vertex]=mesh.GetUvBegin(0)[remapNew];
+ if(channels&(1<<kShaderChannelTexCoord1))
+ m_UV1[vertex]=mesh.GetUvBegin(1)[remapNew];
+ if(channels&(1<<kShaderChannelTangent))
+ m_Tangents[vertex]=mesh.GetTangentBegin()[remapNew];
+ if(channels&(1<<kShaderChannelColor))
+ m_Colors[vertex]=mesh.GetColorBegin()[remapNew];
+ if(!mesh.GetSkin().empty())
+ m_Skin[vertex]=mesh.GetSkin()[remapNew];
+ }
+ }
+};
+
+template<typename T>
+struct SegmentedMesh
+{
+ std::vector<TempPartition<T> > m_Partitions;
+ void Clear() { m_Partitions.clear(); }
+};
+
+template<typename T>
+static void CreateFromSubMesh(std::vector< SegmentedMesh<T> >& segments, Mesh& mesh, int submesh)
+{
+ SubMesh& sm = mesh.GetSubMeshFast(submesh);
+
+ T vertexCount = 0;
+ const int numIndices = sm.indexCount;
+ const int numTriangles = numIndices / 3;
+
+ AssertBreak((numTriangles * 3) == numIndices);
+
+ UInt32 maxComponentStride = 0;
+ const UInt32 availableChannels = mesh.GetAvailableChannels() | (mesh.GetSkin().empty() ? 0 : (1<<kShaderChannelCount));
+ for(int i=0;i<=kShaderChannelCount;i++)
+ {
+ if(availableChannels & (1<<i))
+ {
+ if(maxComponentStride < ComponentStride[i])
+ maxComponentStride = ComponentStride[i];
+ }
+ }
+
+ const UInt32 maxDMATransferSize = 16 * 1024;
+ const UInt32 numVerts = (numIndices + 15) & (~15);
+ const UInt32 maxVerts = std::min(numVerts, maxDMATransferSize / maxComponentStride);
+ const UInt32 batchSize = CalcBestFitBatchSize(availableChannels, numVerts, maxVerts);
+
+ const int maxPartitions = (numIndices + batchSize-1) / batchSize;
+ const int numVertices = (sm.indexCount + 2*maxPartitions);
+
+ const T* srcIndices = reinterpret_cast<const T*> (&mesh.GetIndexBuffer()[sm.firstByte]);
+
+ int startTriangle = 0;
+ int startVertex = 0;
+ std::vector<T> oldToNew;
+ oldToNew.resize(mesh.GetVertexCount());
+ std::vector<TempPartition<T> > & partitions = segments[submesh].m_Partitions;
+ while(startTriangle != numTriangles)
+ {
+ TempPartition<T> p;
+ p.indexBuffer.clear();
+ p.vertexCount = 0;
+ p.InitRemapping(batchSize+3);
+ dynamic_array<T>& dstIndices = p.indexBuffer;
+ memset(&oldToNew[0],(T)-1,oldToNew.size()*sizeof(T));
+ for(int i=startTriangle; i<numTriangles; i++)
+ {
+ startTriangle = numTriangles;
+ T lastVertexCount = vertexCount; // undo stack
+ for(int j=0;j<3;j++)
+ {
+ int index = i*3+j;
+ int vertex = srcIndices[index];
+ AssertBreak(vertex >= 0);
+ AssertBreak(vertex < mesh.GetVertexCount());
+ AssertBreak(lastVertexCount-startVertex+j < p.newToOld.size());
+ AssertBreak(p.newToOld[lastVertexCount-startVertex+j] == (T)-1);
+ if(oldToNew[vertex]==(T)-1)
+ {
+ AssertBreak(vertexCount < numVertices);
+ oldToNew[vertex]=vertexCount-startVertex;
+ p.newToOld[vertexCount-startVertex]=vertex;
+ vertexCount++;
+ }
+ dstIndices.push_back(oldToNew[vertex]);
+ }
+ if((vertexCount-startVertex) > batchSize)
+ {
+ //undo the last one in the partition
+ for(int j=0;j<3;j++)
+ {
+ p.newToOld[lastVertexCount-startVertex+j] = -1;;
+ dstIndices.pop_back();
+ }
+ startTriangle = i;
+ vertexCount = lastVertexCount;
+ break;
+ }
+ }
+ const int actualVertexCount = vertexCount - startVertex;
+ p.vertexCount = maxVerts;//CalcBestFitBatchSize(availableChannels, actualVertexCount, maxVerts, true); // FIXME!!! This needs to find the next "best fit" that will still keep alignment restrictions..
+ p.RemapVertices(mesh, actualVertexCount);
+ partitions.push_back(p);
+ startVertex = vertexCount;
+ }
+ oldToNew.clear();
+}
+
+// mircea: todo: this would be awesome!!!
+// spuInOut:
+// m_Vertices
+// m_Normals
+// m_Tangents
+// spuIn:
+// m_Skin
+
+// rsxDirect
+// m_UV
+// m_UV1
+// m_Colors
+// m_IndexBuffer
+
+void PartitionSubmeshes(Mesh& m)
+{
+ typedef UInt16 T;
+
+ const int submeshCount = m.m_SubMeshes.size();
+
+ m.m_PartitionInfos.clear();
+ m.m_Partitions.clear();
+
+ // skinned meshes cannot be partitioned if the optimization flag is not set because partitioning changes the vertex/index buffers
+ if (!m.GetMeshOptimized() || m.GetSkin().empty())
+ return;
+
+ // destripify if needed
+ m.DestripifyIndices ();
+
+ // need to fixup the indices first so they are not relative to the partition start anymore.
+ Mesh::MeshPartitionInfoContainer& partInfos = m.m_PartitionInfos;
+ for(int pi=0; pi<partInfos.size(); pi++)
+ {
+ const MeshPartitionInfo& partInfo = m.m_PartitionInfos[pi];
+
+ for(int s=0; s<partInfo.partitionCount; s++)
+ {
+ const MeshPartition& p = m.m_Partitions[partInfo.submeshStart + s];
+ IndexBufferData indexBufferData;
+ m.GetIndexBufferData(indexBufferData);
+ UInt16* indices = (UInt16*)(&m.m_IndexBuffer[0] + p.indexByteOffset);
+ for(int i=0;i<p.indexCount;i++)
+ indices[i] += p.vertexOffset;
+ }
+ }
+
+ // make a segment for each submesh
+ std::vector< SegmentedMesh<T> > segments;
+ segments.resize(submeshCount);
+ for(int submesh=0;submesh<submeshCount;submesh++)
+ CreateFromSubMesh<T>(segments, m, submesh);
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // combine the segments to get the script accessible buffers
+
+ UInt32 availableChannels = m.GetAvailableChannels();
+
+ m.Clear(false);
+ m.SetMeshOptimized(true); //mircea@ m.Clear will set the optimized mesh to false. Being here means we are partitioning an optimized mesh so restore the flag.
+ m.SetSubMeshCount(submeshCount);
+
+ UInt32 vertexOffset = 0;
+ UInt32 indexOffset = 0;
+
+ for(int submesh=0;submesh<submeshCount;submesh++)
+ {
+ int indexCount = 0;
+ SegmentedMesh<T>& seg = segments[submesh];
+
+ MeshPartitionInfo partInfo;
+ partInfo.submeshStart = m.m_Partitions.size();
+ partInfo.partitionCount = seg.m_Partitions.size();
+ m.m_PartitionInfos.push_back(partInfo);
+
+ // create partitions & build the mesh buffers
+ for(int s=0;s<seg.m_Partitions.size();s++)
+ {
+ MeshPartition part;
+ TempPartition<T>& p = seg.m_Partitions[s];
+ part.vertexCount = p.vertexCount;
+ part.vertexOffset = vertexOffset;
+ part.indexCount = p.indexBuffer.size();
+ part.indexByteOffset = indexOffset;
+ AssertBreak(0 == (part.vertexOffset & 15));
+ m.m_Partitions.push_back(part);;
+ indexCount += part.indexCount;
+ indexOffset += p.indexBuffer.size() * sizeof(T);
+ vertexOffset += p.vertexCount;
+ }
+ }
+
+ // fill in the partitioned data back into the mesh.
+ m.ResizeVertices(vertexOffset, availableChannels);
+
+ for(int submesh=0;submesh<submeshCount;submesh++)
+ {
+ const SegmentedMesh<T>& seg = segments[submesh];
+ const MeshPartitionInfo& partInfo = m.m_PartitionInfos[submesh];
+ for(int s=0;s<seg.m_Partitions.size();s++)
+ {
+ const TempPartition<T>& p = seg.m_Partitions[s];
+ const MeshPartition& part = m.m_Partitions[partInfo.submeshStart + s];
+ strided_copy (p.m_Vertices.begin (), p.m_Vertices.end(), m.GetVertexBegin () + part.vertexOffset);
+ if(!p.m_Normals.empty())
+ strided_copy (p.m_Normals.begin (), p.m_Normals.end(), m.GetNormalBegin () + part.vertexOffset);
+ if(!p.m_UV.empty())
+ strided_copy (p.m_UV.begin (), p.m_UV.end (), m.GetUvBegin (0) + part.vertexOffset);
+ if(!p.m_UV1.empty())
+ strided_copy (p.m_UV1.begin (), p.m_UV1.end (), m.GetUvBegin (1) + part.vertexOffset);
+ if(!p.m_Tangents.empty())
+ strided_copy (p.m_Tangents.begin (), p.m_Tangents.end (), m.GetTangentBegin () + part.vertexOffset);
+ if(!p.m_Colors.empty())
+ strided_copy (p.m_Colors.begin (), p.m_Colors.end (), m.GetColorBegin() + part.vertexOffset);
+ if(!p.m_Skin.empty())
+ m.GetSkin().insert(m.GetSkin().end(), p.m_Skin.begin(), p.m_Skin.end());
+ }
+
+ std::vector<T> indices;
+ for(int s=0;s<partInfo.partitionCount;s++)
+ {
+ const MeshPartition& p = m.m_Partitions[partInfo.submeshStart+s];
+ const TempPartition<T>& tp = seg.m_Partitions[s];
+ for(int i=0;i<p.indexCount;i++)
+ {
+ int index = tp.indexBuffer[i];
+ AssertBreak( (index>=0) && (index < (p.vertexCount)));
+ #if DEBUG_PARTITIONING
+ index += p.vertexOffset;
+ #endif
+ indices.push_back(index);
+ }
+ }
+ m.SetIndices (&indices[0], indices.size(), submesh, kPrimitiveTriangles);
+ }
+}
+
+void PartitionMesh(Mesh* m)
+{
+ PartitionSubmeshes(*m);
+}
+
+#endif //UNITY_EDITOR
diff --git a/Runtime/Filters/Mesh/MeshPartitioner.h b/Runtime/Filters/Mesh/MeshPartitioner.h
new file mode 100644
index 0000000..95a0d98
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshPartitioner.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define DEBUG_PARTITIONING 0
+class Mesh;
+void PartitionMesh(Mesh* m);
diff --git a/Runtime/Filters/Mesh/MeshRenderer.cpp b/Runtime/Filters/Mesh/MeshRenderer.cpp
new file mode 100644
index 0000000..08dfbae
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshRenderer.cpp
@@ -0,0 +1,664 @@
+#include "UnityPrefix.h"
+#include "MeshRenderer.h"
+#include "Runtime/Graphics/Transform.h"
+#include "LodMesh.h"
+#include "Runtime/Filters/Mesh/MeshUtility.h"
+#include "Runtime/Graphics/DrawUtil.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "External/shaderlab/Library/properties.h"
+#include "External/shaderlab/Library/shaderlab.h"
+
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+
+#include "Runtime/Profiler/TimeHelper.h"
+#include "Runtime/GfxDevice/GfxDeviceStats.h"
+#include "Runtime/Misc/BuildSettings.h"
+
+
+PROFILER_INFORMATION(gMeshRenderProfile, "MeshRenderer.Render", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderScaledProfile, "MeshRenderer.ComputeScaledMesh", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderStaticBatch, "MeshRenderer.RenderStaticBatch", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderDynamicBatch, "MeshRenderer.RenderDynamicBatch", kProfilerRender)
+
+
+#if UNITY_EDITOR
+#define SET_CACHED_SURFACE_AREA_DIRTY() m_CachedSurfaceArea = -1.0f;
+#else
+#define SET_CACHED_SURFACE_AREA_DIRTY() //do nothing
+#endif
+
+IMPLEMENT_CLASS_INIT_ONLY (MeshRenderer)
+
+MeshRenderer::MeshRenderer (MemLabelId label, ObjectCreationMode mode)
+: Super(kRendererMesh, label, mode)
+, m_MeshNode (this)
+{
+ m_ScaledMeshDirty = true;
+ m_MeshWasModified = false;
+
+ m_CachedMesh = NULL;
+ m_ScaledMesh = NULL;
+ SET_CACHED_SURFACE_AREA_DIRTY();
+}
+
+MeshRenderer::~MeshRenderer ()
+{
+ FreeScaledMesh ();
+}
+
+void MeshRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+ Super::AwakeFromLoad (awakeMode);
+ UpdateCachedMesh ();
+}
+
+void MeshRenderer::Deactivate (DeactivateOperation operation)
+{
+ Super::Deactivate (operation);
+ FreeScaledMesh ();
+}
+
+void MeshRenderer::InitializeClass ()
+{
+ REGISTER_MESSAGE (MeshRenderer, kTransformChanged, TransformChanged, int);
+
+ REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyBounds, DidModifyMeshBounds);
+ REGISTER_MESSAGE_VOID(MeshRenderer, kDidDeleteMesh, DidDeleteMesh);
+ REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyMesh, DidModifyMesh);
+}
+
+void MeshRenderer::TransformChanged (int changeMask)
+{
+ if (changeMask & Transform::kScaleChanged)
+ {
+ SET_CACHED_SURFACE_AREA_DIRTY();
+ m_ScaledMeshDirty = true;
+ }
+ Super::TransformChanged (changeMask);
+}
+
+void MeshRenderer::UpdateLocalAABB()
+{
+ DebugAssertIf( m_CachedMesh != m_Mesh );
+ if( m_CachedMesh )
+ {
+ if (HasSubsetIndices())
+ {
+ if (GetMaterialCount() == 1)
+ m_TransformInfo.localAABB = m_CachedMesh->GetBounds(GetSubsetIndex(0));
+ else
+ {
+ MinMaxAABB minMaxAABB;
+ for (int m = 0; m < GetMaterialCount(); ++m)
+ minMaxAABB.Encapsulate(m_CachedMesh->GetBounds(GetSubsetIndex(m)));
+ m_TransformInfo.localAABB = minMaxAABB;
+ }
+ }
+ else
+ {
+ m_TransformInfo.localAABB = m_CachedMesh->GetBounds();
+ }
+ }
+ else
+ m_TransformInfo.localAABB.SetCenterAndExtent( Vector3f::zero, Vector3f::zero );
+}
+
+void MeshRenderer::SetSubsetIndex(int subsetIndex, int index)
+{
+ Renderer::SetSubsetIndex(subsetIndex, index);
+
+ // Reset scaled mesh if this renderer is now statically batched.
+ // Mesh scaling should never be used with static batching (case 551504).
+ FreeScaledMesh();
+}
+
+int MeshRenderer::GetStaticBatchIndex() const
+{
+ // Wrap non-virtual version in a virtual call
+ return GetMeshStaticBatchIndex();
+}
+
+int MeshRenderer::GetMeshStaticBatchIndex() const
+{
+ return IsPartOfStaticBatch() ? m_CachedMesh->GetInstanceID(): 0;
+}
+
+UInt32 MeshRenderer::GetMeshIDSmall() const
+{
+ return m_CachedMesh ? m_CachedMesh->GetInternalMeshID(): 0;
+}
+
+
+Mesh* MeshRenderer::GetCachedMesh ()
+{
+ DebugAssertIf(m_CachedMesh != m_Mesh);
+ return m_CachedMesh;
+}
+
+
+Mesh* MeshRenderer::GetMeshUsedForRendering ()
+{
+ Mesh* cachedMesh = GetCachedMesh ();
+
+ if (cachedMesh != NULL)
+ {
+ // NOTE: staticaly batched geometry already has scale applied
+ // therefore we skip mesh scaling
+ if (!m_ScaledMeshDirty || IsPartOfStaticBatch())
+ return m_ScaledMesh == NULL ? cachedMesh : m_ScaledMesh->mesh;
+
+ m_ScaledMeshDirty = false;
+
+ float unused2;
+ Matrix4x4f unused;
+ Matrix4x4f scalematrix;
+ TransformType type = GetTransform().CalculateTransformMatrixDisableNonUniformScale (unused, scalematrix, unused2);
+ // Check if no scale is needed or we can't access vertices anyway to transform them correctly
+ DebugAssert(!IsNonUniformScaleTransform(type) || cachedMesh->HasVertexData());
+ if (!IsNonUniformScaleTransform(type) || !cachedMesh->HasVertexData())
+ {
+ // Cleanup scaled mesh
+ FreeScaledMesh();
+ m_MeshWasModified = false;
+
+ return cachedMesh;
+ }
+ // Need scaled mesh
+ else
+ {
+ // Early out if the mesh scale hasn't actually changed
+ if (m_ScaledMesh != NULL && CompareApproximately(scalematrix, m_ScaledMesh->matrix) && !m_MeshWasModified)
+ return m_ScaledMesh->mesh;
+
+ // Scale has changed, maybe generated a new scaled mesh
+ PROFILER_AUTO(gMeshRenderScaledProfile, this)
+
+ // Allocate scaled mesh
+ if (m_ScaledMesh == NULL)
+ {
+ m_ScaledMesh = new ScaledMesh ();
+ m_ScaledMesh->mesh = NEW_OBJECT (Mesh);
+ m_ScaledMesh->mesh->Reset();
+ m_ScaledMesh->mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad);
+ m_ScaledMesh->mesh->SetHideFlags(kHideAndDontSave);
+ }
+
+ m_MeshWasModified = false;
+
+ // Rescale mesh
+ m_ScaledMesh->matrix = scalematrix;
+ m_ScaledMesh->mesh->CopyTransformed(*cachedMesh, scalematrix);
+ return m_ScaledMesh->mesh;
+ }
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+static SubMesh const& GetSubMesh(Mesh& mesh, int subsetIndex)
+{
+ const int subMeshCount = mesh.GetSubMeshCount()? mesh.GetSubMeshCount()-1 : 0;
+ const int subMeshIndex = std::min<unsigned int>(subsetIndex, subMeshCount);
+ return mesh.GetSubMeshFast(subMeshIndex);
+}
+
+
+void MeshRenderer::Render (int subsetIndex, const ChannelAssigns& channels)
+{
+ PROFILER_AUTO(gMeshRenderProfile, this);
+
+ Mesh* mesh = GetMeshUsedForRendering ();
+ if (!mesh)
+ return;
+ if (m_CustomProperties)
+ GetGfxDevice().SetMaterialProperties (*m_CustomProperties);
+ DrawUtil::DrawMeshRaw (channels, *mesh, subsetIndex);
+}
+
+
+#if UNITY_EDITOR
+
+void MeshRenderer::GetRenderStats (RenderStats& renderStats)
+{
+ ///@TODO: This does not work with static batching fixor it.
+ memset(&renderStats, 0, sizeof(renderStats));
+
+ Mesh* mesh = m_Mesh;
+ if (mesh)
+ {
+ for (int i=0;i<GetMaterialCount();i++)
+ {
+ const SubMesh& submesh = GetSubMesh (*mesh, GetSubsetIndex(i));
+
+ renderStats.triangleCount += GetPrimitiveCount(submesh.indexCount, submesh.topology, false);
+ renderStats.vertexCount += submesh.vertexCount;
+ renderStats.submeshCount++;
+ }
+ }
+}
+
+float MeshRenderer::GetCachedSurfaceArea ()
+{
+ if (m_CachedSurfaceArea >= 0.0f)
+ return m_CachedSurfaceArea;
+
+ Mesh* mesh = GetCachedMesh ();
+ if (!mesh)
+ {
+ m_CachedSurfaceArea = 1.0f;
+ return m_CachedSurfaceArea;
+ }
+
+ Matrix4x4f objectToWorld;
+ GetComponent (Transform).CalculateTransformMatrix (objectToWorld);
+
+ Mesh::TemporaryIndexContainer triangles;
+ mesh->GetTriangles (triangles);
+
+ dynamic_array<Vector3f> vertices (mesh->GetVertexCount(), kMemTempAlloc);
+ mesh->ExtractVertexArray (vertices.begin ());
+
+ m_CachedSurfaceArea = CalculateSurfaceArea (objectToWorld, triangles, vertices);
+
+ return m_CachedSurfaceArea;
+}
+#endif
+
+void MeshRenderer::DidModifyMeshBounds ()
+{
+ SET_CACHED_SURFACE_AREA_DIRTY();
+ m_TransformDirty = true;
+ BoundsChanged ();
+}
+
+void MeshRenderer::DidModifyMesh ()
+{
+ m_MeshWasModified = true;
+ m_ScaledMeshDirty = true;
+ m_TransformDirty = true;
+ BoundsChanged();
+}
+
+void MeshRenderer::DidDeleteMesh ()
+{
+ m_CachedMesh = NULL;
+}
+
+void MeshRenderer::SetSharedMesh (PPtr<Mesh> mesh)
+{
+ SET_CACHED_SURFACE_AREA_DIRTY();
+ m_Mesh = mesh;
+ UpdateCachedMesh ();
+}
+
+PPtr<Mesh> MeshRenderer::GetSharedMesh ()
+{
+ return m_Mesh;
+}
+
+void MeshRenderer::UpdateCachedMesh ()
+{
+ Mesh* mesh = m_Mesh;
+ if (mesh != m_CachedMesh)
+ {
+ // In order to make sure we are not using old subset indices referring to the previous mesh
+ // we clear them here, assuming that the correct subset indices will be set subsequently.
+ // We only do this if there was a previous mesh that the new mesh is replacing, since some
+ // code paths are transferring in the values and then call this function. In that case we do
+ // not want to mess with the indices.
+ if (m_CachedMesh) ClearSubsetIndices();
+ m_ScaledMeshDirty = true;
+ m_MeshWasModified = true;
+ m_CachedMesh = mesh;
+ m_TransformDirty = true;
+ BoundsChanged();
+ m_MeshNode.RemoveFromList();
+ if (m_CachedMesh)
+ m_CachedMesh->AddObjectUser( m_MeshNode );
+ }
+}
+
+void MeshRenderer::FreeScaledMesh ()
+{
+ if (m_ScaledMesh)
+ {
+ DestroySingleObject (m_ScaledMesh->mesh);
+ delete m_ScaledMesh;
+ m_ScaledMesh = NULL;
+ m_ScaledMeshDirty = false;
+ }
+}
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+PROFILER_INFORMATION(gDrawStaticBatchProfile, "Batch.DrawStatic", kProfilerRender)
+PROFILER_INFORMATION(gDrawDynamicBatchProfile, "Batch.DrawDynamic", kProfilerRender)
+
+static bool RenderStaticBatch (Mesh& mesh, VBO& vbo,
+ BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels)
+{
+ if (count <= 1)
+ return false;
+ IndexBufferData indexBuffer;
+ mesh.GetIndexBufferData (indexBuffer);
+ if (!indexBuffer.indices)
+ return false;
+
+ PROFILER_AUTO(gMeshRenderStaticBatch, &mesh)
+
+ const SubMesh& firstSubmesh = GetSubMesh (mesh, instances[0].subsetIndex);
+ GfxPrimitiveType topology = firstSubmesh.topology;
+ const Matrix4x4f& xform = instances[0].xform;
+ int xformType = instances[0].xformType;
+
+ GfxDevice& device = GetGfxDevice();
+ device.BeginStaticBatching(channels, topology);
+
+ // Concat SubMeshes
+ for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+ {
+ const SubMesh& submesh = GetSubMesh (mesh, it->subsetIndex);
+ device.StaticBatchMesh(submesh.firstVertex, submesh.vertexCount, indexBuffer, submesh.firstByte, submesh.indexCount);
+
+ Assert(topology == submesh.topology);
+ Assert(xformType == it->xformType);
+ }
+
+ device.EndStaticBatching(vbo, xform, TransformType(xformType), mesh.GetChannelsInVBO());
+ GPU_TIMESTAMP();
+
+#if ENABLE_MULTITHREADED_CODE
+ // Make sure renderer is done before mesh is changed or deleted
+ UInt32 cpuFence = device.InsertCPUFence();
+ mesh.SetCurrentCPUFence(cpuFence);
+#endif
+
+ return true;
+}
+
+static bool RenderDynamicBatch (BatchInstanceData const* instances, size_t count, size_t maxVertices, size_t maxIndices, const ChannelAssigns& shaderChannels, UInt32 availableChannels, GfxPrimitiveType topology)
+{
+ if (count <= 1)
+ return false;
+
+ if (gGraphicsCaps.buggyDynamicVBOWithTangents && (shaderChannels.GetSourceMap() & (1<<kShaderChannelTangent)))
+ return false;
+
+ PROFILER_AUTO(gMeshRenderDynamicBatch, NULL)
+
+ DebugAssert (topology != -1);
+
+ GfxDevice& device = GetGfxDevice();
+ UInt32 expectedFence = device.GetNextCPUFence();
+ device.BeginDynamicBatching(shaderChannels, availableChannels, maxVertices, maxIndices, topology);
+
+ // Transform on CPU
+ int xformType = -1;
+
+
+ for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+ {
+ Assert(it->renderer);
+ Assert(it->renderer->GetRendererType() == kRendererMesh);
+ MeshRenderer* meshRenderer = (MeshRenderer*)it->renderer;
+ Mesh* mesh = meshRenderer->GetMeshUsedForRendering();
+ if (!mesh)
+ continue;
+
+ SubMesh const& submesh = GetSubMesh (*mesh, it->subsetIndex);
+
+ Assert(topology == ~0UL || topology == submesh.topology);
+ Assert(xformType == -1 || xformType == it->xformType);
+ xformType = it->xformType;
+
+ VertexBufferData vbData;
+ mesh->GetVertexBufferData(vbData, availableChannels);
+ IndexBufferData ibData;
+ mesh->GetIndexBufferData(ibData);
+
+ // Make sure renderer is done before mesh is changed or deleted
+#if ENABLE_MULTITHREADED_CODE
+ mesh->SetCurrentCPUFence(expectedFence);
+#endif
+
+ device.DynamicBatchMesh(it->xform, vbData, submesh.firstVertex, submesh.vertexCount, ibData, submesh.firstByte, submesh.indexCount);
+ }
+
+ // Draw
+ Assert(xformType != -1);
+ Assert(topology != ~0UL);
+
+ // We transformed all geometry into the world (Identity) space already.
+ // However, we did not normalize the normals.
+ // In fixed function, most GfxDevices (e.g. OpenGL & D3D) will try to figure out uniform
+ // scale directly from the matrix, and hence will not scale our normals.
+ // Therefore we upgrade normalization mode to "full normalize" to make them transform properly.
+ if (xformType & kUniformScaleTransform)
+ {
+ xformType &= ~kUniformScaleTransform;
+ xformType |= kNonUniformScaleTransform;
+ }
+
+ // Caveat: we do pass identity matrix when batching
+ // currently normals handling in vprog is:
+ // xform * (normalize(normal) * unity_Scale.w);
+ // as we pass identity matrix (no scale) we need NOT apply inv_scale
+ device.SetInverseScale(1.0f);
+ device.EndDynamicBatching(TransformType(xformType));
+
+ // Insert fence after batching is complete
+ UInt32 fence = device.InsertCPUFence();
+ Assert(fence == expectedFence);
+
+ GPU_TIMESTAMP();
+
+ return true;
+}
+
+void MeshRenderer::RenderMultiple (BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels)
+{
+ Assert(count > 0);
+
+ GfxDevice& device = GetGfxDevice();
+ const float invScale = device.GetBuiltinParamValues().GetInstanceVectorParam(kShaderInstanceVecScale).w;
+
+ const MaterialPropertyBlock* customProps = instances[0].renderer->GetCustomProperties();
+ if (customProps)
+ device.SetMaterialProperties (*customProps);
+
+ const UInt32 wantedChannels = channels.GetSourceMap();
+ const bool enableDynamicBatching = GetBuildSettings().enableDynamicBatching;
+
+ BatchInstanceData const* instancesEnd = instances + count;
+ for (BatchInstanceData const* iBatchBegin = instances; iBatchBegin != instancesEnd; )
+ {
+ Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh);
+ MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer;
+ Mesh* mesh = meshRenderer->GetMeshUsedForRendering ();
+ VBO* vbo = mesh ? mesh->GetSharedVBO (wantedChannels) : NULL;
+ if (!vbo)
+ {
+ // Skip mesh
+ ++iBatchBegin;
+ continue;
+ }
+
+ const UInt32 availableChannels = mesh->GetChannelsInVBO() & wantedChannels;
+ const int staticBatchIndex = meshRenderer->GetMeshStaticBatchIndex ();
+ const int xformType = iBatchBegin->xformType;
+
+ const SubMesh& firstSubMesh = GetSubMesh(*mesh, iBatchBegin->subsetIndex);
+ const GfxPrimitiveType topology = firstSubMesh.topology;
+ size_t batchVertexCount = firstSubMesh.vertexCount;
+ size_t batchIndexCount = firstSubMesh.indexCount;
+
+ // For first strip take 1 connecting (degenerate) triangles into account
+ if (topology == kPrimitiveTriangleStripDeprecated)
+ batchIndexCount += 1;
+
+ BatchInstanceData const* iBatchEnd = iBatchBegin + 1;
+
+ // static batching
+ if (staticBatchIndex != 0)
+ {
+ Assert(topology == kPrimitiveTriangles || topology == kPrimitiveTriangleStripDeprecated);
+ const int maxIndices = GetGfxDevice().GetMaxStaticBatchIndices();
+
+ for (; iBatchEnd != instancesEnd; ++iBatchEnd)
+ {
+ if (xformType != iBatchEnd->xformType)
+ break;
+
+ Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh);
+ MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer;
+ if (staticBatchIndex != meshRenderer->GetMeshStaticBatchIndex())
+ break;
+
+ Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering ();
+ if (!nextMesh)
+ break;
+
+ const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex);
+ if (submesh.topology != topology)
+ break;
+
+ VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels);
+ if (nextVbo != vbo) // also a NULL check since vbo is non-NULL
+ break;
+
+ UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels;
+ if (availableChannels != nextAvailableChannels)
+ break;
+
+ UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount;
+ if (topology == kPrimitiveTriangleStripDeprecated)
+ requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account
+
+ if (requiredIndexCount > maxIndices)
+ break;
+
+ batchIndexCount = requiredIndexCount;
+ }
+
+ if (mesh && vbo)
+ if (RenderStaticBatch (*mesh, *vbo, iBatchBegin, iBatchEnd - iBatchBegin, channels))
+ iBatchBegin = iBatchEnd;
+ }
+ else if (vbo && enableDynamicBatching)
+ // dynamic batching
+ {
+ const int firstVertexCount = batchVertexCount;
+ const int firstIndexCount = batchIndexCount;
+
+ // after moving to fully strided meshes we were hit by the issue that we might have different channels
+ // in src and dst data, so our optimized asm routines doesn't quite work.
+ // we will move to support vertex streams (this will solve lots of issues after skinning/batching asm rewrite ;-))
+ // but for now let just play safe
+
+ if (CanUseDynamicBatching(*mesh, wantedChannels, firstVertexCount) &&
+ firstIndexCount < kDynamicBatchingIndicesThreshold &&
+ topology != kPrimitiveLineStrip)
+ {
+ for (; iBatchEnd != instancesEnd; ++iBatchEnd)
+ {
+ if (xformType != iBatchEnd->xformType)
+ break;
+
+ Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh);
+ MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer;
+ if (meshRenderer->IsPartOfStaticBatch())
+ break;
+
+ Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering ();
+ if (!nextMesh)
+ break;
+
+ const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex);
+ if (submesh.topology != topology)
+ break;
+
+ if (!CanUseDynamicBatching(*nextMesh, wantedChannels, submesh.vertexCount))
+ break;
+
+ UInt32 requiredVertexCount = batchVertexCount + submesh.vertexCount;
+ UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount;
+ if (topology == kPrimitiveTriangleStripDeprecated)
+ requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account
+
+ if (requiredVertexCount > 0xffff)
+ break;
+
+ if (requiredIndexCount > kDynamicBatchingIndicesThreshold)
+ break;
+
+ VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels);
+ if (!nextVbo)
+ break;
+
+ const UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels;
+ if (availableChannels != nextAvailableChannels)
+ break;
+
+ batchVertexCount = requiredVertexCount;
+ batchIndexCount = requiredIndexCount;
+ }
+
+ // Skip batch if batchVertexCount == 0 or batchIndexCount == 0
+ if (batchVertexCount == 0 || batchIndexCount == 0 || RenderDynamicBatch (iBatchBegin, iBatchEnd - iBatchBegin, batchVertexCount, batchIndexCount, channels, availableChannels, topology))
+ iBatchBegin = iBatchEnd;
+ }
+ }
+
+ // old-school rendering for anything left
+ for (; iBatchBegin != iBatchEnd; ++iBatchBegin)
+ {
+ BatchInstanceData const* it = iBatchBegin;
+ Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh);
+ MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer;
+ Mesh* mesh = meshRenderer->GetMeshUsedForRendering ();
+ if (!mesh)
+ continue;
+
+ VBO* vbo = mesh->GetSharedVBO (wantedChannels);
+ if (!vbo)
+ continue;
+
+ if (customProps)
+ device.SetMaterialProperties (*customProps);
+
+ // Batched rendering above will have set inverse scale to 1.0 (since everything is transformed
+ // to identity). For remaining meshes that aren't batched, we have to setup the original scale
+ // back.
+ device.SetInverseScale(invScale);
+ SetupObjectMatrix (it->xform, it->xformType);
+ DrawUtil::DrawVBOMeshRaw (*vbo, *mesh, channels, it->subsetIndex);
+ }
+
+ Assert(iBatchBegin == iBatchEnd); // everything was rendered successfully
+ }
+}
+
+bool MeshRenderer::CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount)
+{
+ if (mesh.GetStreamCompression() != Mesh::kStreamCompressionDefault ||
+ mesh.GetIndexBuffer().empty() ||
+ vertexCount > kDynamicBatchingVerticesThreshold ||
+ vertexCount * BitsInMask(wantedChannels) > kDynamicBatchingVertsByChannelThreshold)
+ return false;
+ return true;
+}
+
+#endif // #if GFX_ENABLE_DRAW_CALL_BATCHING
+
diff --git a/Runtime/Filters/Mesh/MeshRenderer.h b/Runtime/Filters/Mesh/MeshRenderer.h
new file mode 100644
index 0000000..d42c22e
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshRenderer.h
@@ -0,0 +1,87 @@
+#ifndef MESHRENDERER_H
+#define MESHRENDERER_H
+
+#include "Runtime/Filters/Renderer.h"
+
+class Mesh;
+
+
+
+class MeshRenderer : public Renderer {
+ public:
+ MeshRenderer (MemLabelId label, ObjectCreationMode mode);
+ // ~MeshRenderer (); declared-by-macro
+ REGISTER_DERIVED_CLASS (MeshRenderer, Renderer)
+ static void InitializeClass ();
+
+ // Tag class as sealed, this makes QueryComponent faster.
+ static bool IsSealedClass () { return true; }
+
+ static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels);
+ virtual void Render (int materialIndex, const ChannelAssigns& channels);
+
+ virtual void UpdateLocalAABB();
+
+ virtual void SetSubsetIndex(int subsetIndex, int index);
+
+ virtual int GetStaticBatchIndex() const;
+ virtual UInt32 GetMeshIDSmall() const;
+ int GetMeshStaticBatchIndex() const;
+
+ void TransformChanged (int changeMask);
+ void AwakeFromLoad(AwakeFromLoadMode mode);
+ virtual void Deactivate (DeactivateOperation operation);
+
+ void SetSharedMesh (PPtr<Mesh> mesh);
+ PPtr<Mesh> GetSharedMesh ();
+
+ Mesh& GetInstantiatedMesh ();
+ void SetInstantiatedMesh (Mesh* mesh);
+
+ Mesh* GetMeshUsedForRendering();
+
+ void DidModifyMeshBounds ();
+ void DidModifyMeshValidity ();
+ void DidModifyMesh ();
+ void DidDeleteMesh ();
+ #if UNITY_EDITOR
+ float GetCachedSurfaceArea ();
+ virtual void GetRenderStats (RenderStats& renderStats);
+ #endif
+
+ static bool CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount);
+
+ private:
+
+ Mesh* GetCachedMesh ();
+
+ ListNode<Object> m_MeshNode;
+ void UpdateCachedMesh ();
+
+ void FreeScaledMesh ();
+
+ Mesh* m_CachedMesh;
+ PPtr<Mesh> m_Mesh;
+
+ struct ScaledMesh
+ {
+ Matrix4x4f matrix;
+ Mesh* mesh;
+ };
+
+ ScaledMesh* m_ScaledMesh;
+
+ // as we have padding anyway, we can add more flags here
+ UInt8 m_ScaledMeshDirty;
+ // setted on responce to event to properly handle vertices changing on non-uniform scale
+ UInt8 m_MeshWasModified;
+ // for future
+ UInt16 m_Padding16;
+
+ #if UNITY_EDITOR
+ float m_CachedSurfaceArea;
+ #endif
+
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinning.cpp b/Runtime/Filters/Mesh/MeshSkinning.cpp
new file mode 100644
index 0000000..7d01667
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinning.cpp
@@ -0,0 +1,165 @@
+#include "UnityPrefix.h"
+#include "MeshSkinning.h"
+#if UNITY_OSX
+#include <alloca.h> // this is really deprecated and should be exchanged for stdlib.h
+#else
+#include <stdlib.h>
+#endif
+#include "Runtime/Utilities/Utility.h"
+#include "Runtime/Utilities/LogAssert.h"
+#include "Runtime/Utilities/OptimizationUtility.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Utilities/Prefetch.h"
+#include "Runtime/Profiler/TimeHelper.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Misc/CPUInfo.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+PROFILER_INFORMATION(gMeshSkinningProfile, "MeshSkinning.Skin", kProfilerRender)
+PROFILER_INFORMATION(gMeshSkinningSlowpath, "MeshSkinning.SlowPath", kProfilerRender)
+
+#include "MeshSkinningMobile.h"
+#include "MeshSkinningSSE2.h"
+#include "SkinGeneric.h"
+#include "MeshBlendShaping.h"
+
+
+//===========================================================================================================================================
+
+
+void SkinMesh(SkinMeshInfo& info)
+{
+ const TransformInstruction NormalizeTransformInstruction =
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+ // NOTE: optimized NEON/VFP routines do not do any normalization
+ // instead we rely on GPU to do that
+ kNoNormalize;
+#else
+ //@TODO: fix that "Fast" & "Fastest" crap. Right now "Fastest" is actually a win on PC (1ms saved in Dark Unity)
+ // so I'm leaving it there for now.
+ kNormalizeFastest;
+#endif
+
+ // Instantiates the right skinning template depending on the bone per vertex count
+ #define PERMUTE_BONES(skinNormal,skinTangent) { \
+ if (info.bonesPerVertex == 1) \
+ SkinGeneric<NormalizeTransformInstruction, 1, skinNormal, skinTangent> (info); \
+ else if (info.bonesPerVertex == 2) \
+ SkinGeneric<NormalizeTransformInstruction, 2, skinNormal, skinTangent> (info); \
+ else if (info.bonesPerVertex == 4) \
+ SkinGeneric<NormalizeTransformInstruction, 4, skinNormal, skinTangent> (info); \
+ }
+
+ if (info.skinNormals && info.skinTangents)
+ PERMUTE_BONES(true, true)
+ else if (info.skinNormals)
+ PERMUTE_BONES(true, false)
+ else
+ PERMUTE_BONES(false, false)
+}
+
+
+static void ApplyMeshSkinning (SkinMeshInfo& info)
+{
+ #if UNITY_WII
+ SkinMeshWii(info);
+ #else
+
+ PROFILER_AUTO(gMeshSkinningProfile, NULL);
+
+ if (SkinMeshOptimizedMobile(info))
+ return;
+
+ if (SkinMeshOptimizedSSE2(info))
+ return;
+
+ // fallback to slow generic implementation
+ {
+ PROFILER_AUTO(gMeshSkinningSlowpath, NULL);
+ SkinMesh(info);
+ }
+ #endif
+}
+
+void DeformSkinnedMesh (SkinMeshInfo& info)
+{
+ const bool hasBlendShapes = info.blendshapeCount != 0;
+ const bool hasSkin = info.boneCount != 0;
+
+ // No actual skinning can be done. Just copy vertex stream.
+ // TODO: This code can be removed if we render the undeformed mesh in SkinnedMeshRenderer
+ // when there is no skin and no active blend shapes. See case 557165.
+ if (!hasBlendShapes && !hasSkin)
+ {
+ memcpy (info.outVertices, info.inVertices, info.inStride * info.vertexCount);
+ return;
+ }
+
+ UInt8* tmpBlendShapes = NULL;
+
+ // blend shapes
+ if (hasBlendShapes)
+ {
+ // The final destination might be write-combined memory which is insanely slow to read
+ // or randomly access, so always allocate a temp buffer for blend shapes (case 554830).
+ // Skinning can write directly to a VB since it always writes sequentially to memory.
+ size_t bufferSize = info.inStride * info.vertexCount;
+ tmpBlendShapes = ALLOC_TEMP_MANUAL(UInt8, bufferSize);
+
+ ApplyBlendShapes (info, tmpBlendShapes);
+
+ if (hasSkin)
+ info.inVertices = tmpBlendShapes;
+ else
+ memcpy(info.outVertices, tmpBlendShapes, bufferSize);
+ }
+
+ // skinning
+ if (hasSkin)
+ ApplyMeshSkinning (info);
+
+ if (tmpBlendShapes)
+ FREE_TEMP_MANUAL(tmpBlendShapes);
+}
+
+
+void* DeformSkinnedMeshJob (void* rawData)
+{
+ SkinMeshInfo* data = reinterpret_cast<SkinMeshInfo*>(rawData);
+ DeformSkinnedMesh (*data);
+ return NULL;
+}
+
+
+SkinMeshInfo::SkinMeshInfo()
+{
+ memset(this, 0, sizeof(SkinMeshInfo));
+}
+
+void SkinMeshInfo::Allocate()
+{
+ size_t size = boneCount * sizeof(Matrix4x4f) + sizeof(float) * blendshapeCount;
+ if (size == 0)
+ return;
+
+ allocatedBuffer = (UInt8*)UNITY_MALLOC_ALIGNED(kMemSkinning, size, 64);
+
+ UInt8* head = allocatedBuffer;
+ if (boneCount != 0)
+ {
+ cachedPose = reinterpret_cast<Matrix4x4f*> (head);
+ head += sizeof(Matrix4x4f) * boneCount;
+ }
+
+ if (blendshapeCount != 0)
+{
+ blendshapeWeights = reinterpret_cast<float*> (head);
+ }
+}
+
+void SkinMeshInfo::Release() const
+{
+ if (allocatedBuffer)
+ UNITY_FREE(kMemSkinning, allocatedBuffer);
+}
diff --git a/Runtime/Filters/Mesh/MeshSkinning.h b/Runtime/Filters/Mesh/MeshSkinning.h
new file mode 100644
index 0000000..b56efa9
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinning.h
@@ -0,0 +1,64 @@
+#ifndef MESHSKINNING_H
+#define MESHSKINNING_H
+
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Quaternion.h"
+#include "Mesh.h"
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include <vector>
+#include <list>
+
+class GPUSkinningInfo;
+
+typedef std::vector<BoneInfluence> CompactSkin;
+struct BlendShapeData;
+
+enum TransformInstruction { kNormalizeFastest = 0, kNormalizeFast = 1, kNoNormalize = 3 };
+class VertexData;
+
+struct SkinMeshInfo
+{
+ int bonesPerVertex;
+
+ void* compactSkin;
+ int boneCount;
+
+ const void* inVertices;
+ void* outVertices;
+ int inStride;
+ int outStride;
+
+ int normalOffset;
+ int tangentOffset;
+ bool skinNormals;
+ bool skinTangents;
+
+ int vertexCount;
+
+ // This is instance data and must be double buffered so the render thread can work in paralell.
+ UInt8* allocatedBuffer;
+ Matrix4x4f* cachedPose;
+ float* blendshapeWeights;
+
+ int blendshapeCount;
+ const BlendShapeData* blendshapes;
+
+ bool memExport; // Is set up for memexport (Xbox) or streamout (DX11)
+
+#if UNITY_PS3
+ const VertexData* vertexData;
+#endif
+
+ GPUSkinningInfo *mei;
+
+ SkinMeshInfo();
+
+ void Allocate();
+ void Release () const;
+};
+
+void DeformSkinnedMesh (SkinMeshInfo& info);
+void* DeformSkinnedMeshJob (void* rawData);
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h
new file mode 100644
index 0000000..0b17b42
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h
@@ -0,0 +1,212 @@
+#if 0
+
+/*
+ mircea@INFO: this doesn't do normalization.
+ */
+
+#include "Runtime/Math/Simd/Matrix4x4Simd.h"
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+bool skinNormal, bool skinTangent, bool copy8BytesAt24Offset>
+void SkinGenericSimd (SkinMeshInfo& info)
+{
+ DebugAssertIf( copy8BytesAt24Offset && (!info.skinNormals || info.normalOffset != 12) );
+ const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+ const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+ const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+ const Matrix4x4f* bones4x4 = info.cachedPose;
+
+ const int inStride = info.inStride;
+ int outStride = info.outStride;
+ int count = info.vertexCount;
+
+ const int normalOffset = (copy8BytesAt24Offset ? 12 : info.normalOffset) >> 2;
+ const int tangentOffset = info.tangentOffset >> 2;
+
+ const UInt8* inputVertex = (const UInt8*)info.inVertices;
+ UInt8* outputVertex = (UInt8*)info.outVertices;
+
+ Simd128 pose0, pose1, pose2, pose3;
+
+ for( int v = 0; v < count; v++ )
+ {
+ ALIGN_LOOP_OPTIMIZATION
+
+ // Blend the matrices first, then transform everything with this
+ // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+ // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+ if (bonesPerVertexCount == 1)
+ {
+ const float* maddr = bones4x4[*influence1].m_Data;
+
+ Prefetch(maddr);
+
+ pose0 = V4LoadUnaligned( maddr, 0x0 );
+ pose1 = V4LoadUnaligned( maddr, 0x4 );
+ pose2 = V4LoadUnaligned( maddr, 0x8 );
+ pose3 = V4LoadUnaligned( maddr, 0xC );
+ }
+ else if (bonesPerVertexCount == 2)
+ {
+ Prefetch(influence2);
+
+ Simd128 weights = {influence2->weight[0], influence2->weight[1], 0, 0};
+
+ const float* maddr0 = bones4x4[influence2->boneIndex[0]].m_Data;
+ const float* maddr1 = bones4x4[influence2->boneIndex[1]].m_Data;
+
+ Prefetch(maddr0);
+ Prefetch(maddr1);
+
+ Simd128 weight0 = V4Splat(weights, 0);
+ Simd128 weight1 = V4Splat(weights, 1);
+
+ Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 );
+ Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 );
+ Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 );
+ Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC );
+
+ Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 );
+ Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 );
+ Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 );
+ Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC );
+
+ pose0 = V4Mul(mat00, weight0);
+ pose1 = V4Mul(mat01, weight0);
+ pose2 = V4Mul(mat02, weight0);
+ pose3 = V4Mul(mat03, weight0);
+
+ pose0 = V4MulAdd(mat10, weight1, pose0);
+ pose1 = V4MulAdd(mat11, weight1, pose1);
+ pose2 = V4MulAdd(mat12, weight1, pose2);
+ pose3 = V4MulAdd(mat13, weight1, pose3);
+ }
+ else if (bonesPerVertexCount == 4)
+ {
+ Prefetch(influence4);
+
+ Simd128 weights = {influence4->weight[0], influence4->weight[1], influence4->weight[2], influence4->weight[3]};
+
+ const float* maddr0 = bones4x4[influence4->boneIndex[0]].m_Data;
+ const float* maddr1 = bones4x4[influence4->boneIndex[1]].m_Data;
+ const float* maddr2 = bones4x4[influence4->boneIndex[2]].m_Data;
+ const float* maddr3 = bones4x4[influence4->boneIndex[3]].m_Data;
+
+ Prefetch(maddr0);
+ Prefetch(maddr1);
+ Prefetch(maddr2);
+ Prefetch(maddr3);
+
+ Simd128 weight0 = V4Splat(weights, 0);
+ Simd128 weight1 = V4Splat(weights, 1);
+ Simd128 weight2 = V4Splat(weights, 2);
+ Simd128 weight3 = V4Splat(weights, 3);
+
+ Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 );
+ Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 );
+ Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 );
+ Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC );
+
+ Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 );
+ Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 );
+ Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 );
+ Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC );
+
+ Simd128 mat20 = V4LoadUnaligned( maddr2, 0x0 );
+ Simd128 mat21 = V4LoadUnaligned( maddr2, 0x4 );
+ Simd128 mat22 = V4LoadUnaligned( maddr2, 0x8 );
+ Simd128 mat23 = V4LoadUnaligned( maddr2, 0xC );
+
+ Simd128 mat30 = V4LoadUnaligned( maddr3, 0x0 );
+ Simd128 mat31 = V4LoadUnaligned( maddr3, 0x4 );
+ Simd128 mat32 = V4LoadUnaligned( maddr3, 0x8 );
+ Simd128 mat33 = V4LoadUnaligned( maddr3, 0xC );
+
+ pose0 = V4Mul(mat00, weight0);
+ pose1 = V4Mul(mat01, weight0);
+ pose2 = V4Mul(mat02, weight0);
+ pose3 = V4Mul(mat03, weight0);
+
+ pose0 = V4MulAdd(mat10, weight1, pose0);
+ pose1 = V4MulAdd(mat11, weight1, pose1);
+ pose2 = V4MulAdd(mat12, weight1, pose2);
+ pose3 = V4MulAdd(mat13, weight1, pose3);
+
+ pose0 = V4MulAdd(mat20, weight2, pose0);
+ pose1 = V4MulAdd(mat21, weight2, pose1);
+ pose2 = V4MulAdd(mat22, weight2, pose2);
+ pose3 = V4MulAdd(mat23, weight2, pose3);
+
+ pose0 = V4MulAdd(mat30, weight3, pose0);
+ pose1 = V4MulAdd(mat31, weight3, pose1);
+ pose2 = V4MulAdd(mat32, weight3, pose2);
+ pose3 = V4MulAdd(mat33, weight3, pose3);
+ }
+
+ Prefetch(inputVertex);
+
+ Simd128 vpos = V4LoadUnaligned((const float*)inputVertex, 0);
+ TransformPoint3NATIVE(pose0, pose1, pose2, pose3, vpos, vpos);
+
+ Simd128 vnor, vtan, ndot, tdot;
+
+ // remember... this is a template and skinNormal & skinTangent are consts
+ if(skinNormal || skinTangent)
+ {
+ Simd128 vlen;
+ if( skinNormal )
+ {
+ vnor = V4LoadUnaligned((const float*)inputVertex, normalOffset);
+ TransformVector3NATIVE(pose0, pose1, pose2, pose3, vnor, vnor);
+ ndot = V3Dot(vnor, vnor);
+ }
+ else
+ {
+ ndot = V4Zero();
+ }
+
+ if( skinTangent )
+ {
+ vtan = V4LoadUnaligned((const float*)inputVertex, tangentOffset);
+ TransformVector3NATIVE(pose0, pose1, pose2, pose3, vtan, vtan);
+ tdot = V3Dot(vtan, vtan);
+ }
+ else
+ {
+ tdot = V4Zero();
+ }
+
+ vlen = V4MergeH(ndot, tdot);
+ vlen = V4Rsqrt(vlen);
+
+ if(skinNormal) {
+ vnor = V4Mul(vnor, V4Splat(vlen, 0));
+ V3StoreUnaligned(vnor, (float*)outputVertex, normalOffset);
+ }
+
+ if(skinTangent) {
+ vtan = V4Mul(vtan, V4Splat(vlen, 1));
+ V3StoreUnaligned(vtan, (float*)outputVertex, tangentOffset);
+ }
+ }
+
+ V3StoreUnaligned(vpos, (float*)outputVertex, 0);
+
+ if( skinTangent )
+ {
+ *reinterpret_cast<float*>( outputVertex + (tangentOffset<<2) + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + (tangentOffset<<2) + sizeof(Vector3f) );
+ }
+
+ outputVertex += outStride;
+ inputVertex += inStride;
+
+ if (bonesPerVertexCount == 1)
+ influence1++;
+ else if (bonesPerVertexCount == 2)
+ influence2++;
+ if (bonesPerVertexCount == 4)
+ influence4++;
+ }
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningMobile.h b/Runtime/Filters/Mesh/MeshSkinningMobile.h
new file mode 100644
index 0000000..f6efc54
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningMobile.h
@@ -0,0 +1,160 @@
+#if UNITY_SUPPORTS_VFP
+
+#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+#define s_SkinVertices_VFP _s_SkinVertices_VFP
+#define s_SkinVertices_NoNormals_VFP _s_SkinVertices_NoNormals_VFP
+#define s_SkinVertices_Tangents_VFP _s_SkinVertices_Tangents_VFP
+
+#define s_SkinVertices2Bones_VFP _s_SkinVertices2Bones_VFP
+#define s_SkinVertices2Bones_NoNormals_VFP _s_SkinVertices2Bones_NoNormals_VFP
+#define s_SkinVertices2Bones_Tangents_VFP _s_SkinVertices2Bones_Tangents_VFP
+
+#define s_SkinVertices4Bones_VFP _s_SkinVertices4Bones_VFP
+#define s_SkinVertices4Bones_NoNormals_VFP _s_SkinVertices4Bones_NoNormals_VFP
+#define s_SkinVertices4Bones_Tangents_VFP _s_SkinVertices4Bones_Tangents_VFP
+#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+
+extern "C"
+{
+ void s_SkinVertices_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+ void s_SkinVertices_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+ void s_SkinVertices_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+
+ void s_SkinVertices2Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+ void s_SkinVertices2Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+ void s_SkinVertices2Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+
+ void s_SkinVertices4Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+ void s_SkinVertices4Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+ void s_SkinVertices4Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+}
+#endif
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+#define s_SkinVertices_NEON _s_SkinVertices_NEON
+#define s_SkinVertices_NoNormals_NEON _s_SkinVertices_NoNormals_NEON
+#define s_SkinVertices_Tangents_NEON _s_SkinVertices_Tangents_NEON
+
+#define s_SkinVertices2Bones_NEON _s_SkinVertices2Bones_NEON
+#define s_SkinVertices2Bones_NoNormals_NEON _s_SkinVertices2Bones_NoNormals_NEON
+#define s_SkinVertices2Bones_Tangents_NEON _s_SkinVertices2Bones_Tangents_NEON
+
+#define s_SkinVertices4Bones_NEON _s_SkinVertices4Bones_NEON
+#define s_SkinVertices4Bones_NoNormals_NEON _s_SkinVertices4Bones_NoNormals_NEON
+#define s_SkinVertices4Bones_Tangents_NEON _s_SkinVertices4Bones_Tangents_NEON
+
+#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+
+extern "C"
+{
+ void s_SkinVertices_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+ void s_SkinVertices_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+ void s_SkinVertices_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+
+ void s_SkinVertices2Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+ void s_SkinVertices2Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+ void s_SkinVertices2Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+
+ void s_SkinVertices4Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+ void s_SkinVertices4Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+ void s_SkinVertices4Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+}
+#endif
+
+#if UNITY_SUPPORTS_VFP || (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+bool SkinMeshOptimizedMobile(SkinMeshInfo& info)
+{
+ static const size_t kPrefetchSizeBones = 4096;
+ static const size_t kPrefetchSizeVertex = 512;
+
+ const int bonesPerVertexCount = info.bonesPerVertex;
+ const bool skinNormal = info.skinNormals;
+ const bool skinTangent = info.skinTangents;
+
+ const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+ const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+ const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+ const Matrix4x4f* bones4x4 = info.cachedPose;
+
+ const int inStride = info.inStride;
+ int count = info.vertexCount;
+
+ const UInt8* inputVertex = (const UInt8*)info.inVertices;
+ UInt8* outputVertex = (UInt8*)info.outVertices;
+
+ if (skinTangent && !skinNormal)
+ return false;
+
+ if( !UNITY_SUPPORTS_VFP && !CPUInfo::HasNEONSupport() )
+ {
+ ErrorString("non-NEON path not enabled!");
+ return false;
+ }
+
+#if !ENABLE_MULTITHREADED_SKINNING
+ PROFILER_AUTO_THREAD_SAFE(gMeshSkinningOptimized, NULL);
+#endif
+
+ Prefetch(bones4x4, std::min<size_t>(info.boneCount * sizeof(Matrix4x4f), kPrefetchSizeBones));
+ Prefetch(inputVertex + inStride, std::min<size_t>(inStride * (count-1), kPrefetchSizeVertex));
+
+#if UNITY_SUPPORTS_NEON && UNITY_SUPPORTS_VFP
+#define CALL_SKIN_FUNC( name, influence ) \
+do \
+{ \
+if (CPUInfo::HasNEONSupport()) \
+ name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex); \
+else \
+ name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex); \
+} \
+while(0)
+#endif
+#if UNITY_SUPPORTS_NEON && !UNITY_SUPPORTS_VFP
+#define CALL_SKIN_FUNC( name, influence ) name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex)
+#endif
+#if UNITY_SUPPORTS_VFP && !UNITY_SUPPORTS_NEON
+#define CALL_SKIN_FUNC( name, influence ) name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex)
+#endif
+
+ if (bonesPerVertexCount == 1 )
+ {
+ if (skinNormal && skinTangent)
+ CALL_SKIN_FUNC(s_SkinVertices_Tangents, influence1);
+ else if( skinNormal )
+ CALL_SKIN_FUNC(s_SkinVertices, influence1);
+ else
+ CALL_SKIN_FUNC(s_SkinVertices_NoNormals, influence1);
+ }
+ else if (bonesPerVertexCount == 2)
+ {
+ if (skinNormal && skinTangent)
+ CALL_SKIN_FUNC(s_SkinVertices2Bones_Tangents, influence2);
+ else if( skinNormal )
+ CALL_SKIN_FUNC(s_SkinVertices2Bones, influence2);
+ else
+ CALL_SKIN_FUNC(s_SkinVertices2Bones_NoNormals, influence2);
+ }
+ else if (bonesPerVertexCount == 4)
+ {
+ if (skinNormal && skinTangent)
+ CALL_SKIN_FUNC(s_SkinVertices4Bones_Tangents, influence4);
+ else if (skinNormal)
+ CALL_SKIN_FUNC(s_SkinVertices4Bones, influence4);
+ else
+ CALL_SKIN_FUNC(s_SkinVertices4Bones_NoNormals, influence4);
+ }
+
+ return true;
+}
+#else
+bool SkinMeshOptimizedMobile(SkinMeshInfo& info)
+{
+ return false;
+}
+#endif // UNITY_SUPPORTS_VFP || UNITY_SUPPORTS_NEON
+
+
diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.asm b/Runtime/Filters/Mesh/MeshSkinningNEON.asm
new file mode 100644
index 0000000..494b397
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNEON.asm
@@ -0,0 +1,527 @@
+ AREA .text, CODE
+
+ EXPORT _s_SkinVertices_NEON
+ EXPORT _s_SkinVertices_NoNormals_NEON
+ EXPORT _s_SkinVertices_Tangents_NEON
+ EXPORT _s_SkinVertices2Bones_NEON
+ EXPORT _s_SkinVertices2Bones_NoNormals_NEON
+ EXPORT _s_SkinVertices2Bones_Tangents_NEON
+ EXPORT _s_SkinVertices4Bones_NEON
+ EXPORT _s_SkinVertices4Bones_NoNormals_NEON
+ EXPORT _s_SkinVertices4Bones_Tangents_NEON
+
+|_s_SkinVertices_NEON| PROC
+ mov ip, sp
+ vpush {d8-d10}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ mov.w r8, #12
+ ldr.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+
+|_s_SkinVertices_NEON_loop|
+ vld1.32 {d24-d27}, [r7@128]!
+ vld1.32 {d28-d31}, [r7@128]
+ vld1.32 {d6-d8}, [r1@64]!
+ vmul.f32 q0, q12, d6[0]
+ vmul.f32 q1, q12, d7[1]
+ cmp r1, r2
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ vmla.f32 q1, q13, d8[0]
+ it cc
+ ldrcc.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+ vmla.f32 q0, q14, d7[0]
+ vmla.f32 q1, q14, d8[1]
+ pld [r7]
+ vadd.f32 q0, q0, q15
+ vst1.32 {d0-d1}, [r4], r8
+ vst1.32 {d2-d3}, [r4], r8
+ bcc.w |_s_SkinVertices_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d10}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices_NoNormals_NEON| PROC
+ mov ip, sp
+ vpush {d8-d10}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ mov.w r8, #12
+ ldr.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+
+|_s_SkinVertices_NoNormals_NEON_loop|
+ vld1.32 {d24-d27}, [r7@128]!
+ vld1.32 {d28-d31}, [r7@128]
+ vld1.32 {d6-d7}, [r1], r8
+ vmul.f32 q0, q12, d6[0]
+ cmp r1, r2
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ it cc
+ ldrcc.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+ vmla.f32 q0, q14, d7[0]
+ pld [r7]
+ vadd.f32 q0, q0, q15
+ vst1.32 {d0-d1}, [r4], r8
+ bcc.w |_s_SkinVertices_NoNormals_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d10}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices_Tangents_NEON| PROC
+ mov ip, sp
+ vpush {d8-d10}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ mov.w r8, #12
+ ldr.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+
+|_s_SkinVertices_Tangents_NEON_loop|
+ vld1.32 {d24-d27}, [r7@128]!
+ vld1.32 {d28-d31}, [r7@128]
+ vld1.32 {d6-d8}, [r1@64]!
+ vld1.32 {d9-d10}, [r1@64]!
+ vmul.f32 q0, q12, d6[0]
+ vmul.f32 q1, q12, d7[1]
+ vmul.f32 q2, q12, d9[0]
+ cmp r1, r2
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ vmla.f32 q1, q13, d8[0]
+ vmla.f32 q2, q13, d9[1]
+ it cc
+ ldrcc.w r5, [r3], #4
+ add.w r7, r0, r5, lsl #6
+ vmla.f32 q0, q14, d7[0]
+ vmla.f32 q1, q14, d8[1]
+ vmla.f32 q2, q14, d10[0]
+ pld [r7]
+ vadd.f32 q0, q0, q15
+ vmov.f32 s11, s21
+ vst1.32 {d0-d1}, [r4], r8
+ vst1.32 {d2-d3}, [r4], r8
+ vst1.32 {d4-d5}, [r4]!
+ bcc.w |_s_SkinVertices_Tangents_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d10}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices2Bones_NEON| PROC
+ mov ip, sp
+ vpush {d8-d11}
+ stmdb sp!, {r4, r5, r6, r7, r8, sl}
+ ldr.w r4, [ip]
+ vld1.32 {d11}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q12, q8, d11[0]
+ vmul.f32 q13, q9, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q12, q8, d11[1]
+ vmla.f32 q13, q9, d11[1]
+ ldr r5, [r3, #8]
+ mov.w r8, #12
+ sub.w sl, r2, #24
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q14, q10, d11[1]
+ nop
+
+|_s_SkinVertices2Bones_NEON_loop|
+ cmp r1, sl
+ add.w r7, r0, r5, lsl #6
+ it cc
+ ldrcc r6, [r3, #12]
+ vld1.32 {d6-d8}, [r1@64]!
+ vmla.f32 q15, q11, d11[1]
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ cmp r1, sl
+ vmul.f32 q1, q12, d7[1]
+ vld1.32 {d11}, [r3]
+ vmul.f32 q12, q8, d11[0]
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ vmla.f32 q1, q13, d8[0]
+ it cc
+ ldrcc r5, [r3, #24]
+ vmul.f32 q13, q9, d11[0]
+ vmla.f32 q0, q14, d7[0]
+ cmp r1, r2
+ vmla.f32 q1, q14, d8[1]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q14, q10, d11[0]
+ vadd.f32 q0, q0, q15
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ vst1.32 {d0-d1}, [r4], r8
+ vmla.f32 q13, q9, d11[1]
+ vst1.32 {d2-d3}, [r4], r8
+ add.w r3, r3, #16
+ vmla.f32 q14, q10, d11[1]
+ bcc.w |_s_SkinVertices2Bones_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, sl}
+ vpop {d8-d11}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices2Bones_NoNormals_NEON| PROC
+ mov ip, sp
+ vpush {d8-d11}
+ stmdb sp!, {r4, r5, r6, r7, r8, sl}
+ ldr.w r4, [ip]
+ vld1.32 {d11}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q12, q8, d11[0]
+ vmul.f32 q13, q9, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q12, q8, d11[1]
+ vmla.f32 q13, q9, d11[1]
+ ldr r5, [r3, #8]
+ mov.w r8, #12
+ sub.w sl, r2, #12
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q14, q10, d11[1]
+ nop
+ nop.w
+
+|_s_SkinVertices2Bones_NoNormals_NEON_loop|
+ cmp r1, sl
+ add.w r7, r0, r5, lsl #6
+ it cc
+ ldrcc r6, [r3, #12]
+ vld1.32 {d6-d7}, [r1], r8
+ vmla.f32 q15, q11, d11[1]
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ cmp r1, sl
+ vld1.32 {d11}, [r3]
+ vmul.f32 q12, q8, d11[0]
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ it cc
+ ldrcc r5, [r3, #24]
+ vmul.f32 q13, q9, d11[0]
+ vmla.f32 q0, q14, d7[0]
+ cmp r1, r2
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q14, q10, d11[0]
+ vadd.f32 q0, q0, q15
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ vst1.32 {d0-d1}, [r4], r8
+ vmla.f32 q13, q9, d11[1]
+ add.w r3, r3, #16
+ vmla.f32 q14, q10, d11[1]
+ bcc.w |_s_SkinVertices2Bones_NoNormals_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, sl}
+ vpop {d8-d11}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices2Bones_Tangents_NEON| PROC
+ mov ip, sp
+ vpush {d8-d11}
+ stmdb sp!, {r4, r5, r6, r7, r8, sl}
+ ldr.w r4, [ip]
+ vld1.32 {d11}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q12, q8, d11[0]
+ vmul.f32 q13, q9, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q12, q8, d11[1]
+ vmla.f32 q13, q9, d11[1]
+ ldr r5, [r3, #8]
+ mov.w r8, #12
+ sub.w sl, r2, #40 ; 0x28
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q14, q10, d11[1]
+ nop
+ nop.w
+
+|_s_SkinVertices2Bones_Tangents_NEON_loop|
+ cmp r1, sl
+ add.w r7, r0, r5, lsl #6
+ it cc
+ ldrcc r6, [r3, #12]
+ vld1.32 {d6-d8}, [r1@64]!
+ vmla.f32 q15, q11, d11[1]
+ vld1.32 {d9-d10}, [r1@64]!
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ cmp r1, sl
+ vmul.f32 q1, q12, d7[1]
+ vmul.f32 q2, q12, d9[0]
+ vld1.32 {d11}, [r3]
+ vmul.f32 q12, q8, d11[0]
+ pld [r1, #256] ; 0x100
+ vmla.f32 q0, q13, d6[1]
+ vld1.32 {d20-d23}, [r7@128]
+ add.w r7, r0, r6, lsl #6
+ vmla.f32 q1, q13, d8[0]
+ vmla.f32 q2, q13, d9[1]
+ it cc
+ ldrcc r5, [r3, #24]
+ vmul.f32 q13, q9, d11[0]
+ vmla.f32 q0, q14, d7[0]
+ cmp r1, r2
+ vmla.f32 q1, q14, d8[1]
+ vmla.f32 q2, q14, d10[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q14, q10, d11[0]
+ vadd.f32 q0, q0, q15
+ vmov.f32 s11, s21
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ vst1.32 {d0-d1}, [r4], r8
+ vmla.f32 q13, q9, d11[1]
+ vst1.32 {d2-d3}, [r4], r8
+ add.w r3, r3, #16
+ vmla.f32 q14, q10, d11[1]
+ vst1.32 {d4-d5}, [r4]!
+ bcc.w |_s_SkinVertices2Bones_Tangents_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, sl}
+ vpop {d8-d11}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices4Bones_NEON| PROC
+ mov ip, sp
+ vpush {d8-d12}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ vld1.32 {d11-d12}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vld1.32 {d20-d23}, [r7@128]
+ mov.w r8, #12
+ nop.w
+ nop.w
+ nop.w
+
+|_s_SkinVertices4Bones_NEON_loop|
+ vmul.f32 q12, q8, d11[0]
+ vld1.32 {d6-d8}, [r1@64]!
+ vmul.f32 q13, q9, d11[0]
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ ldmia r3!, {r5, r6}
+ vmla.f32 q13, q9, d11[1]
+ add.w r7, r0, r5, lsl #6
+ cmp r1, r2
+ vmla.f32 q14, q10, d11[1]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d11[1]
+ pld [r3, #256] ; 0x100
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[0]
+ add.w r7, r0, r6, lsl #6
+ vmla.f32 q13, q9, d12[0]
+ vmla.f32 q14, q10, d12[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d12[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[1]
+ vmla.f32 q13, q9, d12[1]
+ vmla.f32 q14, q10, d12[1]
+ vmla.f32 q15, q11, d12[1]
+ pld [r1, #256] ; 0x100
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d11-d12}, [r3]!
+ vmul.f32 q1, q12, d7[1]
+ it cc
+ ldmiacc r3!, {r5, r6}
+ vmla.f32 q0, q13, d6[1]
+ add.w r7, r0, r5, lsl #6
+ vmla.f32 q1, q13, d8[0]
+ vldmia r7, {d16-d23}
+ vmla.f32 q0, q14, d7[0]
+ vmla.f32 q1, q14, d8[1]
+ vadd.f32 q0, q0, q15
+ vst1.32 {d0-d1}, [r4], r8
+ vst1.32 {d2-d3}, [r4], r8
+ bcc.w |_s_SkinVertices4Bones_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d12}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices4Bones_NoNormals_NEON| PROC
+ mov ip, sp
+ vpush {d8-d12}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ vld1.32 {d11-d12}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vld1.32 {d20-d23}, [r7@128]
+ mov.w r8, #12
+ nop
+ nop.w
+
+|_s_SkinVertices4Bones_NoNormals_NEON_loop|
+ vmul.f32 q12, q8, d11[0]
+ vld1.32 {d6-d7}, [r1], r8
+ vmul.f32 q13, q9, d11[0]
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ ldmia r3!, {r5, r6}
+ vmla.f32 q13, q9, d11[1]
+ add.w r7, r0, r5, lsl #6
+ cmp r1, r2
+ vmla.f32 q14, q10, d11[1]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d11[1]
+ pld [r3, #256] ; 0x100
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[0]
+ add.w r7, r0, r6, lsl #6
+ vmla.f32 q13, q9, d12[0]
+ vmla.f32 q14, q10, d12[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d12[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[1]
+ vmla.f32 q13, q9, d12[1]
+ vmla.f32 q14, q10, d12[1]
+ vmla.f32 q15, q11, d12[1]
+ pld [r1, #256] ; 0x100
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d11-d12}, [r3]!
+ it cc
+ ldmiacc r3!, {r5, r6}
+ vmla.f32 q0, q13, d6[1]
+ add.w r7, r0, r5, lsl #6
+ vldmia r7, {d16-d23}
+ vmla.f32 q0, q14, d7[0]
+ vadd.f32 q0, q0, q15
+ vst1.32 {d0-d1}, [r4], r8
+ bcc.w |_s_SkinVertices4Bones_NoNormals_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d12}
+ bx lr
+ ENDP
+
+
+|_s_SkinVertices4Bones_Tangents_NEON| PROC
+ mov ip, sp
+ vpush {d8-d12}
+ stmdb sp!, {r4, r5, r6, r7, r8}
+ ldr.w r4, [ip]
+ vld1.32 {d11-d12}, [r3]!
+ ldmia r3!, {r5, r6}
+ add.w r7, r0, r5, lsl #6
+ vld1.32 {d16-d19}, [r7@128]!
+ vld1.32 {d20-d23}, [r7@128]
+ mov.w r8, #12
+ nop
+ nop.w
+
+|_s_SkinVertices4Bones_Tangents_NEON_loop|
+ vmul.f32 q12, q8, d11[0]
+ vld1.32 {d6-d8}, [r1@64]!
+ vmul.f32 q13, q9, d11[0]
+ vld1.32 {d9-d10}, [r1@64]!
+ add.w r7, r0, r6, lsl #6
+ vmul.f32 q14, q10, d11[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmul.f32 q15, q11, d11[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d11[1]
+ ldmia r3!, {r5, r6}
+ vmla.f32 q13, q9, d11[1]
+ add.w r7, r0, r5, lsl #6
+ cmp r1, r2
+ vmla.f32 q14, q10, d11[1]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d11[1]
+ pld [r3, #256] ; 0x100
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[0]
+ add.w r7, r0, r6, lsl #6
+ vmla.f32 q13, q9, d12[0]
+ vmla.f32 q14, q10, d12[0]
+ vld1.32 {d16-d19}, [r7@128]!
+ vmla.f32 q15, q11, d12[0]
+ vld1.32 {d20-d23}, [r7@128]
+ vmla.f32 q12, q8, d12[1]
+ vmla.f32 q13, q9, d12[1]
+ vmla.f32 q14, q10, d12[1]
+ vmla.f32 q15, q11, d12[1]
+ pld [r1, #256] ; 0x100
+ vmul.f32 q0, q12, d6[0]
+ vld1.32 {d11-d12}, [r3]!
+ vmul.f32 q1, q12, d7[1]
+ vmul.f32 q2, q12, d9[0]
+ it cc
+ ldmiacc r3!, {r5, r6}
+ vmla.f32 q0, q13, d6[1]
+ add.w r7, r0, r5, lsl #6
+ vmla.f32 q1, q13, d8[0]
+ vmla.f32 q2, q13, d9[1]
+ vldmia r7, {d16-d23}
+ vmla.f32 q0, q14, d7[0]
+ vmla.f32 q1, q14, d8[1]
+ vmla.f32 q2, q14, d10[0]
+ vadd.f32 q0, q0, q15
+ vmov.f32 s11, s21
+ vst1.32 {d0-d1}, [r4], r8
+ vst1.32 {d2-d3}, [r4], r8
+ vst1.32 {d4-d5}, [r4]!
+ bcc.w |_s_SkinVertices4Bones_Tangents_NEON_loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8}
+ vpop {d8-d12}
+ bx lr
+ nop
+ ENDP
+
+
+ END
diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.s b/Runtime/Filters/Mesh/MeshSkinningNEON.s
new file mode 100644
index 0000000..e94542d
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNEON.s
@@ -0,0 +1,183 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+
+.globl _s_SkinVertices_NEON
+.globl _s_SkinVertices_NoNormals_NEON
+.globl _s_SkinVertices_Tangents_NEON
+
+.globl _s_SkinVertices2Bones_NEON
+.globl _s_SkinVertices2Bones_NoNormals_NEON
+.globl _s_SkinVertices2Bones_Tangents_NEON
+
+.globl _s_SkinVertices4Bones_NEON
+.globl _s_SkinVertices4Bones_NoNormals_NEON
+.globl _s_SkinVertices4Bones_Tangents_NEON
+
+#if UNITY_ANDROID
+.hidden _s_SkinVertices_NEON
+.hidden _s_SkinVertices_NoNormals_NEON
+.hidden _s_SkinVertices_Tangents_NEON
+
+.hidden _s_SkinVertices2Bones_NEON
+.hidden _s_SkinVertices2Bones_NoNormals_NEON
+.hidden _s_SkinVertices2Bones_Tangents_NEON
+
+.hidden _s_SkinVertices4Bones_NEON
+.hidden _s_SkinVertices4Bones_NoNormals_NEON
+.hidden _s_SkinVertices4Bones_Tangents_NEON
+#endif
+
+
+//===========================================================================================================================================
+
+#define SKIN_POS 1
+#define SKIN_POS_NRM 2
+#define SKIN_POS_NRM_TAN 3
+
+
+#define SKIN_2BONES 0
+#define SKIN_4BONES 0
+
+_s_SkinVertices_NEON:
+
+#define SKIN_1BONE SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_NoNormals_NEON:
+
+#define SKIN_1BONE SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_Tangents_NEON:
+
+#define SKIN_1BONE SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+#undef SKIN_4BONES
+#undef SKIN_2BONES
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE 0
+#define SKIN_4BONES 0
+
+_s_SkinVertices2Bones_NEON:
+
+#define SKIN_2BONES SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices2Bones_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_NoNormals_NEON:
+
+#define SKIN_2BONES SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices2Bones_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_Tangents_NEON:
+
+#define SKIN_2BONES SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices2Bones_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+#undef SKIN_4BONES
+#undef SKIN_1BONE
+
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE 0
+#define SKIN_2BONES 0
+
+_s_SkinVertices4Bones_NEON:
+
+#define SKIN_4BONES SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices4Bones_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_NoNormals_NEON:
+
+#define SKIN_4BONES SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices4Bones_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_Tangents_NEON:
+
+#define SKIN_4BONES SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices4Bones_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+
+#undef SKIN_2BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+.endif
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
new file mode 100644
index 0000000..8e584da
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
@@ -0,0 +1,487 @@
+
+// defines
+// SKIN_1BONE
+// SKIN_2BONES
+// SKIN_4BONES
+// LOOP_NAME
+// VERTEX_SZ
+
+// skin types
+// SKIN_POS
+// SKIN_POS_NRM
+// SKIN_POS_NRM_TAN
+
+
+
+//r0: const void* bones4x4
+//r1: const void* srcVertData
+//r2: const void* srcVertDataEnd
+//r3: const BoneInfluence4* srcBoneInfluence4
+//[sp+0] -> r4: const void* dstVertData
+
+// r5, r6: index
+// r7: matrix address
+// r8: 12 (offset for vector3)
+
+// q0 <- output: pos
+// q1 <- output: nrm
+// q2 <- output: tan
+// q3 <- input: pos
+// q4 <- input: nrm
+// q5 <- input: tan
+// d11,d12 <- weights
+// q12-q15 (blended matrix)
+// q8-q11 (cur matrix)
+
+
+// input:
+// d6[0], d6[1], d7[0] <- pos
+// d7[1], d8[0], d8[1] <- nrm
+// d9[0], d9[1], d10[0], d10[1] <- tan
+// q3 <- pos.x, pos.y, pos.z, nrm.x
+// q4 <- nrm.y, nrm.z, tan.x, tan.y
+// q5 <- tan.z, tan.w, w0, w1
+
+
+//===========================================================================================================================================
+//
+// Common
+
+#define CALC_POS_1 vmul.f32 q0, q12, d6[0]
+#define CALC_POS_2 vmla.f32 q0, q13, d6[1]
+#define CALC_POS_3 vmla.f32 q0, q14, d7[0]
+#define CALC_POS_4 vadd.f32 q0, q15
+
+#define STORE_POS vst1.32 {d0, d1}, [r4], r8
+
+#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) \
+ || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) \
+ || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+ #define LOAD_POS_NRM vld1.32 {d6, d7, d8}, [r1, :64]!
+ #define STORE_NRM vst1.32 {d2, d3}, [r4], r8
+ #define CALC_NRM_1 vmul.f32 q1, q12, d7[1]
+ #define CALC_NRM_2 vmla.f32 q1, q13, d8[0]
+ #define CALC_NRM_3 vmla.f32 q1, q14, d8[1]
+#else
+ #define LOAD_POS_NRM vld1.32 {d6, d7}, [r1], r8
+ #define STORE_NRM
+ #define CALC_NRM_1
+ #define CALC_NRM_2
+ #define CALC_NRM_3
+#endif
+
+#if (SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+ #define LOAD_TAN vld1.32 {d9, d10}, [r1, :64]!
+ #define STORE_TAN vst1.32 {d4, d5}, [r4]!
+ #define CALC_TAN_1 vmul.f32 q2, q12, d9[0]
+ #define CALC_TAN_2 vmla.f32 q2, q13, d9[1]
+ #define CALC_TAN_3 vmla.f32 q2, q14, d10[0]
+ #define CALC_TAN_4 vmov.f32 s11, s21
+#else
+ #define LOAD_TAN
+ #define STORE_TAN
+ #define CALC_TAN_1
+ #define CALC_TAN_2
+ #define CALC_TAN_3
+ #define CALC_TAN_4
+#endif
+
+// right after vertex-data will be copy-data stream, so be careful to not overwrite anything
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_2BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS)
+#define STORE_POS_LAST1 vst1.32 {d0}, [r4]!
+#define STORE_POS_LAST2 vst1.32 {d1[0]}, [r4]!
+#else
+#define STORE_POS_LAST1 STORE_POS
+#define STORE_POS_LAST2
+#endif
+
+#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM)
+#define STORE_NRM_LAST1 vst1.32 {d2}, [r4]!
+#define STORE_NRM_LAST2 vst1.32 {d3[0]}, [r4]!
+#else
+#define STORE_NRM_LAST1 STORE_NRM
+#define STORE_NRM_LAST2
+#endif
+
+#define __NAME_EPILOGUE(x) x ## EPILOGUE
+#define _NAME_EPILOGUE(x) __NAME_EPILOGUE(x)
+#define LOOP_EPILOGUE _NAME_EPILOGUE(LOOP_NAME)
+
+
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+ #define LOAD_M_12 vld1.32 {q12,q13}, [r7,:128]!
+ #define LOAD_M_34 vld1.32 {q14,q15}, [r7,:128]
+#else
+ #define LOAD_M_12 vld1.32 {q8,q9}, [r7,:128]!
+ #define LOAD_M_34 vld1.32 {q10,q11}, [r7,:128]
+#endif
+
+#define WEIGHT_MATRIX_1(op,r) op.f32 q12, q8, r
+#define WEIGHT_MATRIX_2(op,r) op.f32 q13, q9, r
+#define WEIGHT_MATRIX_3(op,r) op.f32 q14, q10, r
+#define WEIGHT_MATRIX_4(op,r) op.f32 q15, q11, r
+
+#define WEIGHT_M0_1 WEIGHT_MATRIX_1(vmul, d11[0])
+#define WEIGHT_M0_2 WEIGHT_MATRIX_2(vmul, d11[0])
+#define WEIGHT_M0_3 WEIGHT_MATRIX_3(vmul, d11[0])
+#define WEIGHT_M0_4 WEIGHT_MATRIX_4(vmul, d11[0])
+
+#define WEIGHT_M1_1 WEIGHT_MATRIX_1(vmla, d11[1])
+#define WEIGHT_M1_2 WEIGHT_MATRIX_2(vmla, d11[1])
+#define WEIGHT_M1_3 WEIGHT_MATRIX_3(vmla, d11[1])
+#define WEIGHT_M1_4 WEIGHT_MATRIX_4(vmla, d11[1])
+
+#define WEIGHT_M2_1 WEIGHT_MATRIX_1(vmla, d12[0])
+#define WEIGHT_M2_2 WEIGHT_MATRIX_2(vmla, d12[0])
+#define WEIGHT_M2_3 WEIGHT_MATRIX_3(vmla, d12[0])
+#define WEIGHT_M2_4 WEIGHT_MATRIX_4(vmla, d12[0])
+
+#define WEIGHT_M3_1 WEIGHT_MATRIX_1(vmla, d12[1])
+#define WEIGHT_M3_2 WEIGHT_MATRIX_2(vmla, d12[1])
+#define WEIGHT_M3_3 WEIGHT_MATRIX_3(vmla, d12[1])
+#define WEIGHT_M3_4 WEIGHT_MATRIX_4(vmla, d12[1])
+
+
+//===========================================================================================================================================
+//
+// 1 bone skinning
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+
+vpush {d8-d10}
+stmfd sp!, {r4-r8}
+
+ldr r4, [ip, #0]
+mov r8, #12
+
+ ldr r5, [r3], #4
+ add r7, r0, r5, lsl #6
+
+LOOP_NAME:
+
+
+
+LOAD_M_12
+LOAD_M_34
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_1
+CALC_NRM_1
+CALC_TAN_1
+
+ cmp r1, r2
+ pld [r1, #256]
+
+CALC_POS_2
+CALC_NRM_2
+CALC_TAN_2
+
+ ldrcc r5, [r3], #4
+ add r7, r0, r5, lsl #6
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+ pld [r7]
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4-r8}
+vpop {d8-d10}
+
+bx lr
+
+
+//===========================================================================================================================================
+//
+// 2 bones skinning
+
+#elif (SKIN_2BONES == SKIN_POS || SKIN_2BONES == SKIN_POS_NRM || SKIN_2BONES == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+
+vpush {d8-d11}
+stmfd sp!, {r4,r5,r6,r7,r8,r10}
+
+ldr r4, [ip, #0]
+
+vld1.32 {d11}, [r3,:64]! // wgt ->
+ldmia r3!, {r5-r6} // idx ->
+
+add r7, r0, r5, lsl #6 // M0 ..
+LOAD_M_12 // M0
+WEIGHT_M0_1
+WEIGHT_M0_2
+
+LOAD_M_34 // M0
+add r7, r0, r6, lsl #6 // M1 ..
+WEIGHT_M0_3
+WEIGHT_M0_4
+
+LOAD_M_12 // M1
+WEIGHT_M1_1
+WEIGHT_M1_2
+
+ldr r5, [r3, #8] // idx0
+
+mov r8, #12
+sub r10, r2, #VERTEX_SZ
+
+LOAD_M_34 // M1
+
+WEIGHT_M1_3
+
+.align 4
+LOOP_NAME:
+
+ cmp r1, r10
+
+ add r7, r0, r5, lsl #6 // M0 ..
+ ldrcc r6, [r3, #12] // idx1
+LOAD_POS_NRM
+
+WEIGHT_M1_4
+
+LOAD_TAN
+
+CALC_POS_1
+LOAD_M_12 // M0
+ cmp r1, r10
+CALC_NRM_1
+CALC_TAN_1
+vld1.32 {d11}, [r3,:64] // wgt ->
+
+WEIGHT_M0_1
+ pld [r1,#256]
+
+CALC_POS_2
+LOAD_M_34 // M0
+ add r7, r0, r6, lsl #6 // M1 ..
+CALC_NRM_2
+CALC_TAN_2
+ ldrcc r5, [r3, #24] // idx0
+WEIGHT_M0_2
+CALC_POS_3
+
+ cmp r1, r2
+CALC_NRM_3
+CALC_TAN_3
+LOAD_M_12 // M1
+
+
+WEIGHT_M0_3
+
+CALC_POS_4
+CALC_TAN_4
+
+WEIGHT_M0_4
+LOAD_M_34 // M1
+
+beq LOOP_EPILOGUE
+
+WEIGHT_M1_1
+STORE_POS
+
+WEIGHT_M1_2
+STORE_NRM
+ add r3, r3, #16
+WEIGHT_M1_3
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4,r5,r6,r7,r8,r10}
+vpop {d8-d11}
+bx lr
+
+
+//===========================================================================================================================================
+//
+// 4 bones skinning
+
+#elif (SKIN_4BONES == SKIN_POS || SKIN_4BONES == SKIN_POS_NRM || SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+
+mov ip, sp
+
+vpush {d8-d12}
+stmfd sp!, {r4-r8}
+
+ldr r4, [ip, #0]
+
+vld1.32 {d11,d12}, [r3,:128]! // wgt ->
+ldmia r3!, {r5-r6} // idx' ->
+
+add r7, r0, r5, lsl #6 // M0 ..
+LOAD_M_12 // M0
+LOAD_M_34 // M0
+
+mov r8, #12
+
+.align 4
+LOOP_NAME:
+
+WEIGHT_M0_1
+LOAD_POS_NRM
+
+WEIGHT_M0_2
+LOAD_TAN
+ add r7, r0, r6, lsl #6 // M1 ..
+
+
+WEIGHT_M0_3
+LOAD_M_12 // M1
+
+WEIGHT_M0_4
+LOAD_M_34 // M1
+
+WEIGHT_M1_1
+ ldmia r3!, {r5-r6} // idx'' ->
+
+WEIGHT_M1_2
+ add r7, r0, r5, lsl #6 // M2 ..
+ cmp r1, r2
+
+WEIGHT_M1_3
+LOAD_M_12 // M2
+
+WEIGHT_M1_4
+ pld [r3, #256]
+LOAD_M_34 // M2
+
+WEIGHT_M2_1
+ add r7, r0, r6, lsl #6 // M3 ..
+WEIGHT_M2_2
+WEIGHT_M2_3
+LOAD_M_12 // M3
+WEIGHT_M2_4
+
+LOAD_M_34 // M3
+WEIGHT_M3_1
+WEIGHT_M3_2
+WEIGHT_M3_3
+WEIGHT_M3_4
+ pld [r1, #256]
+
+CALC_POS_1
+vld1.32 {d11,d12}, [r3,:128]! // wgt ->
+
+CALC_NRM_1
+CALC_TAN_1
+ ldmcc r3!, {r5-r6} // idx ->
+
+CALC_POS_2
+ add r7, r0, r5, lsl #6 // M0 ..
+CALC_NRM_2
+CALC_TAN_2
+vldmia r7, {q8-q11} // M0 ->
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd sp!, {r4-r8}
+vpop {d8-d12}
+bx lr
+
+
+//===========================================================================================================================================
+
+#endif
+
+#undef __NAME_EPILOGUE
+#undef _NAME_EPILOGUE
+#undef LOOP_EPILOGUE
+#undef CALC_POS_1
+#undef CALC_POS_2
+#undef CALC_POS_3
+#undef STORE_POS
+#undef STORE_POS_LAST1
+#undef STORE_POS_LAST2
+#undef LOAD_POS_NRM
+#undef STORE_NRM
+#undef STORE_NRM_LAST1
+#undef STORE_NRM_LAST2
+#undef CALC_NRM_1
+#undef CALC_NRM_2
+#undef CALC_NRM_3
+#undef LOAD_TAN
+#undef STORE_TAN
+#undef CALC_TAN_1
+#undef CALC_TAN_2
+#undef CALC_TAN_3
+#undef CALC_TAN_4
+#undef LOAD_M_12
+#undef LOAD_M_34
+#undef WEIGHT_MATRIX_1
+#undef WEIGHT_MATRIX_2
+#undef WEIGHT_MATRIX_3
+#undef WEIGHT_MATRIX_4
+#undef WEIGHT_M0_1
+#undef WEIGHT_M0_2
+#undef WEIGHT_M0_3
+#undef WEIGHT_M0_4
+#undef WEIGHT_M1_1
+#undef WEIGHT_M1_2
+#undef WEIGHT_M1_3
+#undef WEIGHT_M1_4
+#undef WEIGHT_M2_1
+#undef WEIGHT_M2_2
+#undef WEIGHT_M2_3
+#undef WEIGHT_M2_4
+#undef WEIGHT_M3_1
+#undef WEIGHT_M3_2
+#undef WEIGHT_M3_3
+#undef WEIGHT_M3_4
diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.asm b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm
new file mode 100644
index 0000000..395bf16
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm
@@ -0,0 +1,323 @@
+;; SkinSSE2.s
+;;
+;; Created by Kaspar Daugaard on 1/12/11.
+;; Copyright 2011 Unity Technologies. All rights reserved.
+
+bits 32
+
+section .text align=32
+
+%define normalOffset 12
+%define tangentOffset 24
+
+%macro SkinSSE2_Generic 3
+ ; %1 numBones
+ ; %2 hasNormals
+ ; %3 hasTangents
+ ; [ebp + 8] inVertices
+ ; [ebp + 12] outVertices
+ ; [ebp + 16] numVertices
+ ; [ebp + 20] boneMatrices
+ ; [ebp + 24] weightsAndIndices
+ ; [ebp + 28] inputStride
+ ; [ebp + 32] outputStride
+
+ push ebp
+ mov ebp, esp
+ pushad
+
+ ; Local variables (32 byte aligned)
+ ; [esp + 0] MaskW
+ ; [esp + 16] MaskVec3
+ ; [esp + 32] savedEcx
+ sub esp, 16*3
+ and esp, ~31
+
+ ; Create bitmasks on stack
+ sub eax, eax
+ mov [esp + 0], eax ; MaskW
+ mov [esp + 4], eax
+ mov [esp + 8], eax
+ dec eax
+ mov [esp + 12], eax
+ mov [esp + 16], eax ; MaskVec3
+ mov [esp + 20], eax
+ mov [esp + 24], eax
+ inc eax
+ mov [esp + 28], eax
+
+ mov esi, [ebp + 8] ; inVertices
+ mov edi, [ebp + 12] ; outVertices
+ mov ecx, [ebp + 16] ; numVertices
+ mov edx, [ebp + 24] ; weightsAndIndices
+
+ ; Prefetch vertices
+ prefetchnta [edx]
+ prefetchnta [esi]
+ prefetchnta [esi + 32]
+
+ align 32
+
+%%SkinSSE2_loop:
+ prefetchnta [esi + 64]
+
+ mov ebx, [ebp + 20] ; boneMatrices
+ mov [esp + 32], ecx ; savedEcx
+
+ ; Load first bone index
+%if %1 == 1
+ ; Single bone, no weight
+ mov eax, [edx]
+ shl eax, 6
+%else
+ ; Indices come after weights
+ mov eax, [edx + %1*4]
+ shl eax, 6
+ prefetchnta [ebx + eax]
+ prefetchnta [ebx + eax + 32]
+
+ ; Load second bone index
+ mov ecx, [edx + %1*4 + 4]
+ shl ecx, 6
+ prefetchnta [ebx + ecx]
+ prefetchnta [ebx + ecx + 32]
+
+ ; Load all weights to xmm0
+ movups xmm0, [edx]
+%endif
+
+ ; Load first matrix to xmm4-xmm7
+ movaps xmm4, [ebx + eax]
+ movaps xmm5, [ebx + eax + 16]
+ movaps xmm6, [ebx + eax + 32]
+ movaps xmm7, [ebx + eax + 48]
+
+%if %1 >= 2
+ ; Multiply first matrix with first weight
+ movaps xmm1, xmm0
+ shufps xmm1, xmm1, 0x00
+ mulps xmm4, xmm1
+ mulps xmm5, xmm1
+ mulps xmm6, xmm1
+ mulps xmm7, xmm1
+%endif
+
+%if %1 >= 3
+ ; Load third bone index
+ mov eax, [edx + %1*4 + 8]
+ shl eax, 6
+ prefetchnta [ebx + eax]
+ prefetchnta [ebx + eax + 32]
+%endif
+
+%if %1 >= 2
+ ; Load first two rows of the second matrix to xmm2-xmm3
+ movaps xmm2, [ebx + ecx]
+ movaps xmm3, [ebx + ecx + 16]
+ ; Shuffle second weight to all elements of xmm1
+ movaps xmm1, xmm0
+ shufps xmm1, xmm1, 0x55
+ ; Multiply two first rows of second matrix with second weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm4, xmm2
+ addps xmm5, xmm3
+
+ ; Load last two rows of the second matrix to xmm2-xmm3
+ movaps xmm2, [ebx + ecx + 32]
+ movaps xmm3, [ebx + ecx + 48]
+ ; Multiply two last rows of the second matri with second weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm6, xmm2
+ addps xmm7, xmm3
+%endif
+
+%if %1 >= 4
+ ; Load fourth bone index
+ mov ecx, [edx + %1*4 + 12]
+ shl ecx, 6
+ prefetchnta [ebx + ecx]
+ prefetchnta [ebx + ecx + 32]
+%endif
+
+%if %1 >= 3
+ ; Load first two rows of the third matrix to xmm2-xmm3
+ movaps xmm2, [ebx + eax]
+ movaps xmm3, [ebx + eax + 16]
+ ; Shuffle third weight to all elements of xmm1
+ movaps xmm1, xmm0
+ shufps xmm1, xmm1, 0xaa
+ ; Multiply first two rows of third matrix with third weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm4, xmm2
+ addps xmm5, xmm3
+
+ ; Load last two rows of the third matrix to xmm2-xmm3
+ movaps xmm2, [ebx + eax + 32]
+ movaps xmm3, [ebx + eax + 48]
+ ; Multiply last two rows of third matrix with third weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm6, xmm2
+ addps xmm7, xmm3
+%endif
+
+%if %1 >= 4
+ ; Load first two rows of the fourth matrix into xmm2-xmm3
+ movaps xmm2, [ebx + ecx]
+ movaps xmm3, [ebx + ecx + 16]
+ ; Shuffle fourth weight to all elements of xmm1
+ movaps xmm1, xmm0
+ shufps xmm1, xmm1, 0xff
+ ; Multiply first two rows of the fourth matrix with fourth weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm4, xmm2
+ addps xmm5, xmm3
+
+ ; Load last two rows of the fourth matrix to xmm2-xmm3
+ movaps xmm2, [ebx + ecx + 32]
+ movaps xmm3, [ebx + ecx + 48]
+ ; Multiply last two rows of the fourth matrix with fourth weight
+ mulps xmm2, xmm1
+ mulps xmm3, xmm1
+ ; Add
+ addps xmm6, xmm2
+ addps xmm7, xmm3
+%endif
+
+ ; Matrix is in xmm4-xmm7
+ ; Transform position by 4x4 matrix in xmm4-xmm7
+ movups xmm0, [esi]
+ movaps xmm1, xmm0
+ movaps xmm2, xmm0
+ shufps xmm1, xmm1, 0x55
+ shufps xmm2, xmm2, 0xaa
+ shufps xmm0, xmm0, 0x00
+ mulps xmm1, xmm5
+ mulps xmm2, xmm6
+ mulps xmm0, xmm4
+ addps xmm1, xmm2
+ addps xmm0, xmm7
+ addps xmm0, xmm1
+ ; Store vertex position in outvert
+ movaps xmm7, [esp + 16] ; MaskVec3
+ maskmovdqu xmm0, xmm7
+
+%if %2 ; Has normal
+ ; Transform vector by 3x3 matrix in xmm4-xmm6
+ movups xmm0, [esi + normalOffset]
+ movaps xmm1, xmm0
+ movaps xmm2, xmm0
+ shufps xmm1, xmm1, 0x55
+ shufps xmm2, xmm2, 0xaa
+ shufps xmm0, xmm0, 0x00
+ mulps xmm1, xmm5
+ mulps xmm2, xmm6
+ mulps xmm0, xmm4
+ addps xmm1, xmm2
+ addps xmm0, xmm1
+%endif
+
+%if %3 ; Has tangent
+ ; Transform vector by 3x3 matrix in xmm4-xmm6
+ movups xmm1, [esi + tangentOffset]
+ movaps xmm2, xmm1
+ movaps xmm3, xmm1
+ shufps xmm2, xmm2, 0x55
+ shufps xmm3, xmm3, 0xaa
+ mulps xmm2, xmm5
+ mulps xmm3, xmm6
+ movaps xmm6, xmm1 ; Save original tangent's W in xmm6
+ shufps xmm1, xmm1, 0x00
+ andps xmm6, [esp + 0] ; MaskW
+ mulps xmm1, xmm4
+ addps xmm2, xmm3
+ addps xmm1, xmm2
+%endif
+
+%if %2 || %3 ; Has normal or tangent
+ ; Calculate lengths and normalize
+ movaps xmm2, xmm0
+ movaps xmm5, xmm1
+ mulps xmm2, xmm2
+ mulps xmm5, xmm5
+ movaps xmm3, xmm2
+ movaps xmm4, xmm2
+ shufps xmm3, xmm5, 0x55
+ shufps xmm4, xmm5, 0xaa
+ shufps xmm2, xmm5, 0x00
+ addps xmm3, xmm4
+ addps xmm2, xmm3
+ sqrtps xmm2, xmm2
+ rcpps xmm2, xmm2
+ movaps xmm3, xmm2
+ shufps xmm2, xmm2, 0x00
+ shufps xmm3, xmm3, 0xaa
+ mulps xmm0, xmm2
+ mulps xmm1, xmm3
+%endif
+
+%if %2 ; Write normal
+ add edi, normalOffset
+ maskmovdqu xmm0, xmm7 ; MaskVec3
+ sub edi, normalOffset
+%endif
+
+%if %3 ; Write tangent
+ andps xmm1, xmm7 ; MaskVec3
+ orps xmm1, xmm6 ; Restore original W
+ movups [edi + tangentOffset], xmm1
+%endif
+
+%if %1 == 1
+ ; Indices only
+ add edx, 4
+%else
+ ; Indices and weights
+ add edx, %1 * 8
+%endif
+
+ add esi, [ebp + 28] ; inputStride
+ add edi, [ebp + 32] ; outputStride
+ mov ecx, [esp + 32] ; savedEcx
+ dec ecx
+ jnz %%SkinSSE2_loop
+
+ ; Remove local variables from stack
+ lea esp, [ebp-32]
+
+ popad
+ pop ebp
+ ret
+ align 16
+%endmacro
+
+
+global SkinSSE2_1Bone_Pos
+global SkinSSE2_2Bones_Pos
+global SkinSSE2_4Bones_Pos
+global SkinSSE2_1Bone_PosNormal
+global SkinSSE2_2Bones_PosNormal
+global SkinSSE2_4Bones_PosNormal
+global SkinSSE2_1Bone_PosNormalTan
+global SkinSSE2_2Bones_PosNormalTan
+global SkinSSE2_4Bones_PosNormalTan
+
+
+SkinSSE2_1Bone_Pos: SkinSSE2_Generic 1, 0, 0
+SkinSSE2_2Bones_Pos: SkinSSE2_Generic 2, 0, 0
+SkinSSE2_4Bones_Pos: SkinSSE2_Generic 4, 0, 0
+SkinSSE2_1Bone_PosNormal: SkinSSE2_Generic 1, 1, 0
+SkinSSE2_2Bones_PosNormal: SkinSSE2_Generic 2, 1, 0
+SkinSSE2_4Bones_PosNormal: SkinSSE2_Generic 4, 1, 0
+SkinSSE2_1Bone_PosNormalTan: SkinSSE2_Generic 1, 1, 1
+SkinSSE2_2Bones_PosNormalTan: SkinSSE2_Generic 2, 1, 1
+SkinSSE2_4Bones_PosNormalTan: SkinSSE2_Generic 4, 1, 1
diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.h b/Runtime/Filters/Mesh/MeshSkinningSSE2.h
new file mode 100644
index 0000000..c085309
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.h
@@ -0,0 +1,129 @@
+#if UNITY_SUPPORTS_SSE && !UNITY_64
+
+#if UNITY_OSX || UNITY_LINUX
+#define __cdecl
+#endif
+
+#define SKIN_SSE2_PARAMS \
+ const void* inVertices, \
+ void* outVertices, \
+ int numVertices, \
+ const void* boneMatrices, \
+ const void* weightsAndIndices, \
+ int inputStride, \
+ int outputStride
+
+typedef void (__cdecl *SkinSSE2_Function)(SKIN_SSE2_PARAMS);
+
+extern "C"
+{
+ void __cdecl SkinSSE2_1Bone_Pos(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_2Bones_Pos(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_4Bones_Pos(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_1Bone_PosNormal(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_2Bones_PosNormal(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_4Bones_PosNormal(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_1Bone_PosNormalTan(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_2Bones_PosNormalTan(SKIN_SSE2_PARAMS);
+ void __cdecl SkinSSE2_4Bones_PosNormalTan(SKIN_SSE2_PARAMS);
+}
+
+
+bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
+{
+ if (!CPUInfo::HasSSE2Support())
+ {
+ return false;
+ }
+
+ SkinSSE2_Function skinFunc = NULL;
+
+ if (!info.skinNormals && !info.skinTangents)
+ {
+ switch (info.bonesPerVertex)
+ {
+ DebugAssert(info.inStride == sizeof(Vector3f));
+ case 1:
+ skinFunc = &SkinSSE2_1Bone_Pos;
+ break;
+ case 2:
+ skinFunc = &SkinSSE2_2Bones_Pos;
+ break;
+ case 4:
+ skinFunc = &SkinSSE2_4Bones_Pos;
+ break;
+
+ }
+ }
+ else if (info.skinNormals && !info.skinTangents)
+ {
+ DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f));
+ switch (info.bonesPerVertex)
+ {
+ case 1:
+ skinFunc = &SkinSSE2_1Bone_PosNormal;
+ break;
+ case 2:
+ skinFunc = &SkinSSE2_2Bones_PosNormal;
+ break;
+ case 4:
+ skinFunc = &SkinSSE2_4Bones_PosNormal;
+ break;
+
+ }
+ }
+ else if (info.skinNormals && info.skinTangents)
+ {
+ DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f) + sizeof(Vector4f));
+ switch (info.bonesPerVertex)
+ {
+ case 1:
+ skinFunc = &SkinSSE2_1Bone_PosNormalTan;
+ break;
+ case 2:
+ skinFunc = &SkinSSE2_2Bones_PosNormalTan;
+ break;
+ case 4:
+ skinFunc = &SkinSSE2_4Bones_PosNormalTan;
+ break;
+
+ }
+ }
+
+ if (skinFunc == NULL)
+ return false;
+
+ // Skin all vertices apart from last one!
+ if (info.vertexCount > 1)
+ {
+ (*skinFunc)(info.inVertices, info.outVertices, info.vertexCount - 1,info.cachedPose, info.compactSkin, info.inStride, info.outStride);
+ }
+ // Copy last vertex to stack to avoid reading/writing past end of buffer
+ if (info.vertexCount > 0)
+ {
+ const int maxStride = 2 * sizeof(Vector3f) + sizeof(Vector4f) + 4;
+ Assert(info.inStride <= maxStride && info.outStride <= maxStride);
+ // Need 4 bytes padding to access Vec3 as Vec4
+ char vertexCopyIn[maxStride + 4];
+ char vertexCopyOut[maxStride + 4];
+ int skinStride = (info.bonesPerVertex == 4) ? sizeof(BoneInfluence) :
+ (info.bonesPerVertex == 2) ? sizeof(BoneInfluence2) :
+ (info.bonesPerVertex == 1) ? sizeof(int) : 0;
+ Assert(skinStride != 0);
+ int index = info.vertexCount - 1;
+ const char* compactSkin = static_cast<const char*>(info.compactSkin) + index * skinStride;
+ const char* inVertex = static_cast<const char*>(info.inVertices) + index * info.inStride;
+ char* outVertex = static_cast<char*>(info.outVertices) + index * info.outStride;
+ memcpy(vertexCopyIn, inVertex, info.inStride);
+ (*skinFunc)(vertexCopyIn, vertexCopyOut, 1, info.cachedPose, compactSkin, info.inStride, info.outStride);
+ memcpy(outVertex, vertexCopyOut, info.outStride);
+ }
+
+ return true;
+}
+#else
+inline bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
+{
+ return false;
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningTests.cpp b/Runtime/Filters/Mesh/MeshSkinningTests.cpp
new file mode 100644
index 0000000..407729b
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningTests.cpp
@@ -0,0 +1,228 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+
+#if ENABLE_UNIT_TESTS && UNITY_SUPPORTS_SSE && !UNITY_64
+
+#include "Runtime/Filters/Mesh/MeshSkinning.h"
+#include "External/UnitTest++/src/UnitTest++.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Math/Random/rand.h"
+#include "Runtime/Math/Matrix4x4.h"
+
+bool SkinMeshOptimizedSSE2(SkinMeshInfo& info);
+void SkinMesh(SkinMeshInfo& info);
+
+Vector3f RandomVector3InUnitBox(Rand& rnd)
+{
+ return Vector3f(rnd.GetSignedFloat(),
+ rnd.GetSignedFloat(),
+ rnd.GetSignedFloat());
+}
+
+SUITE (MeshSkinningTests)
+{
+TEST(MeshSkinning_AllFeatures)
+{
+ int failedPositions = 0;
+ int failedNormals = 0;
+ int failedTangents = 0;
+ int failedTangentSigns = 0;
+ int failedVertexCopies = 0;
+
+ const int minVertices = 1;
+ const int maxVertices = 100;
+ const int positionSize = 3*sizeof(float);
+ const int normalSize = 3*sizeof(float);
+ const int tangentSize = 4*sizeof(float);
+ const int maxStride = positionSize + normalSize + tangentSize;
+ const int trailingBytes = 128;
+
+ UInt8 inVertices[maxVertices * maxStride];
+ UInt8 outVerticesRef[maxVertices * maxStride + trailingBytes];
+ UInt8 outVerticesSimd[maxVertices * maxStride + trailingBytes];
+
+ SkinMeshInfo info;
+ memset(&info, 0, sizeof(info));
+ info.inVertices = inVertices;
+ info.vertexCount = minVertices;
+ info.normalOffset = positionSize;
+ info.tangentOffset = positionSize + normalSize;
+
+ // Try a large offset so AABBs don't contain (0,0,0)
+ Vector3f posOffset(-2000, 0, 2000);
+
+ const int numBones = 64;
+ Matrix4x4f *cachedPose;
+ ALLOC_TEMP_ALIGNED(cachedPose, Matrix4x4f, numBones, 32);
+ info.cachedPose = cachedPose;
+ for (int i = 0; i < numBones; i++)
+ {
+ Matrix4x4f mat;
+ mat.SetScale(Vector3f(1.0 + 0.5f*sin(i*0.3f),
+ 1.0 + 0.5f*sin(i*0.5f),
+ 1.0 + 0.5f*sin(i*0.7f)));
+ mat.SetPosition(Vector3f(100.0f*sin(i*1.0f),
+ 100.0f*sin(i*2.5f),
+ 100.0f*sin(i*3.3f)) + posOffset);
+ cachedPose[i] = mat;
+ }
+ info.boneCount = numBones;
+
+ Rand rnd(123);
+
+ int boneIndices[maxVertices];
+ BoneInfluence2 boneInfl2[maxVertices];
+ BoneInfluence boneInfl4[maxVertices];
+ for (int i = 0; i < maxVertices; i++)
+ {
+ boneIndices[i] = i%numBones;
+
+ BoneInfluence2& b2 = boneInfl2[i];
+ b2.boneIndex[0] = (i)%numBones;
+ b2.boneIndex[1] = (i/2+10)%numBones;
+ b2.weight[0] = rnd.GetFloat();
+ b2.weight[1] = 1.0f - b2.weight[0];
+
+ BoneInfluence& b4 = boneInfl4[i];
+ b4.boneIndex[0] = (i)%numBones;
+ b4.boneIndex[1] = (i/2+10)%numBones;
+ b4.boneIndex[2] = (i/3+20)%numBones;
+ b4.boneIndex[3] = (i/4+30)%numBones;
+ float weightLeft = 1.0f;
+ for (int j=0; j<3; j++)
+ {
+ b4.weight[j] = weightLeft * rnd.GetFloat();
+ weightLeft -= b4.weight[j];
+ }
+ b4.weight[3] = weightLeft;
+ }
+
+ for (info.bonesPerVertex = 1; info.bonesPerVertex <= 4; info.bonesPerVertex++)
+ {
+ if (info.bonesPerVertex == 3) continue;
+
+ switch (info.bonesPerVertex)
+ {
+ case 1:
+ info.compactSkin = boneIndices;
+ break;
+ case 2:
+ info.compactSkin = boneInfl2;
+ break;
+ case 4:
+ info.compactSkin = boneInfl4;
+ break;
+ }
+
+ for (int skinNormals = 0; skinNormals <= 1; skinNormals++)
+ {
+ info.skinNormals = (skinNormals != 0);
+
+ for (int skinTangents = 0; skinTangents <= 1; skinTangents++)
+ {
+ if (!skinNormals && skinTangents) continue;
+ info.skinTangents = (skinTangents != 0);
+
+ // Randomize vertex count and stride
+ info.vertexCount += 7;
+ while (info.vertexCount > maxVertices) info.vertexCount -= (maxVertices - minVertices);
+ info.inStride = positionSize;
+ info.inStride += skinNormals ? normalSize : 0;
+ info.inStride += skinTangents ? tangentSize : 0;
+ info.outStride = info.inStride;
+
+ UInt8* inVert = inVertices;
+ for (int i = 0; i < info.vertexCount; i++)
+ {
+ Vector3f* nextVec = (Vector3f*)inVert;
+ Vector3f pos = RandomVector3InUnitBox(rnd);
+ pos *= 1000.0f;
+ *nextVec++ = pos;
+ if (info.skinNormals)
+ {
+ Vector3f normal = RandomVector3InUnitBox(rnd);
+ normal = NormalizeSafe(normal);
+ *nextVec++ = normal;
+ }
+
+ if (info.skinTangents)
+ {
+ Vector3f tangent = RandomVector3InUnitBox(rnd);
+ tangent = NormalizeSafe(tangent);
+ *nextVec++ = tangent;
+ float* tangentSign = (float*)nextVec;
+ *tangentSign = (rnd.GetSignedFloat() < 0.0f) ? -1.0f : 1.0f;
+ }
+ inVert += info.inStride;
+ }
+
+ int outSize = info.vertexCount * info.outStride;
+ memset(outVerticesRef, 0xcc, outSize + trailingBytes);
+ memset(outVerticesSimd, 0xdd, outSize + trailingBytes);
+
+ info.outVertices = outVerticesRef;
+ SkinMesh(info);
+
+ info.outVertices = outVerticesSimd;
+ bool successSimd = SkinMeshOptimizedSSE2(info);
+ CHECK(successSimd);
+
+ // Check if we wrote past end of buffer
+ for (int i = 0; i < trailingBytes; i++)
+ {
+ CHECK_EQUAL(0xcc, outVerticesRef[outSize + i]);
+ CHECK_EQUAL(0xdd, outVerticesSimd[outSize + i]);
+ }
+
+ inVert = inVertices;
+ UInt8* vertRef = outVerticesRef;
+ UInt8* vertSimd = outVerticesSimd;
+ for (int i = 0; i < info.vertexCount; i++)
+ {
+ Vector3f* posRef = (Vector3f*)vertRef;
+ Vector3f* posSimd = (Vector3f*)vertRef;
+ if (!CompareApproximately(*posRef, *posSimd))
+ {
+ failedPositions++;
+ }
+ if (info.skinNormals)
+ {
+ Vector3f* normalRef = (Vector3f*)(vertRef + info.normalOffset);
+ Vector3f* normalSimd = (Vector3f*)(vertRef + info.normalOffset);
+ if (!CompareApproximately(*normalRef, *normalSimd))
+ {
+ failedNormals++;
+ }
+ }
+ if (info.skinTangents)
+ {
+ Vector3f* tangentRef = (Vector3f*)(vertRef + info.tangentOffset);
+ Vector3f* tangentSimd = (Vector3f*)(vertRef + info.tangentOffset);
+ if (!CompareApproximately(*tangentRef, *tangentSimd))
+ {
+ failedTangents++;
+ }
+ float* tangentSignRef = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f));
+ float* tangentSignSimd = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f));
+ if (*tangentSignRef != *tangentSignSimd)
+ {
+ failedTangentSigns++;
+ }
+ }
+
+ inVert += info.inStride;
+ vertRef += info.outStride;
+ vertSimd += info.outStride;
+ }
+ }
+ }
+ }
+
+ CHECK_EQUAL(0, failedPositions);
+ CHECK_EQUAL(0, failedNormals);
+ CHECK_EQUAL(0, failedTangents);
+ CHECK_EQUAL(0, failedTangentSigns);
+ CHECK_EQUAL(0, failedVertexCopies);
+}
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP.s b/Runtime/Filters/Mesh/MeshSkinningVFP.s
new file mode 100644
index 0000000..8829981
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningVFP.s
@@ -0,0 +1,187 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/VFPUtility.h"
+
+#if UNITY_SUPPORTS_VFP
+
+.syntax unified
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+.globl _s_SkinVertices_VFP
+.globl _s_SkinVertices_NoNormals_VFP
+.globl _s_SkinVertices_Tangents_VFP
+
+.globl _s_SkinVertices2Bones_VFP
+.globl _s_SkinVertices2Bones_NoNormals_VFP
+.globl _s_SkinVertices2Bones_Tangents_VFP
+
+.globl _s_SkinVertices4Bones_VFP
+.globl _s_SkinVertices4Bones_Copy4Ints_VFP
+.globl _s_SkinVertices4Bones_NoNormals_VFP
+.globl _s_SkinVertices4Bones_NoNormals_Copy4Ints_VFP
+.globl _s_SkinVertices4Bones_Tangents_VFP
+.globl _s_SkinVertices4Bones_Tangents_Copy4Ints_VFP
+
+#if UNITY_ANDROID
+.hidden _s_SkinVertices_VFP
+.hidden _s_SkinVertices_NoNormals_VFP
+.hidden _s_SkinVertices_Tangents_VFP
+
+.hidden _s_SkinVertices2Bones_VFP
+.hidden _s_SkinVertices2Bones_NoNormals_VFP
+.hidden _s_SkinVertices2Bones_Tangents_VFP
+
+.hidden _s_SkinVertices4Bones_VFP
+.hidden _s_SkinVertices4Bones_NoNormals_VFP
+.hidden _s_SkinVertices4Bones_Tangents_VFP
+#endif
+
+
+//===========================================================================================================================================
+
+
+#define SKIN_POS 1
+#define SKIN_POS_NRM 2
+#define SKIN_POS_NRM_TAN 3
+
+
+#define SKIN_2BONES 0
+#define SKIN_4BONES 0
+
+_s_SkinVertices_VFP:
+
+#define SKIN_1BONE SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_NoNormals_VFP:
+
+#define SKIN_1BONE SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices_NoNormals_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_Tangents_VFP:
+
+#define SKIN_1BONE SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+#undef SKIN_4BONES
+#undef SKIN_2BONES
+
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE 0
+#define SKIN_4BONES 0
+
+_s_SkinVertices2Bones_VFP:
+
+#define SKIN_2BONES SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices2Bones_VFP_Loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_NoNormals_VFP:
+
+#define SKIN_2BONES SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices2Bones_NoNormals_VFP_Loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_Tangents_VFP:
+
+#define SKIN_2BONES SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices2Bones_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+#undef SKIN_4BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE 0
+#define SKIN_2BONES 0
+
+_s_SkinVertices4Bones_VFP:
+
+#define SKIN_4BONES SKIN_POS_NRM
+#define VERTEX_SZ 24
+#define LOOP_NAME _s_SkinVertices4Bones_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_NoNormals_VFP:
+
+#define SKIN_4BONES SKIN_POS
+#define VERTEX_SZ 12
+#define LOOP_NAME _s_SkinVertices4Bones_NoNormals_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_Tangents_VFP:
+
+#define SKIN_4BONES SKIN_POS_NRM_TAN
+#define VERTEX_SZ 40
+#define LOOP_NAME _s_SkinVertices4Bones_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+#undef SKIN_2BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+.endif
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h
new file mode 100644
index 0000000..3b7400f
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h
@@ -0,0 +1,335 @@
+
+// defines
+// SKIN_1BONE
+// SKIN_2BONES
+// SKIN_4BONES
+// LOOP_NAME
+// VERTEX_SZ
+
+// skin types
+// SKIN_POS
+// SKIN_POS_NRM
+// SKIN_POS_NRM_TAN
+
+//r0: const void* bones4x4
+//r1: const void* srcVertData
+//r2: const void* srcVertDataEnd
+//r3: const BoneInfluence4* srcBoneInfluence4
+//[sp+0] -> r4: const void* dstVertData
+
+// s0,s1,s2 <- output: pos
+// s3,s4,s5 <- output: nrm
+// s6,s7,s8,s9 <- output: tan
+// s10,s11,s12 <- input: pos
+// s13,s14,s15 <- input: nrm
+// s16,s17,s18,s19 <- input: tan
+// s20-s31 <- matrix [3x4] last row loaded directly to output pos
+
+//===========================================================================================================================================
+//
+// Common
+
+#define CALC_POS_2 FMACS3 (0,1,2, 20,21,22, 10,10,10)
+#define CALC_POS_3 FMACS3 (0,1,2, 24,25,26, 11,11,11)
+#define CALC_POS_4 FMACS3 (0,1,2, 28,29,30, 12,12,12)
+
+
+#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) \
+ || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) \
+ || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+ #define LOAD_POS_NRM vldmia.32 r1!, {s10-s15}
+ #define STORE_POS_NRM vstmia.32 r4!, {s0-s5}
+ #define CALC_NRM_1 FMULS3 (3,4,5, 20,21,22, 13,13,13)
+ #define CALC_NRM_2 FMACS3 (3,4,5, 24,25,26, 14,14,14)
+ #define CALC_NRM_3 FMACS3 (3,4,5, 28,29,30, 15,15,15)
+#else
+ #define LOAD_POS_NRM vldmia.32 r1!, {s10-s12}
+ #define STORE_POS_NRM vstmia.32 r4!, {s0-s2}
+ #define CALC_NRM_1
+ #define CALC_NRM_2
+ #define CALC_NRM_3
+#endif
+
+#if (SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+ #define LOAD_TAN vldmia.32 r1!, {s16-s19}
+ #define STORE_TAN vstmia.32 r4!, {s6-s9}
+ #define CALC_TAN_1 FMULS3 (6,7,8, 20,21,22, 16,16,16)
+ #define CALC_TAN_2 FMACS3 (6,7,8, 24,25,26, 17,17,17)
+ #define CALC_TAN_3 FMACS3 (6,7,8, 28,29,30, 18,18,18)
+ #define CALC_TAN_4 fcpys s9, s19
+#else
+ #define LOAD_TAN
+ #define STORE_TAN
+ #define CALC_TAN_1
+ #define CALC_TAN_2
+ #define CALC_TAN_3
+ #define CALC_TAN_4
+#endif
+
+
+
+
+//===========================================================================================================================================
+//
+// 1 bone skinning
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+vpush {d7-d15}
+stmfd sp!, {r4,r5,r6,r7,r8,r10,r11}
+
+ldr r4, [ip, #0]
+
+ldr r5, [r3], #4
+add r5, r0, r5, lsl #6
+add r6, r5, #48
+
+vldmia.32 r6, {s0-s2}
+vldmia.32 r5!, {s20-s23}
+vldmia.32 r5!, {s24-s27}
+
+.align 4
+LOOP_NAME:
+
+LOAD_POS_NRM
+
+CALC_POS_2
+CALC_NRM_1
+ ldr r6, [r3], #4 // next matrix index
+vldmia.32 r5, {s28-s30} // bone matrix
+ add r5, r0, r6, lsl #6 // next matrix addr
+
+
+CALC_POS_3
+CALC_NRM_2
+
+LOAD_TAN
+ add r6, r5, #48
+ cmp r1, r2
+
+CALC_TAN_1
+ vldmiacc.32 r5!, {s20-s23} // next bone matrix
+
+
+CALC_POS_4
+
+CALC_TAN_2
+CALC_NRM_3
+ vldmiacc.32 r5!, {s24-s27} // next bone matrix
+
+CALC_TAN_3
+CALC_TAN_4
+
+ pld [r1, #1024]
+
+
+STORE_POS_NRM
+STORE_TAN
+
+ vldmiacc.32 r6, {s0-s2}
+
+bcc LOOP_NAME
+
+ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11}
+vpop {d7-d15}
+bx lr
+
+
+//===========================================================================================================================================
+
+#elif (SKIN_2BONES == SKIN_POS) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+vpush {d7-d15}
+stmfd sp!, {r4,r5,r6,r7,r8,r10,r11}
+
+ldr r4, [ip, #0]
+
+
+.align 4
+LOOP_NAME:
+
+vldmia.32 r3!, {s3,s4} // w
+ ldmia r3!, {r5-r6} // idx
+
+ add r5, r0, r5, lsl #6 // M0
+ add r6, r0, r6, lsl #6 // M1
+
+
+vldmia.64 r5!, {d4,d5} // M0[0]
+
+vldmia.64 r6!, {d6,d7} // M1[0]
+FMULS3 (20,21,22, 8,9,10, 3,3,3) // M0[0] * w
+
+vldmia.64 r5!, {d4,d5} // M0[1]
+FMACS3 (20,21,22, 12,13,14, 4,4,4) // + M1[0] * w
+
+vldmia.64 r6!, {d6,d7} // M1[1]
+FMULS3 (24,25,26, 8,9,10, 3,3,3) // M0[1] * w
+
+vldmia.64 r5!, {d4,d5} // M0[2]
+FMACS3 (24,25,26, 12,13,14, 4,4,4) // + M1[1] * w
+
+vldmia.64 r6!, {d6,d7} // M1[2]
+FMULS3 (28,29,30, 8,9,10, 3,3,3) // M0[2] * w
+
+vldmia.64 r5!, {d4,d5} // M0[3]
+FMACS3 (28,29,30, 12,13,14, 4,4,4) // + M1[2] * w
+
+vldmia.64 r6!, {d6,d7} // M1[3]
+FMULS3 (0,1,2, 8,9,10, 3,3,3) // M0[3] * w
+
+FMACS3 (0,1,2, 12,13,14, 4,4,4) // + M1[3] * w
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_2
+CALC_NRM_1
+CALC_TAN_1
+
+CALC_POS_3
+CALC_NRM_2
+CALC_TAN_2
+ pld [r1, #1024]
+ cmp r1, r2
+CALC_POS_4
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_TAN_4
+
+
+STORE_POS_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11}
+vpop {d7-d15}
+bx lr
+
+
+
+//===========================================================================================================================================
+
+#elif (SKIN_4BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+mov ip, sp
+vpush {d7-d15}
+stmfd sp!, {r4,r5,r6,r7,r8}
+
+ldr r4, [ip, #0]
+
+
+.align 4
+LOOP_NAME:
+
+vldmia.32 r3!, {s3-s6} // w
+ ldmia r3!, {r5-r8} // idx
+
+ add r5, r0, r5, lsl #6 // M0
+ add r6, r0, r6, lsl #6 // M1
+ add r7, r0, r7, lsl #6 // M2
+ add r8, r0, r8, lsl #6 // M3
+
+
+vldmia.64 r5!, {d4,d5} // M0[0]
+
+vldmia.64 r6!, {d6,d7} // M1[0]
+FMULS3 (20,21,22, 8,9,10, 3,3,3) // M0[0] * w
+
+vldmia.64 r7!, {d4,d5} // M2[0]
+FMACS3 (20,21,22, 12,13,14, 4,4,4) // + M1[0] * w
+
+vldmia.64 r8!, {d6,d7} // M3[0]
+FMACS3 (20,21,22, 8,9,10, 5,5,5) // + M2[0] * w
+
+vldmia.64 r5!, {d4,d5} // M0[1]
+FMACS3 (20,21,22, 12,13,14, 6,6,6) // + M3[0] * w
+
+vldmia.64 r6!, {d6,d7} // M1[1]
+FMULS3 (24,25,26, 8,9,10, 3,3,3) // M0[1] * w
+
+vldmia.64 r7!, {d4,d5} // M2[1]
+FMACS3 (24,25,26, 12,13,14, 4,4,4) // + M1[1] * w
+
+vldmia.64 r8!, {d6,d7} // M3[1]
+FMACS3 (24,25,26, 8,9,10, 5,5,5) // + M2[1] * w
+
+vldmia.64 r5!, {d4,d5} // M0[2]
+FMACS3 (24,25,26, 12,13,14, 6,6,6) // + M3[1] * w
+
+vldmia.64 r6!, {d6,d7} // M1[2]
+FMULS3 (28,29,30, 8,9,10, 3,3,3) // M0[2] * w
+
+vldmia.64 r7!, {d4,d5} // M2[2]
+FMACS3 (28,29,30, 12,13,14, 4,4,4) // + M1[2] * w
+
+vldmia.64 r8!, {d6,d7} // M3[2]
+FMACS3 (28,29,30, 8,9,10, 5,5,5) // + M2[2] * w
+
+vldmia.64 r5!, {d4,d5} // M0[3]
+FMACS3 (28,29,30, 12,13,14, 6,6,6) // + M3[2] * w
+
+vldmia.64 r6!, {d6,d7} // M1[3]
+FMULS3 (0,1,2, 8,9,10, 3,3,3) // M0[3] * w
+
+vldmia.64 r7!, {d4,d5} // M2[3]
+FMACS3 (0,1,2, 12,13,14, 4,4,4) // + M1[3] * w
+
+vldmia.64 r8!, {d6,d7} // M3[3]
+FMACS3 (0,1,2, 8,9,10, 5,5,5) // + M2[3] * w
+
+FMACS3 (0,1,2, 12,13,14, 6,6,6) // + M3[3] * w
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_2
+CALC_NRM_1
+CALC_TAN_1
+
+CALC_POS_3
+CALC_NRM_2
+CALC_TAN_2
+ pld [r1, #1024]
+ cmp r1, r2
+CALC_POS_4
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_TAN_4
+
+
+STORE_POS_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+ldmfd sp!, {r4,r5,r6,r7,r8}
+vpop {d7-d15}
+bx lr
+
+#endif
+
+//===========================================================================================================================================
+
+#undef CALC_POS_1
+#undef CALC_POS_2
+#undef CALC_POS_3
+#undef STORE_POS_NRM
+#undef LOAD_POS_NRM
+#undef CALC_NRM_1
+#undef CALC_NRM_2
+#undef CALC_NRM_3
+#undef LOAD_TAN
+#undef STORE_TAN
+#undef CALC_TAN_1
+#undef CALC_TAN_2
+#undef CALC_TAN_3
+#undef CALC_TAN_4
diff --git a/Runtime/Filters/Mesh/MeshUtility.cpp b/Runtime/Filters/Mesh/MeshUtility.cpp
new file mode 100644
index 0000000..75d8e7f
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshUtility.cpp
@@ -0,0 +1,58 @@
+#include "UnityPrefix.h"
+#include "MeshUtility.h"
+#include "Runtime/Geometry/Plane.h"
+#include "Mesh.h"
+
+using namespace std;
+
+void CalculateNormals (StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals)
+{
+ std::fill_n (outNormals, vertexCount, Vector3f(0,0,0));
+
+ // Add normals from faces
+ int idx = 0;
+ for( int i = 0; i < triangleCount; ++i )
+ {
+ UInt32 index0 = indices[idx+0];
+ UInt32 index1 = indices[idx+1];
+ UInt32 index2 = indices[idx+2];
+ Vector3f faceNormal = CalcRawNormalFromTriangle( verts[index0], verts[index1], verts[index2] );
+ outNormals[index0] += faceNormal;
+ outNormals[index1] += faceNormal;
+ outNormals[index2] += faceNormal;
+ idx += 3;
+ }
+
+ // Normalize
+ for (StrideIterator<Vector3f> end = outNormals + vertexCount; outNormals != end; ++outNormals )
+ {
+ *outNormals = NormalizeFast (*outNormals);
+ }
+}
+
+
+float CalculateSurfaceArea (
+ const Matrix4x4f& objectToWorld,
+ const Mesh::TemporaryIndexContainer& triangles,
+ dynamic_array<Vector3f>& vertices)
+{
+ // transform the vertices to world space,
+ // do it in place since they are a copy
+ for (int i = 0; i < vertices.size (); i++)
+ vertices[i] = objectToWorld.MultiplyPoint3 (vertices[i]);
+
+ // calculate the area
+ float cachedSurfaceArea = 0;
+ for (int i = 0; i < triangles.size () / 3; i++)
+ {
+ DebugAssert (triangles[3 * i] < vertices.size ());
+ DebugAssert (triangles[3 * i + 1] < vertices.size ());
+ DebugAssert (triangles[3 * i + 2] < vertices.size ());
+ Vector3f a = vertices[triangles[3 * i]];
+ Vector3f b = vertices[triangles[3 * i + 1]];
+ Vector3f c = vertices[triangles[3 * i + 2]];
+ cachedSurfaceArea += Magnitude (Cross (b - a, c - a)) * 0.5f;
+ }
+
+ return cachedSurfaceArea;
+}
diff --git a/Runtime/Filters/Mesh/MeshUtility.h b/Runtime/Filters/Mesh/MeshUtility.h
new file mode 100644
index 0000000..748c874
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshUtility.h
@@ -0,0 +1,42 @@
+#ifndef MESHUTILITY_H
+#define MESHUTILITY_H
+
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/Math/Quaternion.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/Utilities/dynamic_array.h"
+
+struct Tangent;
+
+// Calculate normals for the mesh, given vertex array and triangle list (3 indices per triangle).
+void CalculateNormals( StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals );
+
+float CalculateSurfaceArea (const Matrix4x4f& objectToWorld, const Mesh::TemporaryIndexContainer& triangles, dynamic_array<Vector3f>& vertices);
+
+// Use this to generate a normal from an tangent basis quickly
+inline Vector3f NormalFromQuatTangentBasis (const Quaternionf& lhs)
+{
+ float x = lhs.x * 2.0F;
+ float y = lhs.y * 2.0F;
+ float z = lhs.z * 2.0F;
+ float xx = lhs.x * x;
+ float yy = lhs.y * y;
+ float xz = lhs.x * z;
+ float yz = lhs.y * z;
+ float wx = lhs.w * x;
+ float wy = lhs.w * y;
+
+ Vector3f res;
+ res.x = xz - wy;
+ res.y = yz + wx;
+ res.z = 1.0f - xx - yy;
+ AssertIf (!CompareApproximately (res, RotateVectorByQuat(Inverse (lhs), Vector3f::zAxis)));
+ return res;
+}
+
+//bool HasDegenerateTriangles (const Vector3f* verts, const MeshData &meshData, float degenerateArea = 0.0001);
+
+
+#endif
diff --git a/Runtime/Filters/Mesh/SkinGeneric.h b/Runtime/Filters/Mesh/SkinGeneric.h
new file mode 100644
index 0000000..ef30d81
--- /dev/null
+++ b/Runtime/Filters/Mesh/SkinGeneric.h
@@ -0,0 +1,338 @@
+#ifndef SKINGENERIC_H
+#define SKINGENERIC_H
+
+#include "Runtime/Filters/Mesh/VertexData.h"
+
+#if UNITY_PS3
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+bool skinNormal, bool skinTangent>
+void SkinGenericStreamed (SkinMeshInfo& info)
+{
+ const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+ const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+ const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+ const Matrix4x4f* bones4x4 = info.cachedPose;
+
+ int count = info.vertexCount;
+
+ int vertexOffset = info.vertexData->GetStream(0).offset;
+ const int vertexStride = info.vertexData->GetStream(0).stride;
+
+ int normalOffset = info.vertexData->GetStream(1).offset;
+ const int normalStride = info.vertexData->GetStream(1).stride;
+
+ int tangentOffset = info.vertexData->GetStream(2).offset;
+ const int tangentStride = info.vertexData->GetStream(2).stride;
+
+ const int copyDataOffset = info.vertexData->GetStream(3).offset;
+ const int copyDataSize = info.vertexData->GetStream(3).stride * info.vertexCount;
+
+ const UInt8* inputVertex = (const UInt8*)info.inVertices;
+ UInt8* outputVertex = (UInt8*)info.outVertices;
+
+ Matrix4x4f poseBlended;
+ const Matrix4x4f* poseToUse;
+
+ for( int v = 0; v < count; v++ )
+ {
+ ALIGN_LOOP_OPTIMIZATION
+
+ Prefetch(inputVertex + 256);
+
+ // Blend the matrices first, then transform everything with this
+ // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+ // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+ if (bonesPerVertexCount == 1)
+ {
+ poseToUse = &bones4x4[*influence1];
+ }
+ else if (bonesPerVertexCount == 2)
+ {
+ float weight0 = influence2->weight[0];
+ float weight1 = influence2->weight[1];
+ const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data;
+ const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data;
+ // we need only 12 components of the matrix
+ poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1;
+ poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1;
+ poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1;
+ poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1;
+ poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1;
+ poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1;
+ poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1;
+ poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1;
+ poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1;
+ poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1;
+ poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1;
+ poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1;
+ poseToUse = &poseBlended;
+ }
+ else if (bonesPerVertexCount == 4)
+ {
+ float weight0 = influence4->weight[0];
+ float weight1 = influence4->weight[1];
+ float weight2 = influence4->weight[2];
+ float weight3 = influence4->weight[3];
+
+ const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data;
+ const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data;
+ const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data;
+ const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data;
+ // we need only 12 components of the matrix, so unroll
+ poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3;
+ poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3;
+ poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3;
+ poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3;
+ poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3;
+ poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3;
+ poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3;
+ poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3;
+ poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3;
+ poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3;
+ poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3;
+ poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3;
+ poseToUse = &poseBlended;
+ }
+
+ // skin components
+ Vector3f outVertex, outNormal, outTangent;
+ const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex + vertexOffset);
+ const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset );
+ const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset );
+ poseToUse->MultiplyPoint3( *vertex, outVertex );
+ if( skinNormal )
+ {
+ poseToUse->MultiplyVector3( *normal, outNormal );
+ if (transformInstruction == kNormalizeFastest)
+ {
+ float sqr1 = SqrMagnitude( outNormal );
+ float invsqrt1 = FastestInvSqrt (sqr1);
+ outNormal *= invsqrt1;
+ }
+ else if (transformInstruction == kNormalizeFast)
+ {
+ float sqr1 = SqrMagnitude( outNormal );
+ float invsqrt1 = FastInvSqrt (sqr1);
+ outNormal *= invsqrt1;
+ }
+ }
+ if( skinTangent )
+ {
+ poseToUse->MultiplyVector3( *tangent, outTangent );
+ if (transformInstruction == kNormalizeFastest)
+ {
+ float sqr1 = SqrMagnitude( outTangent );
+ float invsqrt1 = FastestInvSqrt (sqr1);
+ outTangent *= invsqrt1;
+ }
+ else if (transformInstruction == kNormalizeFast)
+ {
+ float sqr1 = SqrMagnitude( outTangent );
+ float invsqrt1 = FastInvSqrt (sqr1);
+ outTangent *= invsqrt1;
+ }
+ }
+
+ // write data out
+ *reinterpret_cast<Vector3f*> (outputVertex + vertexOffset) = outVertex;
+ if( skinNormal )
+ {
+ *reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal;
+ }
+ if( skinTangent )
+ {
+ *reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent;
+ *reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) );
+ }
+
+ vertexOffset += vertexStride;
+ normalOffset += normalStride;
+ tangentOffset += tangentStride;
+
+ if (bonesPerVertexCount == 1)
+ influence1++;
+ else if (bonesPerVertexCount == 2)
+ influence2++;
+ if (bonesPerVertexCount == 4)
+ influence4++;
+ }
+
+ // copy
+ const UInt8* copyDataSrc = inputVertex + copyDataOffset;
+ UInt8* copyDataDst = outputVertex + copyDataOffset;
+ memcpy(copyDataDst, copyDataSrc, copyDataSize);
+}
+#endif
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+ bool skinNormal, bool skinTangent>
+void SkinGeneric (SkinMeshInfo& info);
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+ bool skinNormal, bool skinTangent>
+void SkinGeneric (SkinMeshInfo& info)
+{
+#if UNITY_PS3
+ if(info.vertexData && (info.vertexData->GetActiveStreamCount() > 2))
+ return SkinGenericStreamed<transformInstruction, bonesPerVertexCount, skinNormal, skinTangent>(info);
+#endif
+ const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+ const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+ const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+ const Matrix4x4f* bones4x4 = info.cachedPose;
+
+ const int inStride = info.inStride;
+ int outStride = info.outStride;
+ int count = info.vertexCount;
+
+ const int normalOffset = info.normalOffset;
+ const int tangentOffset = info.tangentOffset;
+
+ const UInt8* inputVertex = (const UInt8*)info.inVertices;
+ UInt8* outputVertex = (UInt8*)info.outVertices;
+
+ Matrix4x4f poseBlended;
+ const Matrix4x4f* poseToUse;
+
+
+#if !ENABLE_MULTITHREADED_SKINNING
+ PROFILER_AUTO(gMeshSkinningSlowpath, NULL);
+#endif
+
+ //;;printf_console("bonesPerVertexCount: %d, skinNormal: %d, normalOffset: %d, inStride: %d, copyDataSizeInts: %d, count: %d, boneCount: %d, outputVertex: %d\n",
+ // bonesPerVertexCount, (int)skinNormal, normalOffset, inStride, copyDataSizeInts, count, info.boneCount, (int)outputVertex);
+ //;;uint64_t delta = mach_absolute_time();
+
+ for( int v = 0; v < count; v++ )
+ {
+ ALIGN_LOOP_OPTIMIZATION
+
+ Prefetch(inputVertex + 256);
+
+ // Blend the matrices first, then transform everything with this
+ // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+ // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+ if (bonesPerVertexCount == 1)
+ {
+ poseToUse = &bones4x4[*influence1];
+ }
+ else if (bonesPerVertexCount == 2)
+ {
+ float weight0 = influence2->weight[0];
+ float weight1 = influence2->weight[1];
+ const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data;
+ const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data;
+ // we need only 12 components of the matrix
+ poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1;
+ poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1;
+ poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1;
+ poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1;
+ poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1;
+ poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1;
+ poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1;
+ poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1;
+ poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1;
+ poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1;
+ poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1;
+ poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1;
+ poseToUse = &poseBlended;
+ }
+ else if (bonesPerVertexCount == 4)
+ {
+ float weight0 = influence4->weight[0];
+ float weight1 = influence4->weight[1];
+ float weight2 = influence4->weight[2];
+ float weight3 = influence4->weight[3];
+
+ const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data;
+ const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data;
+ const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data;
+ const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data;
+ // we need only 12 components of the matrix, so unroll
+ poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3;
+ poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3;
+ poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3;
+ poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3;
+ poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3;
+ poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3;
+ poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3;
+ poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3;
+ poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3;
+ poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3;
+ poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3;
+ poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3;
+ poseToUse = &poseBlended;
+ }
+
+ // skin components
+ Vector3f outVertex, outNormal, outTangent;
+ const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex );
+ const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset );
+ const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset );
+ poseToUse->MultiplyPoint3( *vertex, outVertex );
+ if( skinNormal )
+ {
+ poseToUse->MultiplyVector3( *normal, outNormal );
+ if (transformInstruction == kNormalizeFastest)
+ {
+ float sqr1 = SqrMagnitude( outNormal );
+ float invsqrt1 = FastestInvSqrt (sqr1);
+ outNormal *= invsqrt1;
+ }
+ else if (transformInstruction == kNormalizeFast)
+ {
+ float sqr1 = SqrMagnitude( outNormal );
+ float invsqrt1 = FastInvSqrt (sqr1);
+ outNormal *= invsqrt1;
+ }
+ }
+ if( skinTangent )
+ {
+ poseToUse->MultiplyVector3( *tangent, outTangent );
+ if (transformInstruction == kNormalizeFastest)
+ {
+ float sqr1 = SqrMagnitude( outTangent );
+ float invsqrt1 = FastestInvSqrt (sqr1);
+ outTangent *= invsqrt1;
+ }
+ else if (transformInstruction == kNormalizeFast)
+ {
+ float sqr1 = SqrMagnitude( outTangent );
+ float invsqrt1 = FastInvSqrt (sqr1);
+ outTangent *= invsqrt1;
+ }
+ }
+
+ // write data out
+ *reinterpret_cast<Vector3f*> (outputVertex) = outVertex;
+ if( skinNormal )
+ {
+ *reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal;
+ }
+
+ if( skinTangent )
+ {
+ *reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent;
+ *reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) );
+ }
+
+ outputVertex += outStride;
+ inputVertex += inStride;
+
+ if (bonesPerVertexCount == 1)
+ influence1++;
+ else if (bonesPerVertexCount == 2)
+ influence2++;
+ if (bonesPerVertexCount == 4)
+ influence4++;
+ }
+
+ //;;static int frameCount = 0; frameCount++;
+ //delta = mach_absolute_time() - delta;
+ //;;static uint64_t deltaAccum = 0; deltaAccum += (int)(delta);
+ //;;printf_console("skin-c: %d %d\n", (int)(deltaAccum / frameCount), (int)delta);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/SpriteRenderer.cpp b/Runtime/Filters/Mesh/SpriteRenderer.cpp
new file mode 100644
index 0000000..4ce85a1
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRenderer.cpp
@@ -0,0 +1,338 @@
+#include "UnityPrefix.h"
+#include "SpriteRenderer.h"
+
+#if ENABLE_SPRITES
+
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Graphics/SpriteFrame.h"
+#include "Runtime/Graphics/Texture.h"
+#include "Runtime/Graphics/Texture2D.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Shaders/ShaderNameRegistry.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Filters/Mesh/TransformVertex.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Math/Color.h"
+#include "Runtime/Core/Callbacks/GlobalCallbacks.h"
+#include "Runtime/Misc/ResourceManager.h"
+#include "Runtime/BaseClasses/Tags.h"
+#include "SpriteRendererAnimationBinding.h"
+
+
+PROFILER_INFORMATION(gSpriteRenderSingleProfile, "SpriteRenderer.RenderSingle", kProfilerRender)
+PROFILER_INFORMATION(gSpriteRenderBatchProfile, "SpriteRenderer.RenderBatch", kProfilerRender)
+PROFILER_INFORMATION(gSpriteRenderSubmitVBO, "Mesh.SubmitVBO", kProfilerRender)
+
+const float kSpriteScaleEpsilon = 0.0001f;
+#define kMaxNumSpriteTrianglesPerBatch (2*1024)
+
+static const char* const kDefaultSpriteShader = "Sprites/Default";
+static const char* const kDefaultSpriteMaterial = "Sprites-Default.mat";
+
+static SHADERPROP (MainTex);
+static SHADERPROP (MainTex_TexelSize);
+static Material* gSpriteDefaultMaterial = NULL;
+
+static void InitDefaultSpriteMaterial()
+{
+ Assert(gSpriteDefaultMaterial == NULL);
+ gSpriteDefaultMaterial = GetBuiltinResource<Material>(kDefaultSpriteMaterial);
+}
+
+IMPLEMENT_CLASS_HAS_INIT (SpriteRenderer)
+IMPLEMENT_OBJECT_SERIALIZE (SpriteRenderer)
+
+SpriteRenderer::SpriteRenderer (MemLabelId label, ObjectCreationMode mode)
+: Super(kRendererSprite, label, mode)
+, m_Color(1.0F, 1.0F, 1.0F, 1.0F)
+{
+ m_CastShadows = false;
+ m_ReceiveShadows = false;
+}
+
+SpriteRenderer::~SpriteRenderer ()
+{
+}
+
+inline ColorRGBA32 GetDeviceColor (const ColorRGBAf& color, GfxDevice& device)
+{
+ if (GetActiveColorSpace () == kLinearColorSpace)
+ return device.ConvertToDeviceVertexColor(GammaToActiveColorSpace(color));
+ else
+ return device.ConvertToDeviceVertexColor(color);
+}
+
+void SpriteRenderer::InitializeClass ()
+{
+ REGISTER_GLOBAL_CALLBACK(initializedEngineGraphics, InitDefaultSpriteMaterial());
+ InitializeSpriteRendererAnimationBindingInterface();
+}
+
+void SpriteRenderer::CleanupClass ()
+{
+ CleanupSpriteRendererAnimationBindingInterface ();
+ gSpriteDefaultMaterial = NULL;
+}
+
+template<class TransferFunction>
+void SpriteRenderer::Transfer(TransferFunction& transfer)
+{
+ Super::Transfer (transfer);
+ TRANSFER (m_Sprite);
+ TRANSFER (m_Color);
+}
+
+void SpriteRenderer::UpdateLocalAABB ()
+{
+ if (m_Sprite.IsValid())
+ {
+ //TODO: calculate AABB from RenderData.
+ m_TransformInfo.localAABB = m_Sprite->GetBounds();
+ }
+ else
+ {
+ m_TransformInfo.localAABB.SetCenterAndExtent(Vector3f::zero, Vector3f::zero);
+ }
+}
+
+void SpriteRenderer::UpdateTransformInfo ()
+{
+ Transform const& transform = GetTransform();
+ if (m_TransformDirty)
+ {
+ // will return a cached matrix most of the time
+ TransformType type = transform.CalculateTransformMatrix (m_TransformInfo.worldMatrix);
+
+ // Always treat sprites has having a non-uniform scale. Will make them batch better
+ // (since we break batches on transform type changes). And does not have any negative effects
+ // since uniform vs. non-uniform scale only affects fixed function vertex normals, which
+ // aren't relevant here.
+ type &= ~kUniformScaleTransform;
+ type |= kNonUniformScaleTransform;
+ m_TransformInfo.transformType = type;
+
+ // Likewise, treat inverse scale as always being 1.
+ m_TransformInfo.invScale = 1.0f;
+ }
+
+ if (m_BoundsDirty)
+ UpdateLocalAABB();
+
+ TransformAABBSlow(m_TransformInfo.localAABB, m_TransformInfo.worldMatrix, m_TransformInfo.worldAABB);
+}
+
+void SpriteRenderer::SetSprite(PPtr<Sprite> sprite)
+{
+ if (m_Sprite != sprite)
+ {
+ m_Sprite = sprite;
+ BoundsChanged();
+ SetupMaterialProperties();
+
+ SetDirty();
+ }
+}
+
+void SpriteRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+ Super::AwakeFromLoad(awakeMode);
+ BoundsChanged();
+ SetupMaterialProperties();
+}
+
+void SpriteRenderer::SmartReset ()
+{
+ SetMaterialCount(1);
+ SetMaterial(GetDefaultSpriteMaterial(), 0);
+}
+
+void SpriteRenderer::SetupMaterialProperties()
+{
+ if (m_Sprite.IsNull())
+ return;
+
+ // Patch sprite texture and apply material property block
+ MaterialPropertyBlock& block = GetPropertyBlockRememberToUpdateHash ();
+ SetupMaterialPropertyBlock(block, GetSpriteRenderDataInContext(m_Sprite)->texture);
+ ComputeCustomPropertiesHash ();
+}
+
+void SpriteRenderer::SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture)
+{
+ const TextureID id = spriteTexture ? spriteTexture->GetTextureID() : TextureID(0);
+ const Vector4f texelSize = spriteTexture ? Vector4f(spriteTexture->GetTexelSizeX(), spriteTexture->GetTexelSizeY(), spriteTexture->GetGLWidth(), spriteTexture->GetGLHeight()) : Vector4f(0, 0, 0, 0);
+
+ block.ReplacePropertyTexture(kSLPropMainTex, kTexDim2D, id);
+ block.ReplacePropertyVector(kSLPropMainTex_TexelSize, texelSize);
+}
+
+const SpriteRenderData* SpriteRenderer::GetSpriteRenderDataInContext(const PPtr<Sprite>& frame)
+{
+ //@Note: this is here for a possible contextual atlas implementation.
+ return &frame->GetRenderDataForPlayMode();
+}
+
+void SpriteRenderer::Render (int materialIndex, const ChannelAssigns& channels)
+{
+ GfxDevice& device = GetGfxDevice();
+
+ Assert(materialIndex == 0);
+ if (m_Sprite.IsNull())
+ return;
+
+ const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite);
+ Assert(rd->texture.IsValid());
+
+ PROFILER_AUTO_GFX(gSpriteRenderSingleProfile, this);
+
+ // Get VBO chunk for a rectangle or mesh
+ UInt32 numIndices, numVertices;
+ GetGeometrySize(numIndices, numVertices);
+ if (!numIndices)
+ return;
+
+ const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor);
+
+ DynamicVBO& vbo = device.GetDynamicVBO();
+ UInt8* __restrict vbPtr;
+ UInt16* __restrict ibPtr;
+ if ( !vbo.GetChunk(channelMask, numVertices, numIndices, DynamicVBO::kDrawIndexedTriangles, (void**)&vbPtr, (void**)&ibPtr) )
+ return;
+
+ TransformSprite (vbPtr, ibPtr, NULL, rd, GetDeviceColor (m_Color, device), 0);
+ vbo.ReleaseChunk(numVertices, numIndices);
+
+ // Draw
+ if (m_CustomProperties)
+ device.SetMaterialProperties(*m_CustomProperties);
+
+ PROFILER_BEGIN(gSpriteRenderSubmitVBO, this)
+ vbo.DrawChunk(channels);
+ GPU_TIMESTAMP();
+ PROFILER_END
+}
+
+void SpriteRenderer::GetGeometrySize(UInt32& indexCount, UInt32& vertexCount)
+{
+ if (m_Sprite.IsValid())
+ {
+ const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite);
+ if (rd->indices.size() > 0)
+ {
+ indexCount = rd->indices.size();
+ vertexCount = rd->vertices.size();
+ return;
+ }
+ }
+
+ indexCount = 0;
+ vertexCount = 0;
+}
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+void SpriteRenderer::RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels)
+{
+ DebugAssert(numIndices);
+ DebugAssert(numVertices);
+ PROFILER_AUTO_GFX(gSpriteRenderBatchProfile, 0);
+
+ GfxDevice& device = GetGfxDevice();
+ const MaterialPropertyBlock* customProps = count > 0 ? instances[0].renderer->GetCustomProperties() : NULL;
+ if (customProps)
+ device.SetMaterialProperties (*customProps);
+
+ UInt32 expectedFence = device.GetNextCPUFence();
+ const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor);;
+ device.BeginDynamicBatching(channels, channelMask, numVertices, numIndices, kPrimitiveTriangles);
+
+ for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+ {
+ UInt32 numIndices, numVertices;
+
+ Assert(it->renderer);
+ Assert(it->renderer->GetRendererType() == kRendererSprite);
+ SpriteRenderer* renderer = (SpriteRenderer*)it->renderer;
+ renderer->GetGeometrySize(numIndices, numVertices);
+ if (!numIndices)
+ continue;
+
+ const SpriteRenderData *rd = renderer->GetSpriteRenderDataInContext(renderer->m_Sprite);
+ Assert(rd->texture.IsValid());
+
+#if ENABLE_MULTITHREADED_CODE
+ renderer->m_Sprite->SetCurrentCPUFence(expectedFence);
+#endif
+ device.DynamicBatchSprite(&it->xform, rd, GetDeviceColor(renderer->m_Color, device));
+ }
+ device.SetInverseScale(1.0f);
+ device.EndDynamicBatching(TransformType(kNoScaleTransform));
+
+ // Insert fence after batching is complete
+ UInt32 fence = device.InsertCPUFence();
+ Assert(fence == expectedFence);
+ GPU_TIMESTAMP();
+}
+
+void SpriteRenderer::RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels)
+{
+ size_t numIndicesBatch = 0;
+ size_t numVerticesBatch = 0;
+
+ BatchInstanceData const* instancesEnd = instances + count;
+ BatchInstanceData const* iBatchBegin = instances;
+ BatchInstanceData const* iBatchEnd = instances;
+ while (iBatchEnd != instancesEnd)
+ {
+ Assert(iBatchEnd->renderer->GetRendererType() == kRendererSprite);
+ SpriteRenderer* renderer = (SpriteRenderer*)iBatchEnd->renderer;
+
+ if (renderer->GetSprite().IsNull())
+ {
+ iBatchEnd++;
+ continue;
+ }
+
+ UInt32 numIndices, numVertices;
+ renderer->GetGeometrySize(numIndices, numVertices);
+
+ if ((numIndicesBatch + numIndices) <= kMaxNumSpriteTrianglesPerBatch)
+ {
+ numIndicesBatch += numIndices;
+ numVerticesBatch += numVertices;
+ iBatchEnd++;
+ }
+ else
+ {
+ if (numIndicesBatch)
+ {
+ RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels);
+ numIndicesBatch = 0;
+ numVerticesBatch = 0;
+ iBatchBegin = iBatchEnd;
+ }
+ else // Can't fit in one draw call
+ {
+ RenderBatch(iBatchEnd, 1, numIndices, numVertices, channels);
+ iBatchEnd++;
+ iBatchBegin = iBatchEnd;
+ }
+ }
+ }
+
+ if ((iBatchBegin != iBatchEnd) && numIndicesBatch)
+ {
+ RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels);
+ }
+}
+#endif
+
+Material* SpriteRenderer::GetDefaultSpriteMaterial ()
+{
+ Assert(gSpriteDefaultMaterial);
+ return gSpriteDefaultMaterial;
+}
+
+#endif // ENABLE_SPRITES
diff --git a/Runtime/Filters/Mesh/SpriteRenderer.h b/Runtime/Filters/Mesh/SpriteRenderer.h
new file mode 100644
index 0000000..0bf47b9
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRenderer.h
@@ -0,0 +1,60 @@
+#ifndef SPRITERENDERER_H
+#define SPRITERENDERER_H
+#include "Configuration/UnityConfigure.h"
+
+#if ENABLE_SPRITES
+
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "Runtime/Filters/Renderer.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Graphics/SpriteFrame.h"
+
+class SpriteRenderer : public Renderer
+{
+public:
+ REGISTER_DERIVED_CLASS (SpriteRenderer, Renderer)
+ DECLARE_OBJECT_SERIALIZE (SpriteRenderer)
+
+ SpriteRenderer (MemLabelId label, ObjectCreationMode mode);
+ // ~SpriteRenderer (); declared-by-macro
+
+ static bool IsSealedClass () { return true; }
+ static void InitializeClass ();
+ static void CleanupClass ();
+
+ virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode);
+ virtual void SmartReset ();
+
+ virtual void UpdateTransformInfo();
+ virtual void UpdateLocalAABB ();
+ virtual void Render (int materialIndex, const ChannelAssigns& channels);
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+ static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels);
+#endif
+ PPtr<Sprite> GetSprite() const { return m_Sprite; }
+ void SetSprite(PPtr<Sprite> sprite);
+
+ ColorRGBAf GetColor() const { return m_Color; }
+ void SetColor(const ColorRGBAf& color) { m_Color = color; }
+
+ static void SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture);
+
+ static Material* GetDefaultSpriteMaterial();
+
+private:
+ PPtr<Sprite> m_Sprite;
+ ColorRGBAf m_Color;
+
+ void SetupMaterialProperties();
+ void GetGeometrySize(UInt32& indexCount, UInt32& vertexCount);
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+ static void RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels);
+#endif
+ // Context
+ const SpriteRenderData* GetSpriteRenderDataInContext(const PPtr<Sprite>& frame);
+};
+
+#endif //ENABLE_SPRITES
+
+#endif
diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp
new file mode 100644
index 0000000..a36406f
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp
@@ -0,0 +1,68 @@
+#include "UnityPrefix.h"
+#include "Runtime/Animation/GenericAnimationBindingCache.h"
+#include "Runtime/Animation/AnimationClipBindings.h"
+#include "SpriteRenderer.h"
+#include "Runtime/Interfaces/IAnimationBinding.h"
+
+#if ENABLE_SPRITES
+
+static const char* kSpriteFrame = "m_Sprite";
+
+class SpriteRendererAnimationBinding : public IAnimationBinding
+{
+public:
+
+#if UNITY_EDITOR
+ virtual void GetAllAnimatableProperties (Object& targetObject, std::vector<EditorCurveBinding>& outProperties) const
+ {
+ AddPPtrBinding (outProperties, ClassID(SpriteRenderer), kSpriteFrame);
+ }
+#endif
+
+ virtual float GetFloatValue (const UnityEngine::Animation::BoundCurve& bind) const { return 0.0F; }
+ virtual void SetFloatValue (const UnityEngine::Animation::BoundCurve& bind, float value) const { }
+
+ virtual void SetPPtrValue (const UnityEngine::Animation::BoundCurve& bound, SInt32 value) const
+ {
+ SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject);
+ renderer->SetSprite(PPtr<Sprite> (value));
+ }
+
+ virtual SInt32 GetPPtrValue (const UnityEngine::Animation::BoundCurve& bound) const
+ {
+ SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject);
+ return renderer->GetSprite().GetInstanceID();
+ }
+
+ virtual bool GenerateBinding (const UnityStr& attribute, bool pptrCurve, UnityEngine::Animation::GenericBinding& outputBinding) const
+ {
+ if (attribute == kSpriteFrame && pptrCurve)
+ {
+ outputBinding.attribute = 0;
+ return true;
+ }
+
+ return false;
+ }
+
+ virtual ClassIDType BindValue (Object& target, const UnityEngine::Animation::GenericBinding& inputBinding, UnityEngine::Animation::BoundCurve& bound) const
+ {
+ return ClassID(Sprite);
+ }
+};
+
+static SpriteRendererAnimationBinding* gSpriteRendererBinding = NULL;
+
+void InitializeSpriteRendererAnimationBindingInterface ()
+{
+ Assert(gSpriteRendererBinding == NULL);
+ gSpriteRendererBinding = UNITY_NEW (SpriteRendererAnimationBinding, kMemAnimation);
+ UnityEngine::Animation::GetGenericAnimationBindingCache ().RegisterIAnimationBinding (ClassID(SpriteRenderer), UnityEngine::Animation::kSpriteRendererPPtrBinding, gSpriteRendererBinding);
+}
+
+void CleanupSpriteRendererAnimationBindingInterface ()
+{
+ UNITY_DELETE (gSpriteRendererBinding, kMemAnimation);
+}
+
+#endif \ No newline at end of file
diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h
new file mode 100644
index 0000000..63e2731
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h
@@ -0,0 +1,2 @@
+void InitializeSpriteRendererAnimationBindingInterface ();
+void CleanupSpriteRendererAnimationBindingInterface (); \ No newline at end of file
diff --git a/Runtime/Filters/Mesh/TransformVertex.cpp b/Runtime/Filters/Mesh/TransformVertex.cpp
new file mode 100644
index 0000000..e9bebc1
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertex.cpp
@@ -0,0 +1,205 @@
+#include "UnityPrefix.h"
+#include "TransformVertex.h"
+
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/Math/Vector4.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Color.h"
+
+#include "Runtime/Misc/CPUInfo.h"
+
+void
+TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+ StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+ StrideIterator<Vector4f> inTangent,
+ UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream )
+{
+ // NOTE: kill this code once all shaders normalize normals & tangents!
+ //
+ // We batch uniformly scaled objects, so derive the "normal matrix" here by scaling world matrix axes.
+ // On reference code seems much cheaper than full normalization of normal/tangent vectors.
+ // Test with scene of 200k vertices on Core i7 2600K: no handling of scale 3.77ms, normalization 8.00ms,
+ // using scaled normal matrix 3.80ms.
+ //
+ // Note that ARM NEON/VFP transformation code does not handle this, but it's not needed on GLES platforms
+ // since shaders always normalize normal & tangent. Might be needed on WinRT+ARM though (or just disable
+ // dynamic batching with tangents there).
+ Matrix4x4f nm;
+ CopyMatrix(m.GetPtr(), nm.GetPtr());
+ const float axisLen = Magnitude (m.GetAxisX());
+ float scale = axisLen > 1.0e-6f ? 1.0f / axisLen : 1.0f;
+ nm.Get (0, 0) *= scale;
+ nm.Get (1, 0) *= scale;
+ nm.Get (2, 0) *= scale;
+ nm.Get (0, 1) *= scale;
+ nm.Get (1, 1) *= scale;
+ nm.Get (2, 1) *= scale;
+ nm.Get (0, 2) *= scale;
+ nm.Get (1, 2) *= scale;
+ nm.Get (2, 2) *= scale;
+
+ while (vertexCount --> 0)
+ {
+ Vector3f* outPos = reinterpret_cast<Vector3f*> (dstData);
+ m.MultiplyPoint3(*inPos, *outPos);
+ dstData += sizeof(Vector3f);
+ ++inPos;
+
+ if (inNormal.GetPointer())
+ {
+ Vector3f* outNormal = reinterpret_cast<Vector3f*> (dstData);
+ nm.MultiplyVector3(*inNormal, *outNormal);
+ dstData += sizeof(Vector3f);
+ ++inNormal;
+ }
+
+ if (inColor.GetPointer())
+ {
+ memcpy(dstData, inColor.GetPointer(), sizeof(ColorRGBA32));
+ dstData += sizeof(ColorRGBA32);
+ ++inColor;
+ }
+
+ if (inTexCoord0.GetPointer())
+ {
+ memcpy(dstData, inTexCoord0.GetPointer(), sizeof(Vector2f));
+ dstData += sizeof(Vector2f);
+ ++inTexCoord0;
+ }
+
+ if (inTexCoord1.GetPointer())
+ {
+ memcpy(dstData, inTexCoord1.GetPointer(), sizeof(Vector2f));
+ dstData += sizeof(Vector2f);
+ ++inTexCoord1;
+ }
+
+ if (inTangent.GetPointer())
+ {
+ Vector4f* outTangent = reinterpret_cast<Vector4f*> (dstData);
+ Vector3f* outTangentXYZ = reinterpret_cast<Vector3f*> (outTangent);
+ nm.MultiplyVector3(reinterpret_cast<const Vector3f&>(*inTangent), *outTangentXYZ);
+ outTangent->w = inTangent->w;
+ dstData += sizeof(Vector4f);
+ ++inTangent;
+ }
+ }
+}
+
+
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+
+typedef void (*TransformFunc)( const void*, const void*, const void*, const float*, void*, int );
+typedef void (*TransformFuncWithTangents)( const void*, const void*, const void*, const float*, void*, int, const void* );
+
+
+#if UNITY_SUPPORTS_NEON
+namespace TransformNEON
+{
+ #define TRANSFORM_FUNC(prefix, addData) s_TransformVertices_Strided_##prefix##_##addData##_NEON
+
+ TransformFunc TransformXYZ[] =
+ {
+ TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5)
+ };
+
+ TransformFunc TransformXYZN[] =
+ {
+ TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5)
+ };
+
+ TransformFuncWithTangents TransformXYZNT[] =
+ {
+ TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5)
+ };
+
+ #undef TRANSFORM_FUNC
+}
+#endif // UNITY_SUPPORTS_NEON
+
+
+#if UNITY_SUPPORTS_VFP
+namespace TransformVFP
+{
+ #define TRANSFORM_FUNC(prefix, addData) s_TransformVertices_Strided_##prefix##_##addData##_VFP
+
+ TransformFunc TransformXYZ[] =
+ {
+ TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5)
+ };
+
+ TransformFunc TransformXYZN[] =
+ {
+ TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5)
+ };
+
+ TransformFuncWithTangents TransformXYZNT[] =
+ {
+ TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5)
+ };
+
+ #undef TRANSFORM_FUNC
+}
+#endif // UNITY_SUPPORTS_VFP
+
+void
+TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+ StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+ StrideIterator<Vector4f> inTangent,
+ UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream )
+{
+ int addDataSize = 0;
+ if( inColor.GetPointer() ) addDataSize += 1;
+ if( inTexCoord0.GetPointer() ) addDataSize += 2;
+ if( inTexCoord1.GetPointer() ) addDataSize += 2;
+
+ const void* addDataSrc = 0;
+ if( inColor.GetPointer() ) addDataSrc = inColor.GetPointer();
+ else if( inTexCoord0.GetPointer() ) addDataSrc = inTexCoord0.GetPointer();
+ else if( inTexCoord1.GetPointer() ) addDataSrc = inTexCoord1.GetPointer();
+
+ // slow path determination
+ if( (inColor.GetPointer() && inTexCoord1.GetPointer() && !inTexCoord0.GetPointer())
+ || (inTangent.GetPointer() && !inNormal.GetPointer()) || multiStream )
+ {
+ TransformVerticesStridedREF(inPos, inNormal, inColor, inTexCoord0, inTexCoord1, inTangent, dstData, m, vertexCount, multiStream);
+ return;
+ }
+
+ int stride = inPos.GetStride();
+ const UInt8* inDataBegin = static_cast<const UInt8*>(inPos.GetPointer());
+ const UInt8* inDataEnd = inDataBegin + vertexCount * stride;
+
+#if UNITY_SUPPORTS_NEON
+ if (CPUInfo::HasNEONSupport())
+ {
+ using namespace TransformNEON;
+ if( inNormal.GetPointer() && inTangent.GetPointer() )
+ TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() );
+ else if( inNormal.GetPointer() )
+ TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+ else
+ TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+ }
+ else
+#endif
+#if UNITY_SUPPORTS_VFP
+ {
+ using namespace TransformVFP;
+ if( inNormal.GetPointer() && inTangent.GetPointer() )
+ TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() );
+ else if( inNormal.GetPointer() )
+ TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+ else
+ TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+ }
+#else
+ {
+ ErrorString("non-NEON path not enabled!");
+ }
+#endif
+}
+#endif
+
diff --git a/Runtime/Filters/Mesh/TransformVertex.h b/Runtime/Filters/Mesh/TransformVertex.h
new file mode 100644
index 0000000..fe7aa77
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertex.h
@@ -0,0 +1,175 @@
+#ifndef TRANSFORM_VERTEX_H_
+#define TRANSFORM_VERTEX_H_
+
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Vector4.h"
+#include "Runtime/Math/Color.h"
+
+class Matrix4x4f;
+
+
+//==============================================================================
+
+#define DECL_TRANSFORM_VERTICES_STRIDED(code, num, postfix) \
+ void s_TransformVertices_Strided_##code##_##num##_##postfix( const void* srcData, const void* srcDataEnd, const void* addData, \
+ const float* xform, void* outData, int stride \
+ );
+
+#define DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(num, postfix) \
+ void s_TransformVertices_Strided_XYZNT_##num##_##postfix( const void* srcData, const void* srcDataEnd, const void* addData, \
+ const float* xform, void* outData, int stride, const void* srcTangent \
+ );
+
+
+#if UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING
+
+extern "C"
+{
+#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+ #define s_TransformVertices_Strided_XYZ_0_NEON _s_TransformVertices_Strided_XYZ_0_NEON
+ #define s_TransformVertices_Strided_XYZ_1_NEON _s_TransformVertices_Strided_XYZ_1_NEON
+ #define s_TransformVertices_Strided_XYZ_2_NEON _s_TransformVertices_Strided_XYZ_2_NEON
+ #define s_TransformVertices_Strided_XYZ_3_NEON _s_TransformVertices_Strided_XYZ_3_NEON
+ #define s_TransformVertices_Strided_XYZ_4_NEON _s_TransformVertices_Strided_XYZ_4_NEON
+ #define s_TransformVertices_Strided_XYZ_5_NEON _s_TransformVertices_Strided_XYZ_5_NEON
+
+ #define s_TransformVertices_Strided_XYZN_0_NEON _s_TransformVertices_Strided_XYZN_0_NEON
+ #define s_TransformVertices_Strided_XYZN_1_NEON _s_TransformVertices_Strided_XYZN_1_NEON
+ #define s_TransformVertices_Strided_XYZN_2_NEON _s_TransformVertices_Strided_XYZN_2_NEON
+ #define s_TransformVertices_Strided_XYZN_3_NEON _s_TransformVertices_Strided_XYZN_3_NEON
+ #define s_TransformVertices_Strided_XYZN_4_NEON _s_TransformVertices_Strided_XYZN_4_NEON
+ #define s_TransformVertices_Strided_XYZN_5_NEON _s_TransformVertices_Strided_XYZN_5_NEON
+
+ #define s_TransformVertices_Strided_XYZNT_0_NEON _s_TransformVertices_Strided_XYZNT_0_NEON
+ #define s_TransformVertices_Strided_XYZNT_1_NEON _s_TransformVertices_Strided_XYZNT_1_NEON
+ #define s_TransformVertices_Strided_XYZNT_2_NEON _s_TransformVertices_Strided_XYZNT_2_NEON
+ #define s_TransformVertices_Strided_XYZNT_3_NEON _s_TransformVertices_Strided_XYZNT_3_NEON
+ #define s_TransformVertices_Strided_XYZNT_4_NEON _s_TransformVertices_Strided_XYZNT_4_NEON
+ #define s_TransformVertices_Strided_XYZNT_5_NEON _s_TransformVertices_Strided_XYZNT_5_NEON
+#if ENABLE_SPRITES
+#define s_TransformVertices_Sprite_NEON _s_TransformVertices_Sprite_NEON
+#endif
+
+#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,NEON);
+
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,NEON);
+
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,NEON);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,NEON);
+#if ENABLE_SPRITES
+ void s_TransformVertices_Sprite_NEON(const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color);
+#endif
+}
+
+#endif
+
+
+#if UNITY_SUPPORTS_VFP
+
+extern "C"
+{
+#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+ #define s_TransformVertices_Strided_XYZ_0_VFP _s_TransformVertices_Strided_XYZ_0_VFP
+ #define s_TransformVertices_Strided_XYZ_1_VFP _s_TransformVertices_Strided_XYZ_1_VFP
+ #define s_TransformVertices_Strided_XYZ_2_VFP _s_TransformVertices_Strided_XYZ_2_VFP
+ #define s_TransformVertices_Strided_XYZ_3_VFP _s_TransformVertices_Strided_XYZ_3_VFP
+ #define s_TransformVertices_Strided_XYZ_4_VFP _s_TransformVertices_Strided_XYZ_4_VFP
+ #define s_TransformVertices_Strided_XYZ_5_VFP _s_TransformVertices_Strided_XYZ_5_VFP
+
+ #define s_TransformVertices_Strided_XYZN_0_VFP _s_TransformVertices_Strided_XYZN_0_VFP
+ #define s_TransformVertices_Strided_XYZN_1_VFP _s_TransformVertices_Strided_XYZN_1_VFP
+ #define s_TransformVertices_Strided_XYZN_2_VFP _s_TransformVertices_Strided_XYZN_2_VFP
+ #define s_TransformVertices_Strided_XYZN_3_VFP _s_TransformVertices_Strided_XYZN_3_VFP
+ #define s_TransformVertices_Strided_XYZN_4_VFP _s_TransformVertices_Strided_XYZN_4_VFP
+ #define s_TransformVertices_Strided_XYZN_5_VFP _s_TransformVertices_Strided_XYZN_5_VFP
+
+ #define s_TransformVertices_Strided_XYZNT_0_VFP _s_TransformVertices_Strided_XYZNT_0_VFP
+ #define s_TransformVertices_Strided_XYZNT_1_VFP _s_TransformVertices_Strided_XYZNT_1_VFP
+ #define s_TransformVertices_Strided_XYZNT_2_VFP _s_TransformVertices_Strided_XYZNT_2_VFP
+ #define s_TransformVertices_Strided_XYZNT_3_VFP _s_TransformVertices_Strided_XYZNT_3_VFP
+ #define s_TransformVertices_Strided_XYZNT_4_VFP _s_TransformVertices_Strided_XYZNT_4_VFP
+ #define s_TransformVertices_Strided_XYZNT_5_VFP _s_TransformVertices_Strided_XYZNT_5_VFP
+#if ENABLE_SPRITES
+ #define s_TransformVertices_Sprite_VFP _s_TransformVertices_Sprite_VFP
+#endif
+#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+
+
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,VFP);
+
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,VFP);
+
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,VFP);
+ DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,VFP);
+#if ENABLE_SPRITES
+ void s_TransformVertices_Sprite_VFP (const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color);
+#endif
+}
+
+#endif
+
+
+#undef DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS
+#undef DECL_TRANSFORM_VERTICES_STRIDED
+
+
+//==============================================================================
+
+void
+TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+ StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+ StrideIterator<Vector4f> inTangent,
+ UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream );
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+void
+TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+ StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+ StrideIterator<Vector4f> inTangent,
+ UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream );
+#endif
+
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+ #define TransformVerticesStrided TransformVerticesStridedARM
+#else
+ #define TransformVerticesStrided TransformVerticesStridedREF
+#endif
+
+
+//==============================================================================
+
+#endif // TRANSFORM_VERTEX_H_
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.asm b/Runtime/Filters/Mesh/TransformVertexNEON.asm
new file mode 100644
index 0000000..7db462b
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON.asm
@@ -0,0 +1,694 @@
+ AREA .text, CODE
+
+ EXPORT _s_TransformVertices_Strided_XYZ_0_NEON
+ EXPORT _s_TransformVertices_Strided_XYZ_1_NEON
+ EXPORT _s_TransformVertices_Strided_XYZ_2_NEON
+ EXPORT _s_TransformVertices_Strided_XYZ_3_NEON
+ EXPORT _s_TransformVertices_Strided_XYZ_4_NEON
+ EXPORT _s_TransformVertices_Strided_XYZ_5_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_0_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_1_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_2_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_3_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_4_NEON
+ EXPORT _s_TransformVertices_Strided_XYZN_5_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_0_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_1_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_2_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_3_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_4_NEON
+ EXPORT _s_TransformVertices_Strided_XYZNT_5_NEON
+
+|_s_TransformVertices_Strided_XYZ_0_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+
+|TransformVertices_Strided_XYZ_0_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ bcc.w |TransformVertices_Strided_XYZ_0_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_1_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZ_1_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vld1.32 {d9}, [r2], r4
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ vst1.32 {d9[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZ_1_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_2_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZ_2_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vld1.32 {d9}, [r2], r4
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ vst1.32 {d9}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZ_2_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_3_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZ_3_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vld1.32 {d9-d10}, [r2], r4
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ vst1.32 {d9}, [r3]!
+ vst1.32 {d10[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZ_3_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_4_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+
+|TransformVertices_Strided_XYZ_4_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vld1.32 {d9-d10}, [r2], r4
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ vst1.32 {d9-d10}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZ_4_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_5_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZ_5_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d6-d7}, [r0], r4
+ vmla.f32 q0, q12, d6[0]
+ vmul.f32 q1, q13, d6[1]
+ vmul.f32 q2, q14, d7[0]
+ vadd.f32 q0, q0, q1
+ vld1.32 {d9-d11}, [r2], r4
+ vadd.f32 q0, q0, q2
+ cmp r0, r1
+ vst1.32 {d0-d1}, [r3], r6
+ vorr q0, q15, q15
+ vst1.32 {d9-d10}, [r3]!
+ vst1.32 {d11[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZ_5_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_0_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+
+|TransformVertices_Strided_XYZN_0_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ bcc.w |TransformVertices_Strided_XYZN_0_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_1_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZN_1_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vld1.32 {d9}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZN_1_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_2_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZN_2_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vld1.32 {d9}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZN_2_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_3_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZN_3_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vld1.32 {d9-d10}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9}, [r3]!
+ vst1.32 {d10[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZN_3_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_4_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZN_4_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vld1.32 {d9-d10}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9-d10}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZN_4_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_5_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZN_5_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vld1.32 {d9-d11}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9-d10}, [r3]!
+ vst1.32 {d11[0]}, [r3]!
+ bcc.w |TransformVertices_Strided_XYZN_5_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_0_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZNT_0_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_0_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_1_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+ nop.w
+ nop.w
+ nop.w
+
+|TransformVertices_Strided_XYZNT_1_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vld1.32 {d9}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9[0]}, [r3]!
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_1_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_2_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZNT_2_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vld1.32 {d9}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9}, [r3]!
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_2_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_3_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZNT_3_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vld1.32 {d9-d10}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9}, [r3]!
+ vst1.32 {d10[0]}, [r3]!
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_3_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_4_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+
+|TransformVertices_Strided_XYZNT_4_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vld1.32 {d9-d10}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9-d10}, [r3]!
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_4_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_5_NEON| PROC
+ mov ip, sp
+ vpush {s0-s15}
+ stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vldmia r3!, {d24-d31}
+ mov.w r6, #12
+ ldr.w r3, [ip]
+ ldr.w r4, [ip, #4]
+ vorr q0, q15, q15
+ ldr.w r8, [ip, #8]
+ mov.w r9, #12
+ mov.w sl, #4
+ nop
+ nop.w
+
+|TransformVertices_Strided_XYZNT_5_Loop|
+ pld [r0, #512] ; 0x200
+ vld1.32 {d4-d6}, [r0], r4
+ vld1.32 {d7-d8}, [r8], r4
+ vmla.f32 q0, q12, d4[0]
+ vmul.f32 q1, q12, d5[1]
+ vmul.f32 q11, q12, d7[0]
+ vld1.32 {d9-d11}, [r2], r4
+ vmla.f32 q0, q13, d4[1]
+ vmla.f32 q1, q13, d6[0]
+ vmla.f32 q11, q13, d7[1]
+ vmla.f32 q0, q14, d5[0]
+ vmla.f32 q1, q14, d6[1]
+ vmla.f32 q11, q14, d8[0]
+ vst1.32 {d0-d1}, [r3], r6
+ cmp r0, r1
+ vorr q0, q15, q15
+ vst1.32 {d2-d3}, [r3], r6
+ vst1.32 {d9-d10}, [r3]!
+ vst1.32 {d11[0]}, [r3]!
+ vtrn.32 d8, d7
+ vst1.32 {d22-d23}, [r3], r9
+ vst1.32 {d7[0]}, [r3], sl
+ bcc.w |TransformVertices_Strided_XYZNT_5_Loop|
+ ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+ vpop {s0-s15}
+ bx lr
+ nop.w
+ nop.w
+ nop.w
+ ENDP
+
+
+ END
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.s b/Runtime/Filters/Mesh/TransformVertexNEON.s
new file mode 100644
index 0000000..e21a554
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON.s
@@ -0,0 +1,224 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+
+#if UNITY_SUPPORTS_NEON
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+
+
+.globl _s_TransformVertices_Strided_XYZ_0_NEON
+.globl _s_TransformVertices_Strided_XYZ_1_NEON
+.globl _s_TransformVertices_Strided_XYZ_2_NEON
+.globl _s_TransformVertices_Strided_XYZ_3_NEON
+.globl _s_TransformVertices_Strided_XYZ_4_NEON
+.globl _s_TransformVertices_Strided_XYZ_5_NEON
+
+.globl _s_TransformVertices_Strided_XYZN_0_NEON
+.globl _s_TransformVertices_Strided_XYZN_1_NEON
+.globl _s_TransformVertices_Strided_XYZN_2_NEON
+.globl _s_TransformVertices_Strided_XYZN_3_NEON
+.globl _s_TransformVertices_Strided_XYZN_4_NEON
+.globl _s_TransformVertices_Strided_XYZN_5_NEON
+
+.globl _s_TransformVertices_Strided_XYZNT_0_NEON
+.globl _s_TransformVertices_Strided_XYZNT_1_NEON
+.globl _s_TransformVertices_Strided_XYZNT_2_NEON
+.globl _s_TransformVertices_Strided_XYZNT_3_NEON
+.globl _s_TransformVertices_Strided_XYZNT_4_NEON
+.globl _s_TransformVertices_Strided_XYZNT_5_NEON
+
+.globl _s_TransformVertices_Sprite_NEON
+
+
+#define STRIDED_INPUT 1
+
+
+#define LOOP_XYZ 1
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 0
+
+
+_s_TransformVertices_Strided_XYZ_0_NEON:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZ_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_1_NEON:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZ_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_2_NEON:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZ_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_3_NEON:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZ_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_4_NEON:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZ_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_5_NEON:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZ_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 1
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 0
+
+
+_s_TransformVertices_Strided_XYZN_0_NEON:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZN_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_1_NEON:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZN_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_2_NEON:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZN_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_3_NEON:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZN_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_4_NEON:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZN_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_5_NEON:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZN_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 1
+#define LOOP_SPRITE 0
+
+
+_s_TransformVertices_Strided_XYZNT_0_NEON:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZNT_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_1_NEON:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZNT_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_2_NEON:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZNT_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_3_NEON:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZNT_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_4_NEON:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZNT_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_5_NEON:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZNT_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 1
+
+_s_TransformVertices_Sprite_NEON:
+#define LOOP_NAME TransformVertices_Sprite_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#undef STRIDED_INPUT
+
+.endif
+
+#endif \ No newline at end of file
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h
new file mode 100644
index 0000000..d84a516
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h
@@ -0,0 +1,254 @@
+// TODO: SOA
+
+// defines
+// LOOP_XYZ
+// LOOP_XYZN
+// LOOP_XYZNT
+// LOOP_NAME
+// COPY_DATA_SZ
+// STRIDED_INPUT
+
+#if STRIDED_INPUT
+
+//r0: const void* srcData
+//r1: const void* srcDataEnd
+//r2: const void* addData
+//r3: const void* xform
+//[sp+0]: void* dstData
+//[sp+4]: const int stride
+
+mov ip, sp
+
+vpush {d0-d15}
+stmfd sp!, {r4-r11}
+
+vldmia r3!, {q12-q15}
+
+// r3:dstData
+// r4: stride
+// r6: proper offset for out ptr (pos, normal)
+
+mov r6, #12
+
+ldr r3, [ip, #0]
+ldr r4, [ip, #4]
+
+// overlap calculation
+
+vmov.32 q0, q15 // pos.w (1.0)
+
+
+#if LOOP_XYZ
+
+.align 4
+LOOP_NAME:
+
+pld [r0, #512] // prefetch
+
+vld1.32 {d6,d7}, [r0], r4 // load pos
+
+vmla.f32 q0, q12, d6[0] // pos.x
+vmul.f32 q1, q13, d6[1] // pos.y
+vmul.f32 q2, q14, d7[0] // pos.z
+
+vadd.f32 q0, q0, q1
+ // load additional data
+#if COPY_DATA_SZ == 1
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32 {d9,d10,d11}, [r2], r4
+#endif
+
+vadd.f32 q0, q0, q2
+cmp r0, r1 // check cycle
+
+vst1.32 {d0,d1}, [r3], r6
+
+vmov.32 q0, q15 // pos.w (1.0)
+ // save additional data
+#if COPY_DATA_SZ == 1
+vst1.32 {d9[0]}, [r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32 {d9}, [r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32 {d9}, [r3]!
+vst1.32 {d10[0]}, [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32 {d9,d10}, [r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32 {d9,d10}, [r3]!
+vst1.32 {d11[0]}, [r3]!
+#endif
+
+bcc LOOP_NAME
+
+
+#elif LOOP_XYZN
+
+
+.align 4
+LOOP_NAME:
+
+pld [r0, #512] // prefetch
+
+vld1.32 {d4,d5,d6}, [r0], r4 // load pos + normal
+
+vmla.f32 q0, q12, d4[0] // pos.x
+vmul.f32 q1, q12, d5[1] // normal.x
+
+ // load additional data
+#if COPY_DATA_SZ == 1
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32 {d9,d10,d11}, [r2], r4
+#endif
+
+vmla.f32 q0, q13, d4[1] // pos.y
+vmla.f32 q1, q13, d6[0] // normal.y
+
+vmla.f32 q0, q14, d5[0] // pos.z
+vmla.f32 q1, q14, d6[1] // normal.z
+
+vst1.32 {d0,d1}, [r3], r6
+
+cmp r0, r1 // check cycle
+vmov.32 q0, q15 // pos.w (1.0)
+vst1.32 {d2,d3}, [r3], r6
+ // save additional data
+#if COPY_DATA_SZ == 1
+vst1.32 {d9[0]}, [r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32 {d9}, [r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32 {d9}, [r3]!
+vst1.32 {d10[0]}, [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32 {d9,d10}, [r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32 {d9,d10}, [r3]!
+vst1.32 {d11[0]}, [r3]!
+#endif
+
+
+bcc LOOP_NAME
+
+
+#elif LOOP_XYZNT
+
+//[sp+8]: const void* tangent
+//r8: tangent
+
+ldr r8, [ip, #8]
+
+mov r9, #12
+mov r10, #4
+
+.align 4
+LOOP_NAME:
+
+pld [r0, #512] // prefetch
+
+vld1.32 {d4,d5,d6}, [r0], r4 // load pos + normal
+vld1.32 {d7,d8}, [r8], r4 // load tangent
+
+vmla.f32 q0, q12, d4[0] // pos.x
+vmul.f32 q1, q12, d5[1] // normal.x
+vmul.f32 q11, q12, d7[0] // tangent.x
+
+ // load additional data
+#if COPY_DATA_SZ == 1
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32 {d9}, [r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32 {d9,d10}, [r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32 {d9,d10,d11}, [r2], r4
+#endif
+
+vmla.f32 q0, q13, d4[1] // pos.y
+vmla.f32 q1, q13, d6[0] // normal.y
+vmla.f32 q11, q13, d7[1] // tangent.y
+
+vmla.f32 q0, q14, d5[0] // pos.z
+vmla.f32 q1, q14, d6[1] // normal.z
+vmla.f32 q11, q14, d8[0] // tangent.z
+
+vst1.32 {d0,d1}, [r3], r6
+
+cmp r0, r1 // check cycle
+vmov.32 q0, q15 // pos.w (1.0)
+vst1.32 {d2,d3}, [r3], r6
+ // save additional data
+#if COPY_DATA_SZ == 1
+vst1.32 {d9[0]}, [r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32 {d9}, [r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32 {d9}, [r3]!
+vst1.32 {d10[0]}, [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32 {d9,d10}, [r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32 {d9,d10}, [r3]!
+vst1.32 {d11[0]}, [r3]!
+#endif
+
+
+// TODO: less stupid way
+
+vtrn.32 d8, d7
+vst1.32 {d22,d23}, [r3], r9
+vst1.32 {d7[0]}, [r3], r10
+
+bcc LOOP_NAME
+#elif LOOP_SPRITE
+.align 4
+ldr r7, [ip, #8] // load color32
+vmov.32 d10[0], r7
+LOOP_NAME:
+
+pld [r0, #512] // prefetch
+
+vld1.32 {d6,d7}, [r0], r4 // load pos
+
+vmla.f32 q0, q12, d6[0] // pos.x
+vmul.f32 q1, q13, d6[1] // pos.y
+vmul.f32 q2, q14, d7[0] // pos.z
+vadd.f32 q0, q0, q1
+// load data
+vld1.32 {d9}, [r2], r4
+
+vadd.f32 q0, q0, q2
+cmp r0, r1 // check cycle
+
+vst1.32 {d0,d1}, [r3], r6
+
+vmov.32 q0, q15 // pos.w (1.0)
+// save data
+vst1.32 {d10[0]}, [r3]!
+vst1.32 {d9}, [r3]!
+
+
+bcc LOOP_NAME
+#endif
+
+ldmfd sp!, {r4-r11}
+vpop {d0-d15}
+bx lr
+
+#endif
diff --git a/Runtime/Filters/Mesh/TransformVertexVFP.s b/Runtime/Filters/Mesh/TransformVertexVFP.s
new file mode 100644
index 0000000..114afc6
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexVFP.s
@@ -0,0 +1,250 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/VFPUtility.h"
+
+#if UNITY_SUPPORTS_VFP
+
+.syntax unified
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+
+
+.globl _s_TransformVertices_Strided_XYZ_0_VFP
+.globl _s_TransformVertices_Strided_XYZ_1_VFP
+.globl _s_TransformVertices_Strided_XYZ_2_VFP
+.globl _s_TransformVertices_Strided_XYZ_3_VFP
+.globl _s_TransformVertices_Strided_XYZ_4_VFP
+.globl _s_TransformVertices_Strided_XYZ_5_VFP
+
+.globl _s_TransformVertices_Strided_XYZN_0_VFP
+.globl _s_TransformVertices_Strided_XYZN_1_VFP
+.globl _s_TransformVertices_Strided_XYZN_2_VFP
+.globl _s_TransformVertices_Strided_XYZN_3_VFP
+.globl _s_TransformVertices_Strided_XYZN_4_VFP
+.globl _s_TransformVertices_Strided_XYZN_5_VFP
+
+.globl _s_TransformVertices_Strided_XYZNT_0_VFP
+.globl _s_TransformVertices_Strided_XYZNT_1_VFP
+.globl _s_TransformVertices_Strided_XYZNT_2_VFP
+.globl _s_TransformVertices_Strided_XYZNT_3_VFP
+.globl _s_TransformVertices_Strided_XYZNT_4_VFP
+.globl _s_TransformVertices_Strided_XYZNT_5_VFP
+
+.globl _s_TransformVertices_Sprite_VFP
+
+
+#if UNITY_ANDROID
+.hidden _s_TransformVertices_Strided_XYZ_0_VFP
+.hidden _s_TransformVertices_Strided_XYZ_1_VFP
+.hidden _s_TransformVertices_Strided_XYZ_2_VFP
+.hidden _s_TransformVertices_Strided_XYZ_3_VFP
+.hidden _s_TransformVertices_Strided_XYZ_4_VFP
+.hidden _s_TransformVertices_Strided_XYZ_5_VFP
+
+.hidden _s_TransformVertices_Strided_XYZN_0_VFP
+.hidden _s_TransformVertices_Strided_XYZN_1_VFP
+.hidden _s_TransformVertices_Strided_XYZN_2_VFP
+.hidden _s_TransformVertices_Strided_XYZN_3_VFP
+.hidden _s_TransformVertices_Strided_XYZN_4_VFP
+.hidden _s_TransformVertices_Strided_XYZN_5_VFP
+
+.hidden _s_TransformVertices_Strided_XYZNT_0_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_1_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_2_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_3_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_4_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_5_VFP
+
+.hidden _s_TransformVertices_Sprite_VFP
+#endif
+
+#define STRIDED_INPUT 1
+
+
+#define LOOP_XYZ 1
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 0
+
+_s_TransformVertices_Strided_XYZ_0_VFP:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZ_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_1_VFP:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZ_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_2_VFP:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZ_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_3_VFP:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZ_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_4_VFP:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZ_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_5_VFP:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZ_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 1
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 0
+
+
+_s_TransformVertices_Strided_XYZN_0_VFP:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZN_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_1_VFP:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZN_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_2_VFP:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZN_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_3_VFP:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZN_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_4_VFP:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZN_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_5_VFP:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZN_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 1
+#define LOOP_SPRITE 0
+
+
+_s_TransformVertices_Strided_XYZNT_0_VFP:
+#define COPY_DATA_SZ 0
+#define LOOP_NAME TransformVertices_Strided_XYZNT_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_1_VFP:
+#define COPY_DATA_SZ 1
+#define LOOP_NAME TransformVertices_Strided_XYZNT_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_2_VFP:
+#define COPY_DATA_SZ 2
+#define LOOP_NAME TransformVertices_Strided_XYZNT_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_3_VFP:
+#define COPY_DATA_SZ 3
+#define LOOP_NAME TransformVertices_Strided_XYZNT_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_4_VFP:
+#define COPY_DATA_SZ 4
+#define LOOP_NAME TransformVertices_Strided_XYZNT_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_5_VFP:
+#define COPY_DATA_SZ 5
+#define LOOP_NAME TransformVertices_Strided_XYZNT_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#define LOOP_XYZ 0
+#define LOOP_XYZN 0
+#define LOOP_XYZNT 0
+#define LOOP_SPRITE 1
+
+_s_TransformVertices_Sprite_VFP:
+#define LOOP_NAME TransformVerties_Sprite_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#undef STRIDED_INPUT
+
+.endif
+
+#endif
diff --git a/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h
new file mode 100644
index 0000000..48193c8
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h
@@ -0,0 +1,252 @@
+// defines
+// LOOP_XYZ
+// LOOP_XYZN
+// LOOP_XYZNT
+// LOOP_SPRITE
+// LOOP_NAME
+// COPY_DATA_SZ
+// STRIDED_INPUT
+
+#if STRIDED_INPUT
+
+//r0: const void* srcData
+//r1: const void* srcDataEnd
+//r2: const void* addData
+//r3: const void* xform
+//[sp+0]: void* dstData
+//[sp+4]: const int stride
+//[sp+8]: const void* tangent
+
+mov ip, sp
+
+vpush {d0-d15}
+stmfd sp!, {r4-r11}
+
+// {s16-s31} xform
+
+vldmia.32 r3!, {s16-s31}
+
+// r3: dstData
+// r4: stride
+//r11: tangent
+ldr r3, [ip, #0]
+ldr r4, [ip, #4]
+
+#if LOOP_XYZNT
+ldr r11, [ip, #8]
+#endif
+
+#if LOOP_SPRITE
+//r6: color
+ldr r6, [ip, #8]
+#endif
+
+
+mov ip, r0
+// VFP_VECTOR_LENGTH(3)
+mov r0, ip
+
+
+#if LOOP_XYZ
+
+.align 4
+LOOP_NAME:
+
+mov r5, r0
+pld [r0, #512] // prefetch
+
+vldmia.32 r5!, {s0-s2} // load pos
+FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w
+
+FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x
+#if COPY_DATA_SZ == 1
+ldmia r2, {r6} // load additional data
+#elif COPY_DATA_SZ == 2
+ldmia r2, {r6-r7} // load additional data
+#elif COPY_DATA_SZ == 3
+ldmia r2, {r6-r8} // load additional data
+#elif COPY_DATA_SZ == 4
+ldmia r2, {r6-r9} // load additional data
+#elif COPY_DATA_SZ == 5
+ldmia r2, {r6-r10} // load additional data
+#endif
+
+FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y
+add r0, r0, r4 // inc srcData
+
+FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z
+add r2, r2, r4 // inc srcAddData
+
+vstmia.32 r3!, {s8-s10} // store pos
+cmp r0, r1 // check cycle
+
+#if COPY_DATA_SZ == 1
+stmia r3!, {r6} // save additional data
+#elif COPY_DATA_SZ == 2
+stmia r3!, {r6-r7} // save additional data
+#elif COPY_DATA_SZ == 3
+stmia r3!, {r6-r8} // save additional data
+#elif COPY_DATA_SZ == 4
+stmia r3!, {r6-r9} // save additional data
+#elif COPY_DATA_SZ == 5
+stmia r3!, {r6-r10} // save additional data
+#endif
+
+bcc LOOP_NAME
+
+
+#elif LOOP_XYZN
+
+.align 4
+LOOP_NAME:
+
+mov r5, r0
+pld [r0, #512] // prefetch
+
+vldmia.32 r5!, {s0-s2} // load pos
+FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w
+
+vldmia.32 r5!, {s3-s5} // load normal
+FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x
+
+FMULS4 (12,13,14,15, 16,17,18,19, 3,3,3,3) // normal.x
+FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y
+
+#if COPY_DATA_SZ == 1
+ldmia r2, {r6} // load additional data
+#elif COPY_DATA_SZ == 2
+ldmia r2, {r6-r7} // load additional data
+#elif COPY_DATA_SZ == 3
+ldmia r2, {r6-r8} // load additional data
+#elif COPY_DATA_SZ == 4
+ldmia r2, {r6-r9} // load additional data
+#elif COPY_DATA_SZ == 5
+ldmia r2, {r6-r10} // load additional data
+#endif
+FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z
+
+FMACS4 (12,13,14,15, 20,21,22,23, 4,4,4,4) // normal.y
+vstmia.32 r3!, {s8-s10} // store pos
+
+FMACS4 (12,13,14,15, 24,25,26,27, 5,5,5,5) // normal.z
+add r0, r0, r4 // inc srcData
+
+vstmia.32 r3!, {s12-s14} // store normal
+add r2, r2, r4 // inc srcAddData
+
+cmp r0, r1 // check cycle
+#if COPY_DATA_SZ == 1
+stmia r3!, {r6} // save additional data
+#elif COPY_DATA_SZ == 2
+stmia r3!, {r6-r7} // save additional data
+#elif COPY_DATA_SZ == 3
+stmia r3!, {r6-r8} // save additional data
+#elif COPY_DATA_SZ == 4
+stmia r3!, {r6-r9} // save additional data
+#elif COPY_DATA_SZ == 5
+stmia r3!, {r6-r10} // save additional data
+#endif
+
+bcc LOOP_NAME
+
+#elif LOOP_XYZNT
+
+.align 4
+LOOP_NAME:
+
+mov r5, r0
+pld [r0, #512] // prefetch
+
+vldmia.32 r5!, {s0-s2} // load pos
+FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w
+
+vldmia.32 r5!, {s3-s5} // load normal
+FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x
+
+FMULS4 (12,13,14,15, 16,17,18,19, 3,3,3,3) // normal.x
+FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y
+
+#if COPY_DATA_SZ == 1
+ldmia r2, {r6} // load additional data
+#elif COPY_DATA_SZ == 2
+ldmia r2, {r6-r7} // load additional data
+#elif COPY_DATA_SZ == 3
+ldmia r2, {r6-r8} // load additional data
+#elif COPY_DATA_SZ == 4
+ldmia r2, {r6-r9} // load additional data
+#elif COPY_DATA_SZ == 5
+ldmia r2, {r6-r10} // load additional data
+#endif
+FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z
+
+FMACS4 (12,13,14,15, 20,21,22,23, 4,4,4,4) // normal.y
+vstmia.32 r3!, {s8-s10} // store pos
+
+FMACS4 (12,13,14,15, 24,25,26,27, 5,5,5,5) // normal.z
+vldmia.32 r11, {s0-s3} // load tangent
+
+add r0, r0, r4 // inc srcData
+FMULS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // tangent.x
+
+vstmia.32 r3!, {s12-s14} // store normal
+FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // tangent.y
+
+cmp r0, r1 // check cycle
+FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // tangent.z
+
+#if COPY_DATA_SZ == 1
+stmia r3!, {r6} // save additional data
+#elif COPY_DATA_SZ == 2
+stmia r3!, {r6-r7} // save additional data
+#elif COPY_DATA_SZ == 3
+stmia r3!, {r6-r8} // save additional data
+#elif COPY_DATA_SZ == 4
+stmia r3!, {r6-r9} // save additional data
+#elif COPY_DATA_SZ == 5
+stmia r3!, {r6-r10} // save additional data
+#endif
+fcpys s11, s3 // copy tangent.w
+
+vstmia.32 r3!, {s8-s11} // store tangent
+add r2, r2, r4 // inc srcAddData
+
+add r11, r11, r4 // inc srcTangent
+bcc LOOP_NAME
+
+#elif LOOP_SPRITE
+
+.align 4
+LOOP_NAME:
+
+mov r5, r0
+pld [r0, #512] // prefetch
+
+vldmia.32 r5!, {s0-s2} // load pos
+FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w
+
+FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x
+
+
+ldmia r2, {r7-r8} // load uv
+
+FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y
+add r0, r0, r4 // inc srcData
+
+FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z
+add r2, r2, r4 // inc srcAddData
+
+vstmia.32 r3!, {s8-s10} // store pos
+cmp r0, r1 // check cycle
+
+stmia r3!, {r6-r8} // save color and uv
+
+bcc LOOP_NAME
+#endif
+
+// VFP_VECTOR_LENGTH_ZERO
+
+ldmfd sp!, {r4-r11}
+vpop {d0-d15}
+bx lr
+
+#endif // STRIDED_INPUT
diff --git a/Runtime/Filters/Mesh/VertexData.cpp b/Runtime/Filters/Mesh/VertexData.cpp
new file mode 100644
index 0000000..b922805
--- /dev/null
+++ b/Runtime/Filters/Mesh/VertexData.cpp
@@ -0,0 +1,559 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "VertexData.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/TransferUtility.h"
+#include "Runtime/Serialize/SwapEndianArray.h"
+#include <algorithm>
+
+/*
+ On most platforms, for skinning/non-uniform-scaling of meshes you would want to split your data into
+ a hot data stream (position, normal and tangent) and a cold data stream (diffuse and uvs) in order to maximize CPU cache access patterns and
+ reduce bandwidth and computation ( you won't need to copy the cold data )
+*/
+
+VertexStreamsLayout VertexDataInfo::kVertexStreamsDefault = {{ kShaderChannelsAll, 0, 0, 0 }};
+#if UNITY_PS3
+ VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }};
+#else
+ VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ kShaderChannelsHot, kShaderChannelsCold, 0, 0 }};
+# if UNITY_EDITOR
+ VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplitPS3 = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }};
+# endif
+#endif
+
+#define MAKE_CHANNEL(fmt, dim) VertexChannelsLayout::Channel(kChannelFormat##fmt, dim)
+VertexChannelsLayout VertexDataInfo::kVertexChannelsDefault =
+{{ // Array wrapped by struct requires double braces
+ MAKE_CHANNEL(Float, 3), // position
+ MAKE_CHANNEL(Float, 3), // normal
+ MAKE_CHANNEL(Color, 1), // color
+ MAKE_CHANNEL(Float, 2), // texcoord0
+ MAKE_CHANNEL(Float, 2), // texcoord1
+ MAKE_CHANNEL(Float, 4) // tangent
+}};
+VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressed =
+{{ // Array wrapped by struct requires double braces
+ MAKE_CHANNEL(Float, 3), // position
+ MAKE_CHANNEL(Float16, 4), // normal
+ MAKE_CHANNEL(Color, 1), // color
+ MAKE_CHANNEL(Float16, 2), // texcoord0
+ MAKE_CHANNEL(Float16, 2), // texcoord1
+ MAKE_CHANNEL(Float16, 4) // tangent
+}};
+VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressedAggressive =
+{{ // Array wrapped by struct requires double braces
+ MAKE_CHANNEL(Float, 3), // position
+ MAKE_CHANNEL(Byte, 4), // normal
+ MAKE_CHANNEL(Color, 1), // color
+ MAKE_CHANNEL(Float16, 2), // texcoord0
+ MAKE_CHANNEL(Float16, 2), // texcoord1
+ MAKE_CHANNEL(Byte, 4) // tangent
+}};
+#undef MAKE_CHANNEL
+
+static const UInt8 kVertexChannelFormatSizes[kChannelFormatCount] = {
+ 4, // kChannelFormatFloat
+ 2, // kChannelFormatFloat16
+ 4, // kChannelFormatColor
+ 1 // kChannelFormatByte
+};
+
+size_t GetChannelFormatSize(UInt8 format)
+{
+ Assert (format < kChannelFormatCount);
+ return kVertexChannelFormatSizes[format];
+}
+
+static bool operator == (const VertexStreamsLayout& lhs, const VertexStreamsLayout& rhs)
+{
+ return CompareArrays(lhs.channelMasks, rhs.channelMasks, kMaxVertexStreams);
+}
+
+template<class TransferFunction>
+void VertexData::Transfer (TransferFunction& transfer)
+{
+ #if SUPPORT_SERIALIZED_TYPETREES
+ if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+ {
+ TransferWorkaround35SerializationFuckup (transfer);
+ return;
+ }
+ #endif
+
+ transfer.Transfer (m_CurrentChannels, "m_CurrentChannels", kHideInEditorMask);
+ transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask);
+
+ dynamic_array<ChannelInfo> channels;
+ dynamic_array<StreamInfo> streams;
+ if (transfer.IsWriting ())
+ {
+ channels.resize_uninitialized (kShaderChannelCount);
+ streams.resize_uninitialized (kMaxVertexStreams);
+ std::copy (m_Channels, m_Channels + kShaderChannelCount, channels.begin ());
+ std::copy (m_Streams, m_Streams + kMaxVertexStreams, streams.begin ());
+ }
+ transfer.Transfer (channels, "m_Channels", kHideInEditorMask);
+ transfer.Transfer (streams, "m_Streams", kHideInEditorMask);
+
+ if (transfer.IsReading ())
+ {
+ // For compatibility do this even if channels/streams info didn't exist (case 558604)
+ // In the past there was only a channels mask, UpdateStreams() generates the info from that
+ if (channels.size () == kShaderChannelCount)
+ std::copy (channels.begin (), channels.begin () + kShaderChannelCount, m_Channels);
+ if (streams.size () == kMaxVertexStreams)
+ std::copy (streams.begin (), streams.begin () + kMaxVertexStreams, m_Streams);
+ else
+ std::fill (m_Streams, m_Streams + kMaxVertexStreams, StreamInfo());
+
+ UInt32 channelsInStreams = 0;
+ for (int i = 0; i < kMaxVertexStreams ; i++)
+ channelsInStreams |= m_Streams[i].channelMask;
+ if (channelsInStreams)
+ UpdateStreams(channelsInStreams, m_VertexCount, GetStreamsLayout (), GetChannelsLayout ());
+ else
+ UpdateStreams(m_CurrentChannels, m_VertexCount, kVertexStreamsDefault, kVertexChannelsDefault);
+ }
+
+ transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask);
+ if (transfer.DidReadLastProperty ())
+ {
+ if (m_Data)
+ UNITY_FREE (kMemVertexData, m_Data);
+ m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+ }
+
+ transfer.TransferTypelessData (m_DataSize, m_Data);
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunction>
+void VertexData::TransferWorkaround35SerializationFuckup (TransferFunction& transfer)
+{
+ UInt32 currentChannels = m_CurrentChannels;
+ transfer.Transfer (currentChannels, "m_CurrentChannels", kHideInEditorMask);
+ transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask);
+
+ TRANSFER(m_Streams[0]);
+ TRANSFER(m_Streams[1]);
+ TRANSFER(m_Streams[2]);
+ TRANSFER(m_Streams[3]);
+
+ if (transfer.IsReading ())
+ {
+ if(m_VertexCount && (currentChannels == 0))
+ {
+ for(int i=0;i<kMaxVertexStreams;i++)
+ currentChannels |= m_Streams[i].channelMask;
+ }
+ UpdateStreams(currentChannels, m_VertexCount);
+ //GetComponentInfo(m_Components, currentChannels);
+ m_CurrentChannels = currentChannels;
+ }
+
+ transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask);
+
+ if (transfer.IsReading ())
+ {
+ if (m_Data)
+ UNITY_FREE (kMemVertexData, m_Data);
+ m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+ }
+
+ transfer.TransferTypelessData (m_DataSize, m_Data);
+}
+#endif
+
+INSTANTIATE_TEMPLATE_TRANSFER(VertexData)
+
+void VertexDataInfo::UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+ m_VertexCount = newVertexCount;
+ m_CurrentChannels = 0;
+ m_VertexSize = 0;
+ size_t streamOffset = 0;
+ for (int s = 0; s < kMaxVertexStreams; s++)
+ {
+ StreamInfo& stream = m_Streams[s];
+ m_Streams[s].Reset();
+ stream.channelMask = streams.channelMasks[s] & newChannelMask;
+ if (stream.channelMask == 0)
+ continue;
+ m_CurrentChannels |= stream.channelMask;
+ for (int c = 0; c < kShaderChannelCount; c++)
+ {
+ if (stream.channelMask & (1 << c))
+ {
+ ChannelInfo& channel = m_Channels[c];
+ const VertexChannelsLayout::Channel& srcChannel = channels.channels[c];
+ channel.stream = s;
+ channel.offset = stream.stride;
+ channel.format = srcChannel.format;
+ channel.dimension = srcChannel.dimension;
+ stream.stride += channel.dimension * GetChannelFormatSize(channel.format);
+ }
+ }
+ streamOffset = AlignStreamSize(streamOffset);
+ stream.offset = streamOffset;
+ streamOffset += stream.stride * newVertexCount;
+ m_VertexSize += stream.stride;
+ }
+ for (int c = 0; c < kShaderChannelCount; c++)
+ {
+ // Reset channels that were removed
+ if (!(m_CurrentChannels & (1 << c)))
+ m_Channels[c].Reset();
+ }
+ m_DataSize = streamOffset;
+}
+
+size_t VertexDataInfo::GetActiveStreamCount() const
+{
+ size_t activeStreamCount = 0;
+ for (int i=0; i<kMaxVertexStreams; i++)
+ {
+ if(m_Streams[i].channelMask != 0)
+ activeStreamCount++;
+ }
+ return activeStreamCount;
+}
+
+size_t VertexDataInfo::GetStreamIndex(ShaderChannel channel) const
+{
+ UInt32 channelMask = 1 << channel;
+ for (int i=0; i<kMaxVertexStreams; i++)
+ {
+ if(m_Streams[i].channelMask & channelMask)
+ return i;
+ }
+ return -1;
+}
+
+VertexStreamsLayout VertexDataInfo::GetStreamsLayout() const
+{
+ VertexStreamsLayout result;
+ for (int i = 0; i < kMaxVertexStreams; i++)
+ result.channelMasks[i] = m_Streams[i].channelMask;
+ return result;
+}
+
+VertexChannelsLayout VertexDataInfo::GetChannelsLayout() const
+{
+ VertexChannelsLayout result;
+ for (int i = 0; i < kShaderChannelCount; i++)
+ {
+ result.channels[i] = VertexChannelsLayout::Channel(m_Channels[i].format, m_Channels[i].dimension);
+ }
+ return result;
+}
+
+bool VertexDataInfo::ConformsToStreamsLayout(const VertexStreamsLayout& streams) const
+{
+ for (int i = 0; i < kMaxVertexStreams; i++)
+ {
+ // Fail if we have a channel that's not in the layout
+ if (m_Streams[i].channelMask & ~streams.channelMasks[i])
+ return false;
+ }
+ return true;
+}
+
+bool VertexDataInfo::ConformsToChannelsLayout(const VertexChannelsLayout& channels) const
+{
+ for (int i = 0; i < kShaderChannelCount; i++)
+ {
+ if (m_Channels[i].IsValid())
+ {
+ const VertexChannelsLayout::Channel& channel = channels.channels[i];
+ if (m_Channels[i].format != channel.format ||
+ m_Channels[i].dimension != channel.dimension)
+ return false;
+ }
+ }
+ return true;
+}
+
+signed char f32_to_s8(float fval)
+{
+ return ((fval * 255.0f) - 1.0f) / 2.0f;
+}
+
+float s8_to_f32(signed char val)
+{
+ return (2*(val/255.0f)-1.0f);
+}
+
+static void ConvertCopyChannel(size_t vertexCount,
+ const UInt8* srcPtr, UInt8 srcStride, UInt8 srcType, UInt8 srcDim,
+ UInt8* dstPtr, UInt8 dstStride, UInt8 dstType, UInt8 dstDim)
+{
+ UInt8 minDim = std::min(srcDim, dstDim);
+ if (srcType == kChannelFormatFloat16 && dstType == kChannelFormatFloat)
+ {
+ // decompressing
+ for (size_t i = 0; i < vertexCount; i++)
+ {
+ UInt8 comp = 0;
+ for ( ; comp < minDim; comp++)
+ HalfToFloat(reinterpret_cast<const UInt16*>(srcPtr)[comp], reinterpret_cast<float*>(dstPtr)[comp]);
+ for ( ; comp < dstDim; comp++)
+ reinterpret_cast<float*>(dstPtr)[comp] = 0.0f;
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ }
+ else if (srcType == kChannelFormatByte && dstType == kChannelFormatFloat)
+ {
+ // decompressing
+ for (size_t i = 0; i < vertexCount; i++)
+ {
+ UInt8 comp = 0;
+ for ( ; comp < minDim; comp++)
+ reinterpret_cast<float*>(dstPtr)[comp] = s8_to_f32(reinterpret_cast<const SInt8*>(srcPtr)[comp]);
+ for ( ; comp < dstDim; comp++)
+ reinterpret_cast<float*>(dstPtr)[comp] = 0.0f;
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ }
+#if UNITY_EDITOR
+ else if (srcType == kChannelFormatFloat && dstType == kChannelFormatFloat16)
+ {
+ // compressing
+ for (size_t i = 0; i < vertexCount; i++)
+ {
+ UInt8 comp = 0;
+ for ( ; comp < minDim; comp++)
+ g_FloatToHalf.Convert(reinterpret_cast<const float*>(srcPtr)[comp], reinterpret_cast<UInt16*>(dstPtr)[comp]);
+ for ( ; comp < dstDim; comp++)
+ reinterpret_cast<UInt16*>(dstPtr)[comp] = 0;
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ }
+ else if (srcType == kChannelFormatFloat && dstType == kChannelFormatByte)
+ {
+ // compressing
+ for (size_t i = 0; i < vertexCount; i++)
+ {
+ UInt8 comp = 0;
+ for ( ; comp < minDim; comp++)
+ reinterpret_cast<SInt8*>(dstPtr)[comp] = f32_to_s8(reinterpret_cast<const float*>(srcPtr)[comp]);
+ for ( ; comp < dstDim; comp++)
+ reinterpret_cast<SInt8*>(dstPtr)[comp] = 0;
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ }
+#endif
+ else
+ ErrorString("Unsupported conversion of vertex formats");
+}
+
+static void CopyChannels (size_t vertexCount, unsigned copyChannels,
+ const StreamInfoArray srcStreams, const ChannelInfoArray srcChannels, const UInt8* srcData,
+ const StreamInfoArray dstStreams, const ChannelInfoArray dstChannels, UInt8* dstData)
+{
+ for (unsigned chan = copyChannels, i = 0; chan && (i < kShaderChannelCount); i++, chan >>= 1)
+ {
+ if (0 == (chan & 1))
+ continue;
+
+ const ChannelInfo& srcChannel = srcChannels[i];
+ const ChannelInfo& dstChannel = dstChannels[i];
+
+ const UInt8* srcPtr = srcData + srcChannel.CalcOffset(srcStreams);
+ UInt8* dstPtr = dstData + dstChannel.CalcOffset(dstStreams);
+ UInt8 srcStride = srcChannel.CalcStride(srcStreams);
+ UInt8 dstStride = dstChannel.CalcStride(dstStreams);
+
+ if(srcChannel.format == dstChannel.format)
+ {
+ size_t copySize = srcChannel.dimension * GetChannelFormatSize(srcChannel.format);
+ switch (copySize)
+ {
+ case 4:
+ {
+ for (size_t i=0; i<vertexCount; ++i)
+ {
+ *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ break;
+ }
+ case 8:
+ {
+ for (size_t i=0; i<vertexCount; ++i)
+ {
+ *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+ *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1);
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ break;
+ }
+ case 12:
+ {
+ for (size_t i=0; i<vertexCount; ++i)
+ {
+ *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+ *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1);
+ *(reinterpret_cast<UInt32*> (dstPtr) + 2) = *(reinterpret_cast<const UInt32*> (srcPtr) + 2);
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ break;
+ }
+ default:
+ {
+ for (size_t i=0; i<vertexCount; ++i)
+ {
+ memcpy (dstPtr, srcPtr, copySize);
+ srcPtr += srcStride;
+ dstPtr += dstStride;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ ConvertCopyChannel(vertexCount, srcPtr, srcStride, srcChannel.format, srcChannel.dimension, dstPtr, dstStride, dstChannel.format, dstChannel.dimension);
+ }
+ }
+}
+
+VertexDataInfo::VertexDataInfo ()
+: m_Data(NULL)
+, m_DataSize(0)
+, m_VertexCount(0)
+, m_VertexSize(0)
+, m_CurrentChannels(0)
+{
+ // Channels and streams have default constructors
+}
+
+VertexData::VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+ // We do not support inserting new channels that are not present in the source
+ Assert ((copyChannels & src.GetChannelMask()) == copyChannels);
+
+ UpdateStreams(copyChannels, src.m_VertexCount, streams, channels);
+ m_Data = (UInt8*) UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+
+ const VertexData& dest = *this;
+ if (m_DataSize == src.m_DataSize &&
+ copyChannels == src.GetChannelMask() &&
+ CompareMemory(dest.m_Channels, src.m_Channels) &&
+ CompareMemory(dest.m_Streams, src.m_Streams))
+ {
+ // Simple copy if the format didn't change
+ memcpy (m_Data, src.m_Data, m_DataSize);
+ }
+ else
+ CopyChannels (m_VertexCount, copyChannels, src.m_Streams, src.m_Channels, src.m_Data, m_Streams, m_Channels, m_Data);
+}
+
+VertexData::~VertexData ()
+{
+ Deallocate();
+}
+
+void VertexData::Deallocate ()
+{
+ if (m_Data)
+ UNITY_FREE(kMemVertexData, m_Data);
+ m_Data = NULL;
+}
+
+void VertexData::Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+ ChannelInfoArray srcChannels;
+ StreamInfoArray srcStreams;
+ memcpy(srcChannels, m_Channels, sizeof(srcChannels));
+ memcpy(srcStreams, m_Streams, sizeof(srcStreams));
+ UInt32 srcChannelMask = m_CurrentChannels;
+ UInt32 srcVertexCount = m_VertexCount;
+ UInt8* srcData = m_Data;
+
+ UpdateStreams(channelMask, vertexCount, streams, channels);
+
+ // In case the streams and channels don't change, simply reallocate the buffer and return
+ // Note that this will rarely be true with multiple streams since the stream offsets change
+ if (m_Data && CompareMemory(srcChannels, m_Channels) && CompareMemory(srcStreams, m_Streams))
+ {
+ m_Data = (UInt8*)UNITY_REALLOC_ALIGNED(kMemVertexData, m_Data, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign);
+ return;
+ }
+
+ m_Data = (UInt8*)UNITY_MALLOC_ALIGNED(kMemVertexData, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign);
+ // copy over the old data
+ if (srcData)
+ {
+ unsigned copyChannels = srcChannelMask & m_CurrentChannels;
+ size_t toCopyCount = std::min<size_t>(srcVertexCount, m_VertexCount);
+ CopyChannels(toCopyCount, copyChannels, srcStreams, srcChannels, srcData, m_Streams, m_Channels, m_Data);
+ UNITY_FREE(kMemVertexData, srcData);
+ }
+}
+
+
+void VertexData::SwapEndianess ()
+{
+ unsigned const kChannelSwapMask = VERTEX_FORMAT5(Vertex, Normal, TexCoord0, TexCoord1, Tangent);
+ for (int s = 0; s < kMaxVertexStreams; s++)
+ {
+ if (m_Streams[s].stride)
+ {
+ StreamInfo& stream = m_Streams[s];
+ size_t stride = stream.stride;
+ UInt8* dataStart = m_Data + stream.offset;
+ UInt8* dataEnd = dataStart + stream.stride * m_VertexCount;
+ UInt32 channelMask = stream.channelMask;
+ for (UInt8* p = dataStart, *end = dataEnd; p != end; p += stride)
+ {
+ // counting from LSb, 1 denotes that a value should be endian-swapped
+ int localOffset = 0;
+ for (unsigned i=0, chan = channelMask, swap = kChannelSwapMask; i<kShaderChannelCount; ++i, chan >>= 1, swap >>= 1)
+ {
+ if (chan & 1)
+ {
+ size_t componentCount = m_Channels[i].dimension;
+ size_t componentSize = GetChannelFormatSize(m_Channels[i].format);
+ if(swap & 1)
+ {
+ Assert (m_Channels [i].IsValid());
+ SwapEndianArray (p + localOffset, componentSize, componentCount);
+ }
+ localOffset += componentCount * componentSize;
+ }
+ }
+ }
+ }
+ }
+}
+
+void swap (VertexData& a, VertexData& b)
+{
+ std::swap_ranges (a.m_Channels, a.m_Channels + kShaderChannelCount, b.m_Channels);
+ std::swap_ranges (a.m_Streams, a.m_Streams + kMaxVertexStreams, b.m_Streams);
+ std::swap (a.m_CurrentChannels, b.m_CurrentChannels);
+ std::swap (a.m_VertexSize, b.m_VertexSize);
+ std::swap (a.m_VertexCount, b.m_VertexCount);
+ std::swap (a.m_DataSize, b.m_DataSize);
+ std::swap (a.m_Data, b.m_Data);
+}
+
+void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData)
+{
+ Assert (vertexCount <= srcData.GetVertexCount() && vertexCount <= dstData.GetVertexCount());
+ Assert ((srcData.GetChannelMask() & copyChannels) == copyChannels);
+ Assert ((dstData.GetChannelMask() & copyChannels) == copyChannels);
+ CopyChannels (vertexCount, copyChannels,
+ srcData.GetStreams(), srcData.GetChannels(), srcData.GetDataPtr(),
+ dstData.GetStreams(), dstData.GetChannels(), dstData.GetDataPtr());
+}
+
diff --git a/Runtime/Filters/Mesh/VertexData.h b/Runtime/Filters/Mesh/VertexData.h
new file mode 100644
index 0000000..7cc6c98
--- /dev/null
+++ b/Runtime/Filters/Mesh/VertexData.h
@@ -0,0 +1,253 @@
+#ifndef VERTEX_DATA_H_
+#define VERTEX_DATA_H_
+
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include "Runtime/BaseClasses/ObjectDefines.h"
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Serialize/TransferFunctionFwd.h"
+
+class VertexData;
+
+void swap (VertexData& a, VertexData& b);
+
+typedef struct StreamInfo
+{
+ enum { kDividerOpDivide=0, kDividerOpModulo };
+
+ UInt32 channelMask;
+ UInt32 offset;
+ UInt16 frequency;
+ UInt8 stride;
+ UInt8 dividerOp;
+
+ // We use default constructors instead of memset()
+ StreamInfo() : channelMask(0), offset(0), frequency(0), stride(0), dividerOp(kDividerOpDivide) {}
+ void Reset() { *this = StreamInfo(); }
+
+ bool operator == (const StreamInfo& rhs) const { return (channelMask == rhs.channelMask) && (offset == rhs.offset) && (frequency == rhs.frequency) && (stride == rhs.stride) && (dividerOp == rhs.dividerOp); }
+ bool operator != (const StreamInfo& rhs) const { return !(*this == rhs); }
+
+ DECLARE_SERIALIZE_NO_PPTR (StreamInfo);
+
+#if SUPPORT_SERIALIZED_TYPETREES
+ template<class TransferFunction>
+ void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+
+} StreamInfoArray [kMaxVertexStreams];
+
+struct VertexStreamsLayout
+{
+ UInt32 channelMasks[kMaxVertexStreams];
+};
+
+typedef struct ALIGN_TYPE(4) ChannelInfo
+{
+ UInt8 stream;
+ UInt8 offset;
+ UInt8 format;
+ UInt8 dimension;
+
+ enum { kInvalidDimension = 0 };
+
+ // We use default constructors instead of memset()
+ ChannelInfo() : stream(0), offset(0), format(0), dimension(kInvalidDimension) {}
+
+ UInt32 CalcOffset(const StreamInfoArray streams) const { return streams[stream].offset + offset; }
+ UInt32 CalcStride(const StreamInfoArray streams) const { return streams[stream].stride; }
+ bool IsValid() const { return (kInvalidDimension != dimension); }
+ void Reset() { *this = ChannelInfo(); }
+
+ bool operator == (const ChannelInfo& rhs) const { return (stream == rhs.stream) && (offset == rhs.offset) && (format == rhs.format) && (dimension == rhs.dimension); }
+ bool operator != (const ChannelInfo& rhs) const { return !(*this == rhs); }
+
+ DECLARE_SERIALIZE_NO_PPTR (ChannelInfo);
+
+} ChannelInfoArray [kShaderChannelCount];
+
+struct VertexChannelsLayout
+{
+ struct Channel
+ {
+ Channel(UInt8 fmt, UInt8 dim) : format(fmt), dimension(dim) {}
+ Channel() : format(0), dimension(0) {}
+ UInt8 format;
+ UInt8 dimension;
+ };
+ Channel channels[kShaderChannelCount];
+};
+
+
+template<class TransferFunc>
+void StreamInfo::Transfer (TransferFunc& transfer)
+{
+ #if SUPPORT_SERIALIZED_TYPETREES
+ if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+ {
+ TransferWorkaround35SerializationFuckup (transfer);
+ return;
+ }
+ #endif
+
+ transfer.Transfer (channelMask, "channelMask", kHideInEditorMask);
+ transfer.Transfer (offset, "offset", kHideInEditorMask);
+ transfer.Transfer (stride, "stride", kHideInEditorMask);
+ transfer.Transfer (dividerOp, "dividerOp", kHideInEditorMask);
+ transfer.Transfer (frequency, "frequency", kHideInEditorMask);
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunc>
+void StreamInfo::TransferWorkaround35SerializationFuckup (TransferFunc& transfer)
+{
+ transfer.Transfer (channelMask, "channelMask", kHideInEditorMask);
+ transfer.Transfer (offset, "offset", kHideInEditorMask);
+
+ UInt32 align;
+ UInt32 stride32bit;
+ transfer.Transfer (stride32bit, "stride", kHideInEditorMask);
+ transfer.Transfer (align, "align", kHideInEditorMask);
+
+ stride = (UInt8) stride32bit;
+}
+#endif
+
+template<class TransferFunc>
+void ChannelInfo::Transfer (TransferFunc& transfer)
+{
+ transfer.Transfer (stream, "stream", kHideInEditorMask);
+ transfer.Transfer (offset, "offset", kHideInEditorMask);
+ transfer.Transfer (format, "format", kHideInEditorMask);
+ transfer.Transfer (dimension, "dimension", kHideInEditorMask);
+}
+
+// Information about all vertex data, but does not own the memory
+class VertexDataInfo
+{
+public:
+ enum
+ {
+ kVertexDataAlign = 32,
+ kVertexStreamAlign = 16,
+ kVertexDataPadding = 16
+ };
+
+ static VertexStreamsLayout kVertexStreamsDefault;
+ static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplit;
+ static VertexChannelsLayout kVertexChannelsDefault;
+ static VertexChannelsLayout kVertexChannelsCompressed;
+ static VertexChannelsLayout kVertexChannelsCompressedAggressive;
+#if UNITY_EDITOR
+ static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplitPS3;
+#endif
+
+ static size_t AlignStreamSize (size_t size) { return (size + (kVertexStreamAlign-1)) & ~(kVertexStreamAlign-1); }
+
+ friend void ::swap (VertexData& a, VertexData& b);
+
+ VertexDataInfo ();
+
+ bool HasChannel (ShaderChannel shaderChannelIndex) const
+ {
+ Assert ((m_Channels[shaderChannelIndex].dimension != 0) == (((m_CurrentChannels & (1 << shaderChannelIndex)) != 0)));
+ return m_Channels[shaderChannelIndex].dimension != 0;
+ }
+
+ void UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+
+ size_t GetActiveStreamCount() const ;
+ size_t GetStreamIndex(ShaderChannel channel) const ;
+ const StreamInfo* GetStreams() const { return m_Streams; }
+ const StreamInfo& GetStream(int index) const { return m_Streams[index]; }
+
+ const ChannelInfo* GetChannels() const { return m_Channels; }
+ const ChannelInfo& GetChannel(int index) const { return m_Channels[index]; }
+
+ VertexStreamsLayout GetStreamsLayout() const;
+ VertexChannelsLayout GetChannelsLayout() const;
+
+ bool ConformsToStreamsLayout(const VertexStreamsLayout& streams) const;
+ bool ConformsToChannelsLayout(const VertexChannelsLayout& channels) const;
+
+ unsigned GetChannelMask () const { return m_CurrentChannels; }
+ size_t GetDataSize () const { return m_DataSize; }
+ size_t GetVertexSize () const { return m_VertexSize; }
+ size_t GetVertexCount () const { return m_VertexCount; }
+ size_t GetChannelOffset (unsigned channel) const { return m_Channels[channel].CalcOffset(m_Streams); }
+ size_t GetChannelStride (unsigned channel) const { return m_Channels[channel].CalcStride(m_Streams); }
+ UInt8* GetDataPtr () const { return m_Data; }
+
+ template<class T>
+ StrideIterator<T> MakeStrideIterator (ShaderChannel shaderChannelIndex) const
+ {
+ Assert (shaderChannelIndex < kShaderChannelCount);
+ void* p = m_Data + GetChannelOffset(shaderChannelIndex);
+ return HasChannel (shaderChannelIndex) ? StrideIterator<T> (p, GetChannelStride (shaderChannelIndex)) : StrideIterator<T> (NULL, GetChannelStride (shaderChannelIndex));
+ }
+
+ template<class T>
+ StrideIterator<T> MakeEndIterator (ShaderChannel shaderChannelIndex) const
+ {
+ T* end = GetEndPointer<T> (shaderChannelIndex);
+ return StrideIterator<T> (end, GetChannelStride (shaderChannelIndex));
+ }
+
+ template<class T>
+ T* GetEndPointer (ShaderChannel shaderChannelIndex) const
+ {
+ Assert (shaderChannelIndex < kShaderChannelCount);
+ void* p = HasChannel (shaderChannelIndex) ? (m_Data + GetChannelOffset(shaderChannelIndex) + m_VertexCount * GetChannelStride (shaderChannelIndex)) : NULL;
+ return reinterpret_cast<T*> (p);
+ }
+
+protected:
+ ChannelInfoArray m_Channels;
+ StreamInfoArray m_Streams;
+
+ size_t m_VertexSize; // must match m_CurrentChannels
+ UInt8* m_Data;
+
+ // The following are being serialized. Their size must match in both 32 and 64 bit platforms
+ UInt32 m_CurrentChannels; // kShaderChannel bitmask
+ UInt32 m_VertexCount;
+ unsigned m_DataSize;
+};
+
+
+// Owns the vertex memory
+class VertexData : public VertexDataInfo
+{
+public:
+
+ DECLARE_SERIALIZE (VertexData)
+
+ VertexData () : VertexDataInfo() { }
+ VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+ ~VertexData ();
+
+ static size_t GetAllocateDataSize (size_t accesibleBufferSize) { return accesibleBufferSize + kVertexDataPadding; }
+
+ void Deallocate ();
+ void Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+ void SwapEndianess ();
+
+private:
+ VertexData (const VertexData& o);
+ void operator= (const VertexData& o);
+ VertexData (const VertexDataInfo& o);
+ void operator= (const VertexDataInfo& o);
+
+#if SUPPORT_SERIALIZED_TYPETREES
+ template<class TransferFunction>
+ void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+};
+
+
+void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData);
+size_t GetChannelFormatSize(UInt8 format);
+
+
+
+#endif