47 files changed, 13319 insertions, 0 deletions
diff --git a/Runtime/Filters/Mesh/CompressedMesh.cpp b/Runtime/Filters/Mesh/CompressedMesh.cpp
new file mode 100644
index 0000000..02cc74c
--- /dev/null
+++ b/Runtime/Filters/Mesh/CompressedMesh.cpp
@@ -0,0 +1,755 @@
+#include "UnityPrefix.h"
+#include "CompressedMesh.h"
+#include "LodMesh.h"
+#include "Runtime/Animation/AnimationCurveUtility.h"
+
+
+#define sqr(x) ((x)*(x))
+
+void PackedFloatVector::PackFloats(float *data, int itemCountInChunk, int chunkStride, int numChunks, int bitSize, bool adjustBitSize)
+{ 
+	float maxf = -std::numeric_limits<float>::infinity();
+	float minf = std::numeric_limits<float>::infinity();
+	float* end = Stride (data, numChunks * chunkStride);
+	for(float* it = data; it != end; it = Stride (it, chunkStride))
+	{
+		for (int i=0; i<itemCountInChunk; ++i)
+		{
+			if(maxf < it[i])
+				maxf = it[i];
+			if(minf > it[i])
+				minf = it[i];
+		}
+	}
+	
+	m_Range = maxf-minf;
+
+	if(adjustBitSize)
+		bitSize += int(ceilf(Log2(m_Range)));
+	if(bitSize > 32)
+		bitSize = 32;
+		
+	m_Start = minf;
+	m_NumItems = numChunks * itemCountInChunk;
+	m_BitSize = bitSize;
+	m_Data.resize((m_NumItems * bitSize + 7)/8, 0);
+	
+	
+	float scale = 1.0/m_Range;
+	
+	int indexPos = 0;
+	int bitPos = 0;
+		
+	for(float* it = data; it != end; it = Stride (it, chunkStride))
+	{
+		for(int i=0; i<itemCountInChunk; ++i)
+		{
+			float scaled = (it[i] - m_Start) * scale;
+			if(scaled < 0) scaled = 0;
+			if(scaled > 1) scaled = 1;
+
+			UInt32 x = UInt32(scaled * ((1 << (m_BitSize)) - 1));
+
+			int bits = 0;
+			while(bits < m_BitSize)
+			{
+				m_Data[indexPos] |= (x >> bits) << bitPos;
+				int num = std::min( m_BitSize-bits, 8-bitPos);
+				bitPos += num;
+				bits += num;
+				if(bitPos == 8)
+				{
+					indexPos++;
+					bitPos = 0;
+				}
+			}
+		}
+	}
+}
+
+void PackedFloatVector::UnpackFloats(float *data, int itemCountInChunk, int chunkStride, int start, int numChunks)
+{
+	int bitPos = m_BitSize*start;
+	int indexPos = bitPos/8;
+	bitPos %= 8;
+	
+	float scale = 1.0/m_Range;
+	if (numChunks == -1)
+		numChunks = m_NumItems / itemCountInChunk;
+	
+	for(float* end = Stride (data, chunkStride * numChunks); data != end; data = Stride (data, chunkStride))
+	{
+		for (int i=0; i<itemCountInChunk; ++i)
+		{
+			UInt32 x = 0;
+					
+			int bits = 0;
+			while(bits < m_BitSize)
+			{
+				x |= (m_Data[indexPos] >> bitPos) << bits;
+				int num = std::min( m_BitSize-bits, 8-bitPos);
+				bitPos += num;
+				bits += num;
+				if(bitPos == 8)
+				{
+					indexPos++;
+					bitPos = 0;
+				}
+			}
+			x &= (1 << m_BitSize) - 1;
+			data[i] = (x / (scale * ((1 << (m_BitSize)) - 1))) + m_Start;
+		}
+	}
+}
+
+template <class IntSize> void PackedIntVector::PackInts(IntSize *data, int numItems)
+{ 
+	// make sure that the intsize is an unsigned type
+	Assert( (IntSize)0 < (IntSize)-1 );
+
+	UInt32 maxi = 0;
+	for(int i=0; i<numItems; i++)
+		if(maxi < data[i])
+			maxi = data[i];
+	
+	m_NumItems = numItems;
+	//Prevent overflow
+	m_BitSize = UInt8(maxi == 0xFFFFFFFF ? 32 : ceilf(Log2(maxi+1)));
+	m_Data.resize((numItems * m_BitSize + 7)/8, 0);
+
+	
+	int indexPos = 0;
+	int bitPos = 0;
+	for(int i=0; i<numItems; i++)
+	{
+		int bits = 0;
+		while(bits < m_BitSize)
+		{
+			m_Data[indexPos] |= (data[i] >> bits) << bitPos;
+			int num = std::min( m_BitSize-bits, 8-bitPos);
+			bitPos += num;
+			bits += num;
+			if(bitPos == 8)
+			{
+				indexPos++;
+				bitPos = 0;
+			}
+		}
+	}
+}
+
+template <class IntSize> void PackedIntVector::UnpackInts(IntSize *data)
+{
+	int indexPos = 0;
+	int bitPos = 0;
+	for(int i=0; i<m_NumItems; i++)
+	{				
+		int bits = 0;
+		data[i] = 0;
+		while(bits < m_BitSize)
+		{
+			data[i] |= (m_Data[indexPos] >> bitPos) << bits;
+			int num = std::min( m_BitSize-bits, 8-bitPos);
+			bitPos += num;
+			bits += num;
+			if(bitPos == 8)
+			{
+				indexPos++;
+				bitPos = 0;
+			}
+		}
+		data[i] &= (1ULL << m_BitSize) - 1;
+	}
+}
+
+
+void PackedQuatVector::PackQuats(Quaternionf *data, int numItems)
+{ 
+	m_NumItems = numItems;
+	m_Data.resize(numItems * (32/8), 0);
+			
+	int indexPos = 0;
+	int bitPos = 0;
+		
+	for(int i=0; i<numItems; i++)
+	{
+		Quaternionf &q = data[i];
+		UInt8 flags = q.x<0? 4:0;
+		
+		float max=fabs(q.x);
+		if(fabs(q.y) > max)
+		{
+			max = fabs(q.y);
+			flags = 1;
+			if(q.y<0)
+				flags |= 4;
+		}
+		if(fabs(q.z) > max)
+		{
+			max = fabs(q.z);
+			flags = 2;
+			if(q.z<0)
+				flags |= 4;
+		}
+		if(fabs(q.w) > max)
+		{
+			max = fabs(q.w);
+			flags = 3;
+			if(q.w<0)
+				flags |= 4;
+		}
+		int bits = 0;
+		while(bits < 3)
+		{
+			m_Data[indexPos] |= (flags >> bits) << bitPos;
+			int num = std::min( 3-bits, 8-bitPos);
+			bitPos += num;
+			bits += num;
+			if(bitPos == 8)
+			{
+				indexPos++;
+				bitPos = 0;
+			}
+		}		
+		for(int j=0;j<4;j++)
+		{
+			if((flags&3) != j)
+			{
+				int bitSize = (((flags&3)+1)%4 == j)?9:10;
+				float scaled = (q[j] + 1) * 0.5;
+				if(scaled < 0) scaled = 0;
+				if(scaled > 1) scaled = 1;
+				
+				UInt32 x = UInt32(scaled * ((1 << bitSize) - 1));
+				
+				bits = 0;
+				while(bits < bitSize)
+				{
+					m_Data[indexPos] |= (x >> bits) << bitPos;
+					int num = std::min( bitSize-bits, 8-bitPos);
+					bitPos += num;
+					bits += num;
+					if(bitPos == 8)
+					{
+						indexPos++;
+						bitPos = 0;
+					}
+				}
+			}
+		}
+	}
+}
+
+void PackedQuatVector::UnpackQuats(Quaternionf *data)
+{
+	int indexPos = 0;
+	int bitPos = 0;
+
+	for(int i=0; i<m_NumItems; i++)
+	{
+		UInt32 flags = 0;
+				
+		int bits = 0;
+		while(bits < 3)
+		{
+			flags |= (m_Data[indexPos] >> bitPos) << bits;
+			int num = std::min( 3-bits, 8-bitPos);
+			bitPos += num;
+			bits += num;
+			if(bitPos == 8)
+			{
+				indexPos++;
+				bitPos = 0;
+			}
+		}
+		flags &= 7;
+		
+				
+		Quaternionf &q = data[i];
+		float sum = 0;
+		for(int j=0;j<4;j++)
+		{
+			if((flags&3) != j)
+			{
+				int bitSize = (((flags&3)+1)%4 == j)?9:10;
+				UInt32 x = 0;
+				
+				bits = 0;
+				while(bits < bitSize)
+				{
+					x |= (m_Data[indexPos] >> bitPos) << bits;
+					int num = std::min( bitSize-bits, 8-bitPos);
+					bitPos += num;
+					bits += num;
+					if(bitPos == 8)
+					{
+						indexPos++;
+						bitPos = 0;
+					}
+				}
+				x &= (1 << bitSize) - 1;
+				q[j] = (x / (0.5 * ((1 << (bitSize)) - 1))) - 1;
+				sum += sqr(q[j]);
+			}
+		}
+		
+		int lastComponent = flags&3;
+		q[lastComponent] = FastSqrt(1 - sum);
+		if(flags & 4)
+			q[lastComponent] = -q[lastComponent];
+	}
+}
+
+void CompressedMesh::Compress(Mesh &src, int compression)
+{
+	int numVertices = src.GetVertexCount();
+	
+	int vertexBits = 0;
+	switch(compression)
+	{
+		case kMeshCompressionHigh: vertexBits = 10; break;
+		case kMeshCompressionMed: vertexBits = 16; break;
+		case kMeshCompressionLow: vertexBits = 20; break;
+	}
+	m_Vertices.PackFloats((float*)src.GetChannelPointer(kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex), numVertices, vertexBits, false);
+
+	//Possible optimization: use Edgebreaker algorithm 
+	//for 1.8 bits per triangle connectivity information
+	//http://www.gvu.gatech.edu/~jarek/edgebreaker/eb/
+	
+	int numIndices = src.m_IndexBuffer.size();
+	numIndices/=2;
+		
+	m_Triangles.PackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0],numIndices);
+	
+	if(src.IsAvailable(kShaderChannelTexCoord0))
+	{
+		int uvBits = 0;
+		switch(compression)
+		{
+			case kMeshCompressionHigh: uvBits = 8; break;
+			case kMeshCompressionMed: uvBits = 10; break;
+			case kMeshCompressionLow: uvBits = 16; break;
+		}
+		if(src.IsAvailable(kShaderChannelTexCoord1))
+		{
+			Vector2f *uv12 = new Vector2f[numVertices*2];
+			src.ExtractUvArray(0, uv12);
+			src.ExtractUvArray(1, uv12 + numVertices);
+			m_UV.PackFloats(&uv12->x, 2, sizeof(Vector2f), numVertices*2, uvBits, true);
+			delete[] uv12;
+		}
+		else
+			m_UV.PackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), numVertices, uvBits, true);
+	}
+	else if(src.IsAvailable(kShaderChannelTexCoord1))
+		ErrorString( "Mesh compression doesn't work on Meshes wich only have a UV1 channel but no UV0 channel. UVs will be dropped." );
+		
+	if(src.IsAvailable (kShaderChannelNormal))
+	{
+		int normalBits = 0;
+		switch(compression)
+		{
+			case kMeshCompressionHigh: normalBits = 6; break;
+			case kMeshCompressionMed: normalBits = 8; break;
+			case kMeshCompressionLow: normalBits = 8; break;
+		}
+
+		float *normals = new float[numVertices*2];
+		UInt32 *signs = new UInt32[numVertices];
+		StrideIterator<Vector3f> n = src.GetNormalBegin ();
+		for(int i=0;i<numVertices; ++i, ++n)
+		{
+			normals[i*2+0] = n->x;
+			normals[i*2+1] = n->y;
+			signs[i] = n->z>0?1:0;
+		}
+		m_Normals.PackFloats(normals, 2, sizeof (float) * 2, numVertices, normalBits, false);
+		m_NormalSigns.PackInts(signs, numVertices);	
+		delete[] normals;
+		delete[] signs;
+	}
+	
+	if(src.IsAvailable (kShaderChannelTangent))
+	{
+		int normalBits = 0;
+		switch(compression)
+		{
+			case kMeshCompressionHigh: normalBits = 6; break;
+			case kMeshCompressionMed: normalBits = 8; break;
+			case kMeshCompressionLow: normalBits = 8; break;
+		}
+
+		float *tangents = new float[numVertices*2];
+		UInt32 *signs = new UInt32[numVertices*2];
+		StrideIterator<Vector4f> t = src.GetTangentBegin ();
+		for(int i=0;i<numVertices; ++i, ++t)
+		{
+			tangents[i*2+0] = t->x;
+			tangents[i*2+1] = t->y;
+			signs[i*2+0] = t->z>0?1:0;
+			signs[i*2+1] = t->w>0?1:0;
+		}
+		m_Tangents.PackFloats(tangents, 2, sizeof (float) * 2, numVertices, normalBits, false);	
+		m_TangentSigns.PackInts(signs, numVertices*2);	
+		delete[] tangents;
+		delete[] signs;
+	}
+
+	// TODO: do an actual compression
+	if(src.IsAvailable (kShaderChannelColor))
+	{
+		dynamic_array<UInt32> tempColors (numVertices, kMemTempAlloc);
+		std::transform (src.GetColorBegin (), src.GetColorEnd (), tempColors.begin (), OpColorRGBA32ToUInt32());
+		m_Colors.PackInts<UInt32> (tempColors.data (), tempColors.size ());
+	}
+
+	BoneInfluence* influence = src.GetBoneWeights();
+	if(influence)
+	{
+		UInt32 *weights = new UInt32[numVertices*3];
+		UInt32 *indices = new UInt32[numVertices*4];
+		int weightPos = 0;
+		int boneIndexPos = 0;
+		for(int i=0;i<numVertices;i++)
+		{
+			int j;
+			int sum = 0;
+			
+			//As all four bone weights always add up to 1, we can always calculate the fourth one
+			// by subtracting the other three from 1. So we don't need to store it.
+
+			//Furthermore, once the weights we stored add up to 1, we don't need to store further
+			//weights or indices, as these will necessarily be zero. This is often the case, as many
+			//vertices have only the first weight set to one, and all others to zero.
+			
+			//find last non-zero entry -- we don't need to store those after this.
+			int lastNonZero;
+			for(lastNonZero=3;lastNonZero>0&&influence[i].weight[lastNonZero]==0;lastNonZero--)
+			{}
+			
+						
+			for(j=0;j<3 && j<=lastNonZero && sum<31;j++)
+			{
+				weights[weightPos] = UInt32(influence[i].weight[j] * 31);
+				indices[boneIndexPos++] = influence[i].boneIndex[j];
+				sum += weights[weightPos++];
+			}
+			if(lastNonZero<3)
+			{
+				//we stored less then 3 weights, but they don't add up to one, due to quantization
+				//inprecision.
+				//Add the difference, so the math works out on decompression.
+				if(sum<31)
+					weights[weightPos-1] += 31-sum;
+			}
+			
+			//we stored three weights, but they don't add up to one. we don't need to store the fourth weight
+			//(as it can be calculated from the other three), but we need the bone index.
+			else if(sum<31)
+				indices[boneIndexPos++] = influence[i].boneIndex[j];				
+		}
+		
+		m_Weights.PackInts(weights, weightPos);	
+		m_BoneIndices.PackInts(indices, boneIndexPos);
+
+		delete[] weights;
+		delete[] indices;
+	}
+}
+
+void CompressedMesh::Decompress(Mesh &src)
+{	
+	int numIndices = m_Triangles.Count();
+	src.m_IndexBuffer.resize(numIndices * 2);
+	m_Triangles.UnpackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0]);
+	
+	int numVertices = m_Vertices.Count()/3;
+	unsigned decompressedFormat = 0;
+	if (m_Vertices.Count ()) decompressedFormat |= VERTEX_FORMAT1(Vertex);
+	if (m_Normals.Count()) decompressedFormat |= VERTEX_FORMAT1(Normal);
+	if (m_UV.Count()) decompressedFormat |= VERTEX_FORMAT1(TexCoord0);
+	if (m_UV.Count() == numVertices * 4) decompressedFormat |= VERTEX_FORMAT1(TexCoord1);
+	if (m_Tangents.Count()) decompressedFormat |= VERTEX_FORMAT1(Tangent);
+	if (m_Colors.Count()) decompressedFormat |= VERTEX_FORMAT1(Color);
+	
+	src.ResizeVertices(numVertices, decompressedFormat);
+	Assert (src.GetVertexCount () == numVertices);
+
+	m_Vertices.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex));
+		
+	if(m_UV.Count())
+	{
+		m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), 0, numVertices);
+
+		if(m_UV.Count()==numVertices * 4)
+		{
+			m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord1), 2, src.GetStride (kShaderChannelTexCoord1), numVertices*2, numVertices);
+		}
+	}
+	
+	// TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data.
+	// Most likely no version ever did. Remove code and bindpose serialization.
+	if(m_BindPoses.Count())
+	{
+		src.m_Bindpose.resize_initialized(m_BindPoses.Count()/16);
+		m_BindPoses.UnpackFloats(src.m_Bindpose[0].m_Data, 16, sizeof(float) * 16);
+	}
+
+	if(m_Normals.Count())
+	{
+		float *normalData = new float[m_Normals.Count()];
+		UInt32 *signs = new UInt32[m_NormalSigns.Count()];
+		
+		m_Normals.UnpackFloats(normalData, 2, sizeof(float) * 2);
+		m_NormalSigns.UnpackInts(signs);
+
+		StrideIterator<Vector3f> n = src.GetNormalBegin ();
+		for(int i=0;i<m_Normals.Count()/2; ++i, ++n)
+		{
+			n->x = normalData[i*2+0];
+			n->y = normalData[i*2+1];
+			float zsqr = 1 - sqr(n->x) - sqr(n->y);
+			if(zsqr >= 0)
+				n->z = FastSqrt( zsqr );
+			else
+			{
+				n->z = 0;
+				*n = Normalize(*n);
+			}
+			if(signs[i]==0)
+				n->z = -n->z;
+		}
+				
+		delete[] normalData;
+		delete[] signs;
+	} 
+
+	if(m_Tangents.Count())
+	{
+		float *tangentData = new float[m_Tangents.Count()];
+		UInt32 *signs = new UInt32[m_TangentSigns.Count()];
+
+		m_Tangents.UnpackFloats(tangentData, 2, sizeof(float) * 2);
+		m_TangentSigns.UnpackInts(signs);
+		
+		StrideIterator<Vector4f> t = src.GetTangentBegin ();
+		for(int i=0;i<m_Tangents.Count()/2; ++i, ++t)
+		{
+			t->x = tangentData[i*2+0];
+			t->y = tangentData[i*2+1];
+			float zsqr = 1-sqr(tangentData[i*2+0])-sqr(tangentData[i*2+1]);
+			if(zsqr >= 0.0f)
+				t->z = FastSqrt( zsqr );
+			else
+			{
+				t->z = 0;
+				*(Vector3f*)(&*t) = Normalize(*(Vector3f*)(&*t));
+			}
+			if(signs[i*2+0]==0)
+				t->z = -t->z;
+
+			t->w = signs[i*2+1]?1.0:-1.0;
+		}
+				
+		delete[] tangentData;
+		delete[] signs;
+	}
+	
+	// TODO: do an actual compression
+	if (m_Colors.Count())
+	{
+		dynamic_array<UInt32> tempColors (m_Colors.Count (), kMemTempAlloc);
+		m_Colors.UnpackInts<UInt32> (tempColors.data ());
+		Assert (tempColors.size () == src.GetVertexCount ());
+		strided_copy ((ColorRGBA32*)tempColors.begin (), (ColorRGBA32*)tempColors.end (), src.GetColorBegin ());
+	}
+
+	if(m_Weights.Count())
+	{
+		UInt32 *weights = new UInt32[m_Weights.Count()];
+		m_Weights.UnpackInts(weights);
+		UInt32 *boneIndices = new UInt32[m_BoneIndices.Count()];
+		m_BoneIndices.UnpackInts(boneIndices);
+		src.m_Skin.resize_uninitialized(numVertices);
+		int bonePos = 0;
+		int boneIndexPos = 0;
+		int j=0;
+		int sum = 0;
+		
+		for(int i=0;i<m_Weights.Count();i++)
+		{
+			//read bone index and weight.
+			src.m_Skin[bonePos].weight[j] = weights[i]/31.0;
+			src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++];
+			j++;
+			sum += weights[i];
+			
+			//the weights add up to one. fill the rest for this vertex with zero, and continue with next one.
+			if(sum >= 31)
+			{
+				for(;j<4;j++)
+				{
+					src.m_Skin[bonePos].weight[j] = 0;
+					src.m_Skin[bonePos].boneIndex[j] = 0;
+				}
+				bonePos++;
+				j = 0;
+				sum = 0;
+			}
+			//we read three weights, but they don't add up to one. calculate the fourth one, and read
+			//missing bone index. continue with next vertex.
+			else if(j==3)
+			{
+				src.m_Skin[bonePos].weight[j] = (31-sum)/31.0;
+				src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++];
+				bonePos++;
+				j = 0;
+				sum = 0;
+			}
+		}
+				
+		delete[] weights;
+		delete[] boneIndices;
+	}
+}
+
+template <class T> void CompressedAnimationCurve::CompressTimeKeys(AnimationCurveTpl<T> &src)
+{
+	int numKeys = src.GetKeyCount();
+	
+	float minTime=0;
+	for(int i=0;i<numKeys;i++)
+	{
+		float t = src.GetKey(i).time;
+		if(t < minTime)
+		{
+			//negative time key. offset all keys by this, so math doesn't break - but it's still wrong.
+			minTime = t;
+		}
+	}
+	
+	
+	UInt32 *times = new UInt32[numKeys];
+	UInt32 t=0;
+	for(int i=0;i<numKeys;i++)
+	{
+		times[i] = UInt32((src.GetKey(i).time - minTime) * 100);
+		times[i] -= t;
+		t += times[i];
+	}
+	
+	m_Times.PackInts(times, numKeys);		
+	
+	delete[] times;
+}
+
+template <class T> void CompressedAnimationCurve::DecompressTimeKeys(AnimationCurveTpl<T> &src)
+{
+	int numKeys = m_Times.Count();
+	UInt32 *times = new UInt32[numKeys];
+	m_Times.UnpackInts(times);
+	
+	UInt32 t=0;
+
+	src.ResizeUninitialized(numKeys);
+	
+	for(int i=0;i<numKeys;i++)
+	{
+		t+=times[i];
+		src.GetKey(i).time = t*0.01;
+	}	
+	delete[] times;
+}
+
+void CompressedAnimationCurve::CompressQuatCurve(AnimationClip::QuaternionCurve &src)
+{
+	CompressTimeKeys(src.curve);
+	int numKeys = src.curve.GetKeyCount();
+	
+	Quaternionf *qkeys = new Quaternionf[numKeys];		
+	for(int i=0;i<numKeys;i++)
+		qkeys[i] = src.curve.GetKey(i).value;
+	m_Values.PackQuats(qkeys, numKeys);		
+	
+	delete[] qkeys;
+	
+	bool same = true;
+
+	for(int i=0;i<numKeys && same;i++)
+	{
+		Quaternionf &q1 = src.curve.GetKey(i).inSlope;
+		Quaternionf &q2 = src.curve.GetKey(i).inSlope;
+		if(q1.x!=q2.x)
+			same = false;
+		if(q1.y!=q2.y)
+			same = false;
+		if(q1.z!=q2.z)
+			same = false;
+		if(q1.w!=q2.w)
+			same = false;
+	}
+
+	float *keys = new float[numKeys*8];
+	for(int i=0;i<numKeys;i++)
+	{
+		Quaternionf q = src.curve.GetKey(i).inSlope;
+		keys[i*4+0] = q.x;
+		keys[i*4+1] = q.y;
+		keys[i*4+2] = q.z;
+		keys[i*4+3] = q.w;
+		q = src.curve.GetKey(i).outSlope;
+		keys[(i+numKeys)*4+0] = q.x;
+		keys[(i+numKeys)*4+1] = q.y;
+		keys[(i+numKeys)*4+2] = q.z;
+		keys[(i+numKeys)*4+3] = q.w;
+	}
+	
+	//if in and out slopes are all the same, pack only the first of the two.
+	if(same)
+		m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 4, 6, false);
+	else
+		m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 8, 6, false);
+		
+	delete[] keys;
+	
+	m_PreInfinity = src.curve.GetPreInfinityInternal();
+	m_PostInfinity = src.curve.GetPostInfinityInternal();
+	m_Path = src.path;
+}
+
+void CompressedAnimationCurve::DecompressQuatCurve(AnimationClip::QuaternionCurve &src)
+{
+	DecompressTimeKeys(src.curve);
+	int numKeys = m_Values.Count();
+	
+	Quaternionf *qkeys = new Quaternionf[numKeys];		
+	m_Values.UnpackQuats(qkeys);	
+	for(int i=0;i<numKeys;i++)
+		src.curve.GetKey(i).value = qkeys[i];
+	delete[] qkeys;
+
+	float *keys = new float[numKeys*8];
+	m_Slopes.UnpackFloats(keys, 1, sizeof(float));
+	
+	//are there seperate in and out slopes?
+	int offs = 0;
+	if(m_Slopes.Count() == numKeys*8)
+		offs = numKeys;
+	for(int i=0;i<numKeys;i++)
+	{
+		src.curve.GetKey(i).inSlope.x = keys[i*4+0];
+		src.curve.GetKey(i).inSlope.y = keys[i*4+1];
+		src.curve.GetKey(i).inSlope.z = keys[i*4+2];
+		src.curve.GetKey(i).inSlope.w = keys[i*4+3];
+		src.curve.GetKey(i).outSlope.x = keys[(i+offs)*4+0];
+		src.curve.GetKey(i).outSlope.y = keys[(i+offs)*4+1];
+		src.curve.GetKey(i).outSlope.z = keys[(i+offs)*4+2];
+		src.curve.GetKey(i).outSlope.w = keys[(i+offs)*4+3];
+	}
+	delete[] keys;
+	
+	src.curve.SetPreInfinityInternal( m_PreInfinity );
+	src.curve.SetPostInfinityInternal( m_PostInfinity );
+	src.path = m_Path;
+}
diff --git a/Runtime/Filters/Mesh/CompressedMesh.h b/Runtime/Filters/Mesh/CompressedMesh.h
new file mode 100644
index 0000000..cf2f01c
--- /dev/null
+++ b/Runtime/Filters/Mesh/CompressedMesh.h
@@ -0,0 +1,175 @@
+#ifndef COMPRESSEDMESH_H
+#define COMPRESSEDMESH_H
+
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Animation/AnimationClip.h"
+class Mesh;
+class AnimationClip;
+
+enum
+{
+	kMeshCompressionOff = 0,
+	kMeshCompressionLow = 1,
+	kMeshCompressionMed = 2,
+	kMeshCompressionHigh = 3,
+};
+
+typedef std::vector<UInt8> DataVector;
+
+class PackedFloatVector 
+{
+public: 
+	DECLARE_SERIALIZE (PackedBitVector)
+	
+	PackedFloatVector() { m_NumItems = 0; m_Range = 0; m_Start = 0; m_BitSize = 0; }
+	
+	void PackFloats(float *data, int chunkSize, int chunkStride, int chunkCount, int bitSize, bool adjustBitSize);
+	void UnpackFloats(float *data, int chunkSize, int chunkStride, int start = 0, int count = -1);
+	int Count() {return m_NumItems;}
+	
+private:
+	UInt32 m_NumItems;
+	float m_Range;
+	float m_Start;
+	UInt8 m_BitSize;
+	std::vector<UInt8> m_Data;
+};
+
+class PackedIntVector 
+{
+public: 
+	DECLARE_SERIALIZE (PackedBitVector)
+	
+	PackedIntVector() { m_NumItems = 0; m_BitSize = 0; }
+	
+	template <class IntSize> void PackInts(IntSize *data, int numItems);
+	template <class IntSize> void UnpackInts(IntSize *data);
+	int Count() {return m_NumItems;}
+	
+private:
+	UInt32 m_NumItems;
+	UInt8 m_BitSize;
+	std::vector<UInt8> m_Data;
+};
+
+class PackedQuatVector 
+{
+public: 
+	DECLARE_SERIALIZE (PackedBitVector)
+	
+	PackedQuatVector() {m_NumItems = 0;}
+	
+	void PackQuats(Quaternionf *data, int numItems);
+	void UnpackQuats(Quaternionf *data);
+	int Count() {return m_NumItems;}
+	
+private:
+	UInt32 m_NumItems;
+	std::vector<UInt8> m_Data;
+};
+
+class CompressedMesh
+{
+public:
+	DECLARE_SERIALIZE (CompressedMesh)
+		
+	void Compress(Mesh &src, int quality);
+	void Decompress(Mesh &src);	
+	
+private:
+	PackedFloatVector m_Vertices;
+	PackedFloatVector m_UV;
+
+	// TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data.
+	// Most likely no version ever did. Remove code and bindpose serialization.
+	PackedFloatVector m_BindPoses;
+
+	PackedFloatVector m_Normals;
+	PackedIntVector m_NormalSigns;
+	PackedFloatVector m_Tangents;
+	PackedIntVector m_TangentSigns;
+	PackedIntVector m_Weights;
+	PackedIntVector m_BoneIndices;
+	PackedIntVector m_Triangles;
+	PackedIntVector m_Colors;
+};
+
+template<class TransferFunc>
+void PackedFloatVector::Transfer (TransferFunc& transfer) {
+	TRANSFER ( m_NumItems );
+	TRANSFER( m_Range );
+	TRANSFER( m_Start );
+	TRANSFER( m_Data );
+	TRANSFER( m_BitSize );
+	transfer.Align();
+}
+
+template<class TransferFunc>
+void PackedIntVector::Transfer (TransferFunc& transfer) {
+	TRANSFER( m_NumItems );
+	TRANSFER( m_Data );
+	TRANSFER( m_BitSize );
+	transfer.Align();
+}
+
+template<class TransferFunc>
+void PackedQuatVector::Transfer (TransferFunc& transfer) {
+	TRANSFER( m_NumItems );
+	TRANSFER( m_Data );
+	transfer.Align();
+}
+
+template<class TransferFunc>
+void CompressedMesh::Transfer (TransferFunc& transfer) {
+	TRANSFER( m_Vertices );
+	TRANSFER( m_UV );
+	TRANSFER( m_BindPoses );
+	TRANSFER( m_Normals );
+	TRANSFER( m_Tangents );
+	TRANSFER( m_Weights );
+	TRANSFER( m_NormalSigns );
+	TRANSFER( m_TangentSigns );
+	TRANSFER( m_BoneIndices );
+	TRANSFER( m_Triangles );
+	TRANSFER( m_Colors );
+}
+
+class CompressedAnimationCurve
+{
+public:
+	DECLARE_SERIALIZE (CompressedAnimationCurve)
+
+	CompressedAnimationCurve() { m_PreInfinity = 0; m_PostInfinity = 0; }
+		
+	void CompressQuatCurve(AnimationClip::QuaternionCurve &src);
+	void DecompressQuatCurve(AnimationClip::QuaternionCurve &src);	
+		
+private:
+
+	template <class T> void CompressTimeKeys(AnimationCurveTpl<T> &src);
+	template <class T> void DecompressTimeKeys(AnimationCurveTpl<T> &src);	
+
+	PackedIntVector m_Times;
+	PackedQuatVector m_Values;
+	PackedFloatVector m_Slopes;
+	
+	int   m_PreInfinity;
+	int   m_PostInfinity;	
+	
+	UnityStr m_Path;
+};
+
+template<class TransferFunc>
+void CompressedAnimationCurve::Transfer (TransferFunc& transfer) {
+	
+	TRANSFER( m_Path );
+
+	TRANSFER( m_Times );
+	TRANSFER( m_Values );
+	TRANSFER( m_Slopes );
+
+	TRANSFER( m_PreInfinity );
+	TRANSFER( m_PostInfinity );
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/LodMesh.cpp b/Runtime/Filters/Mesh/LodMesh.cpp
new file mode 100644
index 0000000..fc5dca8
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMesh.cpp
@@ -0,0 +1,2344 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "LodMesh.h"
+#include "Runtime/Utilities/vector_utility.h"
+#include "Runtime/Utilities/Utility.h"
+#include "Runtime/Math/FloatConversion.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/PersistentManager.h"
+#include "Runtime/Graphics/TriStripper.h"
+#include "MeshUtility.h"
+#include "Runtime/Geometry/TangentSpaceCalculation.h"
+#include "Runtime/BaseClasses/GameObject.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Serialize/TransferUtility.h"
+#include "Runtime/Serialize/SwapEndianArray.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/BaseClasses/IsPlaying.h"
+#include "Runtime/Camera/IntermediateRenderer.h"
+#include "Runtime/Filters/Mesh/MeshRenderer.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "Runtime/Threads/Thread.h"
+#include "Runtime/Misc/BuildSettings.h"
+#include "Runtime/Utilities/UniqueIDGenerator.h"
+#if UNITY_XENON
+#include "PlatformDependent/Xbox360/Source/GfxDevice/GfxXenonVBO.h"
+#endif
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+
+#if UNITY_FLASH
+#include <limits.h>
+#define FLT_MAX __FLT_MAX__
+#define FLT_MIN __FLT_MIN__
+#endif
+
+#if UNITY_EDITOR
+#	include "Editor/Src/BuildPipeline/PrepareMeshDataForBuildTarget.h"
+#	include "Runtime/Camera/RenderLoops/RenderLoopPrivate.h"
+#	include "Runtime/Misc/Player.h"
+#endif
+
+
+///* Checkbox in mesh importer that allows you have mesh access (Done)
+///* Default for new importers is to have mesh access enabled (done)
+///* Error Messages when acessing data although you shouldn't be allowed (--)
+///* MeshColliders / SkinnedMeshes / non-uniform scale. Forces meshes to be non-readable. (Done)
+
+
+///* MeshCollider with no-access allowed. Does it work / no errors
+///* MeshCollider with no-access allowed, mesh is assigned from script. Does it give an error in editor & player
+///* MeshCollider with no-access allowed, mesh is scaled at runtime does it give an error
+///* MeshCollider with no-access allowed, mesh is scaled in scene. Does it work without errors.
+///* Mesh data accessed from script, does it give an error.
+
+
+
+static char const* kMeshAPIErrorMessage =
+"Mesh.%s is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array.";
+
+
+static UniqueIDGenerator s_MeshIDGenerator;
+
+
+// The Mesh class contains one of these for every Material that is bound to it.
+struct DeprecatedMeshData
+{
+	std::vector<Face> faces;				// Indices for specific faces
+	std::vector <unsigned short> strips;	// A list of triangle strips
+	int triangleCount;
+	DECLARE_SERIALIZE_NO_PPTR (MeshData)
+};
+
+template<class TransferFunc>
+void DeprecatedMeshData::Transfer (TransferFunc& transfer)
+{
+	TRANSFER (faces);
+	TRANSFER (strips);
+	TRANSFER(triangleCount);
+}
+
+struct DeprecatedLOD
+{
+	vector<DeprecatedMeshData>	m_MeshData;
+
+	DECLARE_SERIALIZE (LOD)
+};
+
+template<class TransferFunction>
+void DeprecatedLOD::Transfer (TransferFunction& transfer)
+{
+	TRANSFER (m_MeshData);
+}
+
+static void LoadDeprecatedMeshData (Mesh& mesh, vector<DeprecatedLOD> &lods)
+{
+	mesh.GetIndexBuffer().clear();
+	mesh.GetSubMeshes().clear();
+
+	if (lods.empty())
+		return;
+
+	DeprecatedLOD& lod = lods.front();
+
+	mesh.SetSubMeshCount(lod.m_MeshData.size());
+	for (int i=0;i<lod.m_MeshData.size();i++)
+	{
+		DeprecatedMeshData& oldMeshData = lod.m_MeshData[i];
+		if (oldMeshData.faces.size())
+			mesh.SetIndicesComplex (&oldMeshData.faces[0].v1, oldMeshData.faces.size()*3, i, kPrimitiveTriangles, Mesh::k16BitIndices);
+		else
+		{
+			UNITY_TEMP_VECTOR(UInt16) triangles;
+			Destripify(&oldMeshData.strips[0], oldMeshData.strips.size(), triangles);
+			mesh.SetIndicesComplex (&triangles[0], triangles.size(), i, kPrimitiveTriangles, Mesh::k16BitIndices);
+		}
+	}
+}
+
+
+using namespace std;
+
+Mesh::Mesh (MemLabelId label, ObjectCreationMode mode)
+:	Super(label, mode)
+,	m_ChannelsInVBO(0)
+,	m_VerticesDirty(true)
+,	m_IndicesDirty(true)
+,	m_IsDynamic(false)
+,	m_HideFromRuntimeStats(false)
+,	m_VertexColorsSwizzled(false)
+,	m_MeshUsageFlags(0)
+,	m_LocalAABB(Vector3f::zero, Vector3f::zero)
+,	m_VBO(NULL)
+,   m_InternalMeshID (0)
+,   m_Skin (label)
+,	m_CachedSkin2 (label)
+,	m_CachedSkin1 (label)
+,	m_CachedBonesAABB(label)
+,	m_Bindpose(label)
+,	m_BonePathHashes(label)
+,	m_RootBonePathHash(0)
+{
+	m_MaxBoneIndex = -1;
+	SubMesh sub;
+	m_SubMeshes.push_back(sub);
+
+	m_MeshCompression = kMeshCompressionOff;
+	m_StreamCompression = kStreamCompressionDefault;
+	m_IsReadable = true;
+	m_KeepVertices = false;
+	m_KeepIndices = false;
+
+#if UNITY_EDITOR
+	m_MeshOptimized = false;
+#endif
+
+#if ENABLE_MULTITHREADED_CODE
+	m_CurrentCPUFence = 0;
+	m_WaitOnCPUFence = false;
+#endif
+
+	m_InternalMeshID = 0;
+}
+
+Mesh::~Mesh ()
+{
+	MainThreadCleanup ();
+}
+
+bool Mesh::MainThreadCleanup ()
+{
+	WaitOnRenderThreadUse();
+	NotifyObjectUsers( kDidDeleteMesh );
+	m_IntermediateUsers.Notify( kImNotifyAssetDeleted );
+
+	m_CollisionMesh.Cleanup();
+
+	if (m_VBO)
+	{
+		GetGfxDevice().DeleteVBO(m_VBO);
+		m_VBO = NULL;
+	}
+
+	if (m_InternalMeshID != 0)
+	{
+		s_MeshIDGenerator.RemoveID (m_InternalMeshID);
+		m_InternalMeshID = 0;
+	}
+
+	return true;
+}
+
+void Mesh::LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &inTangents)
+{
+	int count = inTangents.size();
+	unsigned needChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT2(Normal, Tangent);
+	if (count != GetVertexCount () || m_VertexData.GetChannelMask () != needChannels)
+		ResizeVertices (count, needChannels);
+
+	Assert (GetVertexCount () == count);
+
+	StrideIterator<Vector3f> normals = GetNormalBegin ();
+	StrideIterator<Vector4f> tangents = GetTangentBegin ();
+
+	for(int i=0;i<count; ++i, ++normals, ++tangents)
+	{
+		*normals = inTangents[i].normal;
+		*tangents = Vector4f(inTangents[i].tangent.x,inTangents[i].tangent.y,inTangents[i].tangent.z,inTangents[i].handedness);
+	}
+}
+
+void Mesh::SwizzleVertexColorsIfNeeded ()
+{
+	// Early out if color are already in the right format
+	if (gGraphicsCaps.needsToSwizzleVertexColors == m_VertexColorsSwizzled)
+		return;
+
+	// Due to runtime GfxDevice switching we might need to unswizzle vertex colors (case 562695)
+	if (m_VertexColorsSwizzled)
+	{
+		std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+		m_VertexColorsSwizzled = false;
+	}
+	else
+	{
+		std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform);
+		m_VertexColorsSwizzled = true;
+	}
+}
+
+void Mesh::ExtractVertexArray (Vector3f* destination) const
+{
+	StrideIterator<Vector3f> v = GetVertexBegin ();
+	for (Vector3f* end = destination + GetVertexCount(); destination != end; ++v, ++destination)
+		*destination = *v;
+}
+
+void Mesh::ExtractNormalArray (Vector3f* destination) const
+{
+	StrideIterator<Vector3f> n = GetNormalBegin ();
+	for (Vector3f* end = destination + GetVertexCount(); destination != end; ++n, ++destination)
+		*destination = *n;
+}
+
+void Mesh::ExtractColorArray (ColorRGBA32* destination) const
+{
+	if (m_VertexColorsSwizzled)
+		std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform);
+	else
+		std::copy(GetColorBegin(), GetColorEnd(), destination);
+}
+
+void Mesh::ExtractColorArrayConverting (ColorRGBAf* destination) const
+{
+	if (m_VertexColorsSwizzled)
+		std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform);
+	else
+		std::copy(GetColorBegin(), GetColorEnd(), destination);
+}
+
+void Mesh::ExtractUvArray (int uvIndex, Vector2f* destination) const
+{
+	StrideIterator<Vector2f> uv = GetUvBegin (uvIndex);
+	for (Vector2f* end = destination + GetVertexCount(); destination != end; ++uv, ++destination)
+		*destination = *uv;
+}
+
+void Mesh::ExtractTangentArray (Vector4f* destination) const
+{
+	StrideIterator<Vector4f> t = GetTangentBegin ();
+	for (Vector4f* end = destination + GetVertexCount(); destination != end; ++t, ++destination)
+		*destination = *t;
+}
+
+
+UInt32 Mesh::ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+	Assert (count <= std::numeric_limits<UInt16>::max());
+
+	UInt32 prevChannels = m_VertexData.GetChannelMask();
+
+	if (m_VertexData.GetVertexCount() != count ||
+		m_VertexData.GetChannelMask() != shaderChannels ||
+		!m_VertexData.ConformsToStreamsLayout(streams) ||
+		!m_VertexData.ConformsToChannelsLayout(channels))
+	{
+		WaitOnRenderThreadUse();
+
+		SET_ALLOC_OWNER(this);
+		m_VertexData.Resize(count, shaderChannels, streams, channels);
+
+		if (!m_Skin.empty ())
+			m_Skin.resize_initialized (count, BoneInfluence());
+	}
+
+	return m_VertexData.GetChannelMask() & ~prevChannels;
+}
+
+
+UInt32 Mesh::FormatVertices (UInt32 shaderChannels)
+{
+	return ResizeVertices(GetVertexCount(), shaderChannels);
+}
+
+void Mesh::InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels)
+{
+	if (shaderChannels & VERTEX_FORMAT1(Vertex))
+		std::fill (GetVertexBegin () + begin, GetVertexBegin () + begin + count, Vector3f (0,0,0));
+	if (shaderChannels & VERTEX_FORMAT1(Normal))
+		std::fill (GetNormalBegin () + begin, GetNormalBegin () + begin + count, Vector3f (0,0,0));
+	if (shaderChannels & VERTEX_FORMAT1(Color))
+		std::fill (GetColorBegin () + begin, GetColorBegin () + begin + count, ColorRGBA32 (0xffffffff));
+	if (shaderChannels & VERTEX_FORMAT1(TexCoord0))
+		std::fill (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, Vector2f (0,0));
+	if (shaderChannels & VERTEX_FORMAT1(Tangent))
+		std::fill (GetTangentBegin () + begin, GetTangentBegin () + begin + count, Vector4f (0,0,0,0));
+
+	if (shaderChannels & VERTEX_FORMAT1(TexCoord1))
+	{
+		if( GetAvailableChannels () & VERTEX_FORMAT1(TexCoord0) )
+			std::copy (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, GetUvBegin (1) + begin);
+		else
+			std::fill (GetUvBegin (1) + begin, GetUvBegin (1) + begin + count, Vector2f (0,0));
+	}
+}
+
+namespace
+{
+	bool IsStripValid(const Mesh::TemporaryIndexContainer& triangles, const Mesh::TemporaryIndexContainer& newStrip)
+	{
+		int invalidTriangleCount = 0;
+		for (int j = 0; j < triangles.size(); j += 3)
+		{
+			int i0 = triangles[j + 0];
+			int i1 = triangles[j + 1];
+			int i2 = triangles[j + 2];
+
+			bool found = false;
+			for (int k = 0; k < newStrip.size() - 2; ++k)
+			{
+				int s0 = newStrip[k + 0];
+				int s1 = newStrip[k + 1];
+				int s2 = newStrip[k + 2];
+
+				if (k&1)
+					std::swap(s1, s2);
+
+				if ((s0 == i0 && s1 == i1 && s2 == i2) ||
+					(s0 == i1 && s1 == i2 && s2 == i0) ||
+					(s0 == i2 && s1 == i0 && s2 == i1))
+				{
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+				++invalidTriangleCount;
+		}
+
+		AssertMsg(invalidTriangleCount == 0, "Mesh strip is missing %d triangles", invalidTriangleCount);
+		return invalidTriangleCount == 0;
+	}
+}
+
+void Mesh::RecalculateBoundsInternal ()
+{
+	MinMaxAABB minmax;
+	minmax.Init ();
+	for (StrideIterator<Vector3f> it = GetVertexBegin (), end = GetVertexEnd (); it != end; ++it)
+		minmax.Encapsulate (*it);
+
+	// Apply all blendshape targets to bounding volumes
+	if (!m_Shapes.vertices.empty())
+	{
+		StrideIterator<Vector3f> verts = GetVertexBegin ();
+
+		for (int i=0;i<m_Shapes.vertices.size();i++)
+		{
+			Vector3f pos = verts[m_Shapes.vertices[i].index] + m_Shapes.vertices[i].vertex;
+			minmax.Encapsulate (pos);
+		}
+	}
+
+	AABB aabb;
+	if (GetVertexCount ())
+		aabb = minmax;
+	else
+		aabb = AABB (Vector3f::zero, Vector3f::zero);
+
+	m_LocalAABB = aabb;
+
+	for (int submesh = 0; submesh < m_SubMeshes.size(); ++submesh)
+		RecalculateSubmeshBoundsInternal (submesh);
+}
+
+void Mesh::RecalculateSubmeshBoundsInternal (unsigned submesh)
+{
+	MinMaxAABB minmax;
+	minmax.Init ();
+
+		const UInt16* indices = GetSubMeshBuffer16(submesh);
+		StrideIterator<Vector3f> vertices = GetVertexBegin ();
+		for (unsigned int i = 0; i < GetSubMeshFast(submesh).indexCount; i++)
+			minmax.Encapsulate (vertices[indices[i]]);
+
+	AABB aabb;
+	if (GetSubMeshFast(submesh).indexCount > 0)
+		aabb = minmax;
+	else
+		aabb = AABB (Vector3f::zero, Vector3f::zero);
+
+	GetSubMeshFast(submesh).localAABB = aabb;
+}
+
+
+void Mesh::RecalculateBounds ()
+{
+	RecalculateBoundsInternal ();
+
+	SetDirty();
+	NotifyObjectUsers( kDidModifyBounds );
+	m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::RecalculateSubmeshBounds (unsigned submesh)
+{
+	RecalculateSubmeshBoundsInternal (submesh);
+
+	SetDirty();
+	NotifyObjectUsers( kDidModifyBounds );
+	m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+
+void Mesh::Clear (bool keepVertexLayout)
+{
+	WaitOnRenderThreadUse();
+
+	m_SubMeshes.clear();
+	SubMesh sub;
+	m_SubMeshes.push_back(sub);
+
+	ClearBlendShapes (m_Shapes);
+
+	m_IndexBuffer.clear();
+#if UNITY_EDITOR
+	m_MeshOptimized = false;
+#endif
+
+#if UNITY_PS3 || UNITY_EDITOR
+	m_PartitionInfos.clear();
+	m_Partitions.clear();
+#endif
+
+	unsigned prevFormat = m_VertexData.GetChannelMask();
+
+	if (m_VertexData.GetVertexCount() > 0)
+	{
+		// keepVertexLayout added in Unity 3.5.3; keep previous behaviour
+		// for older content for safety.
+		if (keepVertexLayout && IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1))
+		{
+			ResizeVertices (0, prevFormat);
+		}
+		else
+		{
+			VertexData tempVD;
+			swap (tempVD, m_VertexData);
+		}
+	}
+
+	if (!m_Skin.empty())
+	{
+		m_Skin.clear();
+	}
+
+	m_VertexColorsSwizzled = false;
+	ClearSkinCache();
+
+	SetChannelsDirty( prevFormat, true );
+}
+
+IMPLEMENT_CLASS (Mesh)
+IMPLEMENT_OBJECT_SERIALIZE (Mesh)
+
+template <typename Index>
+static void GetVertexBufferRange(const Index* indices, int indexCount, UInt32& fromVertex, UInt32& toVertex)
+{
+	Index a = Index(INT_MAX);
+	Index b = 0;
+	const Index* indicesEnd = indices + indexCount;
+	for (const Index* index = indices; index < indicesEnd; ++index)
+	{
+		a = std::min(a, *index);
+		b = std::max(b, *index);
+	}
+	fromVertex = a;
+	toVertex = b;
+}
+
+void Mesh::ByteSwapIndices ()
+{
+	SwapEndianArray (&m_IndexBuffer[0], kVBOIndexSize, GetTotalndexCount());
+}
+
+template<class T>
+bool ShouldSerializeForBigEndian (T& transfer)
+{
+	bool bigEndian = UNITY_BIG_ENDIAN;
+	if (transfer.ConvertEndianess())
+		bigEndian = !bigEndian;
+	return bigEndian;
+}
+
+void Mesh::DestripifyIndices ()
+{
+	if (m_IndexBuffer.empty() || m_SubMeshes.empty())
+		return;
+
+	int submeshCount = m_SubMeshes.size();
+	bool anyStripped = false;
+	for (size_t i = 0; i < submeshCount; ++i)
+	{
+		if (m_SubMeshes[i].topology == kPrimitiveTriangleStripDeprecated)
+		{
+			anyStripped = true;
+			break;
+		}
+	}
+	if(!anyStripped)
+		return;
+
+	// destripify the stripped submeshes
+	typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer;
+
+	std::vector<TemporaryIndexContainer> submeshIndices;
+	submeshIndices.resize(submeshCount);
+	for(int i=0;i<submeshCount;i++)
+	{
+		SubMesh& sm = m_SubMeshes[i];
+		if (sm.topology == kPrimitiveTriangleStripDeprecated)
+			Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]);
+		else
+		{
+			submeshIndices[i].resize(sm.indexCount);
+			memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1);
+		}
+	}
+
+	SetSubMeshCount(0);
+	SetSubMeshCount(submeshCount);
+
+	for(int i=0;i<submeshCount;i++)
+		SetIndices(&submeshIndices[i][0], submeshIndices[i].size(), i, kPrimitiveTriangles);
+}
+
+bool Mesh::CanAccessFromScript() const
+{
+#if UNITY_EDITOR
+	// Allow editor scripts access even if not allowed in runtime
+	if (!IsInsidePlayerLoop() && !IsInsideRenderLoop())
+		return true;
+#endif
+	return m_IsReadable;
+}
+
+
+template<class TransferFunction>
+void Mesh::Transfer (TransferFunction& transfer)
+{
+	#if SUPPORT_SERIALIZED_TYPETREES
+	// See TransferWorkaround35SerializeFuckup below for comments.
+	// Remove when we can break backwards-compatiblity.
+	if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+	{
+		TransferWorkaround35SerializeFuckup (transfer);
+		return;
+	}
+	#endif
+
+	Super::Transfer (transfer);
+	transfer.SetVersion (8);
+
+	#if UNITY_EDITOR
+	const UInt32 supportedChannels = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshSupportedChannels : 0;
+	const UInt32 meshUsageFlags = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshUsageFlags : 0;
+	PrepareMeshDataForBuildTarget prepareMesh(*this, transfer.GetBuildingTarget().platform, supportedChannels, meshUsageFlags);
+	#endif
+
+	bool reswizzleColors = false;
+	if (m_VertexColorsSwizzled)
+	{
+		// Unswizzle colors before serializing
+		std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+		m_VertexColorsSwizzled = false;
+		reswizzleColors = true;
+	}
+
+	transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask);
+	transfer.Transfer (m_Shapes, "m_Shapes", kHideInEditorMask);
+	transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+	transfer.Transfer (m_BonePathHashes, "m_BoneNameHashes", kHideInEditorMask);
+	transfer.Transfer (m_RootBonePathHash, "m_RootBoneNameHash", kHideInEditorMask);
+
+	transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask);
+	transfer.Transfer (m_StreamCompression, "m_StreamCompression", kHideInEditorMask);
+	transfer.Transfer (m_IsReadable, "m_IsReadable", kHideInEditorMask);
+	transfer.Transfer (m_KeepVertices, "m_KeepVertices", kHideInEditorMask);
+	transfer.Transfer (m_KeepIndices, "m_KeepIndices", kHideInEditorMask);
+	transfer.Align();
+
+	// Notice the two codepaths for serialization here.
+	// It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash.
+	// Look at kSerializeForPrefabSystem to disable compression when using Transfer to instantiate a Mesh.
+	// Changes to compression can break web content if we recompress at runtime. (case 546159)
+	bool doCompression = m_MeshCompression && !(transfer.GetFlags() & kSerializeForPrefabSystem);
+	if (!doCompression)
+	{
+		if (transfer.ConvertEndianess() && transfer.IsWriting ())
+			ByteSwapIndices();
+
+		transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask);
+
+		if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+			ByteSwapIndices();
+
+		transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask);
+
+		if (transfer.IsVersionSmallerOrEqual (5))
+		{
+			dynamic_array<Vector4f> tangents;
+			dynamic_array<Vector3f> vertices, normals;
+			dynamic_array<Vector2f> uvs, uvs1;
+			dynamic_array<ColorRGBA32> colors;
+
+
+			transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask);
+			transfer.Transfer (uvs, "m_UV", kHideInEditorMask);
+			transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask);
+			transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask);
+			transfer.Transfer (normals, "m_Normals", kHideInEditorMask);
+			transfer.Transfer (colors, "m_Colors", kHideInEditorMask);
+
+			unsigned format = 0;
+			if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex);
+			if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent);
+			if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal);
+			if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0);
+			if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1);
+			if (!colors.empty ()) format |= VERTEX_FORMAT1(Color);
+
+			size_t vertexCount = vertices.size ();
+			if (GetVertexCount () != vertexCount || GetAvailableChannels () != format)
+				ResizeVertices (vertexCount, format);
+
+			strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ());
+			strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ());
+			strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0));
+			strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1));
+			strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ());
+			strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ());
+		}
+		else
+		{
+			// version 6 introduces interleaved buffer
+			if (transfer.ConvertEndianess() && transfer.IsWriting ())
+				m_VertexData.SwapEndianess ();
+
+			transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask);
+
+			if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+				m_VertexData.SwapEndianess ();
+		}
+	}
+	// Notice the two codepaths for serialization here.
+	// It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash.
+	else
+	{
+		BoneInfluenceContainer dummySkin;
+		VertexData dummyVertexData;
+		IndexContainer dummyIndexContainer;
+
+		transfer.Transfer (dummyIndexContainer, "m_IndexBuffer", kHideInEditorMask);
+		transfer.Transfer (dummySkin, "m_Skin", kHideInEditorMask);
+		transfer.Transfer (dummyVertexData, "m_VertexData", kHideInEditorMask);
+	}
+
+	{
+		// only keep the compressed mesh in memory while needed
+		CompressedMesh m_CompressedMesh;
+		transfer.Align();
+		// Check both IsWriting() and IsReading() since both are true when reading with SafeBinaryRead
+		if (doCompression && transfer.IsWriting())
+			m_CompressedMesh.Compress(*this, m_MeshCompression);
+
+		transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask);
+
+		if (doCompression && transfer.DidReadLastProperty ())
+			m_CompressedMesh.Decompress(*this);
+	}
+
+	#if !GFX_SUPPORTS_TRISTRIPS
+	if (transfer.IsReading())
+		DestripifyIndices ();
+	#endif
+
+	// Reswizzle colors after serializing
+	if (reswizzleColors)
+	{
+		std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform);
+		m_VertexColorsSwizzled = true;
+	}
+
+	transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask);
+
+	#if UNITY_EDITOR
+	// When building player we precalcuate mesh usage based on who uses the different MeshColliders in different scenes.
+	if (transfer.IsWritingGameReleaseData())
+	{
+		int buildMeshUsageFlags = transfer.GetBuildUsage().meshUsageFlags;
+		transfer.Transfer (buildMeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+	}
+	else
+		transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+	#else
+	transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+	#endif
+
+	m_CollisionMesh.Transfer(transfer, *this);
+
+	if (transfer.IsOldVersion(1))
+	{
+		vector<DeprecatedLOD> lod;
+		transfer.Transfer (lod, "m_LODData", kHideInEditorMask);
+		LoadDeprecatedMeshData(*this, lod);
+	}
+
+	if (transfer.IsVersionSmallerOrEqual(4))
+	{
+		for (int sm = 0; sm < m_SubMeshes.size(); ++sm)
+		{
+			UpdateSubMeshVertexRange (sm);
+			RecalculateSubmeshBoundsInternal (sm);
+		}
+	}
+
+	if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1))
+	{
+		DeprecatedTangentsArray m_TangentSpace;
+		transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask);
+		if(transfer.IsReading())
+			LoadDeprecatedTangentData(*this,m_TangentSpace);
+	}
+
+	if (transfer.IsVersionSmallerOrEqual(7))
+	{
+		DestripifySubmeshOnTransferInternal();
+	}
+	TRANSFER_EDITOR_ONLY_HIDDEN(m_MeshOptimized);
+
+#if UNITY_EDITOR || UNITY_PS3
+	TransferPS3Data(transfer);
+#endif
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+// Except for some dead-path removal and a change to the ResizeVertices call to account for an
+// API change, this is an exact copy of the Mesh::Transfer function as it shipped in 3.5.0 final.
+// This path exists solely to work around the issue with compressed mesh serialization in 3.5.0
+// which produced different serializations for compressed and uncompressed meshes while using the
+// same type tree for either case.  This makes it impossible for SafeBinaryRead to sort things out.
+//
+// By having the exact same transfer path, we end up with identical type trees compared to version
+// 3.5.0 and thus automatically end up on the StreamedBinaryRead codepath.  Also, as long as this
+// separate path here is preserved, we can read the faulty 3.5.0 streams without having to worry
+// about it in the normal transfer path.
+template<class TransferFunction>
+void Mesh::TransferWorkaround35SerializeFuckup (TransferFunction& transfer)
+{
+	Super::Transfer (transfer);
+	transfer.SetVersion (6);
+
+	if (m_VertexColorsSwizzled)
+	{
+		// Unswizzle colors before serializing
+		std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform);
+		m_VertexColorsSwizzled = false;
+	}
+
+	transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask);
+
+	if (!transfer.IsVersionSmallerOrEqual(3))
+		transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask);
+	else
+		m_MeshCompression = kMeshCompressionOff;
+
+	transfer.Align();
+	if (m_MeshCompression == kMeshCompressionOff)
+	{
+		if (transfer.ConvertEndianess() && transfer.IsWriting ())
+			ByteSwapIndices();
+
+		transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask);
+
+		if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+			ByteSwapIndices();
+
+		transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask);
+		transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+
+		if (transfer.IsVersionSmallerOrEqual (5))
+		{
+			dynamic_array<Vector4f> tangents;
+			dynamic_array<Vector3f> vertices, normals;
+			dynamic_array<Vector2f> uvs, uvs1;
+			dynamic_array<ColorRGBA32> colors;
+
+
+			transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask);
+			transfer.Transfer (uvs, "m_UV", kHideInEditorMask);
+			transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask);
+			transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask);
+			transfer.Transfer (normals, "m_Normals", kHideInEditorMask);
+			transfer.Transfer (colors, "m_Colors", kHideInEditorMask);
+
+			unsigned format = 0;
+			if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex);
+			if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent);
+			if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal);
+			if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0);
+			if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1);
+			if (!colors.empty ()) format |= VERTEX_FORMAT1(Color);
+
+			size_t vertexCount = vertices.size ();
+			if (GetVertexCount () != vertexCount || GetAvailableChannels () != format)
+				ResizeVertices (vertexCount, format);
+
+			strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ());
+			strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ());
+			strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0));
+			strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1));
+			strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ());
+			strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ());
+		}
+		else
+		{
+			// version 6 introduces interleaved buffer
+			if (transfer.ConvertEndianess() && transfer.IsWriting ())
+				m_VertexData.SwapEndianess ();
+
+			transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask);
+
+			if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ()))
+				m_VertexData.SwapEndianess ();
+		}
+	}
+	else
+	{
+		vector<Vector4f> emptyVector4;
+		vector<Vector3f> emptyVector3;
+		vector<Vector2f> emptyVector2;
+		vector<BoneInfluence> emptyBones;
+		vector<UInt8> emptyIndices;
+		vector<ColorRGBA32> emptyColors;
+
+		transfer.Transfer (emptyIndices, "m_IndexBuffer", kHideInEditorMask);
+		transfer.Transfer (emptyVector3, "m_Vertices", kHideInEditorMask);
+		transfer.Transfer (emptyBones, "m_Skin", kHideInEditorMask);
+		transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask);
+		transfer.Transfer (emptyVector2, "m_UV", kHideInEditorMask);
+		transfer.Transfer (emptyVector2, "m_UV1", kHideInEditorMask);
+		transfer.Transfer (emptyVector4, "m_Tangents", kHideInEditorMask);
+		transfer.Transfer (emptyVector3, "m_Normals", kHideInEditorMask);
+		transfer.Transfer (emptyColors, "m_Colors", kHideInEditorMask);
+	}
+
+	CompressedMesh m_CompressedMesh;
+	transfer.Align();
+	if (transfer.IsWriting() && m_MeshCompression)
+		m_CompressedMesh.Compress(*this, m_MeshCompression);
+
+	printf_console( "Reading compressed mesh...\n" );
+	transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask);
+
+	if (transfer.DidReadLastProperty () && m_MeshCompression)
+		m_CompressedMesh.Decompress(*this);
+
+
+#if !GFX_SUPPORTS_TRISTRIPS
+	if (transfer.IsReading())
+		DestripifyIndices ();
+#endif
+
+	transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask);
+	transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask);
+
+	m_CollisionMesh.Transfer(transfer, *this);
+
+	if (transfer.IsOldVersion(1))
+	{
+		vector<DeprecatedLOD> lod;
+		transfer.Transfer (lod, "m_LODData", kHideInEditorMask);
+		LoadDeprecatedMeshData(*this, lod);
+	}
+
+	if (transfer.IsVersionSmallerOrEqual(4))
+	{
+		for (int sm = 0; sm < m_SubMeshes.size(); ++sm)
+		{
+			UpdateSubMeshVertexRange (sm);
+			RecalculateSubmeshBoundsInternal (sm);
+		}
+	}
+
+	if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1))
+	{
+		DeprecatedTangentsArray m_TangentSpace;
+		transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask);
+		if(transfer.IsReading())
+			LoadDeprecatedTangentData(*this,m_TangentSpace);
+	}
+
+	if (transfer.IsReading())
+		DestripifySubmeshOnTransferInternal();
+}
+#endif
+
+#if UNITY_EDITOR || UNITY_PS3
+template<class TransferFunction>
+void Mesh::TransferPS3Data (TransferFunction& transfer)
+{
+	if (UNITY_PS3 || (kBuildPS3 == transfer.GetBuildingTarget().platform))
+	{
+		transfer.Transfer(m_Partitions, "m_Partitions", kHideInEditorMask);
+		transfer.Transfer(m_PartitionInfos, "m_PartitionInfos", kHideInEditorMask);
+	}
+}
+#endif
+
+
+void Mesh::UpdateSubMeshVertexRange (int index)
+{
+	SubMesh& submesh = m_SubMeshes[index];
+	if (submesh.indexCount > 0)
+	{
+		UInt32 lastVertex = 0;
+			GetVertexBufferRange(GetSubMeshBuffer16(index), submesh.indexCount, submesh.firstVertex, lastVertex);
+		Assert(lastVertex < GetVertexCount ());
+		Assert(submesh.firstVertex <= lastVertex);
+		submesh.vertexCount = lastVertex - submesh.firstVertex + 1;
+	}
+	else
+	{
+		submesh.firstVertex = 0;
+		submesh.vertexCount = 0;
+	}
+}
+
+static bool CheckOutOfBounds (unsigned max, const UInt16* p, unsigned count)
+{
+	for (int i=0;i<count;i++)
+	{
+		if (p[i] >= max)
+			return false;
+	}
+	return true;
+}
+
+static bool CheckOutOfBounds (unsigned max, const UInt32* p, unsigned count)
+{
+	for (int i=0;i<count;i++)
+	{
+		if (p[i] >= max)
+			return false;
+	}
+	return true;
+}
+
+bool Mesh::ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount)
+{
+	if (newTriangles)
+	{
+			return CheckOutOfBounds (newVertexCount, reinterpret_cast<const UInt16*>(newTriangles), indexCount);
+	}
+	else
+	{
+			return CheckOutOfBounds(newVertexCount, reinterpret_cast<const UInt16*>(&m_IndexBuffer[0]), GetTotalndexCount());
+	}
+}
+
+int Mesh::GetTotalndexCount () const
+{
+	return m_IndexBuffer.size () / kVBOIndexSize;
+}
+
+void Mesh::SetVertices (Vector3f const* data, size_t count)
+{
+	if (m_StreamCompression)
+		return;
+
+	if (count > std::numeric_limits<UInt16>::max())
+	{
+		ErrorString("Mesh.vertices is too large. A mesh may not have more than 65000 vertices.");
+		return;
+	}
+
+	size_t prevCount = GetVertexCount ();
+	if (IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1) && count < prevCount && !ValidateVertexCount(count, NULL, 0))
+	{
+		ErrorString("Mesh.vertices is too small. The supplied vertex array has less vertices than are referenced by the triangles array.");
+		return;
+	}
+
+	WaitOnRenderThreadUse();
+
+#if UNITY_PS3
+	if(m_Skin.empty() || (!(m_Skin.empty() || m_PartitionInfos.empty())))
+	{
+	// mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226)
+		SET_ALLOC_OWNER(this);
+		VertexData vertexData(m_VertexData, GetAvailableChannels(), GetStreamsLayout(), GetChannelsLayout());
+	swap(vertexData, m_VertexData);
+	}
+#endif
+
+	if (prevCount != count)
+	{
+		unsigned prevChannels = GetAvailableChannels ();
+		ResizeVertices (count, prevChannels | VERTEX_FORMAT1(Vertex));
+
+		// In case there were other channels present, initialize the newly created values of
+		// the expanded buffer to something meaningful.
+		if (prevCount != 0 && count > prevCount && (prevChannels & ~VERTEX_FORMAT1(Vertex)))
+		{
+			InitChannelsToDefault (prevCount, count - prevCount, prevChannels & ~VERTEX_FORMAT1(Vertex));
+		}
+	}
+
+	// Make sure we'll not be overrunning the buffer
+	if (GetVertexCount () < count)
+		count = GetVertexCount ();
+
+	strided_copy (data, data + count, GetVertexBegin ());
+	SetChannelsDirty (VERTEX_FORMAT1(Vertex), false);
+
+	// We do not recalc the bounds automatically when re-writing existing vertices
+	if (prevCount != count)
+		RecalculateBounds ();
+}
+
+void Mesh::SetNormals (Vector3f const* data, size_t count)
+{
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	if (count == 0 || !data)
+	{
+		FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Normal));
+		SetChannelsDirty (VERTEX_FORMAT1(Normal), false);
+		return;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		ErrorStringMsg(kMeshAPIErrorMessage, "normals");
+		return;
+	}
+
+	if (!IsAvailable (kShaderChannelNormal))
+		FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Normal));
+
+	strided_copy (data, data + count, GetNormalBegin ());
+
+	SetChannelsDirty (VERTEX_FORMAT1(Normal), false);
+}
+
+void Mesh::SetTangents (Vector4f const* data, size_t count)
+{
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	if (count == 0 || !data)
+	{
+		FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Tangent));
+		SetChannelsDirty (VERTEX_FORMAT1(Tangent), false);
+		return;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		ErrorStringMsg(kMeshAPIErrorMessage, "tangents");
+		return;
+	}
+
+	if (!IsAvailable (kShaderChannelTangent))
+		FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Tangent));
+
+	strided_copy (data, data + count, GetTangentBegin ());
+	SetChannelsDirty( VERTEX_FORMAT1(Tangent), false );
+}
+
+void Mesh::SetUv (int uvIndex, Vector2f const* data, size_t count)
+{
+	Assert (uvIndex <= 1);
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	ShaderChannel texCoordChannel = static_cast<ShaderChannel>(kShaderChannelTexCoord0 + uvIndex);
+	unsigned texCoordMask = 1 << texCoordChannel;
+	if (count == 0 || !data)
+	{
+		FormatVertices (GetAvailableChannels () & ~texCoordMask);
+		SetChannelsDirty (texCoordMask, false);
+		return;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		const char* uvName = uvIndex == 1 ? "uv2" : "uv";
+		ErrorStringMsg(kMeshAPIErrorMessage, uvName);
+		return;
+	}
+
+	if (!IsAvailable (texCoordChannel))
+		FormatVertices (GetAvailableChannels () | texCoordMask);
+
+	strided_copy (data, data + count, GetUvBegin (uvIndex));
+	SetChannelsDirty (texCoordMask, false);
+}
+
+void Mesh::SetColors (ColorRGBA32 const* data, size_t count)
+{
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	if (count == 0 || !data)
+	{
+		FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color));
+		SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+		return;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		ErrorStringMsg(kMeshAPIErrorMessage, "colors");
+		return;
+	}
+
+	if (!IsAvailable (kShaderChannelColor))
+	{
+		FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color));
+	}
+	m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors;
+
+	if (m_VertexColorsSwizzled)
+		std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform);
+	else
+		std::copy(data, data + count, GetColorBegin());
+
+	SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+}
+
+void Mesh::SetColorsConverting (ColorRGBAf const* data, size_t count)
+{
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	if (count == 0 || !data)
+	{
+		FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color));
+		SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+		return;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		ErrorStringMsg(kMeshAPIErrorMessage, "colors");
+		return;
+	}
+
+	if (!IsAvailable (kShaderChannelColor))
+	{
+		FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color));
+	}
+	m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors;
+
+	if (m_VertexColorsSwizzled)
+		std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform);
+	else
+		strided_copy_convert(data, data + count, GetColorBegin());
+
+	SetChannelsDirty( VERTEX_FORMAT1(Color), false );
+}
+
+
+void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles) const
+{
+	triangles.clear();
+	for (unsigned m=0;m<GetSubMeshCount();m++)
+		AppendTriangles(triangles, m);
+}
+
+void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+	triangles.clear();
+	AppendTriangles(triangles, submesh);
+}
+
+void QuadsToTriangles(const UInt16* quads, const int indexCount, Mesh::TemporaryIndexContainer& triangles)
+{
+	DebugAssert (indexCount%4 == 0);
+	triangles.resize((indexCount/2)*3);
+	for (int q = 0, t = 0; q < indexCount; q += 4, t +=6)
+	{
+		triangles[t] = quads[q];
+		triangles[t + 1] = quads[q + 1];
+		triangles[t + 2] = quads[q + 2];
+
+		triangles[t + 3] = quads[q];
+		triangles[t + 4] = quads[q + 2];
+		triangles[t + 5] = quads[q + 3];
+	}
+}
+
+
+void Mesh::AppendTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+	if (submesh >= GetSubMeshCount())
+	{
+		ErrorString("Failed getting triangles. Submesh index is out of bounds.");
+		return;
+	}
+
+	int topology = GetSubMeshFast(submesh).topology;
+	if (topology == kPrimitiveTriangleStripDeprecated)
+		Destripify(GetSubMeshBuffer16(submesh), GetSubMeshFast(submesh).indexCount, triangles);
+	else if (topology == kPrimitiveQuads)
+		QuadsToTriangles (GetSubMeshBuffer16 (submesh), GetSubMeshFast (submesh).indexCount, triangles);
+	else if (topology == kPrimitiveTriangles)
+		triangles.insert(triangles.end(), GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+	else
+		ErrorString("Failed getting triangles. Submesh topology is lines or points.");
+}
+
+void Mesh::GetStrips (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+	triangles.clear();
+	if (submesh >= GetSubMeshCount())
+	{
+		ErrorString("Failed getting triangles. Submesh index is out of bounds.");
+		return;
+	}
+
+	if (GetSubMeshFast(submesh).topology != kPrimitiveTriangleStripDeprecated)
+		return;
+
+	triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+}
+
+void Mesh::GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const
+{
+	triangles.clear();
+	if (submesh >= GetSubMeshCount())
+	{
+		ErrorString("Failed getting indices. Submesh index is out of bounds.");
+		return;
+	}
+	triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount);
+}
+
+
+bool Mesh::SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology)
+{
+	int mask = kRebuildCollisionTriangles;
+	return SetIndicesComplex (indices, count, submesh, topology, mask);
+}
+
+bool Mesh::SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology)
+{
+	int mask = kRebuildCollisionTriangles | k16BitIndices;
+	return SetIndicesComplex (indices, count, submesh, topology, mask);
+}
+
+
+bool Mesh::SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode)
+{
+	WaitOnRenderThreadUse();
+
+	if (indices == NULL && count != 0 && (mode & kDontAssignIndices) == 0)
+	{
+		ErrorString("failed setting triangles. triangles is NULL");
+		return false;
+	}
+
+	if (submesh >= GetSubMeshCount())
+	{
+		ErrorString("Failed setting triangles. Submesh index is out of bounds.");
+		return false;
+	}
+
+	if ((topology == kPrimitiveTriangles) && (count % 3 != 0))
+	{
+		ErrorString("Failed setting triangles. The number of supplied triangle indices must be a multiple of 3.");
+		return false;
+	}
+
+	if ((mode & kDontAssignIndices) == 0)
+	{
+		bool valid;
+		if (mode & k16BitIndices)
+			valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt16*>(indices), count);
+		else
+			valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt32*>(indices), count);
+
+		if (!valid)
+		{
+			ErrorString("Failed setting triangles. Some indices are referencing out of bounds vertices.");
+			return false;
+		}
+	}
+
+	SetIndexData(submesh, count, indices, topology, mode);
+
+	if (mode & Mesh::kDontSupportSubMeshVertexRanges)
+	{
+		Assert(m_SubMeshes.size () == 1);
+		m_SubMeshes[0].firstVertex = 0;
+		m_SubMeshes[0].vertexCount = GetVertexCount();
+		m_SubMeshes[0].localAABB = m_LocalAABB;
+	}
+	else
+	{
+		// Update vertex range
+		UpdateSubMeshVertexRange (submesh);
+		RecalculateSubmeshBounds(submesh);
+	}
+
+	if (mode & kRebuildCollisionTriangles)
+		RebuildCollisionTriangles();
+
+	SetChannelsDirty( 0, true );
+
+	return true;
+}
+
+void Mesh::DestripifySubmeshOnTransferInternal()
+{
+	if (m_IndexBuffer.empty() || m_SubMeshes.empty())
+		return;
+
+	int submeshCount = m_SubMeshes.size();	
+	typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer;
+	
+	std::vector<TemporaryIndexContainer> submeshIndices;
+	submeshIndices.resize(submeshCount);
+	
+	// We have to do this in two batches, as SetIndexData seems to have a bug that causes
+	// triangle windings to get screwed up if we attempt to modify the submeshes in-place.
+	
+	for (size_t i = 0; i < submeshCount; ++i)
+	{
+		SubMesh& sm = m_SubMeshes[i];
+		if (sm.topology == kPrimitiveTriangleStripDeprecated)
+		{
+			Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]);
+		}	
+		else
+		{
+			submeshIndices[i].resize(sm.indexCount);
+			memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1);
+		}
+	}
+	
+	for(size_t i = 0; i < submeshCount; ++i)
+	{
+		SetIndexData(i, submeshIndices[i].size(), &submeshIndices[i][0], kPrimitiveTriangles, kRebuildCollisionTriangles | k16BitIndices);
+	}
+}
+
+void Mesh::SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode)
+{
+	int newByteSize = indexCount * kVBOIndexSize;
+	int oldSubmeshSize = GetSubMeshBufferByteSize (submeshIndex);
+	int insertedBytes = newByteSize - GetSubMeshBufferByteSize (submeshIndex);
+	int oldFirstByte = m_SubMeshes[submeshIndex].firstByte;
+	// Growing the buffer
+	if (insertedBytes > 0)
+	{
+		m_IndexBuffer.insert(m_IndexBuffer.begin() + oldFirstByte + oldSubmeshSize, insertedBytes, 0);
+	}
+	// Shrinking the buffer
+	else
+	{
+		m_IndexBuffer.erase(m_IndexBuffer.begin() + oldFirstByte, m_IndexBuffer.begin() + oldFirstByte - insertedBytes);
+	}
+
+#if UNITY_PS3
+
+	// mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226)
+	IndexContainer newIndexContainer;
+	newIndexContainer.resize(m_IndexBuffer.size());
+	m_IndexBuffer.swap(newIndexContainer);
+
+#endif
+
+	// Update the sub mesh
+	m_SubMeshes[submeshIndex].indexCount = indexCount;
+	m_SubMeshes[submeshIndex].topology = topology;
+
+	// Synchronize subsequent sub meshes
+	for (int i=submeshIndex+1;i<m_SubMeshes.size();i++)
+	{
+		m_SubMeshes[i].firstByte = m_SubMeshes[i-1].firstByte + m_SubMeshes[i-1].indexCount * kVBOIndexSize;
+	}
+
+	// Write indices into the allocated data
+	if ((mode & kDontAssignIndices) == 0)
+	{
+		if (mode & k16BitIndices)
+		{
+			const UInt16* src = reinterpret_cast<const UInt16*>(indices);
+			UInt16* dst = GetSubMeshBuffer16(submeshIndex);
+			for (int i=0;i<indexCount;i++)
+				dst[i] = src[i];
+		}
+		else
+		{
+			const UInt32* src = reinterpret_cast<const UInt32*>(indices);
+			UInt16* dst = GetSubMeshBuffer16(submeshIndex);
+			for (int i=0;i<indexCount;i++)
+				dst[i] = src[i];
+		}
+	}
+
+	return;
+}
+
+const UInt16* Mesh::GetSubMeshBuffer16 (int submesh) const
+{
+	return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<const UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL;
+}
+UInt16* Mesh::GetSubMeshBuffer16 (int submesh)
+{
+	return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL;
+}
+
+void Mesh::SetBindposes (const Matrix4x4f* bindposes, int count)
+{
+	m_Bindpose.assign(bindposes, bindposes + count);
+	SetDirty();
+}
+
+void Mesh::SetBounds (const AABB& aabb)
+{
+	m_LocalAABB = aabb;
+	SetDirty();
+	NotifyObjectUsers( kDidModifyBounds );
+	m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::SetBounds (unsigned submesh, const AABB& aabb)
+{
+	GetSubMeshFast(submesh).localAABB = aabb;
+	SetDirty();
+	NotifyObjectUsers( kDidModifyBounds );
+	m_IntermediateUsers.Notify( kImNotifyBoundsChanged );
+}
+
+void Mesh::NotifyObjectUsers(const MessageIdentifier& msg)
+{
+	ASSERT_RUNNING_ON_MAIN_THREAD;
+
+	MessageData data;
+	data.SetData (this, ClassID (Mesh));
+
+	ObjectList::iterator next;
+	for( ObjectList::iterator i = m_ObjectUsers.begin(); i != m_ObjectUsers.end(); i=next )
+	{
+		next = i;
+		++next;
+		Object& target = **i;
+		SendMessageDirect(target, msg, data);
+	}
+}
+
+void Mesh::WaitOnRenderThreadUse()
+{
+#if ENABLE_MULTITHREADED_CODE
+	if (m_WaitOnCPUFence)
+	{
+		GetGfxDevice().WaitOnCPUFence(m_CurrentCPUFence);
+		m_WaitOnCPUFence = false;
+	}
+#endif
+}
+
+void Mesh::RebuildCollisionTriangles()
+{
+	m_CollisionMesh.VertexDataHasChanged ();
+}
+
+PROFILER_INFORMATION(gRecalculateNormals, "Mesh.RecalculateNormals", kProfilerOther)
+
+void Mesh::RecalculateNormals()
+{
+	if (m_StreamCompression)
+		return;
+	WaitOnRenderThreadUse();
+
+	PROFILER_AUTO(gRecalculateNormals, this);
+
+	if (int vertexCount = GetVertexCount())
+	{
+		unsigned newChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT1(Normal);
+		if (newChannels != m_VertexData.GetChannelMask ())
+			FormatVertices (newChannels);
+
+		TemporaryIndexContainer triangles;
+		GetTriangles (triangles);
+
+		CalculateNormals( GetVertexBegin (), &triangles[0], vertexCount, triangles.size()/3, GetNormalBegin () );
+	}
+
+	SetChannelsDirty( VERTEX_FORMAT1(Normal), false );
+}
+
+
+void Mesh::SetSubMeshCount (unsigned int count)
+{
+	WaitOnRenderThreadUse();
+
+	if (count == 0)
+	{
+		m_IndexBuffer.clear();
+		m_SubMeshes.clear();
+		return;
+	}
+
+	// Remove elements
+	if (count < m_SubMeshes.size ())
+	{
+		m_IndexBuffer.resize(m_SubMeshes[count].firstByte);
+		m_SubMeshes.resize(count);
+	}
+	// Append elements
+	else if (count > m_SubMeshes.size ())
+	{
+		SubMesh data;
+		data.firstByte = m_IndexBuffer.size();
+		data.indexCount = 0;
+		data.topology = kPrimitiveTriangles;
+		data.firstVertex = 0;
+		data.vertexCount = 0;
+		data.localAABB = AABB (Vector3f::zero, Vector3f::zero);
+		m_SubMeshes.resize(count, data);
+		RecalculateBounds();
+	}
+}
+
+size_t Mesh::GetSubMeshCount () const
+{
+	return m_SubMeshes.size();
+}
+
+int Mesh::GetPrimitiveCount() const
+{
+	int submeshes = GetSubMeshCount();
+	int count = 0;
+	for( int m = 0; m < submeshes; ++m ) {
+		const SubMesh& sub = m_SubMeshes[m];
+		count += ::GetPrimitiveCount(sub.indexCount, sub.topology, false);
+	}
+	return count;
+}
+
+int Mesh::CalculateTriangleCount() const
+{
+	int submeshes = GetSubMeshCount();
+	int count = 0;
+	for( int m = 0; m < submeshes; ++m )
+	{
+		const SubMesh& sub = m_SubMeshes[m];
+		if (sub.topology == kPrimitiveTriangleStripDeprecated)
+		{
+			const UInt16* indices = GetSubMeshBuffer16(m);
+			int triCount = CountTrianglesInStrip (indices, sub.indexCount);
+			count += triCount;
+		}
+		else if (sub.topology == kPrimitiveTriangles)
+		{
+			count += sub.indexCount / 3;
+		}
+	}
+	return count;
+}
+
+Mesh& Mesh::GetInstantiatedMesh (Mesh* mesh, Object& owner)
+{
+	if (NULL != mesh && mesh->m_Owner == PPtr<Object> (&owner))
+		return *mesh;
+
+	if (!IsWorldPlaying())
+		ErrorStringObject("Instantiating mesh due to calling MeshFilter.mesh during edit mode. This will leak meshes. Please use MeshFilter.sharedMesh instead.", &owner);
+
+	if (mesh == NULL || !mesh->HasVertexData ())
+	{
+		if (!mesh)
+			mesh = NEW_OBJECT (Mesh);
+		mesh->Reset();
+
+		mesh->SetName(owner.GetName());
+		mesh->m_Owner = &owner;
+
+		mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad);
+		return *mesh;
+	}
+
+	Mesh* instance = NEW_OBJECT (Mesh);
+	CopySerialized(*mesh, *instance);
+	instance->SetNameCpp (Append (mesh->GetName (), " Instance"));
+	instance->m_Owner = &owner;
+	return *instance;
+}
+
+const VertexStreamsLayout& Mesh::GetStreamsLayout() const
+{
+	if (!m_Skin.empty() || GetBlendShapeChannelCount() != 0)
+		return VertexDataInfo::kVertexStreamsSkinnedHotColdSplit;
+	else
+		return VertexDataInfo::kVertexStreamsDefault;
+}
+
+const VertexChannelsLayout& Mesh::GetChannelsLayout() const
+{
+	UInt8 compressed = m_StreamCompression;
+#if !UNITY_EDITOR
+	// Editor only does build step for compression and never draws float16 vertices
+	if (!gGraphicsCaps.has16BitFloatVertex)
+	{
+		compressed = kStreamCompressionDefault;
+	}
+#endif
+	switch (compressed)
+	{
+		default: // fall through
+		case kStreamCompressionDefault:
+			return VertexDataInfo::kVertexChannelsDefault;
+		case kStreamCompressionCompressed:
+			return VertexDataInfo::kVertexChannelsCompressed;
+		case kStreamCompressionCompressedAggressive:
+			return VertexDataInfo::kVertexChannelsCompressedAggressive;
+	}
+}
+
+void Mesh::InitVertexBufferData( UInt32 wantedChannels )
+{
+#if GFX_CAN_UNLOAD_MESH_DATA
+	// If data was uploaded and freed we cannot update it.
+	if (!HasVertexData())
+		return;
+#endif
+	UInt32 presentChannels = GetAvailableChannels ();
+
+	// Modify the vertex buffer before fetching any channel pointers, as modifying the format reallocates the buffer and pointers
+	// are invalidated. Due to possible format changes, also fetch the stride sizes only after buffer reformatting.
+	unsigned initChannels = 0;
+
+	// Silently create an all-white color array if shader wants colors, but mesh does not have them.
+	// On D3D, some runtime/driver combinations will crash if a vertex shader wants colors but does not
+	// have them (e.g. Vista drivers for Intel 965). In other cases it will default to white for fixed function
+	// pipe, and to undefined value for vertex shaders, which is not good either.
+	if( (wantedChannels & VERTEX_FORMAT1(Color)) && !(presentChannels & VERTEX_FORMAT1(Color)) )
+		initChannels |= VERTEX_FORMAT1(Color);
+
+#if UNITY_PEPPER
+	// Pepper OpenGL implementation fails to draw anything if any channel is missing.
+	if( (wantedChannels & VERTEX_FORMAT1(Tangent)) && !(presentChannels & VERTEX_FORMAT1(Tangent)) )
+		initChannels |= VERTEX_FORMAT1(Tangent);
+#endif
+
+	if ((initChannels & presentChannels) != initChannels)
+	{
+		FormatVertices (presentChannels | initChannels);
+		InitChannelsToDefault (0, GetVertexCount (), initChannels);
+	}
+}
+
+void Mesh::GetVertexBufferData( VertexBufferData& buffer, UInt32 wantedChannels )
+{
+	InitVertexBufferData(wantedChannels);
+
+	for (int i = 0; i < kShaderChannelCount; i++)
+		buffer.channels[i] = m_VertexData.GetChannel(i);
+
+	for (int i = 0; i < kMaxVertexStreams; i++)
+		buffer.streams[i] = m_VertexData.GetStream(i);
+
+	int srcTexcoord = kShaderChannelNone;
+	for (int i = kShaderChannelTexCoord0; i <= kShaderChannelTexCoord1; i++)
+	{
+		if (buffer.channels[i].IsValid())
+		{
+			// We have a valid texcoord
+			srcTexcoord = i;
+			continue;
+		}
+		UInt32 channelMask = 1 << i;
+		if (srcTexcoord != kShaderChannelNone)
+		{
+			// Replicate last valid texture coord
+			const ChannelInfo& srcChannel = buffer.channels[srcTexcoord];
+			buffer.channels[i] = srcChannel;
+			buffer.streams[srcChannel.stream].channelMask |= channelMask;
+		}
+	}
+
+	// Data pointer can be NULL if we are only updating declaration of uploaded VBO
+	buffer.buffer = m_VertexData.GetDataPtr();
+	buffer.bufferSize = m_VertexData.GetDataSize();
+	buffer.vertexCount = GetVertexCount();
+
+#if UNITY_EDITOR
+	#define LogStringObjectEditor(x) LogStringObject(Format(x, GetName()),this)
+
+	if (Camera::ShouldShowChannelErrors(GetCurrentCameraPtr()))
+	{
+		const ChannelInfo* channels = buffer.channels;
+
+		if ((wantedChannels & VERTEX_FORMAT1(Tangent)) && !channels[kShaderChannelTangent].IsValid())
+			LogStringObjectEditor ("Shader wants tangents, but the mesh %s doesn't have them");
+
+		if ((wantedChannels & VERTEX_FORMAT1(Normal)) && !channels[kShaderChannelNormal].IsValid())
+			LogStringObjectEditor ("Shader wants normals, but the mesh %s doesn't have them");
+
+		if ((wantedChannels & VERTEX_FORMAT1(TexCoord0)) && !channels[kShaderChannelTexCoord0].IsValid())
+				LogStringObjectEditor ("Shader wants texture coordinates, but the mesh %s doesn't have them");
+
+		if ((wantedChannels & VERTEX_FORMAT1(TexCoord1)) && !channels[kShaderChannelTexCoord1].IsValid())
+			LogStringObjectEditor ("Shader wants secondary texture coordinates, but the mesh %s doesn't have any");
+
+		if ((wantedChannels & VERTEX_FORMAT1(Color)) && !channels[kShaderChannelColor].IsValid())
+			LogStringObjectEditor ("Shader wants vertex colors, and failed to create a vertex color array");
+	}
+	#undef LogStringObjectEditor
+#endif
+
+#if UNITY_PS3
+	if(m_PartitionInfos.empty())
+	{
+		int submeshCount = m_SubMeshes.size();
+		for (int submesh=0; submesh<submeshCount; submesh++)
+		{
+			SubMesh& sm = GetSubMeshFast(submesh);
+
+			MeshPartitionInfo partInfo;
+			partInfo.submeshStart = submesh;
+			partInfo.partitionCount = 1;
+			buffer.partInfo.push_back(partInfo);
+
+			MeshPartition part;
+			part.vertexCount = sm.vertexCount;
+			part.vertexOffset = 0;
+			part.indexCount = sm.indexCount;
+			part.indexByteOffset = sm.firstByte;
+			buffer.partitions.push_back(part);;
+		}
+	}
+	else
+	{
+		buffer.partInfo = m_PartitionInfos;
+		buffer.partitions = m_Partitions;
+	}
+
+#endif
+
+	buffer.vertexCount = GetVertexCount ();
+}
+
+void Mesh::GetIndexBufferData (IndexBufferData& buffer)
+{
+	DebugAssert (!m_IndexBuffer.empty());
+	buffer.indices = m_IndexBuffer.empty() ? NULL : (void*)&m_IndexBuffer[0];
+
+	///@TODO: HACK for now to get index buffers working, without changing a lot of vbo code
+	// We should be passing the byte size not the number of indices
+	buffer.count = GetTotalndexCount();
+	buffer.hasTopologies = 0;
+	for (size_t i = 0, n = m_SubMeshes.size(); i < n; ++i)
+	{
+		buffer.hasTopologies |= (1<<m_SubMeshes[i].topology);
+	}
+}
+
+PROFILER_INFORMATION(gCreateVBOProfile, "Mesh.CreateVBO", kProfilerRender);
+PROFILER_INFORMATION(gAwakeFromLoadMesh, "Mesh.AwakeFromLoad", kProfilerLoading);
+PROFILER_INFORMATION(gUploadMeshDataMesh, "Mesh.UploadMeshData", kProfilerLoading);
+
+VBO* Mesh::GetSharedVBO( UInt32 wantedChannels )
+{
+	// Some badly written shaders have no Bind statements in the vertex shaders parts;
+	// and only happened to work before by accident. If requiredChannels turns out to be
+	// zero, let's pretend it did request at least position.
+	if (wantedChannels == 0)
+		wantedChannels = (1<<kShaderChannelVertex);
+
+	UInt32 newChannels = wantedChannels | m_ChannelsInVBO;
+	bool addedChannels = newChannels != m_ChannelsInVBO;
+
+#if GFX_CAN_UNLOAD_MESH_DATA
+	if (!m_IsReadable && !m_KeepVertices && m_VBO)
+	{
+		// Everything is already prepared, just return VBO
+		return m_VBO;
+	}
+#endif
+
+	if ((GFX_ALL_BUFFERS_CAN_BECOME_LOST || m_IsDynamic) && m_VBO && m_VBO->IsVertexBufferLost())
+		m_VerticesDirty = true;
+	if (GFX_ALL_BUFFERS_CAN_BECOME_LOST && m_VBO && m_VBO->IsIndexBufferLost())
+		m_IndicesDirty = true;
+
+	if (addedChannels || m_VerticesDirty || m_IndicesDirty)
+		CreateSharedVBO(wantedChannels);
+
+	return m_VBO;
+}
+
+void Mesh::CreateSharedVBO( UInt32 wantedChannels )
+{
+	if (m_IndexBuffer.empty())
+	{
+		if (m_VBO)
+		{
+			GetGfxDevice().DeleteVBO(m_VBO);
+			m_VBO = NULL;
+		}
+		return;
+	}
+
+	PROFILER_BEGIN(gCreateVBOProfile, this)
+	SET_ALLOC_OWNER(this);
+
+	if (!m_VBO)
+	{
+		m_VBO = GetGfxDevice().CreateVBO();
+		m_VBO->SetHideFromRuntimeStats(m_HideFromRuntimeStats);
+	}
+
+	UInt32 newChannels = wantedChannels | m_ChannelsInVBO;
+	if (m_VerticesDirty || newChannels != m_ChannelsInVBO)
+	{
+		if (m_IsDynamic)
+			m_VBO->SetVertexStreamMode(0, VBO::kStreamModeDynamic);
+
+		VertexBufferData vertexBuffer;
+		GetVertexBufferData (vertexBuffer, newChannels);
+		m_VBO->UpdateVertexData (vertexBuffer);
+	}
+
+	if (m_IndicesDirty)
+	{
+		// TODO: probably add separate script access to set vertex/index dynamic
+		if (m_IsDynamic)
+			m_VBO->SetIndicesDynamic(true);
+
+		IndexBufferData indexBuffer;
+		GetIndexBufferData (indexBuffer);
+		m_VBO->UpdateIndexData (indexBuffer);
+	}
+
+	m_VerticesDirty = false;
+	m_IndicesDirty = false;
+	m_ChannelsInVBO = newChannels;
+
+	PROFILER_END
+}
+
+bool Mesh::CopyToVBO ( UInt32 wantedChannels, VBO& vbo )
+{
+	if( m_IndexBuffer.empty() )
+		return false;
+
+	PROFILER_BEGIN(gCreateVBOProfile, this)
+
+	VertexBufferData vertexBuffer;
+	GetVertexBufferData( vertexBuffer, wantedChannels );
+	vbo.UpdateVertexData( vertexBuffer );
+
+	IndexBufferData indexBuffer;
+	GetIndexBufferData (indexBuffer);
+	vbo.UpdateIndexData (indexBuffer);
+#if UNITY_XENON
+	if( m_VBO )
+		vbo.CopyExtraUvChannels( m_VBO );
+#endif
+	PROFILER_END
+
+	return true;
+}
+
+
+void Mesh::UnloadVBOFromGfxDevice()
+{
+	if (m_VBO)
+	{
+		WaitOnRenderThreadUse();
+		GetGfxDevice().DeleteVBO (m_VBO);
+	}
+	m_VBO = NULL;
+	m_ChannelsInVBO = 0;
+	m_VerticesDirty = m_IndicesDirty = true;
+#if ENABLE_MULTITHREADED_CODE
+	m_CurrentCPUFence = 0;
+	m_WaitOnCPUFence = false;
+#endif
+}
+
+void Mesh::ReloadVBOToGfxDevice()
+{
+	const bool needReloadFromDisk = (!m_IsReadable && !HasVertexData());
+	if (needReloadFromDisk)
+	{
+		GetPersistentManager().ReloadFromDisk(this);
+	}
+	else
+	{
+		m_ChannelsInVBO = 0;
+		m_VerticesDirty = m_IndicesDirty = true;
+	}
+	SwizzleVertexColorsIfNeeded();
+}
+
+
+bool Mesh::ExtractTriangle (UInt32 face, UInt32* indices) const
+{
+	///@TODO: OPTIMIZE this away
+	TemporaryIndexContainer triangles;
+	GetTriangles(triangles);
+	if (face * 3 > triangles.size ())
+		return false;
+
+	indices[0] = triangles[face * 3 + 0];
+	indices[1] = triangles[face * 3 + 1];
+	indices[2] = triangles[face * 3 + 2];
+	return true;
+}
+
+static void TransformNormals (const Matrix3x3f& invTranspose, StrideIterator<Vector3f> inNormals, StrideIterator<Vector3f> inNormalsEnd, StrideIterator<Vector3f> outNormals)
+{
+	for (; inNormals != inNormalsEnd; ++inNormals, ++outNormals)
+		*outNormals = NormalizeSafe (invTranspose.MultiplyVector3 (*inNormals));
+}
+
+static void TransformTangents (const Matrix3x3f& invTranspose, StrideIterator<Vector4f> inTangents, StrideIterator<Vector4f> inTangentsEnd, StrideIterator<Vector4f> outTangents)
+{
+	for ( ; inTangents != inTangentsEnd; ++inTangents, ++outTangents)
+	{
+		Vector3f tangent = Vector3f(inTangents->x,inTangents->y,inTangents->z);
+		Vector3f normalized = NormalizeSafe (invTranspose.MultiplyVector3 (tangent));
+		*outTangents = Vector4f(normalized.x, normalized.y ,normalized.z, inTangents->w);
+	}
+}
+
+void Mesh::CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform)
+{
+	int vertexCount = mesh.GetVertexCount();
+	unsigned outVertexFormat = mesh.GetAvailableChannelsForRendering ();
+
+	ResizeVertices(mesh.GetVertexCount (), outVertexFormat);
+
+	if (outVertexFormat & VERTEX_FORMAT1(Vertex))
+	TransformPoints3x4 (transform,
+						(Vector3f*)mesh.GetChannelPointer (kShaderChannelVertex), mesh.GetStride (kShaderChannelVertex),
+						(Vector3f*)GetChannelPointer (kShaderChannelVertex), GetStride (kShaderChannelVertex),
+						vertexCount);
+
+	Matrix3x3f invTranspose3x3 = Matrix3x3f(transform); invTranspose3x3.InvertTranspose ();
+
+	if (outVertexFormat & VERTEX_FORMAT1(Normal))
+	TransformNormals (invTranspose3x3, mesh.GetNormalBegin (), mesh.GetNormalEnd (), GetNormalBegin ());
+	if (outVertexFormat & VERTEX_FORMAT1(Tangent))
+	TransformTangents (invTranspose3x3, mesh.GetTangentBegin (), mesh.GetTangentEnd (), GetTangentBegin ());
+
+	m_IndexBuffer = mesh.m_IndexBuffer;
+	m_SubMeshes = mesh.m_SubMeshes;
+	m_Skin = mesh.m_Skin;
+	if (outVertexFormat & VERTEX_FORMAT1(TexCoord0))
+	strided_copy (mesh.GetUvBegin (0), mesh.GetUvEnd (0), GetUvBegin (0));
+	if (outVertexFormat & VERTEX_FORMAT1(TexCoord1))
+	strided_copy (mesh.GetUvBegin (1), mesh.GetUvEnd (1), GetUvBegin (1));
+	if (outVertexFormat & VERTEX_FORMAT1(Color))
+	strided_copy (mesh.GetColorBegin (), mesh.GetColorEnd (), GetColorBegin ());
+	m_VertexColorsSwizzled = mesh.m_VertexColorsSwizzled;
+	m_LocalAABB = mesh.m_LocalAABB;
+
+	SetChannelsDirty( outVertexFormat, true );
+	ClearSkinCache();
+}
+
+
+void Mesh::SetChannelsDirty (unsigned vertexChannelsChanged, bool indices)
+{
+	SetDirty();
+
+	m_VerticesDirty |= vertexChannelsChanged != 0;
+	m_IndicesDirty |= indices;
+
+	// We should regenreate physics mesh only if verex data have changed
+	if ((vertexChannelsChanged & VERTEX_FORMAT1(Vertex)) || indices)
+	{
+		m_CollisionMesh.VertexDataHasChanged();
+		m_CachedBonesAABB.clear();
+	}
+	NotifyObjectUsers( kDidModifyMesh );
+}
+
+bool Mesh::SetBoneWeights (const BoneInfluence* v, int count)
+{
+	WaitOnRenderThreadUse();
+	ClearSkinCache();
+	if (count == 0)
+	{
+		m_Skin.clear();
+		UpdateVertexFormat();
+		return true;
+	}
+
+	if (count != GetVertexCount ())
+	{
+		ErrorString("Mesh.boneWeights is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array.");
+		return false;
+	}
+	m_Skin.assign(v, v + count);
+	SetChannelsDirty (0, false);
+	UpdateVertexFormat();
+
+	return true;
+}
+
+static void ComputeBoneBindPoseAABB (const Matrix4x4f* bindPoses, size_t bindPoseCount, const StrideIterator<Vector3f> vertices, const BoneInfluence* influences, size_t vertexCount, const BlendShapeVertices& blendShapeVertices, MinMaxAABB* outputBounds)
+{
+	if (blendShapeVertices.empty())
+	{
+		for(int v=0;v<vertexCount;v++)
+		{
+			const Vector3f& vert = vertices[v];
+			for (int i = 0; i < 4; i++)
+			{
+				if(influences[v].weight[i] > 0.0f)
+				{
+					const UInt32 boneIndex = influences[v].boneIndex[i];
+
+					outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(vert));
+				}
+			}
+		}
+	}
+	else
+	{
+		Vector3f* minVertices;
+		ALLOC_TEMP(minVertices, Vector3f, vertexCount);
+		Vector3f* maxVertices;
+		ALLOC_TEMP(maxVertices, Vector3f, vertexCount);
+
+		strided_copy(vertices, vertices + vertexCount, minVertices);
+		strided_copy(vertices, vertices + vertexCount, maxVertices);
+
+		for (int i=0;i<blendShapeVertices.size();i++)
+	{
+			int index = blendShapeVertices[i].index;
+			Vector3f pos = blendShapeVertices[i].vertex + vertices[index];
+			maxVertices[index]  = max (maxVertices[index], pos);
+			minVertices[index]  = min (minVertices[index], pos);
+		}
+
+		for(int v=0;v<vertexCount;v++)
+		{
+        for (int i = 0; i < 4; i++)
+        {
+				if(influences[v].weight[i] > 0.0f)
+            {
+					const UInt32 boneIndex = influences[v].boneIndex[i];
+					outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(minVertices[v]));
+					outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(maxVertices[v]));
+				}
+			}
+		}
+	}
+}
+
+const Mesh::AABBContainer& Mesh::GetCachedBonesBounds()
+{
+	// Use cached result if it has the correct size (including empty)
+	if (m_CachedBonesAABB.size() == m_Bindpose.size())
+		return m_CachedBonesAABB;
+
+	Assert(GetMaxBoneIndex() < m_Bindpose.size());
+
+	m_CachedBonesAABB.resize_initialized(m_Bindpose.size(), MinMaxAABB());
+
+	ComputeBoneBindPoseAABB (GetBindposes(), m_CachedBonesAABB.size(), GetVertexBegin(), m_Skin.begin(), GetVertexCount(), m_Shapes.vertices, &m_CachedBonesAABB[0]);
+
+	return m_CachedBonesAABB;
+}
+
+void Mesh::ClearSkinCache ()
+{
+	m_CachedBonesAABB.clear();
+	m_CachedSkin2.clear();
+	m_CachedSkin1.clear();
+	m_MaxBoneIndex = -1;
+}
+
+int Mesh::GetMaxBoneIndex ()
+{
+	if (m_MaxBoneIndex != -1)
+		return m_MaxBoneIndex;
+
+	m_MaxBoneIndex = 0;
+	for (int i=0;i<m_Skin.size();i++)
+	{
+		m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[0]);
+		m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[1]);
+		m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[2]);
+		m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[3]);
+	}
+
+	return m_MaxBoneIndex;
+}
+
+void* Mesh::GetSkinInfluence (int count)
+{
+	if (!m_Skin.empty())
+	{
+		BoneInfluence* bones4 = &m_Skin[0];
+		if (count == 1)
+		{
+			if (!m_CachedSkin1.empty())
+				return &m_CachedSkin1[0];
+
+			// Cache 1 bone skin weights
+			int size = m_Skin.size();
+			m_CachedSkin1.resize_uninitialized(size);
+
+			int* bones1 = &m_CachedSkin1[0];
+			for (int i=0;i<size;i++)
+				bones1[i] = bones4[i].boneIndex[0];
+			return bones1;
+
+		}
+		else if (count == 2)
+		{
+			if (!m_CachedSkin2.empty ())
+				return &m_CachedSkin2[0];
+
+			// Cache 2 bone skin weights
+			int size = m_Skin.size();
+			m_CachedSkin2.resize_uninitialized(size);
+
+			BoneInfluence2* bones2 = &m_CachedSkin2[0];
+			for (int i=0;i<size;i++)
+			{
+				bones2[i].boneIndex[0] = bones4[i].boneIndex[0];
+				bones2[i].boneIndex[1] = bones4[i].boneIndex[1];
+
+				float invSum = 1.0F / (bones4[i].weight[0] + bones4[i].weight[1]);
+				bones2[i].weight[0] = bones4[i].weight[0] * invSum;
+				bones2[i].weight[1] = bones4[i].weight[1] * invSum;
+			}
+			return bones2;
+		}
+		else if (count == 4)
+		{
+			return bones4;
+		}
+		else
+		{
+			return NULL;
+		}
+	}
+	else
+	{
+		return NULL;
+	}
+}
+
+
+int Mesh::GetRuntimeMemorySize () const
+{
+	int size = Super::GetRuntimeMemorySize();
+
+	#if ENABLE_PROFILER
+	if (m_VBO)
+		size += m_VBO->GetRuntimeMemorySize();
+	#endif
+
+	return size;
+}
+
+
+void* Mesh::GetSharedNxMesh ()
+{
+	return m_CollisionMesh.GetSharedNxMesh (*this);
+}
+
+void* Mesh::GetSharedNxConvexMesh ()
+{
+	return m_CollisionMesh.GetSharedNxConvexMesh (*this);
+}
+
+void Mesh::UploadMeshData(bool markNoLongerReadable)
+{
+	if(markNoLongerReadable)
+		m_IsReadable = false;
+
+	ClearSkinCache();
+	UpdateVertexFormat();
+
+	// prepare VBO
+	UInt32 channelMask = GetAvailableChannelsForRendering();
+
+	// Create color channel in case it's needed by shader (and we can't patch it)
+#if GFX_CAN_UNLOAD_MESH_DATA
+	bool unloadData = !m_IsReadable && m_Skin.empty();
+	if (unloadData && !m_KeepVertices)
+		channelMask |= VERTEX_FORMAT1(Color);
+#endif
+
+	// Shared VBO is not required for skinned meshes (unless used as non-skinned)
+	if (m_Skin.empty())
+		CreateSharedVBO(channelMask);
+
+#if GFX_CAN_UNLOAD_MESH_DATA
+	if (unloadData)
+	{
+		if (!m_KeepVertices && m_VBO && !m_VBO->IsUsingSourceVertices())
+		{
+			Assert(m_Skin.empty());
+			m_VertexData.Deallocate();
+			m_VBO->UnloadSourceVertices();
+		}
+		if (!m_KeepIndices && m_VBO && !m_VBO->IsUsingSourceIndices())
+		{
+#if UNITY_METRO
+			m_IndexBuffer.clear();
+			m_IndexBuffer.shrink_to_fit();
+#else
+			// On Metro this throws "Expression: vector containers incompatible for swap" when compiling in VS 2013, works okay if compiling in VS 2012
+			// Case 568418
+			IndexContainer emptyIndices;
+			m_IndexBuffer.swap(emptyIndices);
+#endif
+		}
+	}
+#endif
+}
+
+void Mesh::AwakeFromLoad(AwakeFromLoadMode awakeMode)
+{
+	PROFILER_AUTO(gAwakeFromLoadMesh, this)
+
+	Super::AwakeFromLoad(awakeMode);
+	m_CollisionMesh.AwakeFromLoad(awakeMode);
+
+	UploadMeshData(!m_IsReadable);
+
+	if (m_InternalMeshID == 0)
+		m_InternalMeshID = s_MeshIDGenerator.AllocateID ();
+}
+
+void Mesh::AwakeFromLoadThreaded()
+{
+	Super::AwakeFromLoadThreaded();
+	m_CollisionMesh.AwakeFromLoadThreaded(*this);
+}
+
+void Mesh::MarkDynamic()
+{
+	// Optimize for frequent updates
+	m_IsDynamic = true;
+}
+
+void Mesh::UpdateVertexFormat()
+{
+	// Make sure vertex streams are in the format we want for rendering
+	// This will also handle decompression of unsupported vertex formats
+	FormatVertices(GetAvailableChannels());
+	SwizzleVertexColorsIfNeeded();
+}
+
+bool Mesh::ShouldIgnoreInGarbageDependencyTracking ()
+{
+	return true;
+}
+
+UInt32 Mesh::GetAvailableChannels() const
+{
+	return m_VertexData.GetChannelMask ();
+}
+
+UInt32 Mesh::GetAvailableChannelsForRendering() const
+{
+	unsigned availChannels = m_VertexData.GetChannelMask ();
+	return availChannels;
+}
+
+bool Mesh::IsSuitableSizeForDynamicBatching () const
+{
+	// If any submesh has too many vertices, don't keep mesh data for batching
+	for (size_t i = 0; i < GetSubMeshCount(); i++)
+	{
+		if (m_SubMeshes[i].vertexCount > kDynamicBatchingVerticesThreshold)
+			return false;
+	}
+	return true;
+}
+
+void Mesh::CheckConsistency()
+{
+	Super::CheckConsistency();
+
+	for (int i = 0; i < m_SubMeshes.size(); ++i)
+	{
+		Assert(m_SubMeshes[i].topology != kPrimitiveTriangleStripDeprecated);
+	}
+}
+
+void Mesh::SwapBlendShapeData (BlendShapeData& shapes)
+{
+	WaitOnRenderThreadUse();
+
+//	swap (m_Shapes, shapes);
+	m_Shapes = shapes;
+
+	NotifyObjectUsers( kDidModifyMesh );
+}
diff --git a/Runtime/Filters/Mesh/LodMesh.h b/Runtime/Filters/Mesh/LodMesh.h
new file mode 100644
index 0000000..41fcf74
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMesh.h
@@ -0,0 +1,509 @@
+#ifndef LODMESH_H
+#define LODMESH_H
+
+#include "Runtime/BaseClasses/NamedObject.h"
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Vector4.h"
+#include "Mesh.h"
+#include "Runtime/Math/Color.h"
+#include <string>
+#include <vector>
+#include "Runtime/BaseClasses/MessageIdentifier.h"
+#include "Runtime/Shaders/VBO.h"
+#include "CompressedMesh.h"
+#include "VertexData.h"
+#include "Runtime/Dynamics/CollisionMeshData.h"
+#include "MeshBlendShape.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Camera/IntermediateUsers.h"
+
+class IntermediateRenderer;
+
+struct SubMesh
+{
+	UInt32 firstByte;
+	UInt32 indexCount;
+	GfxPrimitiveType topology;
+
+	UInt32 firstVertex;
+	UInt32 vertexCount;
+	AABB   localAABB;
+
+	SubMesh ()
+	{
+		firstByte = 0;
+		indexCount = 0;
+		topology = kPrimitiveTriangles;
+		firstVertex = 0;
+		vertexCount = 0;
+		localAABB = AABB (Vector3f::zero, Vector3f::zero);
+	}
+
+	DECLARE_SERIALIZE_NO_PPTR (SubMesh)
+
+#if SUPPORT_SERIALIZED_TYPETREES
+	template<class TransferFunction>
+	void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+};
+
+/// typedef for tangent space lighting rotations
+typedef std::vector<DeprecatedTangent, STL_ALLOCATOR(kMemGeometry, DeprecatedTangent) > DeprecatedTangentsArray;
+
+template<class TransferFunc>
+void SubMesh::Transfer (TransferFunc& transfer)
+{
+	#if SUPPORT_SERIALIZED_TYPETREES
+	if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+	{
+		TransferWorkaround35SerializationFuckup (transfer);
+		return;
+	}
+	#endif
+
+	transfer.SetVersion (2);
+	TRANSFER(firstByte);
+	TRANSFER(indexCount);
+	TRANSFER_ENUM(topology);
+	TRANSFER(firstVertex);
+	TRANSFER(vertexCount);
+	TRANSFER(localAABB);
+	if (transfer.IsOldVersion(1))
+	{
+		UInt32 triStrip;
+		transfer.Transfer (triStrip, "isTriStrip");
+		topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles;
+	}
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunc>
+void SubMesh::TransferWorkaround35SerializationFuckup (TransferFunc& transfer)
+{
+	TRANSFER(firstByte);
+	TRANSFER(indexCount);
+	
+	UInt32 triStrip;
+	transfer.Transfer (triStrip, "isTriStrip");
+	topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles;
+	
+	UInt32 triangleCount;
+	transfer.Transfer (triangleCount, "triangleCount");
+
+	TRANSFER(firstVertex);
+	TRANSFER(vertexCount);
+	TRANSFER(localAABB);
+}
+#endif
+
+template<class TransferFunc>
+void MeshPartition::Transfer (TransferFunc& transfer)
+{
+	TRANSFER(vertexCount);
+	TRANSFER(vertexOffset);
+	TRANSFER(indexCount);
+	TRANSFER(indexByteOffset);
+}
+
+template<class TransferFunc>
+void MeshPartitionInfo::Transfer (TransferFunc& transfer)
+{
+	TRANSFER(submeshStart);
+	TRANSFER(partitionCount);
+}
+
+class EXPORT_COREMODULE Mesh : public NamedObject
+{
+public:
+	enum
+	{
+	#if UNITY_IPHONE || UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+		alignBoneContainer = 16,
+	#else
+		alignBoneContainer = kDefaultMemoryAlignment,
+	#endif
+	};
+
+	//mircea@INFO PS3 doesn't render from VBOs hence m_VertexData and m_IndexBuffer *have* to be allocated with kMemVertexData.
+	typedef UNITY_VECTOR(kMemVertexData, UInt8)				IndexContainer;
+	typedef UNITY_VECTOR(kMemGeometry, SubMesh)				SubMeshContainer;
+	typedef dynamic_array<Matrix4x4f>						MatrixContainer;
+	typedef dynamic_array<int>								SkinContainer;
+	typedef UNITY_VECTOR(kMemGeometry, UInt32)				CollisionTriangleContainer;
+	typedef dynamic_array<MinMaxAABB>						AABBContainer;
+
+	typedef dynamic_array<BoneInfluence, alignBoneContainer>  BoneInfluenceContainer;
+	typedef dynamic_array<BoneInfluence2, alignBoneContainer> BoneInfluence2Container;
+
+	typedef UNITY_TEMP_VECTOR(UInt32)		TemporaryIndexContainer;
+
+#if UNITY_PS3 || UNITY_EDITOR
+	typedef UNITY_VECTOR(kMemVertexData, MeshPartition)		MeshPartitionContainer;
+	typedef UNITY_VECTOR(kMemVertexData, MeshPartitionInfo) MeshPartitionInfoContainer;
+#endif
+
+	REGISTER_DERIVED_CLASS (Mesh, NamedObject)
+	DECLARE_OBJECT_SERIALIZE (Mesh)
+
+	Mesh (MemLabelId label, ObjectCreationMode mode);
+	// ~Mesh (); declared-by-macro
+
+public:
+
+	virtual int GetRuntimeMemorySize () const;
+
+	VBO* GetSharedVBO( UInt32 wantedChannels );
+	bool CopyToVBO ( UInt32 wantedChannels, VBO& vbo );
+	void InitVertexBufferData ( UInt32 wantedChannels );
+	void GetVertexBufferData ( VertexBufferData& buffer, UInt32 wantedChannels );
+	void GetIndexBufferData (IndexBufferData& buffer);
+	void UnloadVBOFromGfxDevice();
+	void ReloadVBOToGfxDevice();
+
+
+	void AwakeFromLoad(AwakeFromLoadMode mode);
+	void AwakeFromLoadThreaded();
+	void UploadMeshData(bool markNoLongerReadable);
+	
+	virtual bool MainThreadCleanup ();
+	
+	void MarkDynamic();
+	void UpdateVertexFormat();
+
+	void SetBounds (const AABB& aabb );
+	const AABB& GetBounds () const { return m_LocalAABB; }
+
+	void SetBounds (unsigned submesh, const AABB& aabb );
+	const AABB& GetBounds (unsigned submesh) const
+	{
+		DebugAssertIf(submesh >= m_SubMeshes.size());
+		return m_SubMeshes[submesh].localAABB;
+	}
+
+	void Clear (bool keepVertexLayout);
+
+	/// Recalculate the bounding volume
+	void RecalculateBounds ();
+	void RecalculateSubmeshBounds (unsigned submesh);
+
+	// Recalculate normals
+	void RecalculateNormals();
+	void RecalculateNormalsWithHardAngle( float hardAngle );
+
+	// Validate that there are no out of bounds indices in the triangles
+	bool ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount);
+
+	int GetVertexCount () const { return m_VertexData.GetVertexCount (); }
+
+	// Gets count in all submeshes.
+	int GetPrimitiveCount() const;
+	int CalculateTriangleCount() const; // ignores degenerates in strips
+
+	// NOTE: make sure to call SetChannelDirty and RecalculateBounds when changing the geometry!
+	StrideIterator<Vector3f> GetVertexBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex); }
+	StrideIterator<Vector3f> GetVertexEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelVertex); }
+
+	StrideIterator<Vector3f> GetNormalBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal); }
+	StrideIterator<Vector3f> GetNormalEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelNormal); }
+
+	StrideIterator<ColorRGBA32> GetColorBegin () const { return m_VertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor); }
+	StrideIterator<ColorRGBA32> GetColorEnd () const { return m_VertexData.MakeEndIterator<ColorRGBA32> (kShaderChannelColor); }
+
+	StrideIterator<Vector2f> GetUvBegin (int uvIndex = 0) const { return m_VertexData.MakeStrideIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); }
+	StrideIterator<Vector2f> GetUvEnd (int uvIndex = 0) const { return m_VertexData.MakeEndIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); }
+
+	StrideIterator<Vector4f> GetTangentBegin () const { return m_VertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent); }
+	StrideIterator<Vector4f> GetTangentEnd () const { return m_VertexData.MakeEndIterator<Vector4f> (kShaderChannelTangent); }
+
+	void ExtractVertexArray (Vector3f* destination) const;
+	void ExtractNormalArray (Vector3f* destination) const;
+	void ExtractColorArray (ColorRGBA32* destination) const;
+	void ExtractColorArrayConverting (ColorRGBAf* destination) const;
+	void ExtractUvArray (int uvIndex, Vector2f* destination) const;
+	void ExtractTangentArray (Vector4f* destination) const;
+
+	void SetVertices (Vector3f const* data, size_t count);
+	void SetNormals (Vector3f const* data, size_t count);
+	void SetTangents (Vector4f const* data, size_t count);
+	void SetUv (int uvIndex, Vector2f const* data, size_t count);
+	void SetColors (ColorRGBA32 const* data, size_t count);
+	void SetColorsConverting (ColorRGBAf const* data, size_t count);
+
+	bool GetVertexColorsSwizzled() const { return m_VertexColorsSwizzled; }
+	void SetVertexColorsSwizzled(bool flag) { m_VertexColorsSwizzled = flag; }
+	bool HasVertexData () const { return m_VertexData.GetDataPtr () != NULL; }
+	void* GetVertexDataPointer () const { return m_VertexData.GetDataPtr (); }
+	size_t GetVertexDataSize () const { return m_VertexData.GetDataSize (); }
+	size_t GetVertexSize () const { return m_VertexData.GetVertexSize(); }
+
+	const void* GetChannelPointer (ShaderChannel channel) const { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); }
+	void* GetChannelPointer (ShaderChannel channel) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); }
+	void* GetChannelPointer (ShaderChannel channel, size_t offsetInElements) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel) + offsetInElements * m_VertexData.GetChannelStride(channel); }
+	size_t GetStride (ShaderChannel channel) const { return m_VertexData.GetChannelStride(channel); }
+
+	bool IsAvailable (ShaderChannel channel) const { return m_VertexData.HasChannel (channel); }
+	// returns a bitmask of a newly created channels
+	UInt32 ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels);
+	UInt32 ResizeVertices (size_t count, UInt32 shaderChannels) { return ResizeVertices(count, shaderChannels, GetStreamsLayout(), GetChannelsLayout()); }
+
+	// returns a bitmask of a newly created channels
+	UInt32 FormatVertices (UInt32 shaderChannels);
+	// initializes the specified channels to default values
+	void InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels);
+
+	bool SetBoneWeights (const BoneInfluence* v, int count);
+	const BoneInfluence* GetBoneWeights () const { return m_Skin.empty() ? NULL : &m_Skin[0]; }
+	BoneInfluence* GetBoneWeights () { return m_Skin.empty() ? NULL : &m_Skin[0]; }
+	void ClearSkinCache ();
+	int GetMaxBoneIndex ();
+
+	const Matrix4x4f* GetBindposes () const { return m_Bindpose.empty() ? NULL : &m_Bindpose[0]; }
+	int GetBindposeCount () const { return m_Bindpose.size(); }
+	void SetBindposes (const Matrix4x4f* bindposes, int count);
+
+	bool SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology);
+	bool SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology);
+
+	void GetTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const;
+	void GetTriangles (TemporaryIndexContainer& triangles) const;
+	void AppendTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const;
+	void GetStrips (TemporaryIndexContainer& triangles, unsigned submesh) const;
+	void GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const;
+
+	enum {
+		k16BitIndices = 1 << 0,
+		kRebuildCollisionTriangles = 1 << 2,
+		kDontAssignIndices = 1 << 3,
+		kDontSupportSubMeshVertexRanges = 1 << 4
+	};
+	bool SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode);
+
+	bool ExtractTriangle (UInt32 face, UInt32* indices) const;
+
+	void SetSubMeshCount (unsigned int count);
+	size_t GetSubMeshCount () const;
+
+	void UpdateSubMeshVertexRange (int index);
+
+	void AddObjectUser( ListNode<Object>& node ) { m_ObjectUsers.push_back(node); }
+	void AddIntermediateUser( ListNode<IntermediateRenderer>& node ) { m_IntermediateUsers.AddUser(node); }
+	
+	const BlendShapeData& GetBlendShapeData() const { return m_Shapes; }
+	size_t GetBlendShapeChannelCount() const { return m_Shapes.channels.size(); }
+	void SwapBlendShapeData (BlendShapeData& shapes);
+	
+
+	BlendShapeData& GetWriteBlendShapeDataInternal() { return m_Shapes; }
+
+	
+	void CheckConsistency();
+
+#if ENABLE_MULTITHREADED_CODE
+	void SetCurrentCPUFence( UInt32 fence ) { m_CurrentCPUFence = fence; m_WaitOnCPUFence = true; }
+#endif
+
+	void WaitOnRenderThreadUse();
+
+	static Mesh& GetInstantiatedMesh (Mesh* mesh, Object& owner);
+
+	void CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform);
+
+	void SetChannelsDirty (unsigned vertexChannelsChanged, bool indices);
+
+	void* GetSharedNxMesh ();
+	void* GetSharedNxConvexMesh ();
+
+	void RebuildCollisionTriangles();
+
+	const SubMesh& GetSubMeshFast (unsigned int submesh) const
+	{
+		DebugAssertIf(submesh >= m_SubMeshes.size());
+		return m_SubMeshes[submesh];
+	}
+	SubMesh& GetSubMeshFast (unsigned int submesh)
+	{
+		DebugAssertIf(submesh >= m_SubMeshes.size());
+		return m_SubMeshes[submesh];
+	}
+
+	const UInt16* GetSubMeshBuffer16 (int submesh) const;
+	UInt16* GetSubMeshBuffer16 (int submesh);
+
+	int GetSubMeshBufferByteSize (int submesh) const { return kVBOIndexSize * m_SubMeshes[submesh].indexCount; }
+
+	// The number of indices contained in the index buffer (all submeshes)
+	int GetTotalndexCount () const;
+
+	void ByteSwapIndices ();
+
+	/// 4, 2, 1 bone influence (BoneInfluence, BoneInfluence2, int)
+	void* GetSkinInfluence (int count);
+
+	int  GetMeshUsageFlags () const { return m_MeshUsageFlags; }
+
+	virtual bool ShouldIgnoreInGarbageDependencyTracking ();
+
+	UInt32 GetAvailableChannels() const;
+	// May return only a subset of channels that are present in the mesh
+	UInt32 GetAvailableChannelsForRendering() const;
+	UInt32 GetChannelsInVBO() const { return m_ChannelsInVBO; }
+
+	bool IsSuitableSizeForDynamicBatching () const;
+
+	// Calculate cached bone bounds per bone by calculating the bounding volume in bind pose space.
+	// This is used by the SkinnedMeshRenderer to compute an accurate world space bounding volume quickly.
+	const AABBContainer& GetCachedBonesBounds();
+
+	void DestripifyIndices ();
+	void SetHideFromRuntimeStats(bool flag) { m_HideFromRuntimeStats = flag; }
+
+	bool IsSharedPhysicsMeshDirty () { return m_CollisionMesh.IsSharedPhysicsMeshDirty(); }
+
+	bool CanAccessFromScript() const;
+
+	const VertexData&			GetVertexData() const			{ return m_VertexData; }
+	VertexData&					GetVertexData()					{ return m_VertexData; }
+
+	UInt8						GetMeshCompression() const		{ return m_MeshCompression; }
+	void						SetMeshCompression(UInt8 mc)	{ m_MeshCompression = mc; }
+
+	enum
+	{
+		kStreamCompressionDefault = 0,
+		kStreamCompressionCompressed,
+		kStreamCompressionCompressedAggressive
+	};
+
+	UInt8						GetStreamCompression() const	{ return m_StreamCompression; }
+	void						SetStreamCompression(UInt8 cs)	{ m_StreamCompression = cs; }
+	bool						GetIsReadable() const			{ return m_IsReadable; }
+	void						SetIsReadable(bool readable)	{ m_IsReadable = readable; }
+
+
+	bool						GetKeepVertices() const			{ return m_KeepVertices; }
+	void						SetKeepVertices(bool keep)		{ m_KeepVertices = keep; }
+
+	bool						GetKeepIndices() const			{ return m_KeepIndices; }
+	void						SetKeepIndices(bool keep)		{ m_KeepIndices = keep; }
+
+	const IndexContainer&		GetIndexBuffer() const			{ return m_IndexBuffer; }
+	IndexContainer&				GetIndexBuffer()				{ return m_IndexBuffer; }
+
+	const SubMeshContainer&		GetSubMeshes() const			{ return m_SubMeshes; }
+	SubMeshContainer&			GetSubMeshes()					{ return m_SubMeshes; }
+
+	const MatrixContainer&		GetBindpose() const				{ return m_Bindpose; }
+	MatrixContainer&			GetBindpose()					{ return m_Bindpose; }
+
+	const dynamic_array<BindingHash>& GetBonePathHashes() const	{ return m_BonePathHashes; }
+	dynamic_array<BindingHash>& GetBonePathHashes()				{ return m_BonePathHashes; }
+	BindingHash					GetRootBonePathHash() const		{ return m_RootBonePathHash; }
+	void						SetRootBonePathHash(BindingHash val) { m_RootBonePathHash = val; }
+
+	const BoneInfluenceContainer& GetSkin() const				{ return m_Skin; }
+	BoneInfluenceContainer&		GetSkin()						{ return m_Skin; }
+
+	const AABB&					GetLocalAABB() const			{ return m_LocalAABB; }
+	void						SetLocalAABB(const AABB& aabb)	{ m_LocalAABB = aabb; }
+
+#if UNITY_PS3 || UNITY_EDITOR
+	MeshPartitionContainer		m_Partitions;
+	MeshPartitionInfoContainer	m_PartitionInfos;
+#endif
+
+
+#if UNITY_EDITOR
+	void SetMeshOptimized(bool meshOptimized) { m_MeshOptimized = meshOptimized; }
+	bool GetMeshOptimized() const { return m_MeshOptimized; }
+#endif
+
+	UInt32 GetInternalMeshID() const { Assert(m_InternalMeshID); return m_InternalMeshID; }
+	
+private:
+	void CreateSharedVBO( UInt32 wantedChannels );
+	void NotifyObjectUsers( const MessageIdentifier& msg );
+	void RecalculateSubmeshBoundsInternal (unsigned submesh);
+	void RecalculateBoundsInternal ();
+	void LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &tangents);
+	void SwizzleVertexColorsIfNeeded ();
+
+	const VertexStreamsLayout& GetStreamsLayout() const;
+	const VertexChannelsLayout& GetChannelsLayout() const;
+
+	void DestripifySubmeshOnTransferInternal();
+	void SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode);
+
+#if SUPPORT_SERIALIZED_TYPETREES
+	template<class TransferFunction>
+	void TransferWorkaround35SerializeFuckup (TransferFunction& transfer);
+#endif
+
+#if UNITY_EDITOR || UNITY_PS3
+	template<class TransferFunction>
+	void TransferPS3Data (TransferFunction& transfer);
+#endif
+#if UNITY_EDITOR
+	bool						m_MeshOptimized;
+#endif
+	
+	VertexData					m_VertexData;
+
+	UInt8						m_MeshCompression;
+	UInt8						m_StreamCompression;
+	bool						m_IsReadable;
+	bool						m_KeepVertices;
+	bool						m_KeepIndices;
+	UInt32						m_InternalMeshID;
+
+	int							 m_MeshUsageFlags;
+
+	IndexContainer	             m_IndexBuffer;
+	SubMeshContainer             m_SubMeshes;
+	MatrixContainer              m_Bindpose;
+	BlendShapeData               m_Shapes;
+
+	dynamic_array<BindingHash>   m_BonePathHashes;
+	BindingHash                  m_RootBonePathHash;
+
+	AABBContainer				 m_CachedBonesAABB;
+
+	BoneInfluenceContainer       m_Skin;
+	BoneInfluence2Container		 m_CachedSkin2;
+	SkinContainer	     		 m_CachedSkin1;
+
+	int                          m_MaxBoneIndex;
+
+	AABB                         m_LocalAABB;
+
+	CollisionMeshData            m_CollisionMesh;
+
+	typedef List< ListNode<Object> > ObjectList;
+	ObjectList					m_ObjectUsers; // Object-derived users of this mesh
+
+	IntermediateUsers           m_IntermediateUsers; // IntermediateRenderer users of this mesh
+								 
+	#if ENABLE_MULTITHREADED_CODE
+	UInt32			m_CurrentCPUFence;
+	bool			m_WaitOnCPUFence;
+	#endif
+
+	PPtr<Object>	m_Owner;
+	VBO*			m_VBO;
+
+
+	UInt32			m_ChannelsInVBO;
+	bool			m_VerticesDirty;
+	bool			m_IndicesDirty;
+	bool			m_IsDynamic;
+	bool			m_HideFromRuntimeStats;
+	bool			m_VertexColorsSwizzled;
+
+	friend class MeshFilter;
+	friend class ClothAnimator;
+	friend class CompressedMesh;
+	friend void PartitionSubmeshes (Mesh& m);
+	friend void OptimizeReorderVertexBuffer (Mesh& mesh);
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/LodMeshFilter.cpp b/Runtime/Filters/Mesh/LodMeshFilter.cpp
new file mode 100644
index 0000000..512f153
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMeshFilter.cpp
@@ -0,0 +1,96 @@
+#include "UnityPrefix.h"
+#include "LodMeshFilter.h"
+#include "LodMesh.h"
+#include "MeshRenderer.h"
+#include "Runtime/Filters/Particles/MeshParticleEmitter.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/TransferFunctions/TransferNameConversions.h"
+
+MeshFilter::MeshFilter (MemLabelId label, ObjectCreationMode mode)
+:	Super(label, mode)
+{
+	m_Mesh = NULL;
+}
+
+MeshFilter::~MeshFilter ()
+{
+}
+
+void MeshFilter::OnDidAddMesh ()
+{
+	AssignMeshToRenderer ();
+}
+
+void MeshFilter::AssignMeshToRenderer ()
+{
+	if (GetGameObjectPtr())
+	{
+		MeshRenderer* renderer = QueryComponent(MeshRenderer);
+		if (renderer && renderer->GetSharedMesh() != m_Mesh)
+			renderer->SetSharedMesh(m_Mesh);	
+
+		MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter);
+		if (emitter && emitter->GetMesh() != m_Mesh)
+			emitter->SetMesh(m_Mesh);	
+	}
+}
+
+void MeshFilter::SetSharedMesh (PPtr<Mesh> mesh)
+{
+	m_Mesh = mesh;
+
+	MeshRenderer* renderer = QueryComponent(MeshRenderer);
+	if (renderer)
+		renderer->SetSharedMesh(m_Mesh);	
+
+	MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter);
+	if (emitter)
+		emitter->SetMesh(m_Mesh);	
+
+	SetDirty ();
+}
+
+PPtr<Mesh> MeshFilter::GetSharedMesh ()
+{
+	return m_Mesh;
+}
+
+Mesh* MeshFilter::GetInstantiatedMesh ()
+{
+	Mesh* instantiated = &Mesh::GetInstantiatedMesh (m_Mesh, *this);
+	if (PPtr<Mesh> (instantiated) != m_Mesh)
+	{
+		SetSharedMesh(instantiated);
+	}
+
+	return instantiated;
+}
+
+void MeshFilter::SetInstantiatedMesh (Mesh* mesh)
+{
+	SetSharedMesh(mesh);
+}
+
+IMPLEMENT_CLASS_HAS_INIT (MeshFilter)
+IMPLEMENT_OBJECT_SERIALIZE (MeshFilter)
+
+template<class TransferFunction> inline
+void MeshFilter::Transfer (TransferFunction& transfer)
+{
+	Super::Transfer (transfer);
+	transfer.Transfer (m_Mesh, "m_Mesh", kSimpleEditorMask);
+}
+
+void MeshFilter::InitializeClass ()
+{
+	RegisterAllowNameConversion(GetClassStringStatic(), "m_LodMesh", "m_Mesh");
+	RegisterAllowTypeNameConversion ("PPtr<LodMesh>", "PPtr<Mesh>");
+	
+	REGISTER_MESSAGE_VOID(MeshFilter, kDidAddComponent, OnDidAddMesh);
+}
+
+void MeshFilter::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+	Super::AwakeFromLoad (awakeMode);
+	AssignMeshToRenderer ();
+}
diff --git a/Runtime/Filters/Mesh/LodMeshFilter.h b/Runtime/Filters/Mesh/LodMeshFilter.h
new file mode 100644
index 0000000..ff6273b
--- /dev/null
+++ b/Runtime/Filters/Mesh/LodMeshFilter.h
@@ -0,0 +1,38 @@
+#ifndef LODMESHFILTER_H
+#define LODMESHFILTER_H
+
+#include "Runtime/BaseClasses/GameObject.h"
+#include "Runtime/Modules/ExportModules.h"
+
+class Mesh;
+
+class EXPORT_COREMODULE MeshFilter : public Unity::Component
+{
+public:
+	REGISTER_DERIVED_CLASS (MeshFilter, Unity::Component)
+	DECLARE_OBJECT_SERIALIZE (MeshFilter)
+
+	MeshFilter (MemLabelId label, ObjectCreationMode mode);
+	
+	void SetSharedMesh (PPtr<Mesh> mesh);
+	PPtr<Mesh> GetSharedMesh ();
+
+	Mesh* GetInstantiatedMesh ();
+	void SetInstantiatedMesh (Mesh* mesh);
+	
+	static void InitializeClass ();
+	static void CleanupClass () {}
+
+	void OnDidAddMesh ();
+	
+protected:
+	virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode);	
+
+
+private:	
+	void AssignMeshToRenderer ();
+	
+	PPtr<Mesh>				m_Mesh;
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/Mesh.h b/Runtime/Filters/Mesh/Mesh.h
new file mode 100644
index 0000000..e6b58dc
--- /dev/null
+++ b/Runtime/Filters/Mesh/Mesh.h
@@ -0,0 +1,76 @@
+#ifndef MESH_H
+#define MESH_H
+
+#include <vector>
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Misc/Allocator.h"
+
+class Quaternionf;
+
+/// A face in the mesh.
+struct Face {
+	UInt16 v1, v2, v3;
+	Face (UInt16 vert1, UInt16 vert2, UInt16 vert3) 
+		{v1 = vert1; v2 = vert2; v3 = vert3;}
+	Face () {}
+	
+	UInt16 &operator[] (int i) { return (&v1)[i]; }
+	UInt16 operator[] (int i) const  { return (&v1)[i]; }
+	
+	DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Face)
+};
+
+template<class TransferFunc>
+void Face::Transfer (TransferFunc& transfer)
+{
+	TRANSFER (v1);
+	TRANSFER (v2);
+	TRANSFER (v3);
+}
+
+struct DeprecatedTangent
+{
+	Vector3f normal;
+	Vector3f tangent;
+	float handedness;
+	DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Tangent)
+};
+
+template<class TransferFunc>
+void DeprecatedTangent::Transfer (TransferFunc& transfer)
+{
+	TRANSFER (normal);
+	TRANSFER (tangent);
+	TRANSFER (handedness);
+}
+
+struct BoneInfluence
+{
+	float weight[4];
+	int   boneIndex[4];
+
+	DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (BoneInfluence)
+};
+
+struct BoneInfluence2
+{
+	float weight[2];
+	int   boneIndex[2];
+};
+
+template<class TransferFunc>
+void BoneInfluence::Transfer (TransferFunc& transfer)
+{
+	TRANSFER (weight[0]);
+	TRANSFER (weight[1]);
+	TRANSFER (weight[2]);
+	TRANSFER (weight[3]);
+
+	TRANSFER (boneIndex[0]);
+	TRANSFER (boneIndex[1]);
+	TRANSFER (boneIndex[2]);
+	TRANSFER (boneIndex[3]);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshBlendShape.cpp b/Runtime/Filters/Mesh/MeshBlendShape.cpp
new file mode 100644
index 0000000..c7588e2
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShape.cpp
@@ -0,0 +1,234 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "MeshBlendShape.h"
+#include "Runtime/mecanim/generic/crc32.h"
+
+static const float kVertexDeltaEpsilon = 1e-5f;
+static const float kNormalDeltaEpsilon = 1e-5f;
+
+void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame)
+{
+	Assert(deltaNormals.empty() || deltaVertices.size() == deltaNormals.size());
+	Assert(deltaTangents.empty() || deltaVertices.size() == deltaTangents.size());
+
+	frame.firstVertex = sharedSparceVertices.size();
+
+	// Converting blend shape in to sparse blend shape
+	sharedSparceVertices.reserve(sharedSparceVertices.size() + deltaVertices.size());
+
+	frame.hasNormals = frame.hasTangents = false;
+
+	for (int j = 0; j < deltaVertices.size(); ++j)
+	{
+		const bool vertexHasNormal = (!deltaNormals.empty() && Magnitude(deltaNormals[j]) > kNormalDeltaEpsilon);
+		const bool vertexHasTangent = (!deltaTangents.empty() && Magnitude(deltaTangents[j]) > kNormalDeltaEpsilon);
+
+		frame.hasNormals = frame.hasNormals || vertexHasNormal;
+		frame.hasTangents = frame.hasTangents || vertexHasTangent;
+
+		if (Magnitude(deltaVertices[j]) > kVertexDeltaEpsilon || vertexHasNormal || vertexHasTangent)
+		{
+			BlendShapeVertex v;			
+
+			v.vertex = deltaVertices[j];
+			if (!deltaNormals.empty())
+				v.normal = deltaNormals[j];
+			if (!deltaTangents.empty())
+				v.tangent = deltaTangents[j];
+
+			v.index = j;
+			sharedSparceVertices.push_back(v);
+		}				
+	}
+
+	frame.vertexCount = sharedSparceVertices.size() - frame.firstVertex;
+}
+
+void BlendShape::UpdateFlags(const BlendShapeVertices& sharedSparceVertices)
+{
+	hasNormals = hasTangents = false;
+
+	for (int j = 0; j < vertexCount; ++j)
+	{
+		const BlendShapeVertex& v = sharedSparceVertices[firstVertex + j];
+		const bool vertexHasNormal = Magnitude(v.normal) > kNormalDeltaEpsilon;
+		const bool vertexHasTangent = Magnitude(v.tangent) > kNormalDeltaEpsilon;
+
+		hasNormals = hasNormals || vertexHasNormal;
+		hasTangents = hasTangents || vertexHasTangent;
+	}
+}
+
+void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel)
+{
+	channel.name.assign(inName.c_str(), kMemGeometry);
+	channel.nameHash = mecanim::processCRC32(inName.c_str());
+	channel.frameIndex = frameIndex;
+	channel.frameCount = frameCount;
+}
+
+const char* GetChannelName (const BlendShapeData& data, int index)
+{
+	return data.channels[index].name.c_str();
+}
+
+int GetChannelIndex (const BlendShapeData& data, const char* name)
+{
+	for (int i=0;i<data.channels.size();i++)
+	{
+		if (name == data.channels[i].name)
+			return i;
+	}
+	return -1;
+}
+
+int GetChannelIndex (const BlendShapeData& data, BindingHash name)
+{
+	for (int i=0;i<data.channels.size();i++)
+	{
+		if (name == data.channels[i].nameHash)
+			return i;
+	}
+	return -1;
+}
+
+void ClearBlendShapes (BlendShapeData& data)
+{
+	data.vertices.clear();
+	data.shapes.clear();
+	data.channels.clear();
+	data.fullWeights.clear();
+}
+
+/*
+
+STRUCT BlendShapeChannel
+
+// BlendShape vertex class.
+STRUCT Vertex
+// Vertex delta.
+CSRAW public Vector3 vertex;
+
+// Normal delta.
+CSRAW public Vector3 normal;
+
+// Tangent delta.
+CSRAW public Vector3 tangent;
+
+// Index to [[Mesh]] vertex data.
+CSRAW public int index;
+END
+
+// A class representing a single BlendShape (also called morph-target).
+STRUCT BlendShape
+
+// The weight of the frame 
+CSRAW public float    weight;
+
+// Sparse vertex data.
+CSRAW public Vertex[] vertices;
+END
+
+// Name of the BlendShape.
+CSRAW public string           name;
+
+// The frames making up a blendshape animation.
+// Each frame has a weight, based on the weight of the BlendShape in the SkinnedMeshRenderer, Unity will apply 1 or 2 frames.
+CSRAW public BlendShape[] shapes;
+END
+
+
+C++RAW
+/*
+ struct MonoMeshBlendShape
+ {
+ ScriptingStringPtr name;
+ ScriptingArrayPtr vertices;
+ };
+ 
+ void BlendShapeVertexToMono (const BlendShapeVertex &src, MonoBlendShapeVertex &dest) {
+ dest.vertex = src.vertex;
+ dest.normal = src.normal;
+ dest.tangent = src.tangent;
+ dest.index = src.index;
+ }
+ void BlendShapeVertexToCpp (const MonoBlendShapeVertex &src, BlendShapeVertex &dest) {
+ dest.vertex = src.vertex;
+ dest.normal = src.normal;
+ dest.tangent = src.tangent;
+ dest.index = src.index;
+ }
+ 
+ class MeshBlendShapeToMono
+ {
+ public:
+ MeshBlendShapeToMono(const BlendShapeVertices& sharedVertices_) : sharedVertices(sharedVertices_) {}
+ 
+ void operator() (const MeshBlendShape &src, MonoMeshBlendShape &dest)
+ {
+ dest.name = scripting_string_new(src.m_Name);
+ const BlendShapeVertices vertices(sharedVertices.begin() + src.firstVertex, sharedVertices.begin() + src.firstVertex + src.vertexCount);
+ 
+ ScriptingTypePtr classVertex = GetScriptingTypeRegistry().GetType("UnityEngine", "BlendShapeVertex");
+ dest.vertices = VectorToScriptingStructArray<BlendShapeVertex, MonoBlendShapeVertex>(vertices, classVertex, BlendShapeVertexToMono);
+ }
+ 
+ private:
+ const BlendShapeVertices& sharedVertices;
+ };
+ 
+ class MeshBlendShapeToCpp
+ {
+ public:
+ MeshBlendShapeToCpp(int meshVertexCount_, BlendShapeVertices& sharedVertices_) : meshVertexCount(meshVertexCount_), sharedVertices(sharedVertices_) {}
+ 
+ void operator() (MonoMeshBlendShape &src, MeshBlendShape &dest)
+ {
+ dest.weight = src.weight;
+ 
+ const BlendShapeVertex* vertices = Scripting::GetScriptingArrayStart<BlendShapeVertex> (src.vertices);
+ sharedVertices.insert(sharedVertices.end(), vertices, vertices + GetScriptingArraySize(src.vertices));
+ 
+ for (BlendShapeVertices::iterator it = vertices.begin(), end = vertices.end(); it != end; ++it)
+ {
+ BlendShapeVertex& v = *it;
+ if (v.index < 0 || v.index >= meshVertexCount)
+ {
+ ErrorStringMsg("Value (%d) of BlendShapeVertex.index #%d is out of bounds (Mesh vertex count: %d) on BlendShape '%s'. It will be reset to 0.", v.index, it - vertices.begin(), meshVertexCount, dest.m_Name.c_str());
+ v.index = 0;
+ }
+ }
+ 
+ dest.firstVertex = sharedVertices.size();
+ dest.vertexCount = vertices.size();
+ 
+ sharedVertices.insert(sharedVertices.end(), vertices.begin(), vertices.end());
+ dest.UpdateFlags(sharedVertices);
+ }
+ 
+ private:
+ int meshVertexCount;
+ BlendShapeVertices& sharedVertices;
+ };
+ 
+ 
+ 
+ ----------------
+ 
+ // BlendShapes for this mesh.
+ CUSTOM_PROP BlendShapeChannel[] blendShapes
+ {
+ //		ScriptingTypePtr classBlendShape = GetScriptingTypeRegistry().GetType("UnityEngine", "MeshBlendShape");
+ //		return VectorToScriptingStructArray<MeshBlendShape, MonoMeshBlendShape>(self->GetShapesVector(), classBlendShape, MeshBlendShapeToMono(self->GetShapeVertexVector()));
+ return SCRIPTING_NULL;
+ }
+ {
+ //		Mesh::MeshBlendShapeContainer shapes;
+ //		self->GetShapeVertexVector().clear();
+ //		ScriptingStructArrayToVector<MeshBlendShape, MonoMeshBlendShape>(value, shapes, MeshBlendShapeToCpp(self->GetVertexCount(), self->GetShapeVertexVector()));
+ //		self->SwapShapesVector(shapes);
+ }
+ 
+ 
+ 
+ */
diff --git a/Runtime/Filters/Mesh/MeshBlendShape.h b/Runtime/Filters/Mesh/MeshBlendShape.h
new file mode 100644
index 0000000..d4d0f41
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShape.h
@@ -0,0 +1,115 @@
+#ifndef MESHBLENDSHAPES_H
+#define MESHBLENDSHAPES_H
+
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Utilities/dynamic_array.h"
+#include "Runtime/Containers/ConstantString.h"
+#include "Runtime/Containers/ConstantStringSerialization.h"
+
+typedef UInt32 BindingHash;
+
+struct BlendShapeVertex
+{
+	// vertex, normal & tangent are stored as deltas
+	Vector3f  vertex;
+	Vector3f  normal;
+	Vector3f  tangent;
+	UInt32    index;
+
+	BlendShapeVertex() : vertex(Vector3f::zero), normal(Vector3f::zero), tangent(Vector3f::zero), index(0) {}
+
+	DECLARE_SERIALIZE_NO_PPTR (BlendShapeVertex)
+};
+typedef dynamic_array<BlendShapeVertex> BlendShapeVertices;
+
+struct BlendShapeChannel
+{
+	ConstantString name;
+	BindingHash    nameHash;
+	
+	int            frameIndex;
+	int            frameCount;
+	
+	DECLARE_SERIALIZE_NO_PPTR(MeshBlendShapeChannel)
+};
+
+struct BlendShape
+{
+	BlendShape() : firstVertex(0), vertexCount(0), hasNormals(false), hasTangents(false) {}
+	
+	UInt32   firstVertex;
+	UInt32   vertexCount;
+
+	bool     hasNormals;
+	bool     hasTangents;
+
+	
+	///@TODO: MOve
+	// updates hasNormals and hasTangents based on data in vertices
+	void UpdateFlags(const BlendShapeVertices& sharedSparceVertices);
+
+	DECLARE_SERIALIZE_NO_PPTR (MeshBlendShape)
+};
+
+struct BlendShapeData
+{
+	BlendShapeVertices                 vertices;
+	dynamic_array<BlendShape>          shapes;
+	std::vector<BlendShapeChannel>     channels;
+	dynamic_array<float>               fullWeights;
+	
+	DECLARE_SERIALIZE_NO_PPTR(BlendShapeData)
+};
+
+
+// Convert between blendshape name and index
+const char* GetChannelName (const BlendShapeData& data, int index);
+inline size_t GetBlendShapeChannelCount (const BlendShapeData& data) { return data.channels.size(); }
+int GetChannelIndex (const BlendShapeData& data, const char* name);
+int GetChannelIndex (const BlendShapeData& data, BindingHash name);
+
+// data is passed as non-sparce arrays, i.e. deltaVertices.size() has to be the same as vertex count on the Mesh
+void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame);
+void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel);
+void ClearBlendShapes (BlendShapeData& data);
+
+template<class TransferFunc>
+void BlendShape::Transfer (TransferFunc& transfer)
+{
+	TRANSFER(firstVertex);
+	TRANSFER(vertexCount);
+	TRANSFER(hasNormals);
+	TRANSFER(hasTangents);
+	transfer.Align();
+}
+
+template<class TransferFunc>
+void BlendShapeData::Transfer (TransferFunc& transfer)
+{
+	TRANSFER (vertices);
+	TRANSFER (shapes);
+	TRANSFER (channels);
+	TRANSFER (fullWeights);
+}
+
+template<class TransferFunc>
+void BlendShapeVertex::Transfer (TransferFunc& transfer)
+{
+	TRANSFER(vertex);
+	TRANSFER(normal);
+	TRANSFER(tangent);
+	TRANSFER(index);
+}
+
+template<class TransferFunc>
+void BlendShapeChannel::Transfer (TransferFunc& transfer)
+{
+	TransferConstantString (name, "name", kNoTransferFlags, kMemGeometry, transfer);
+	TRANSFER (nameHash);
+	TRANSFER (frameIndex);
+	TRANSFER (frameCount);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.cpp b/Runtime/Filters/Mesh/MeshBlendShaping.cpp
new file mode 100644
index 0000000..a86a24d
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShaping.cpp
@@ -0,0 +1,184 @@
+#include "UnityPrefix.h"
+#include "MeshBlendShaping.h"
+#include "MeshSkinning.h"
+#include "MeshBlendShape.h"
+
+template<bool skinNormal, bool skinTangent>
+void ApplyBlendShapeTmpl (const BlendShapeVertex* vertices, size_t vertexCount, size_t dstVertexCount, float weight, int normalOffset, int tangentOffset, int inStride, UInt8* dst)
+{
+	for (int i = 0; i < vertexCount; ++i)
+	{
+		const BlendShapeVertex& blendShapeVertex = vertices[i];
+		
+		int offset = inStride * blendShapeVertex.index;
+
+		*reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * weight;
+		if (skinNormal)
+		{
+			DebugAssert (offset + normalOffset < inStride * dstVertexCount);
+			*reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * weight;
+		}
+		if (skinTangent)
+		{
+			DebugAssert (offset + tangentOffset < inStride * dstVertexCount);
+			*reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * weight;
+		}				
+	}
+}
+
+
+void ApplyBlendShape (const BlendShape& target, const BlendShapeVertices& vertices, float weight, const SkinMeshInfo& info, UInt8* dst)
+{
+	if (!HasValidWeight(weight))
+		return;
+	
+	weight = std::min(weight, 1.0F);
+	
+	const BlendShapeVertex* v = vertices.begin() + target.firstVertex;
+	
+	if (info.skinNormals && info.skinTangents && target.hasNormals && target.hasTangents)
+		ApplyBlendShapeTmpl<true, true> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+	else if (info.skinNormals && target.hasNormals)
+		ApplyBlendShapeTmpl<true, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+	else
+		ApplyBlendShapeTmpl<false, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst);
+}
+
+static int FindFrame (const float* weights, size_t count, float targetWeight)
+{
+	// Find frame (left index)
+	int frame = 0;
+	while (frame < count-1 && targetWeight > weights[frame+1])
+		frame++;
+	
+	return frame;
+}
+
+void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst)
+{
+	DebugAssert (info.blendshapeCount != 0);
+	Assert (info.inStride == info.outStride);
+	const int inStride = info.inStride;
+	const int count = info.vertexCount;
+
+	Assert (dst);
+	memcpy (dst, info.inVertices, inStride * count);		
+
+	const BlendShapeData& blendShapeData = *info.blendshapes;
+	
+	for (int c = 0; c < info.blendshapeCount; ++c)
+	{
+		const float targetWeight = info.blendshapeWeights[c];
+
+		if (!HasValidWeight (targetWeight))
+			continue;
+
+		const BlendShapeChannel& channel = blendShapeData.channels[c];
+		Assert(channel.frameCount != 0);
+
+		const BlendShape* blendShapeFrames = &blendShapeData.shapes[channel.frameIndex];
+		const float* weights = &blendShapeData.fullWeights[channel.frameIndex];
+		
+		// The first blendshape does not need to do any blending. Just fade it in.
+		if (targetWeight < weights[0] || channel.frameCount == 1)
+		{
+			float lhsShapeWeight = weights[0];
+			ApplyBlendShape (blendShapeFrames[0], blendShapeData.vertices, targetWeight / lhsShapeWeight, info, dst);
+		}
+		// We are blending with two frames
+		else
+		{
+			// Find the frame we are blending with
+			int frame = FindFrame(weights, channel.frameCount, targetWeight);
+			
+			float lhsShapeWeight = weights[frame + 0];
+			float rhsShapeWeight = weights[frame + 1];
+			
+			float relativeWeight = (targetWeight - lhsShapeWeight) / (rhsShapeWeight - lhsShapeWeight);
+			
+			ApplyBlendShape (blendShapeFrames[frame + 0], blendShapeData.vertices, 1.0F - relativeWeight, info, dst);
+			ApplyBlendShape (blendShapeFrames[frame + 1], blendShapeData.vertices, relativeWeight, info, dst);
+		}
+	}
+}
+
+///@TODO: How do we deal with resizing vertex count once mesh blendshapes have been created???
+
+/*
+ template<bool skinNormal, bool skinTangent>
+ static void ApplyBlendShapesTmpl (SkinMeshInfo& info, UInt8* dst)
+ {
+ DebugAssert (info.blendshapeCount != 0);
+ Assert (info.inStride == info.outStride);
+ const int inStride = info.inStride;
+ const int count = info.vertexCount;
+ 
+ Assert (dst);
+ memcpy (dst, info.inVertices, inStride * count);		
+ 
+ const int normalOffset = info.normalOffset;
+ const int tangentOffset = info.tangentOffset;
+ 
+ #if BLEND_DIRECT_NORMALS
+ if (skinNormal)
+ { // figure out how what fraction of original normal should be used
+ float totalBlendshapeWeight = 0.0f;
+ for (int i = 0; i < info.blendshapeCount; ++i)
+ totalBlendshapeWeight += info.blendshapeWeights[i];
+ Assert (totalBlendshapeWeight >= 0.0f);
+ if (totalBlendshapeWeight > 0.0f)
+ {
+ for (int i = 0; i < count; ++i)
+ *reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset) *= max(0.0f, (1.0f - totalBlendshapeWeight));
+ }
+ }
+ 
+ bool atLeastOneSparseBlendshape = false;
+ #endif
+ for (int bs = 0; bs < info.blendshapeCount; ++bs)
+ {
+ const float w = info.blendshapeWeights[bs];
+ 
+ if (HasWeight(w))
+ {
+ const MeshBlendShape& blendShape = info.blendshapes[bs];
+ 
+ const BlendShapeVertex* vertices = info.blendshapesVertices + blendShape.firstVertex;
+ for (int i = 0; i < blendShape.vertexCount; ++i)
+ {
+ const BlendShapeVertex& blendShapeVertex = vertices[i];
+ 
+ int offset = inStride * blendShapeVertex.index;
+ Assert (offset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * w;
+ if (skinNormal)
+ {
+ Assert (offset + normalOffset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * w;
+ }
+ if (skinTangent)
+ {
+ Assert (offset + tangentOffset < inStride * count);
+ *reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * w;
+ }				
+ }
+ 
+ #if BLEND_DIRECT_NORMALS
+ if (vertices.size () < count)
+ atLeastOneSparseBlendshape = true;
+ #endif
+ }
+ }
+ 
+ #if BLEND_DIRECT_NORMALS
+ if (atLeastOneSparseBlendshape && skinNormal) // we might need to take larger fraction from original normal
+ for (int i = 0; i < count; ++i)
+ {	
+ Vector3f const& srcNormal = *reinterpret_cast<Vector3f*>((UInt8*)info.inVertices + i*inStride + normalOffset);
+ Vector3f* dstNormal = reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset);
+ const float missingFractionOfNormal = max (0.0f, 1.0f - Magnitude (*dstNormal));
+ *dstNormal += srcNormal * missingFractionOfNormal;
+ }
+ #endif
+ }
+*/
+\ No newline at end of file
diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.h b/Runtime/Filters/Mesh/MeshBlendShaping.h
new file mode 100644
index 0000000..7b39f26
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshBlendShaping.h
@@ -0,0 +1,12 @@
+#pragma once
+
+struct SkinMeshInfo;
+
+// Does "mesh skinning" logic for BlendShapes
+void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst);
+
+inline bool HasValidWeight(const float w)
+{
+	const float kWeightEpsilon = 1e-4f;
+	return w > kWeightEpsilon;
+}
diff --git a/Runtime/Filters/Mesh/MeshCombiner.cpp b/Runtime/Filters/Mesh/MeshCombiner.cpp
new file mode 100644
index 0000000..1bf93e5
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshCombiner.cpp
@@ -0,0 +1,502 @@
+#include "UnityPrefix.h"
+#include "MeshCombiner.h"
+#include "Runtime/Graphics/TriStripper.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "Runtime/Profiler/Profiler.h"
+#include <limits>
+
+
+#define sqr(x) ((x)*(x))
+
+PROFILER_INFORMATION(gCombineMeshesProfile, "CombineMeshes", kProfilerRender)
+PROFILER_INFORMATION(gCombineVerticesProfile, "CombineVertices", kProfilerRender)
+PROFILER_INFORMATION(gCombineIndicesProfile, "CombineIndices", kProfilerRender)
+
+static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh);
+
+
+size_t ExtractMeshIndices(Mesh::TemporaryIndexContainer& srcIndices, const CombineInstance& in, bool useVertexOffsets, size_t& inoutTotalVertexOffset, UInt16* dstIndices)
+{
+	srcIndices.clear();
+
+	if (in.subMeshIndex < 0 || in.subMeshIndex >= in.mesh->GetSubMeshCount())
+		return 0;
+	
+	const int subMeshIndex = in.subMeshIndex;
+	const int vertexOffset = useVertexOffsets ? in.vertexOffset : inoutTotalVertexOffset;
+	inoutTotalVertexOffset += in.mesh->GetVertexCount();
+
+		in.mesh->GetTriangles( srcIndices, subMeshIndex );
+
+	size_t numIndices = srcIndices.size();
+	if (Dot (Cross(in.transform.GetAxisX(), in.transform.GetAxisY()), in.transform.GetAxisZ()) >= 0)
+	{
+		for ( size_t k=0; k!=numIndices; ++k )
+			dstIndices[k] = srcIndices[k] + vertexOffset;
+	} 
+	else 
+	{
+		// if trilist, then
+		// reverse Cull order by reversing indices
+		for ( size_t k=0; k!=numIndices; ++k )
+			dstIndices[k] = srcIndices[numIndices-k-1] + vertexOffset;
+	}
+
+	return numIndices;
+}
+
+static bool IsMeshBatchable (const Mesh* mesh, int subMeshIndex)
+{
+	return mesh && mesh->HasVertexData() && subMeshIndex >= 0 && subMeshIndex < mesh->GetSubMeshCount();
+}
+
+
+void CombineMeshIndicesForStaticBatching(const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets)
+{	
+	PROFILER_AUTO(gCombineIndicesProfile, &inoutMesh);
+
+	size_t size = in.size();
+
+	UInt32 maxIndices = 0;
+	for ( size_t i=0; i!=size; ++i )
+	{
+		if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+		{
+				const UInt32 numTris = in[i].mesh->GetSubMeshFast( in[i].subMeshIndex ).indexCount;
+				if (mergeSubMeshes)
+					maxIndices += numTris;
+				else
+					maxIndices = std::max( maxIndices, numTris );
+			}
+		}
+	
+	UInt16* dstIndices = new UInt16[maxIndices+1];
+	Mesh::TemporaryIndexContainer srcIndices;
+	srcIndices.reserve( maxIndices+1 );
+	
+	size_t totalVertexOffset = 0;
+	if (mergeSubMeshes)
+	{
+		inoutMesh.SetSubMeshCount( 1 );
+		size_t totalNumIndices = 0;
+		for ( size_t s=0; s!=size; ++s )
+		{
+			if (in[s].mesh)
+			{
+				size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices+totalNumIndices);
+				
+				totalNumIndices += numIndices;
+				Assert(totalNumIndices <= (maxIndices+1));				
+			}
+		}
+		int mask = Mesh::k16BitIndices;
+		inoutMesh.SetIndicesComplex (dstIndices, totalNumIndices, 0, kPrimitiveTriangles, mask);
+	}
+	else
+	{
+		inoutMesh.SetSubMeshCount( in.size() );
+		for ( size_t s=0; s!=size; ++s )
+		{
+			if (in[s].mesh)
+			{
+				size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices);
+				Assert(numIndices <= (maxIndices+1));
+
+				int mask = Mesh::k16BitIndices;
+				inoutMesh.SetIndicesComplex (dstIndices, numIndices, s, kPrimitiveTriangles, mask);
+			}
+		}
+	}
+	
+	delete []dstIndices;
+}
+
+void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms )
+{
+	PROFILER_AUTO(gCombineVerticesProfile, &outCombinedMesh);
+
+	int vertexCount = 0;
+	size_t size = in.size();
+	for( size_t i=0; i!=size; ++i )
+	{
+		if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			vertexCount += in[i].mesh->GetVertexCount();
+	}
+	
+	bool hasNormals = false;
+	bool hasTangents = false;
+	bool hasUV0 = false;
+	bool hasUV1 = false;
+	bool hasColors = false;
+	bool hasSkin = false;
+	int bindposeCount = 0;
+	
+	for( size_t i=0; i!=size; ++i )
+	{
+		if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+		{
+			const Mesh* mesh = in[i].mesh;
+			const UInt32 channels = mesh->GetAvailableChannels();
+			hasNormals	|= (channels & (1<<kShaderChannelNormal)) != 0;
+			hasTangents |= (channels & (1<<kShaderChannelTangent)) != 0;
+			hasUV0		|= (channels & (1<<kShaderChannelTexCoord0)) != 0;
+			hasUV1		|= (channels & (1<<kShaderChannelTexCoord1)) != 0 || (in[i].lightmapTilingOffset != Vector4f(1, 1, 0, 0));
+			hasColors	|= (channels & (1<<kShaderChannelColor)) != 0;
+			hasSkin		|= mesh->GetSkin().size() && mesh->GetBindpose().size();
+			bindposeCount += mesh->GetBindpose().size();
+		}
+	}
+	
+	UInt32 channels = 1<<kShaderChannelVertex;
+	if ( hasNormals )	channels |= 1<<kShaderChannelNormal;
+	if ( hasTangents )	channels |= 1<<kShaderChannelTangent;
+	if ( hasUV0 )		channels |= 1<<kShaderChannelTexCoord0;
+	if ( hasUV1 )		channels |= 1<<kShaderChannelTexCoord1;
+	if ( hasColors )	channels |= 1<<kShaderChannelColor;
+
+	outCombinedMesh.Clear(true);
+	outCombinedMesh.ResizeVertices( vertexCount, channels );
+	outCombinedMesh.SetName( combinedMeshName.c_str() );
+	// Input meshes are already swizzled correctly, so we can copy colors directly
+	outCombinedMesh.SetVertexColorsSwizzled(gGraphicsCaps.needsToSwizzleVertexColors);
+	
+	if ( hasSkin )
+	{
+		outCombinedMesh.GetSkin().resize_initialized(vertexCount);
+		outCombinedMesh.GetBindpose().resize_initialized(bindposeCount);
+		outCombinedMesh.GetBonePathHashes().resize_uninitialized(bindposeCount);
+	}
+
+	// avoid doing twice (in worst case)
+	Matrix4x4f* normalMatrices;
+	bool* isNonUniformScaleTransform;
+	ALLOC_TEMP (normalMatrices, Matrix4x4f, size);
+	ALLOC_TEMP (isNonUniformScaleTransform, bool, size);
+	if ( hasNormals || hasTangents )
+	{
+		for( size_t i=0; i!=size; ++i )
+		{
+			float uniformScale;
+			TransformType type = ComputeTransformType(in[i].transform, uniformScale);
+			Matrix4x4f m;
+			isNonUniformScaleTransform[i] = IsNonUniformScaleTransform(type);
+			if (isNonUniformScaleTransform[i])
+			{
+				Matrix4x4f::Invert_General3D( in[i].transform, normalMatrices[i] );
+				normalMatrices[i].Transpose();
+			}
+			else
+			{
+				normalMatrices[i] = Matrix3x3f(in[i].transform);
+				// Scale matrix to keep normals normalized
+				normalMatrices[i].Scale(Vector3f::one * (1.0f/uniformScale));
+			}
+		}
+	}
+	
+	int offset = 0;
+	for( size_t i=0; i!=size; ++i )
+	{
+		if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+		{
+			const Matrix4x4f& transform = in[i].transform;
+			const Mesh* mesh = in[i].mesh;
+			if (useTransforms)
+				TransformPoints3x4 (transform, 
+									(Vector3f const*)mesh->GetChannelPointer (kShaderChannelVertex), 
+									mesh->GetStride (kShaderChannelVertex),
+									(Vector3f*)outCombinedMesh.GetChannelPointer (kShaderChannelVertex, offset), 
+									outCombinedMesh.GetStride (kShaderChannelVertex), 
+									mesh->GetVertexCount());
+			else
+				strided_copy (mesh->GetVertexBegin (), mesh->GetVertexEnd (), outCombinedMesh.GetVertexBegin () + offset);
+			offset += mesh->GetVertexCount();
+		}
+	}
+	
+	if ( hasNormals )
+	{
+		offset = 0;
+		for( size_t i=0; i!=size; ++i )
+		{
+			if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			{
+				const Mesh* mesh = in[i].mesh;
+				int vertexCount = mesh->GetVertexCount ();
+				if (!mesh->IsAvailable (kShaderChannelNormal))
+					std::fill(outCombinedMesh.GetNormalBegin () + offset, outCombinedMesh.GetNormalBegin () + offset + vertexCount, Vector3f(0.0f,1.0f,0.0f));
+				else
+				{
+					const Matrix4x4f& transform =  normalMatrices[i];
+
+					StrideIterator<Vector3f> outNormal = outCombinedMesh.GetNormalBegin () + offset;
+					if (useTransforms)
+					{
+						if (isNonUniformScaleTransform[i])
+						{
+							for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal)
+								*outNormal = Normalize( transform.MultiplyVector3( *it) );
+						}
+						else
+						{
+							for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal)
+								*outNormal = transform.MultiplyVector3( *it);
+						}
+					}
+					else
+						strided_copy (mesh->GetNormalBegin (), mesh->GetNormalEnd (), outCombinedMesh.GetNormalBegin () + offset);					
+				}
+				offset += vertexCount;
+			}
+		}
+	}
+	
+	if ( hasTangents )
+	{
+		offset = 0;
+		for ( size_t i=0; i!=size; ++i )
+		{
+			if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			{
+				const Mesh* mesh = in[i].mesh;
+				int vertexCount = mesh->GetVertexCount ();
+				if (!mesh->IsAvailable (kShaderChannelTangent))
+					std::fill(outCombinedMesh.GetTangentBegin () + offset, outCombinedMesh.GetTangentBegin () + offset + vertexCount, Vector4f(1.0f,0.0f,0.0f,1.0f));
+				else
+				{
+					const Matrix4x4f& transform =  normalMatrices[i];
+					
+					StrideIterator<Vector4f> outTanget = outCombinedMesh.GetTangentBegin () + offset;
+					if (useTransforms)
+					{
+						if (isNonUniformScaleTransform[i])
+						{
+							for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget)
+							{
+								Vector3f t3 = Normalize(transform.MultiplyVector3(Vector3f(it->x, it->y, it->z)));
+								*outTanget = Vector4f(t3.x,t3.y,t3.z,it->w);
+							}
+						}
+						else
+						{
+							for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget)
+							{
+								Vector3f t3 = transform.MultiplyVector3(Vector3f(it->x, it->y, it->z));
+								*outTanget = Vector4f(t3.x,t3.y,t3.z,it->w);
+							}
+						}
+					}
+					else
+						strided_copy (mesh->GetTangentBegin (), mesh->GetTangentEnd (), outCombinedMesh.GetTangentBegin () + offset);
+				}
+				offset += vertexCount;
+			}
+		}
+	}
+	
+	if ( hasUV0 )
+	{
+		offset = 0;
+		for ( size_t i=0; i!=size; ++i )
+		{
+			if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			{
+				const Mesh* mesh = in[i].mesh;
+				int vertexCount = mesh->GetVertexCount ();
+				if (!mesh->IsAvailable (kShaderChannelTexCoord0))
+					std::fill (outCombinedMesh.GetUvBegin (0) + offset, outCombinedMesh.GetUvBegin (0) + offset + vertexCount, Vector2f(0.0f,0.0f));
+				else
+					strided_copy (mesh->GetUvBegin (0), mesh->GetUvEnd (0), outCombinedMesh.GetUvBegin (0) + offset);
+				offset += vertexCount;
+			}
+		}
+	}
+	
+	if ( hasUV1 )
+	{
+		offset = 0;
+		for ( size_t i=0; i!=size; ++i )
+		{
+			if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			{
+				const Mesh* mesh = in[i].mesh;
+				const int uvIndex = (mesh->GetAvailableChannels() & (1<<kShaderChannelTexCoord1))!=0? 1 : 0;
+				StrideIterator<Vector2f> it = in[i].mesh->GetUvBegin( uvIndex );
+				StrideIterator<Vector2f> end = in[i].mesh->GetUvEnd( uvIndex );
+				
+				int vertexCount = mesh->GetVertexCount ();
+				if ( it == end)
+					std::fill (outCombinedMesh.GetUvBegin (1) + offset, outCombinedMesh.GetUvBegin (1) + offset + vertexCount, Vector2f(0.0f,0.0f));
+				else
+				{
+					// we have to apply lightmap UV scale and offset factors
+					// callee is responsible to reset lightmapTilingOffset on the Renderer afterwards
+					const Vector4f uvScaleOffset = in[i].lightmapTilingOffset;
+					if ( uvScaleOffset != Vector4f(1, 1, 0, 0) )
+					{
+						StrideIterator<Vector2f> outUV = outCombinedMesh.GetUvBegin (1) + offset;
+						for (; it != end; ++it, ++outUV)
+						{
+							outUV->x = it->x * uvScaleOffset.x + uvScaleOffset.z;
+							outUV->y = it->y * uvScaleOffset.y + uvScaleOffset.w;
+						}
+					}
+					else
+						strided_copy (it, end, outCombinedMesh.GetUvBegin (1) + offset);
+				}
+				offset += vertexCount;
+			}
+		}
+	}
+	
+	if ( hasColors )
+	{
+		offset = 0;
+		for ( size_t i=0; i!=size; ++i )
+		{
+			if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			{
+				const Mesh* mesh = in[i].mesh;
+				int vertexCount = mesh->GetVertexCount ();
+				if (!mesh->IsAvailable (kShaderChannelColor))
+					std::fill (outCombinedMesh.GetColorBegin () + offset, outCombinedMesh.GetColorBegin () + offset + vertexCount, ColorRGBA32(255,255,255,255));
+				else
+				{
+					DebugAssert(mesh->GetVertexColorsSwizzled() == outCombinedMesh.GetVertexColorsSwizzled());
+					strided_copy (mesh->GetColorBegin (), mesh->GetColorEnd (), outCombinedMesh.GetColorBegin () + offset);
+				}
+				offset += vertexCount;
+			}
+		}
+	}
+
+	if ( hasSkin )
+	{
+		CombineBoneSkinning (in, outCombinedMesh);
+	}
+}
+
+static void CalculateRootBonePathHash (const CombineInstances &in, Mesh& outCombinedMesh)
+{
+	// We always pick the root bone path hash of the first combine instance.
+	// This is because anything else gives unpredictable behaviour and makes it impossible for the user
+	// to setup the skinned mesh renderer T/R/S correctly.
+	outCombinedMesh.SetRootBonePathHash(in[0].mesh->GetRootBonePathHash());
+
+	// If we made it so that the skinnedmeshrenderer always used the default pose from the Avatar
+	// Then it would be possible to pick the root bone from the mesh with the most bones instead.
+#if 0
+	size_t size = in.size();
+
+	BindingHash rootBonePathHash = 0;
+	int boneCount = 0;
+	for (size_t i=0; i<size; ++i)
+	{
+		}
+	}
+	if (rootBonePathHash)
+		outCombinedMesh.SetRootBonePathHash(rootBonePathHash);
+#endif
+}
+
+static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh)
+{
+	size_t size = in.size();
+
+	int boneOffset = 0;
+	int offset = 0;
+	for ( size_t i=0; i!=size; ++i )
+	{
+		if (!IsMeshBatchable(in[i].mesh, in[i].subMeshIndex))
+			continue;
+		
+		const Mesh* mesh = in[i].mesh;
+		Mesh::BoneInfluenceContainer& outSkin = outCombinedMesh.GetSkin();
+		const Mesh::BoneInfluenceContainer& inSkin = mesh->GetSkin();
+		int vertexCount = mesh->GetVertexCount ();
+		if (inSkin.empty())
+		{
+			for(int i=0; i<vertexCount;i++)
+			{
+				outSkin[offset+i].weight[0] = 0;
+				outSkin[offset+i].weight[1] = 0;
+				outSkin[offset+i].weight[2] = 0;
+				outSkin[offset+i].weight[3] = 0;
+				outSkin[offset+i].boneIndex[0] = 0;
+				outSkin[offset+i].boneIndex[1] = 0;
+				outSkin[offset+i].boneIndex[2] = 0;
+				outSkin[offset+i].boneIndex[3] = 0;
+			}
+		}
+		else 
+		{
+			for(int i=0; i<vertexCount;i++)
+			{
+				outSkin[offset+i].weight[0] = inSkin[i].weight[0];
+				outSkin[offset+i].weight[1] = inSkin[i].weight[1];
+				outSkin[offset+i].weight[2] = inSkin[i].weight[2];
+				outSkin[offset+i].weight[3] = inSkin[i].weight[3];
+				outSkin[offset+i].boneIndex[0] = inSkin[i].boneIndex[0]+boneOffset;
+				outSkin[offset+i].boneIndex[1] = inSkin[i].boneIndex[1]+boneOffset;
+				outSkin[offset+i].boneIndex[2] = inSkin[i].boneIndex[2]+boneOffset;
+				outSkin[offset+i].boneIndex[3] = inSkin[i].boneIndex[3]+boneOffset;
+			}
+		}
+		
+		offset += vertexCount;
+
+		int poseCount = mesh->GetBindpose().size();
+		int bindingHashCount = mesh->GetBonePathHashes().size();
+		
+		memcpy(outCombinedMesh.GetBindpose().begin() + boneOffset, mesh->GetBindpose().begin(), poseCount*sizeof(Matrix4x4f));
+
+		// Old asset bundles might not have bindingHashCount in sync with bind poses.
+		if (poseCount == bindingHashCount)
+			memcpy(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, mesh->GetBonePathHashes().begin(), poseCount*sizeof(BindingHash));
+		else
+			memset(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, 0, poseCount*sizeof(BindingHash));
+		
+		boneOffset += poseCount;
+	}
+
+	CalculateRootBonePathHash (in, outCombinedMesh);
+}
+
+
+void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms)
+{
+	if (!out.CanAccessFromScript())
+	{
+		ErrorStringMsg("Cannot combine into mesh that does not allow access: %s", out.GetName());
+		return;
+	}
+	for (size_t i = 0; i < in.size(); ++i)
+	{
+		Mesh* mesh = in[i].mesh;
+		if (!mesh)
+		{
+			WarningStringMsg("Combine mesh instance %" PRINTF_SIZET_FORMAT " is null.", i);
+		}
+		if (mesh && (in[i].subMeshIndex < 0 || in[i].subMeshIndex >= mesh->GetSubMeshCount()))
+		{
+			WarningStringMsg("Submesh index %d is invalid for mesh %s.", in[i].subMeshIndex, mesh->GetName());
+		}
+		if (mesh && !mesh->CanAccessFromScript())
+		{
+			ErrorStringMsg("Cannot combine mesh that does not allow access: %s", mesh->GetName());
+			return;
+		}
+		if (mesh == &out)
+		{
+			ErrorStringMsg("Cannot combine into a mesh that is also in the CombineInstances input: %s", mesh->GetName());
+			return;
+		}
+	}
+
+	CombineMeshVerticesForStaticBatching (in, out.GetName(), out, useTransforms);
+	CombineMeshIndicesForStaticBatching (in, out, mergeSubMeshes, false);
+
+	out.RecalculateBounds();
+	out.UpdateVertexFormat();
+}
+
diff --git a/Runtime/Filters/Mesh/MeshCombiner.h b/Runtime/Filters/Mesh/MeshCombiner.h
new file mode 100644
index 0000000..a6975a9
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshCombiner.h
@@ -0,0 +1,33 @@
+#ifndef MESHCOMBINER_H
+#define MESHCOMBINER_H
+
+#include "LodMesh.h"
+
+class Renderer;
+
+struct CombineInstance
+{
+	Mesh *mesh;
+	int subMeshIndex;
+	Matrix4x4f transform;
+	
+	Vector4f lightmapTilingOffset;
+	int	vertexOffset;
+	
+	CombineInstance() :
+		mesh(NULL),
+		subMeshIndex(0),
+		lightmapTilingOffset(1, 1, 0, 0),
+		vertexOffset(0)
+	{}
+};
+
+typedef std::vector<CombineInstance>	CombineInstances;
+
+void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms);
+// takes an array of meshes(their vertex data) and merges them into 1 combined mesh.
+void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms = true );
+// takes an array of meshes(their indices) and merges them in 1 mesh (setups subsets) 
+void CombineMeshIndicesForStaticBatching (const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets);
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshOptimizer.cpp b/Runtime/Filters/Mesh/MeshOptimizer.cpp
new file mode 100644
index 0000000..068dc53
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshOptimizer.cpp
@@ -0,0 +1,359 @@
+#include "UnityPrefix.h"
+#include "MeshOptimizer.h"
+#include <vector>
+
+//@TODO: 
+
+// Step 1
+
+//* bool ExtractCollisionData (Mesh& mesh, UNITY_TEMP_VECTOR(kMemGeometry, Vector3f)& vertices, UNITY_TEMP_VECTOR(kMemGeometry, UInt32)& triangles);
+//   -> make it return welded vertices and triangle array
+//* Enable Deformablemesh code and make it work with welding code and check that cloth works visually...
+
+// Testing:
+//* Check mesh collision detection code to work visually correct.
+// * run functional test suite
+// * run lightmapper tests in the integration test suite. They have a complete test for the lightmap uv coordinates picking up lightmap values...
+
+
+// Step 2:
+//* Verify vertex cache performance on iPad1 / Wii / intel integrated graphics
+//* Switch to default gpu optimized mode and update all model importer templates
+
+
+
+template<typename T, const int CACHE_SIZE>
+class VertexCacheOptimizer 
+{
+	UInt32* m_cacheEntries;
+	UInt32 m_cacheSize;
+
+	mutable UInt32 m_cacheMisses;
+	mutable UInt32 m_cacheHits;
+
+	UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const 
+	{ 
+		return vertexInCache[lIndex] ? 1 : 0; 
+	}
+	
+	void AddToCache(UInt32 lIndex, char* vertexInCache) 
+	{
+		if(m_cacheEntries[0]!=-1) 
+			vertexInCache[m_cacheEntries[0]]=0;
+
+		for(UInt32 i=0; i<m_cacheSize-1; i++) 
+			m_cacheEntries[i]=m_cacheEntries[i+1];
+
+		m_cacheEntries[m_cacheSize-1]=lIndex;
+		vertexInCache[lIndex]=1;
+	}
+
+public:
+	
+	VertexCacheOptimizer () : m_cacheSize(CACHE_SIZE)
+	{
+		m_cacheEntries=new UInt32 [m_cacheSize];
+
+		m_cacheHits = m_cacheMisses = 0;
+		for(UInt32 i=0; i<m_cacheSize; i++) 
+			m_cacheEntries[i]=(UInt32)-1;
+	}
+
+	~VertexCacheOptimizer() { delete m_cacheEntries; }
+
+	UInt32 GetCacheMisses() { return m_cacheMisses; }
+	UInt32 GetCacheHits() { return m_cacheHits; }
+
+	void OptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles) 
+	{
+		UInt32 cachedVerts=0;
+		char* triangleUsed=new char [numTriangles];
+		char* vertexInCache=new char [numVertices];
+		memset(triangleUsed,0,numTriangles);
+		memset(vertexInCache,0,numVertices);
+
+		bool foundTriangle=true;
+		while (foundTriangle) 
+		{
+			foundTriangle=false;
+			UInt32 bestCandidate=0;
+			UInt32 bestCacheValue=0;
+			for (UInt32 i = 0; i < numTriangles; i++) 
+			{
+				if (triangleUsed[i]) 
+					continue;
+
+				foundTriangle=true;
+				UInt32 i1=srcTris[i*3+0];
+				UInt32 i2=srcTris[i*3+1];
+				UInt32 i3=srcTris[i*3+2];
+
+				UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1;
+				if (lCacheValue > bestCacheValue) 
+				{
+					bestCandidate=i;
+					bestCacheValue=lCacheValue;
+					if (bestCacheValue == 4) 
+						break;
+				}
+			}
+			if(foundTriangle) 
+			{
+				triangleUsed[bestCandidate]=1;
+				UInt32 i1=srcTris[bestCandidate*3+0];
+				UInt32 i2=srcTris[bestCandidate*3+1];
+				UInt32 i3=srcTris[bestCandidate*3+2];
+				*pdstTris++=(T)i1;
+				*pdstTris++=(T)i2;
+				*pdstTris++=(T)i3;
+				if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+				if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+				if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+			}
+		}
+		delete[] triangleUsed;
+		delete[] vertexInCache;
+	}
+};
+
+inline bool CompareBlendShapeVertexIndex (const BlendShapeVertex& lhs, const BlendShapeVertex& rhs)
+{
+	return lhs.index < rhs.index;
+}
+
+void OptimizeReorderVertexBuffer (Mesh& mesh)
+{
+	const int submeshCount = mesh.GetSubMeshCount();
+	const int vertexCount = mesh.GetVertexCount();
+
+	// backup required data
+	VertexData backupVertexData(mesh.m_VertexData, mesh.GetAvailableChannels(), mesh.GetVertexData().GetStreamsLayout(), mesh.GetVertexData().GetChannelsLayout());
+
+	Mesh::BoneInfluenceContainer backupSkin;	
+	if (!mesh.m_Skin.empty())		
+		backupSkin.swap(mesh.m_Skin);
+
+	// reorder the vertices so they come in increasing order
+	dynamic_array<UInt32> oldToNew;
+	dynamic_array<UInt32> newToOld;
+	newToOld.resize_initialized(vertexCount, 0xFFFFFFFF);
+	oldToNew.resize_initialized(vertexCount, 0xFFFFFFFF);
+	
+	Mesh::TemporaryIndexContainer dstIndices;
+	int newVertexCount = 0;
+	for (int submesh = 0; submesh < submeshCount; submesh++) 
+	{
+		Mesh::TemporaryIndexContainer indices;
+		mesh.GetTriangles (indices, submesh);
+
+		const int indexCount = indices.size();
+		dstIndices.resize(indexCount);
+		for (int index=0; index < indexCount; index++) 
+		{
+			int vertex = indices[index];				
+			AssertBreak(vertex >= 0);
+			AssertBreak(vertex < vertexCount);
+			
+			if (oldToNew[vertex] == 0xFFFFFFFF)
+			{
+				oldToNew[vertex]=newVertexCount;
+				newToOld[newVertexCount]=vertex;
+				newVertexCount++;
+			}
+			dstIndices[index] = oldToNew[vertex];
+		}
+		
+		mesh.SetIndices (&dstIndices[0], dstIndices.size(), submesh, kPrimitiveTriangles);
+	}
+	
+	mesh.ResizeVertices(newVertexCount, backupVertexData.GetChannelMask());
+
+	if (!backupSkin.empty())
+		mesh.m_Skin.resize_initialized(newVertexCount);
+
+	for (int vertex=0; vertex < newVertexCount; vertex++) 
+	{
+		UInt32 remapNew = newToOld[vertex];
+		Assert(remapNew != 0xFFFFFFFF);
+		
+		if (!backupSkin.empty())
+			mesh.m_Skin[vertex] = backupSkin[remapNew];
+
+		mesh.GetVertexBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex)[remapNew];
+
+		if (backupVertexData.HasChannel(kShaderChannelNormal))
+			mesh.GetNormalBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal)[remapNew];
+
+		if (backupVertexData.HasChannel(kShaderChannelColor))
+			mesh.GetColorBegin()[vertex] = backupVertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor)[remapNew];
+
+		if (backupVertexData.HasChannel(kShaderChannelTexCoord0))
+			mesh.GetUvBegin(0)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord0)[remapNew];
+
+		if (backupVertexData.HasChannel(kShaderChannelTexCoord1))
+			mesh.GetUvBegin(1)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord1)[remapNew];
+
+		if (backupVertexData.HasChannel(kShaderChannelTangent))
+			mesh.GetTangentBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent)[remapNew];
+	}
+	
+	// Remap vertex indices stored in blend shapes
+	BlendShapeData& blendShapeData = mesh.GetWriteBlendShapeDataInternal();
+	BlendShapeVertices& blendShapeVertices = blendShapeData.vertices;
+	for (BlendShapeVertices::iterator itv = blendShapeVertices.begin(), endv = blendShapeVertices.end(); itv != endv; ++itv)
+	{
+		BlendShapeVertex& bsv = *itv;
+		bsv.index = oldToNew[bsv.index];
+	}
+
+	// Sort each shape's vertices by index so the blending writes to memory as linearly as possible
+	for (int shapeIndex = 0; shapeIndex < blendShapeData.shapes.size(); shapeIndex++)
+	{
+		const BlendShape& shape = blendShapeData.shapes[shapeIndex];
+		BlendShapeVertex* vertices = &blendShapeVertices[shape.firstVertex];
+		std::sort(vertices, vertices + shape.vertexCount, CompareBlendShapeVertexIndex);
+	}
+
+	mesh.SetChannelsDirty(mesh.GetAvailableChannels(), true);
+}
+
+void OptimizeIndexBuffers (Mesh& mesh)
+{
+	const int submeshCount = mesh.GetSubMeshCount();
+	const int vertexCount = mesh.GetVertexCount();
+
+	// first optimize the indices for each submesh
+	for (int submesh = 0; submesh < submeshCount; submesh++)
+	{
+		Mesh::TemporaryIndexContainer unoptimizedIndices;
+		mesh.GetTriangles (unoptimizedIndices, submesh);
+		
+		Mesh::TemporaryIndexContainer optimizedIndices;
+		optimizedIndices.resize(unoptimizedIndices.size());
+		
+		VertexCacheOptimizer<UInt32, 16> vertexCacheOptimizer;
+		vertexCacheOptimizer.OptimizeTriangles(&optimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3);
+		// LogString(Format("[Optimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheOptimizer.GetCacheHits(), vertexCacheOptimizer.GetCacheMisses()));
+
+		mesh.SetIndices (&optimizedIndices[0], optimizedIndices.size(), submesh, kPrimitiveTriangles);
+	}
+}
+
+
+template<typename T, const int CACHE_SIZE>
+class VertexCacheDeOptimizer 
+{
+	UInt32* m_cacheEntries;
+	UInt32 m_cacheSize;
+    
+	mutable UInt32 m_cacheMisses;
+	mutable UInt32 m_cacheHits;
+    
+	UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const 
+	{ 
+		return vertexInCache[lIndex] ? 1 : 0; 
+	}
+    
+	void AddToCache(UInt32 lIndex, char* vertexInCache) 
+	{
+		if(m_cacheEntries[0]!=-1) 
+			vertexInCache[m_cacheEntries[0]]=0;
+        
+		for(UInt32 i=0; i<m_cacheSize-1; i++) 
+			m_cacheEntries[i]=m_cacheEntries[i+1];
+        
+		m_cacheEntries[m_cacheSize-1]=lIndex;
+		vertexInCache[lIndex]=1;
+	}
+    
+public:
+    
+	VertexCacheDeOptimizer () : m_cacheSize(CACHE_SIZE)
+	{
+		m_cacheEntries=new UInt32 [m_cacheSize];
+        
+		m_cacheHits = m_cacheMisses = 0;
+		for(UInt32 i=0; i<m_cacheSize; i++) 
+			m_cacheEntries[i]=(UInt32)-1;
+	}
+    
+	~VertexCacheDeOptimizer() { delete m_cacheEntries; }
+	
+	UInt32 GetCacheMisses() { return m_cacheMisses; }
+	UInt32 GetCacheHits() { return m_cacheHits; }
+    
+	void DeOptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles) 
+	{
+		UInt32 cachedVerts=0;
+		char* triangleUsed=new char [numTriangles];
+		char* vertexInCache=new char [numVertices];
+		memset(triangleUsed,0,numTriangles);
+		memset(vertexInCache,0,numVertices);
+        
+		bool foundTriangle=true;
+		while (foundTriangle) 
+		{
+			foundTriangle=false;
+			UInt32 bestCandidate=0;
+			UInt32 bestCacheValue=4;
+			for (UInt32 i = 0; i < numTriangles; i++) 
+			{
+				if (triangleUsed[i]) 
+					continue;
+                
+				foundTriangle=true;
+				UInt32 i1=srcTris[i*3+0];
+				UInt32 i2=srcTris[i*3+1];
+				UInt32 i3=srcTris[i*3+2];
+                
+				UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1;
+				if (lCacheValue <= bestCacheValue) 
+				{
+					bestCandidate=i;
+					bestCacheValue=lCacheValue;
+					if (bestCacheValue == 1) 
+						break;
+				}
+			}
+			if(foundTriangle) 
+			{
+				triangleUsed[bestCandidate]=1;
+				UInt32 i1=srcTris[bestCandidate*3+0];
+				UInt32 i2=srcTris[bestCandidate*3+1];
+				UInt32 i3=srcTris[bestCandidate*3+2];
+				*pdstTris++=(T)i1;
+				*pdstTris++=(T)i2;
+				*pdstTris++=(T)i3;
+				if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+				if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+				if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++;
+			}
+		}
+		delete triangleUsed;
+		delete vertexInCache;
+	}
+};
+
+void DeOptimizeIndexBuffers (Mesh& mesh)
+{
+	const int submeshCount = mesh.GetSubMeshCount();
+	const int vertexCount = mesh.GetVertexCount();
+    
+	// first optimize the indices for each submesh
+	for (int submesh = 0; submesh < submeshCount; submesh++)
+	{
+		Mesh::TemporaryIndexContainer unoptimizedIndices;
+		mesh.GetTriangles (unoptimizedIndices, submesh);
+        
+		Mesh::TemporaryIndexContainer deOptimizedIndices;
+		deOptimizedIndices.resize(unoptimizedIndices.size());
+        
+		VertexCacheDeOptimizer<UInt32, 16> vertexCacheDeOptimizer;
+		vertexCacheDeOptimizer.DeOptimizeTriangles(&deOptimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3);
+        
+		//LogString(Format("[Deoptimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheDeOptimizer.GetCacheHits(), vertexCacheDeOptimizer.GetCacheMisses()));
+        
+		mesh.SetIndices (&deOptimizedIndices[0], deOptimizedIndices.size(), submesh, kPrimitiveTriangles);
+	}
+}
+
diff --git a/Runtime/Filters/Mesh/MeshOptimizer.h b/Runtime/Filters/Mesh/MeshOptimizer.h
new file mode 100644
index 0000000..8964edf
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshOptimizer.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#ifndef __importmeshoptimizer_h_included__
+#define __importmeshoptimizer_h_included__
+
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+void DeOptimizeIndexBuffers (Mesh& mesh);
+void OptimizeIndexBuffers (Mesh& mesh);
+void OptimizeReorderVertexBuffer (Mesh& mesh);
+
+
+#endif	//__importmeshoptimizer_h_included__
diff --git a/Runtime/Filters/Mesh/MeshPartitioner.cpp b/Runtime/Filters/Mesh/MeshPartitioner.cpp
new file mode 100644
index 0000000..9ec9f87
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshPartitioner.cpp
@@ -0,0 +1,346 @@
+
+#include "UnityPrefix.h"
+#include "MeshPartitioner.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+#if UNITY_EDITOR
+
+static const UInt32 ComponentStride[] = { 12, 12, 4, 8, 8, 16, sizeof(BoneInfluence) };
+
+static int CalcDMABatchSize(int totalVerts, int stride, const int sizeRestriction, bool padded) 
+{
+	const int alignmentRestriction = 16; // DMA transfers address must be a multiple of 16
+	int a = alignmentRestriction;
+
+	if(a>stride) 
+	{
+		if(a % stride == 0)
+			return sizeRestriction;
+		while(a % stride) { a+=alignmentRestriction; }
+	}
+	else
+	{
+		if(stride % a == 0)
+			return sizeRestriction;
+		while(stride % a) { a+=alignmentRestriction; }
+	}
+
+	int batchMultiple = a / stride;
+	totalVerts = (totalVerts < sizeRestriction) ? totalVerts : sizeRestriction;
+	if(padded)
+		totalVerts += batchMultiple - 1;
+	totalVerts /= batchMultiple;
+	totalVerts *= batchMultiple;
+	return totalVerts;
+};
+
+static int CalcBestFitBatchSize(const UInt32 availableChannels, int vertexCount, int maxVerts, bool padded = false) 
+{
+	int bestFit = INT_MAX;
+	for(int i=0;i<=kShaderChannelCount;i++) 
+	{
+		if (availableChannels & (1<<i)) 
+		{
+			int maxVCount = CalcDMABatchSize(vertexCount, ComponentStride[i], maxVerts, padded);
+			bestFit = (bestFit > maxVCount) ? maxVCount : bestFit;
+		}
+	}
+	return bestFit;
+}
+
+template<typename T>
+struct TempPartition 
+{
+	dynamic_array<Vector3f>		m_Vertices;
+	dynamic_array<Vector2f>		m_UV;
+	dynamic_array<Vector2f>		m_UV1;
+	dynamic_array<ColorRGBA32>	m_Colors;
+	dynamic_array<Vector3f>		m_Normals;
+	dynamic_array<Vector4f>		m_Tangents;
+	dynamic_array<BoneInfluence> m_Skin;
+	dynamic_array<T>			indexBuffer;
+	dynamic_array<T>			newToOld; 
+	int							vertexCount;
+	//
+	void InitRemapping(int numVertices) 
+	{
+		newToOld.resize_uninitialized(numVertices);
+		memset(&newToOld[0],(T)-1,numVertices*sizeof(T));
+	}
+	void RemapVertices(Mesh& mesh, int actualVertexCount)
+	{
+		m_Vertices.resize_uninitialized(vertexCount);
+		const UInt32 channels = mesh.GetAvailableChannels();
+		if(channels&(1<<kShaderChannelNormal))
+			m_Normals.resize_uninitialized(vertexCount);
+		if(channels&(1<<kShaderChannelTexCoord0))
+			m_UV.resize_uninitialized(vertexCount);
+		if(channels&(1<<kShaderChannelTexCoord1))
+			m_UV1.resize_uninitialized(vertexCount);
+		if(channels&(1<<kShaderChannelTangent))
+			m_Tangents.resize_uninitialized(vertexCount);
+		if(channels&(1<<kShaderChannelColor))
+			m_Colors.resize_uninitialized(vertexCount);
+		if(!mesh.GetSkin().empty())
+			m_Skin.resize_uninitialized(vertexCount);
+
+		T remapNew = 0;
+		for(int vertex=0; vertex<vertexCount; vertex++) 
+		{
+			if((T)-1 != newToOld[vertex])
+				remapNew = newToOld[vertex];
+			m_Vertices[vertex]=mesh.GetVertexBegin()[remapNew];
+			if(channels&(1<<kShaderChannelNormal))
+				m_Normals[vertex]=mesh.GetNormalBegin()[remapNew];
+			if(channels&(1<<kShaderChannelTexCoord0))
+				m_UV[vertex]=mesh.GetUvBegin(0)[remapNew];
+			if(channels&(1<<kShaderChannelTexCoord1))
+				m_UV1[vertex]=mesh.GetUvBegin(1)[remapNew];
+			if(channels&(1<<kShaderChannelTangent))
+				m_Tangents[vertex]=mesh.GetTangentBegin()[remapNew];
+			if(channels&(1<<kShaderChannelColor))
+				m_Colors[vertex]=mesh.GetColorBegin()[remapNew];
+			if(!mesh.GetSkin().empty())
+				m_Skin[vertex]=mesh.GetSkin()[remapNew];
+		}
+	}
+};
+
+template<typename T>
+struct SegmentedMesh
+{
+	std::vector<TempPartition<T> >	m_Partitions;
+	void Clear() { m_Partitions.clear(); }
+};
+
+template<typename T>
+static void CreateFromSubMesh(std::vector< SegmentedMesh<T> >& segments, Mesh& mesh, int submesh)
+{
+	SubMesh& sm = mesh.GetSubMeshFast(submesh);
+
+	T vertexCount = 0;
+	const int numIndices = sm.indexCount;
+	const int numTriangles = numIndices / 3;
+
+	AssertBreak((numTriangles * 3) == numIndices);
+
+	UInt32 maxComponentStride = 0;
+	const UInt32 availableChannels = mesh.GetAvailableChannels() | (mesh.GetSkin().empty() ? 0 : (1<<kShaderChannelCount));
+	for(int i=0;i<=kShaderChannelCount;i++) 
+	{ 
+		if(availableChannels & (1<<i)) 
+		{
+			if(maxComponentStride < ComponentStride[i]) 
+				maxComponentStride = ComponentStride[i];
+		}
+	}
+
+	const UInt32 maxDMATransferSize = 16 * 1024;
+	const UInt32 numVerts = (numIndices + 15) & (~15);
+	const UInt32 maxVerts = std::min(numVerts, maxDMATransferSize / maxComponentStride);
+	const UInt32 batchSize = CalcBestFitBatchSize(availableChannels, numVerts, maxVerts);
+
+	const int maxPartitions = (numIndices + batchSize-1) / batchSize;
+	const int numVertices = (sm.indexCount + 2*maxPartitions);
+
+	const T* srcIndices = reinterpret_cast<const T*> (&mesh.GetIndexBuffer()[sm.firstByte]);
+
+	int startTriangle = 0;
+	int startVertex = 0;
+	std::vector<T> oldToNew; 
+	oldToNew.resize(mesh.GetVertexCount());
+	std::vector<TempPartition<T> > & partitions = segments[submesh].m_Partitions;
+	while(startTriangle != numTriangles) 
+	{
+		TempPartition<T> p;
+		p.indexBuffer.clear();
+		p.vertexCount = 0;
+		p.InitRemapping(batchSize+3);
+		dynamic_array<T>& dstIndices = p.indexBuffer;
+		memset(&oldToNew[0],(T)-1,oldToNew.size()*sizeof(T));
+		for(int i=startTriangle; i<numTriangles; i++) 
+		{
+			startTriangle = numTriangles;
+			T lastVertexCount = vertexCount; // undo stack
+			for(int j=0;j<3;j++) 
+			{
+				int index = i*3+j;
+				int vertex = srcIndices[index];				
+				AssertBreak(vertex >= 0);
+				AssertBreak(vertex < mesh.GetVertexCount());
+				AssertBreak(lastVertexCount-startVertex+j < p.newToOld.size());
+				AssertBreak(p.newToOld[lastVertexCount-startVertex+j] == (T)-1);
+				if(oldToNew[vertex]==(T)-1) 
+				{
+					AssertBreak(vertexCount < numVertices);
+					oldToNew[vertex]=vertexCount-startVertex;
+					p.newToOld[vertexCount-startVertex]=vertex;
+					vertexCount++;
+				}
+				dstIndices.push_back(oldToNew[vertex]);
+			}
+			if((vertexCount-startVertex) > batchSize) 
+			{
+				//undo the last one in the partition
+				for(int j=0;j<3;j++)
+				{
+					p.newToOld[lastVertexCount-startVertex+j] = -1;;
+					dstIndices.pop_back();
+				}
+				startTriangle = i;
+				vertexCount = lastVertexCount;
+				break;
+			}
+		}
+		const int actualVertexCount = vertexCount - startVertex;
+		p.vertexCount = maxVerts;//CalcBestFitBatchSize(availableChannels, actualVertexCount, maxVerts, true);	// FIXME!!! This needs to find the next "best fit" that will still keep alignment restrictions..
+		p.RemapVertices(mesh, actualVertexCount);
+		partitions.push_back(p);
+		startVertex = vertexCount;
+	}
+	oldToNew.clear();
+}
+
+// mircea: todo: this would be awesome!!!
+//	spuInOut:
+//		m_Vertices
+//		m_Normals
+//		m_Tangents
+//	spuIn:
+//		m_Skin
+
+//	rsxDirect
+//		m_UV
+//		m_UV1
+//		m_Colors
+//		m_IndexBuffer
+
+void PartitionSubmeshes(Mesh& m) 
+{
+	typedef UInt16 T;
+
+	const int submeshCount = m.m_SubMeshes.size();
+
+	m.m_PartitionInfos.clear();
+	m.m_Partitions.clear();
+
+	// skinned meshes cannot be partitioned if the optimization flag is not set because partitioning changes the vertex/index buffers
+	if (!m.GetMeshOptimized() || m.GetSkin().empty())
+		return;
+
+	// destripify if needed
+	m.DestripifyIndices ();
+
+	// need to fixup the indices first so they are not relative to the partition start anymore.
+	Mesh::MeshPartitionInfoContainer& partInfos = m.m_PartitionInfos;
+	for(int pi=0; pi<partInfos.size(); pi++)
+	{
+		const MeshPartitionInfo& partInfo = m.m_PartitionInfos[pi];
+
+		for(int s=0; s<partInfo.partitionCount; s++)
+		{
+			const MeshPartition& p = m.m_Partitions[partInfo.submeshStart + s];
+			IndexBufferData indexBufferData;
+			m.GetIndexBufferData(indexBufferData);
+			UInt16* indices = (UInt16*)(&m.m_IndexBuffer[0] + p.indexByteOffset);
+			for(int i=0;i<p.indexCount;i++) 
+				indices[i] += p.vertexOffset;
+		}
+	}
+
+	// make a segment for each submesh
+	std::vector< SegmentedMesh<T> > segments;
+	segments.resize(submeshCount);
+	for(int submesh=0;submesh<submeshCount;submesh++) 
+		CreateFromSubMesh<T>(segments, m, submesh);
+
+	///////////////////////////////////////////////////////////////////////////////
+	// combine the segments to get the script accessible buffers
+
+	UInt32 availableChannels = m.GetAvailableChannels();
+
+	m.Clear(false);
+	m.SetMeshOptimized(true);		//mircea@ m.Clear will set the optimized mesh to false. Being here means we are partitioning an optimized mesh so restore the flag.
+	m.SetSubMeshCount(submeshCount);
+
+	UInt32 vertexOffset = 0;
+	UInt32 indexOffset = 0;
+
+	for(int submesh=0;submesh<submeshCount;submesh++) 
+	{
+		int indexCount = 0;
+		SegmentedMesh<T>& seg = segments[submesh];
+
+		MeshPartitionInfo partInfo;
+		partInfo.submeshStart = m.m_Partitions.size();
+		partInfo.partitionCount = seg.m_Partitions.size();
+		m.m_PartitionInfos.push_back(partInfo);
+
+		// create partitions & build the mesh buffers
+		for(int s=0;s<seg.m_Partitions.size();s++)
+		{
+			MeshPartition part;
+			TempPartition<T>& p = seg.m_Partitions[s];
+			part.vertexCount = p.vertexCount;
+			part.vertexOffset = vertexOffset;
+			part.indexCount = p.indexBuffer.size();
+			part.indexByteOffset = indexOffset;
+			AssertBreak(0 == (part.vertexOffset & 15));
+			m.m_Partitions.push_back(part);;
+			indexCount += part.indexCount;
+			indexOffset += p.indexBuffer.size() * sizeof(T);
+			vertexOffset += p.vertexCount;
+		}
+	}		
+
+	// fill in the partitioned data back into the mesh.
+	m.ResizeVertices(vertexOffset, availableChannels);
+
+	for(int submesh=0;submesh<submeshCount;submesh++) 
+	{
+		const SegmentedMesh<T>& seg = segments[submesh];
+		const MeshPartitionInfo& partInfo = m.m_PartitionInfos[submesh];
+		for(int s=0;s<seg.m_Partitions.size();s++)
+		{
+			const TempPartition<T>& p = seg.m_Partitions[s];
+			const MeshPartition& part = m.m_Partitions[partInfo.submeshStart + s];
+			strided_copy (p.m_Vertices.begin (), p.m_Vertices.end(), m.GetVertexBegin () + part.vertexOffset);
+			if(!p.m_Normals.empty())
+				strided_copy (p.m_Normals.begin (), p.m_Normals.end(), m.GetNormalBegin () + part.vertexOffset);
+			if(!p.m_UV.empty())
+				strided_copy (p.m_UV.begin (), p.m_UV.end (), m.GetUvBegin (0) + part.vertexOffset);
+			if(!p.m_UV1.empty())
+				strided_copy (p.m_UV1.begin (), p.m_UV1.end (), m.GetUvBegin (1) + part.vertexOffset);
+			if(!p.m_Tangents.empty())
+				strided_copy (p.m_Tangents.begin (), p.m_Tangents.end (), m.GetTangentBegin () + part.vertexOffset);
+			if(!p.m_Colors.empty())
+				strided_copy (p.m_Colors.begin (), p.m_Colors.end (), m.GetColorBegin() + part.vertexOffset);
+			if(!p.m_Skin.empty())
+				m.GetSkin().insert(m.GetSkin().end(), p.m_Skin.begin(), p.m_Skin.end());
+		}
+
+		std::vector<T> indices;
+		for(int s=0;s<partInfo.partitionCount;s++)
+		{
+			const MeshPartition& p = m.m_Partitions[partInfo.submeshStart+s];
+			const TempPartition<T>& tp = seg.m_Partitions[s];
+			for(int i=0;i<p.indexCount;i++) 
+			{
+				int index = tp.indexBuffer[i];
+				AssertBreak( (index>=0) && (index < (p.vertexCount)));
+					#if DEBUG_PARTITIONING
+						index += p.vertexOffset;
+					#endif
+				indices.push_back(index);
+			}
+		}
+		m.SetIndices (&indices[0], indices.size(), submesh, kPrimitiveTriangles);
+	}
+}
+
+void PartitionMesh(Mesh* m) 
+{
+	PartitionSubmeshes(*m);
+}
+
+#endif	//UNITY_EDITOR
diff --git a/Runtime/Filters/Mesh/MeshPartitioner.h b/Runtime/Filters/Mesh/MeshPartitioner.h
new file mode 100644
index 0000000..95a0d98
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshPartitioner.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define DEBUG_PARTITIONING 0 
+class Mesh;
+void PartitionMesh(Mesh* m);
diff --git a/Runtime/Filters/Mesh/MeshRenderer.cpp b/Runtime/Filters/Mesh/MeshRenderer.cpp
new file mode 100644
index 0000000..08dfbae
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshRenderer.cpp
@@ -0,0 +1,664 @@
+#include "UnityPrefix.h"
+#include "MeshRenderer.h"
+#include "Runtime/Graphics/Transform.h"
+#include "LodMesh.h"
+#include "Runtime/Filters/Mesh/MeshUtility.h"
+#include "Runtime/Graphics/DrawUtil.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "External/shaderlab/Library/properties.h"
+#include "External/shaderlab/Library/shaderlab.h"
+
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+
+#include "Runtime/Profiler/TimeHelper.h"
+#include "Runtime/GfxDevice/GfxDeviceStats.h"
+#include "Runtime/Misc/BuildSettings.h"
+
+
+PROFILER_INFORMATION(gMeshRenderProfile, "MeshRenderer.Render", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderScaledProfile, "MeshRenderer.ComputeScaledMesh", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderStaticBatch, "MeshRenderer.RenderStaticBatch", kProfilerRender)
+PROFILER_INFORMATION(gMeshRenderDynamicBatch, "MeshRenderer.RenderDynamicBatch", kProfilerRender)
+
+
+#if UNITY_EDITOR
+#define SET_CACHED_SURFACE_AREA_DIRTY() m_CachedSurfaceArea = -1.0f;
+#else
+#define SET_CACHED_SURFACE_AREA_DIRTY() //do nothing
+#endif
+
+IMPLEMENT_CLASS_INIT_ONLY (MeshRenderer)
+
+MeshRenderer::MeshRenderer (MemLabelId label, ObjectCreationMode mode)
+:	Super(kRendererMesh, label, mode)
+,	m_MeshNode (this)
+{
+	m_ScaledMeshDirty = true;
+	m_MeshWasModified = false;
+
+	m_CachedMesh = NULL;
+	m_ScaledMesh = NULL;
+	SET_CACHED_SURFACE_AREA_DIRTY();
+}
+
+MeshRenderer::~MeshRenderer ()
+{
+	FreeScaledMesh ();
+}
+
+void MeshRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+	Super::AwakeFromLoad (awakeMode);
+	UpdateCachedMesh ();
+}
+
+void MeshRenderer::Deactivate (DeactivateOperation operation)
+{
+	Super::Deactivate (operation);
+	FreeScaledMesh ();
+}
+
+void MeshRenderer::InitializeClass ()
+{
+	REGISTER_MESSAGE (MeshRenderer, kTransformChanged, TransformChanged, int);
+
+	REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyBounds, DidModifyMeshBounds);
+	REGISTER_MESSAGE_VOID(MeshRenderer, kDidDeleteMesh, DidDeleteMesh);
+	REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyMesh, DidModifyMesh);
+}
+
+void MeshRenderer::TransformChanged (int changeMask)
+{
+	if (changeMask & Transform::kScaleChanged)
+	{
+		SET_CACHED_SURFACE_AREA_DIRTY();
+		m_ScaledMeshDirty = true;
+	}
+	Super::TransformChanged (changeMask);
+}
+
+void MeshRenderer::UpdateLocalAABB()
+{
+	DebugAssertIf( m_CachedMesh != m_Mesh );
+	if( m_CachedMesh )
+	{
+		if (HasSubsetIndices())
+		{
+			if (GetMaterialCount() == 1)
+				m_TransformInfo.localAABB = m_CachedMesh->GetBounds(GetSubsetIndex(0));
+			else
+			{
+				MinMaxAABB minMaxAABB;
+				for (int m = 0; m < GetMaterialCount(); ++m)
+					minMaxAABB.Encapsulate(m_CachedMesh->GetBounds(GetSubsetIndex(m)));
+				m_TransformInfo.localAABB = minMaxAABB;
+			}
+		}
+		else
+		{
+			m_TransformInfo.localAABB = m_CachedMesh->GetBounds();
+		}
+	}
+	else
+		m_TransformInfo.localAABB.SetCenterAndExtent( Vector3f::zero, Vector3f::zero );
+}
+
+void MeshRenderer::SetSubsetIndex(int subsetIndex, int index)
+{
+	Renderer::SetSubsetIndex(subsetIndex, index);
+
+	// Reset scaled mesh if this renderer is now statically batched.
+	// Mesh scaling should never be used with static batching (case 551504).
+	FreeScaledMesh();
+}
+
+int MeshRenderer::GetStaticBatchIndex() const
+{
+	// Wrap non-virtual version in a virtual call
+	return GetMeshStaticBatchIndex();
+}
+
+int MeshRenderer::GetMeshStaticBatchIndex() const
+{
+	return IsPartOfStaticBatch() ? m_CachedMesh->GetInstanceID(): 0;
+}
+
+UInt32 MeshRenderer::GetMeshIDSmall() const
+{
+	return m_CachedMesh ? m_CachedMesh->GetInternalMeshID(): 0;
+}
+
+
+Mesh* MeshRenderer::GetCachedMesh ()
+{
+	DebugAssertIf(m_CachedMesh != m_Mesh);
+	return m_CachedMesh;
+}
+
+
+Mesh* MeshRenderer::GetMeshUsedForRendering ()
+{
+	Mesh* cachedMesh = GetCachedMesh ();
+
+	if (cachedMesh != NULL)
+	{
+		// NOTE: staticaly batched geometry already has scale applied
+		// therefore we skip mesh scaling
+		if (!m_ScaledMeshDirty || IsPartOfStaticBatch())
+			return m_ScaledMesh == NULL ? cachedMesh : m_ScaledMesh->mesh;
+
+		m_ScaledMeshDirty = false;
+
+		float unused2;
+		Matrix4x4f unused;
+		Matrix4x4f scalematrix;
+		TransformType type = GetTransform().CalculateTransformMatrixDisableNonUniformScale (unused, scalematrix, unused2);
+		// Check if no scale is needed or we can't access vertices anyway to transform them correctly
+		DebugAssert(!IsNonUniformScaleTransform(type) || cachedMesh->HasVertexData());
+		if (!IsNonUniformScaleTransform(type) || !cachedMesh->HasVertexData())
+		{
+			// Cleanup scaled mesh
+			FreeScaledMesh();
+			m_MeshWasModified = false;
+
+			return cachedMesh;
+		}
+		// Need scaled mesh
+		else
+		{
+			// Early out if the mesh scale hasn't actually changed
+			if (m_ScaledMesh != NULL && CompareApproximately(scalematrix, m_ScaledMesh->matrix) && !m_MeshWasModified)
+				return m_ScaledMesh->mesh;
+
+			// Scale has changed, maybe generated a new scaled mesh
+			PROFILER_AUTO(gMeshRenderScaledProfile, this)
+
+			// Allocate scaled mesh
+			if (m_ScaledMesh == NULL)
+			{
+				m_ScaledMesh = new ScaledMesh ();
+				m_ScaledMesh->mesh = NEW_OBJECT (Mesh);
+				m_ScaledMesh->mesh->Reset();
+				m_ScaledMesh->mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad);
+				m_ScaledMesh->mesh->SetHideFlags(kHideAndDontSave);
+			}
+
+			m_MeshWasModified = false;
+
+			// Rescale mesh
+			m_ScaledMesh->matrix = scalematrix;
+			m_ScaledMesh->mesh->CopyTransformed(*cachedMesh, scalematrix);
+			return m_ScaledMesh->mesh;
+		}
+	}
+	else
+	{
+		return NULL;
+	}
+}
+
+static SubMesh const& GetSubMesh(Mesh& mesh, int subsetIndex)
+{
+	const int subMeshCount = mesh.GetSubMeshCount()? mesh.GetSubMeshCount()-1 : 0;
+	const int subMeshIndex = std::min<unsigned int>(subsetIndex, subMeshCount);
+	return mesh.GetSubMeshFast(subMeshIndex);
+}
+
+
+void MeshRenderer::Render (int subsetIndex, const ChannelAssigns& channels)
+{
+	PROFILER_AUTO(gMeshRenderProfile, this);
+
+	Mesh* mesh = GetMeshUsedForRendering ();
+	if (!mesh)
+		return;
+	if (m_CustomProperties)
+		GetGfxDevice().SetMaterialProperties (*m_CustomProperties);
+	DrawUtil::DrawMeshRaw (channels, *mesh, subsetIndex);
+}
+
+
+#if UNITY_EDITOR
+
+void MeshRenderer::GetRenderStats (RenderStats& renderStats)
+{
+	///@TODO: This does not work with static batching fixor it.
+	memset(&renderStats, 0, sizeof(renderStats));
+
+	Mesh* mesh = m_Mesh;
+	if (mesh)
+	{
+		for (int i=0;i<GetMaterialCount();i++)
+		{
+			const SubMesh& submesh = GetSubMesh (*mesh, GetSubsetIndex(i));
+
+			renderStats.triangleCount += GetPrimitiveCount(submesh.indexCount, submesh.topology, false);
+			renderStats.vertexCount += submesh.vertexCount;
+			renderStats.submeshCount++;
+		}
+	}
+}
+
+float MeshRenderer::GetCachedSurfaceArea ()
+{
+	if (m_CachedSurfaceArea >= 0.0f)
+		return m_CachedSurfaceArea;
+
+	Mesh* mesh = GetCachedMesh ();
+	if (!mesh)
+	{
+		m_CachedSurfaceArea = 1.0f;
+		return m_CachedSurfaceArea;
+	}
+
+	Matrix4x4f objectToWorld;
+	GetComponent (Transform).CalculateTransformMatrix (objectToWorld);
+
+	Mesh::TemporaryIndexContainer triangles;
+	mesh->GetTriangles (triangles);
+
+	dynamic_array<Vector3f> vertices (mesh->GetVertexCount(), kMemTempAlloc);
+	mesh->ExtractVertexArray (vertices.begin ());
+
+	m_CachedSurfaceArea = CalculateSurfaceArea (objectToWorld, triangles, vertices);
+
+	return m_CachedSurfaceArea;
+}
+#endif
+
+void MeshRenderer::DidModifyMeshBounds ()
+{
+	SET_CACHED_SURFACE_AREA_DIRTY();
+	m_TransformDirty = true;
+	BoundsChanged ();
+}
+
+void MeshRenderer::DidModifyMesh ()
+{
+	m_MeshWasModified = true;
+	m_ScaledMeshDirty = true;
+	m_TransformDirty = true;
+	BoundsChanged();
+}
+
+void MeshRenderer::DidDeleteMesh ()
+{
+	m_CachedMesh = NULL;
+}
+
+void MeshRenderer::SetSharedMesh (PPtr<Mesh> mesh)
+{
+	SET_CACHED_SURFACE_AREA_DIRTY();
+	m_Mesh = mesh;
+	UpdateCachedMesh ();
+}
+
+PPtr<Mesh> MeshRenderer::GetSharedMesh ()
+{
+	return m_Mesh;
+}
+
+void MeshRenderer::UpdateCachedMesh ()
+{
+	Mesh* mesh = m_Mesh;
+	if (mesh != m_CachedMesh)
+	{
+		// In order to make sure we are not using old subset indices referring to the previous mesh
+		// we clear them here, assuming that the correct subset indices will be set subsequently.
+		// We only do this if there was a previous mesh that the new mesh is replacing, since some
+		// code paths are transferring in the values and then call this function. In that case we do
+		// not want to mess with the indices.
+		if (m_CachedMesh) ClearSubsetIndices();
+		m_ScaledMeshDirty = true;
+		m_MeshWasModified = true;
+		m_CachedMesh = mesh;
+		m_TransformDirty = true;
+		BoundsChanged();
+		m_MeshNode.RemoveFromList();
+		if (m_CachedMesh)
+			m_CachedMesh->AddObjectUser( m_MeshNode );
+	}
+}
+
+void MeshRenderer::FreeScaledMesh ()
+{
+	if (m_ScaledMesh)
+	{
+		DestroySingleObject (m_ScaledMesh->mesh);
+		delete m_ScaledMesh;
+		m_ScaledMesh = NULL;
+		m_ScaledMeshDirty = false;
+	}
+}
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+PROFILER_INFORMATION(gDrawStaticBatchProfile, "Batch.DrawStatic", kProfilerRender)
+PROFILER_INFORMATION(gDrawDynamicBatchProfile, "Batch.DrawDynamic", kProfilerRender)
+
+static bool RenderStaticBatch (Mesh& mesh, VBO& vbo,
+							   BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels)
+{
+	if (count <= 1)
+		return false;
+	IndexBufferData indexBuffer;
+	mesh.GetIndexBufferData (indexBuffer);
+	if (!indexBuffer.indices)
+		return false;
+
+	PROFILER_AUTO(gMeshRenderStaticBatch, &mesh)
+
+	const SubMesh& firstSubmesh = GetSubMesh (mesh, instances[0].subsetIndex);
+	GfxPrimitiveType topology = firstSubmesh.topology;
+	const Matrix4x4f& xform = instances[0].xform;
+	int xformType = instances[0].xformType;
+
+	GfxDevice& device = GetGfxDevice();
+	device.BeginStaticBatching(channels, topology);
+
+	// Concat SubMeshes
+	for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+	{
+		const SubMesh& submesh = GetSubMesh (mesh, it->subsetIndex);
+		device.StaticBatchMesh(submesh.firstVertex, submesh.vertexCount, indexBuffer, submesh.firstByte, submesh.indexCount);
+
+		Assert(topology == submesh.topology);
+		Assert(xformType == it->xformType);
+	}
+
+	device.EndStaticBatching(vbo, xform, TransformType(xformType), mesh.GetChannelsInVBO());
+	GPU_TIMESTAMP();
+
+#if ENABLE_MULTITHREADED_CODE
+	// Make sure renderer is done before mesh is changed or deleted
+	UInt32 cpuFence = device.InsertCPUFence();
+	mesh.SetCurrentCPUFence(cpuFence);
+#endif
+
+	return true;
+}
+
+static bool RenderDynamicBatch (BatchInstanceData const* instances, size_t count, size_t maxVertices, size_t maxIndices, const ChannelAssigns& shaderChannels, UInt32 availableChannels, GfxPrimitiveType topology)
+{
+	if (count <= 1)
+		return false;
+
+	if (gGraphicsCaps.buggyDynamicVBOWithTangents && (shaderChannels.GetSourceMap() & (1<<kShaderChannelTangent)))
+		return false;
+
+	PROFILER_AUTO(gMeshRenderDynamicBatch, NULL)
+
+	DebugAssert (topology != -1);
+
+	GfxDevice& device = GetGfxDevice();
+	UInt32 expectedFence = device.GetNextCPUFence();
+	device.BeginDynamicBatching(shaderChannels, availableChannels, maxVertices, maxIndices, topology);
+
+	// Transform on CPU
+	int xformType = -1;
+
+
+	for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+	{
+		Assert(it->renderer);
+		Assert(it->renderer->GetRendererType() == kRendererMesh);
+		MeshRenderer* meshRenderer = (MeshRenderer*)it->renderer;
+		Mesh* mesh = meshRenderer->GetMeshUsedForRendering();
+		if (!mesh)
+			continue;
+
+		SubMesh const& submesh = GetSubMesh (*mesh, it->subsetIndex);
+
+		Assert(topology == ~0UL || topology == submesh.topology);
+		Assert(xformType == -1 || xformType == it->xformType);
+		xformType = it->xformType;
+
+		VertexBufferData vbData;
+		mesh->GetVertexBufferData(vbData, availableChannels);
+		IndexBufferData ibData;
+		mesh->GetIndexBufferData(ibData);
+
+		// Make sure renderer is done before mesh is changed or deleted
+#if ENABLE_MULTITHREADED_CODE
+		mesh->SetCurrentCPUFence(expectedFence);
+#endif
+
+		device.DynamicBatchMesh(it->xform, vbData, submesh.firstVertex, submesh.vertexCount, ibData, submesh.firstByte, submesh.indexCount);
+	}
+
+	// Draw
+	Assert(xformType != -1);
+	Assert(topology != ~0UL);
+
+	// We transformed all geometry into the world (Identity) space already.
+	// However, we did not normalize the normals.
+	// In fixed function, most GfxDevices (e.g. OpenGL & D3D) will try to figure out uniform
+	// scale directly from the matrix, and hence will not scale our normals.
+	// Therefore we upgrade normalization mode to "full normalize" to make them transform properly.
+	if (xformType & kUniformScaleTransform)
+	{
+		xformType &= ~kUniformScaleTransform;
+		xformType |= kNonUniformScaleTransform;
+	}
+
+	// Caveat: we do pass identity matrix when batching
+	// currently normals handling in vprog is:
+	// xform * (normalize(normal) * unity_Scale.w);
+	// as we pass identity matrix (no scale) we need NOT apply inv_scale
+	device.SetInverseScale(1.0f);
+	device.EndDynamicBatching(TransformType(xformType));
+
+	// Insert fence after batching is complete
+	UInt32 fence = device.InsertCPUFence();
+	Assert(fence == expectedFence);
+
+	GPU_TIMESTAMP();
+	
+	return true;
+}
+
+void MeshRenderer::RenderMultiple (BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels)
+{
+	Assert(count > 0);
+
+	GfxDevice& device = GetGfxDevice();
+	const float invScale = device.GetBuiltinParamValues().GetInstanceVectorParam(kShaderInstanceVecScale).w;
+
+	const MaterialPropertyBlock* customProps = instances[0].renderer->GetCustomProperties();
+	if (customProps)
+		device.SetMaterialProperties (*customProps);
+
+	const UInt32 wantedChannels = channels.GetSourceMap();
+	const bool enableDynamicBatching = GetBuildSettings().enableDynamicBatching;
+
+	BatchInstanceData const* instancesEnd = instances + count;
+	for (BatchInstanceData const* iBatchBegin = instances; iBatchBegin != instancesEnd; )
+	{
+		Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh);
+		MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer;
+		Mesh* mesh = meshRenderer->GetMeshUsedForRendering ();
+		VBO* vbo = mesh ? mesh->GetSharedVBO (wantedChannels) : NULL;
+		if (!vbo)
+		{
+			// Skip mesh
+			++iBatchBegin;
+			continue;
+		}
+
+		const UInt32 availableChannels = mesh->GetChannelsInVBO() & wantedChannels;
+		const int staticBatchIndex = meshRenderer->GetMeshStaticBatchIndex ();
+		const int xformType = iBatchBegin->xformType;
+
+		const SubMesh& firstSubMesh = GetSubMesh(*mesh, iBatchBegin->subsetIndex);
+		const GfxPrimitiveType topology = firstSubMesh.topology;
+		size_t batchVertexCount = firstSubMesh.vertexCount;
+		size_t batchIndexCount = firstSubMesh.indexCount;
+
+		// For first strip take 1 connecting (degenerate) triangles into account
+		if (topology == kPrimitiveTriangleStripDeprecated)
+			batchIndexCount += 1;
+
+		BatchInstanceData const* iBatchEnd = iBatchBegin + 1;
+
+		// static batching
+		if (staticBatchIndex != 0)
+		{
+			Assert(topology == kPrimitiveTriangles || topology == kPrimitiveTriangleStripDeprecated);
+			const int maxIndices = GetGfxDevice().GetMaxStaticBatchIndices();
+
+			for (; iBatchEnd != instancesEnd; ++iBatchEnd)
+			{
+				if (xformType != iBatchEnd->xformType)
+					break;
+
+				Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh);
+				MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer;
+				if (staticBatchIndex != meshRenderer->GetMeshStaticBatchIndex())
+					break;
+
+				Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering ();
+				if (!nextMesh)
+					break;
+
+				const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex);
+				if (submesh.topology != topology)
+					break;
+
+				VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels);
+				if (nextVbo != vbo) // also a NULL check since vbo is non-NULL
+					break;
+
+				UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels;
+				if (availableChannels != nextAvailableChannels)
+					break;
+
+				UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount;
+				if (topology == kPrimitiveTriangleStripDeprecated)
+					requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account
+
+				if (requiredIndexCount > maxIndices)
+					break;
+
+				batchIndexCount = requiredIndexCount;
+			}
+
+			if (mesh && vbo)
+				if (RenderStaticBatch (*mesh, *vbo, iBatchBegin, iBatchEnd - iBatchBegin, channels))
+					iBatchBegin = iBatchEnd;
+		}
+		else if (vbo && enableDynamicBatching)
+		// dynamic batching
+		{
+			const int firstVertexCount = batchVertexCount;
+			const int firstIndexCount  = batchIndexCount;
+
+			// after moving to fully strided meshes we were hit by the issue that we might have different channels
+			// in src and dst data, so our optimized asm routines doesn't quite work.
+			// we will move to support vertex streams (this will solve lots of issues after skinning/batching asm rewrite ;-))
+			// but for now let just play safe
+
+			if (CanUseDynamicBatching(*mesh, wantedChannels, firstVertexCount) &&
+				firstIndexCount < kDynamicBatchingIndicesThreshold &&
+				topology != kPrimitiveLineStrip)
+			{
+				for (; iBatchEnd != instancesEnd; ++iBatchEnd)
+				{
+					if (xformType != iBatchEnd->xformType)
+						break;
+
+					Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh);
+					MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer;
+					if (meshRenderer->IsPartOfStaticBatch())
+						break;
+
+					Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering ();
+					if (!nextMesh)
+						break;
+
+					const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex);
+					if (submesh.topology != topology)
+						break;
+
+					if (!CanUseDynamicBatching(*nextMesh, wantedChannels, submesh.vertexCount))
+						break;
+
+					UInt32 requiredVertexCount = batchVertexCount + submesh.vertexCount;
+					UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount;
+					if (topology == kPrimitiveTriangleStripDeprecated)
+						requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account
+
+					if (requiredVertexCount > 0xffff)
+						break;
+
+					if (requiredIndexCount > kDynamicBatchingIndicesThreshold)
+						break;
+
+					VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels);
+					if (!nextVbo)
+						break;
+
+					const UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels;
+					if (availableChannels != nextAvailableChannels)
+						break;
+
+					batchVertexCount = requiredVertexCount;
+					batchIndexCount = requiredIndexCount;
+				}
+
+				// Skip batch if batchVertexCount == 0 or batchIndexCount == 0
+				if (batchVertexCount == 0 || batchIndexCount == 0 || RenderDynamicBatch (iBatchBegin, iBatchEnd - iBatchBegin, batchVertexCount, batchIndexCount, channels, availableChannels, topology))
+					iBatchBegin = iBatchEnd;
+			}
+		}
+
+		// old-school rendering for anything left
+		for (; iBatchBegin != iBatchEnd; ++iBatchBegin)
+		{
+			BatchInstanceData const* it = iBatchBegin;
+			Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh);
+			MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer;
+			Mesh* mesh = meshRenderer->GetMeshUsedForRendering ();
+			if (!mesh)
+				continue;
+
+			VBO* vbo = mesh->GetSharedVBO (wantedChannels);
+			if (!vbo)
+				continue;
+
+			if (customProps)
+				device.SetMaterialProperties (*customProps);
+
+			// Batched rendering above will have set inverse scale to 1.0 (since everything is transformed
+			// to identity). For remaining meshes that aren't batched, we have to setup the original scale
+			// back.
+			device.SetInverseScale(invScale);
+			SetupObjectMatrix (it->xform, it->xformType);
+			DrawUtil::DrawVBOMeshRaw (*vbo, *mesh, channels, it->subsetIndex);
+		}
+
+		Assert(iBatchBegin == iBatchEnd); // everything was rendered successfully
+	}
+}
+
+bool MeshRenderer::CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount)
+{
+	if (mesh.GetStreamCompression() != Mesh::kStreamCompressionDefault ||
+		mesh.GetIndexBuffer().empty() ||
+		vertexCount > kDynamicBatchingVerticesThreshold ||
+		vertexCount * BitsInMask(wantedChannels) > kDynamicBatchingVertsByChannelThreshold)
+		return false;
+	return true;
+}
+
+#endif // #if GFX_ENABLE_DRAW_CALL_BATCHING
+
diff --git a/Runtime/Filters/Mesh/MeshRenderer.h b/Runtime/Filters/Mesh/MeshRenderer.h
new file mode 100644
index 0000000..d42c22e
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshRenderer.h
@@ -0,0 +1,87 @@
+#ifndef MESHRENDERER_H
+#define MESHRENDERER_H
+
+#include "Runtime/Filters/Renderer.h"
+
+class Mesh;
+
+
+
+class MeshRenderer : public Renderer {
+  public:
+	MeshRenderer (MemLabelId label, ObjectCreationMode mode);
+	// ~MeshRenderer ();	 declared-by-macro
+	REGISTER_DERIVED_CLASS (MeshRenderer, Renderer)
+	static void InitializeClass ();
+	
+	// Tag class as sealed, this makes QueryComponent faster.
+	static bool IsSealedClass ()				{ return true; }
+	
+	static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels);
+	virtual void Render (int materialIndex, const ChannelAssigns& channels);
+	
+	virtual void UpdateLocalAABB();
+
+	virtual void SetSubsetIndex(int subsetIndex, int index);
+
+	virtual int GetStaticBatchIndex() const;
+	virtual UInt32 GetMeshIDSmall() const;
+	int GetMeshStaticBatchIndex() const;
+	 
+	void TransformChanged (int changeMask);
+	void AwakeFromLoad(AwakeFromLoadMode mode);
+	virtual void Deactivate (DeactivateOperation operation);
+	
+	void SetSharedMesh (PPtr<Mesh> mesh);
+	PPtr<Mesh> GetSharedMesh ();
+	
+	Mesh& GetInstantiatedMesh ();
+	void SetInstantiatedMesh (Mesh* mesh);
+
+	Mesh* GetMeshUsedForRendering();
+
+	void DidModifyMeshBounds ();
+	void DidModifyMeshValidity ();
+	void DidModifyMesh ();
+	void DidDeleteMesh ();
+	#if UNITY_EDITOR
+	float GetCachedSurfaceArea ();
+	virtual void GetRenderStats (RenderStats& renderStats);
+	#endif
+
+	static bool CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount);
+
+  private:
+
+	Mesh* GetCachedMesh ();
+
+  	ListNode<Object> m_MeshNode;
+  	void UpdateCachedMesh ();
+
+	void FreeScaledMesh ();
+  	
+  	Mesh*           m_CachedMesh;
+  	PPtr<Mesh>  m_Mesh;
+	
+	struct ScaledMesh
+	{
+		Matrix4x4f matrix;
+		Mesh* mesh;
+	};
+	
+	ScaledMesh*       m_ScaledMesh;
+	
+	// as we have padding anyway, we can add more flags here
+  	UInt8			m_ScaledMeshDirty;
+	// setted on responce to event to properly handle vertices changing on non-uniform scale
+	UInt8			m_MeshWasModified; 
+	// for future
+	UInt16			m_Padding16;	
+
+	#if UNITY_EDITOR
+	float m_CachedSurfaceArea;
+	#endif
+
+};
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinning.cpp b/Runtime/Filters/Mesh/MeshSkinning.cpp
new file mode 100644
index 0000000..7d01667
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinning.cpp
@@ -0,0 +1,165 @@
+#include "UnityPrefix.h"
+#include "MeshSkinning.h"
+#if UNITY_OSX
+#include <alloca.h> // this is really deprecated and should be exchanged for stdlib.h
+#else
+#include <stdlib.h>
+#endif
+#include "Runtime/Utilities/Utility.h"
+#include "Runtime/Utilities/LogAssert.h"
+#include "Runtime/Utilities/OptimizationUtility.h"
+#include "Runtime/Misc/Allocator.h"
+#include "Runtime/Utilities/Prefetch.h"
+#include "Runtime/Profiler/TimeHelper.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Misc/CPUInfo.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+
+PROFILER_INFORMATION(gMeshSkinningProfile, "MeshSkinning.Skin", kProfilerRender)
+PROFILER_INFORMATION(gMeshSkinningSlowpath, "MeshSkinning.SlowPath", kProfilerRender)
+
+#include "MeshSkinningMobile.h"
+#include "MeshSkinningSSE2.h"
+#include "SkinGeneric.h"
+#include "MeshBlendShaping.h"
+
+
+//===========================================================================================================================================
+
+
+void SkinMesh(SkinMeshInfo& info)
+{
+	const TransformInstruction NormalizeTransformInstruction =
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+		// NOTE: optimized NEON/VFP routines do not do any normalization
+		// instead we rely on GPU to do that
+		kNoNormalize;
+#else
+		//@TODO: fix that "Fast" & "Fastest" crap. Right now "Fastest" is actually a win on PC (1ms saved in Dark Unity)
+		// so I'm leaving it there for now.
+		kNormalizeFastest;
+#endif
+	
+	// Instantiates the right skinning template depending on the bone per vertex count
+	#define PERMUTE_BONES(skinNormal,skinTangent) { \
+	if (info.bonesPerVertex == 1) \
+		SkinGeneric<NormalizeTransformInstruction, 1, skinNormal, skinTangent> (info); \
+	else if (info.bonesPerVertex == 2) \
+		SkinGeneric<NormalizeTransformInstruction, 2, skinNormal, skinTangent> (info); \
+	else if (info.bonesPerVertex == 4) \
+		SkinGeneric<NormalizeTransformInstruction, 4, skinNormal, skinTangent> (info); \
+	}
+
+	if (info.skinNormals && info.skinTangents)
+		PERMUTE_BONES(true, true)
+	else if (info.skinNormals)
+		PERMUTE_BONES(true, false)
+	else
+		PERMUTE_BONES(false, false)
+}
+
+
+static void ApplyMeshSkinning (SkinMeshInfo& info)
+{
+	#if UNITY_WII
+	SkinMeshWii(info);
+	#else
+	
+	PROFILER_AUTO(gMeshSkinningProfile, NULL);
+
+	if (SkinMeshOptimizedMobile(info))
+		return;
+
+	if (SkinMeshOptimizedSSE2(info))
+		return;
+	
+	// fallback to slow generic implementation
+	{
+		PROFILER_AUTO(gMeshSkinningSlowpath, NULL);
+		SkinMesh(info);
+	}
+	#endif	
+}
+
+void DeformSkinnedMesh (SkinMeshInfo& info)
+{
+	const bool hasBlendShapes = info.blendshapeCount != 0;
+	const bool hasSkin = info.boneCount != 0;
+
+	// No actual skinning can be done. Just copy vertex stream.
+	// TODO: This code can be removed if we render the undeformed mesh in SkinnedMeshRenderer
+	// when there is no skin and no active blend shapes. See case 557165.
+	if (!hasBlendShapes && !hasSkin)
+	{
+		memcpy (info.outVertices, info.inVertices, info.inStride * info.vertexCount);
+		return;
+	}
+
+	UInt8* tmpBlendShapes = NULL;
+
+	// blend shapes
+	if (hasBlendShapes)
+	{
+		// The final destination might be write-combined memory which is insanely slow to read
+		// or randomly access, so always allocate a temp buffer for blend shapes (case 554830).
+		// Skinning can write directly to a VB since it always writes sequentially to memory.
+		size_t bufferSize = info.inStride * info.vertexCount;
+		tmpBlendShapes = ALLOC_TEMP_MANUAL(UInt8, bufferSize);
+		
+		ApplyBlendShapes (info, tmpBlendShapes);
+		
+		if (hasSkin)
+			info.inVertices = tmpBlendShapes;
+		else
+			memcpy(info.outVertices, tmpBlendShapes, bufferSize);
+	}
+
+	// skinning
+	if (hasSkin)
+		ApplyMeshSkinning (info);
+
+	if (tmpBlendShapes)
+		FREE_TEMP_MANUAL(tmpBlendShapes);
+}
+
+
+void* DeformSkinnedMeshJob (void* rawData)
+{
+	SkinMeshInfo* data = reinterpret_cast<SkinMeshInfo*>(rawData);
+	DeformSkinnedMesh (*data);
+	return NULL;
+}
+
+
+SkinMeshInfo::SkinMeshInfo()
+{
+	memset(this, 0, sizeof(SkinMeshInfo));
+}
+
+void SkinMeshInfo::Allocate()
+{
+	size_t size = boneCount * sizeof(Matrix4x4f) + sizeof(float) * blendshapeCount;
+	if (size == 0)
+		return;
+	
+	allocatedBuffer = (UInt8*)UNITY_MALLOC_ALIGNED(kMemSkinning, size, 64);
+	
+	UInt8* head = allocatedBuffer;
+	if (boneCount != 0)
+	{
+		cachedPose = reinterpret_cast<Matrix4x4f*> (head);
+		head += sizeof(Matrix4x4f)  * boneCount;
+	}
+	
+	if (blendshapeCount != 0)
+{
+		blendshapeWeights = reinterpret_cast<float*> (head);
+	}
+}
+
+void SkinMeshInfo::Release() const
+{
+	if (allocatedBuffer)
+		UNITY_FREE(kMemSkinning, allocatedBuffer);
+}
diff --git a/Runtime/Filters/Mesh/MeshSkinning.h b/Runtime/Filters/Mesh/MeshSkinning.h
new file mode 100644
index 0000000..b56efa9
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinning.h
@@ -0,0 +1,64 @@
+#ifndef MESHSKINNING_H
+#define MESHSKINNING_H
+
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Quaternion.h"
+#include "Mesh.h"
+#include "Runtime/Geometry/AABB.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include <vector>
+#include <list>
+
+class GPUSkinningInfo;
+
+typedef std::vector<BoneInfluence> CompactSkin;
+struct BlendShapeData;
+
+enum TransformInstruction { kNormalizeFastest = 0, kNormalizeFast = 1, kNoNormalize = 3 };
+class VertexData;
+
+struct SkinMeshInfo
+{
+	int bonesPerVertex;
+	
+	void* compactSkin;
+	int boneCount;
+
+	const void* inVertices;
+	void*	outVertices;
+	int		inStride;
+	int		outStride;
+
+	int		normalOffset;
+	int		tangentOffset;
+	bool	skinNormals;
+	bool	skinTangents;
+
+	int   vertexCount;
+
+	// This is instance data and must be double buffered so the render thread can work in paralell.
+	UInt8*                      allocatedBuffer;
+	Matrix4x4f*                 cachedPose;
+	float*                      blendshapeWeights;
+
+	int                         blendshapeCount;
+	const BlendShapeData*       blendshapes;
+
+	bool memExport; // Is set up for memexport (Xbox) or streamout (DX11)
+
+#if UNITY_PS3
+	const VertexData* vertexData;
+#endif
+
+	GPUSkinningInfo *mei;
+
+	SkinMeshInfo();
+	
+	void Allocate();
+	void Release () const;
+};
+
+void DeformSkinnedMesh (SkinMeshInfo& info);
+void* DeformSkinnedMeshJob (void* rawData);
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h
new file mode 100644
index 0000000..0b17b42
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h
@@ -0,0 +1,212 @@
+#if 0
+
+/*
+ mircea@INFO: this doesn't do normalization.
+ */
+
+#include "Runtime/Math/Simd/Matrix4x4Simd.h"
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+bool skinNormal, bool skinTangent, bool copy8BytesAt24Offset>
+void SkinGenericSimd (SkinMeshInfo& info)
+{
+	DebugAssertIf( copy8BytesAt24Offset && (!info.skinNormals || info.normalOffset != 12) );
+	const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+	const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+	const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+	
+	const Matrix4x4f* bones4x4 = info.cachedPose;
+	
+	const int inStride = info.inStride;
+	int outStride = info.outStride;
+	int count = info.vertexCount;
+	
+	const int normalOffset = (copy8BytesAt24Offset ? 12 : info.normalOffset) >> 2;
+	const int tangentOffset = info.tangentOffset >> 2;
+	
+	const UInt8* inputVertex = (const UInt8*)info.inVertices;
+	UInt8* outputVertex = (UInt8*)info.outVertices;
+	
+	Simd128 pose0, pose1, pose2, pose3;
+	
+	for( int v = 0; v < count; v++ )
+	{
+		ALIGN_LOOP_OPTIMIZATION
+		
+		// Blend the matrices first, then transform everything with this
+		// blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+		// in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+		if (bonesPerVertexCount == 1)
+		{
+			const float* maddr = bones4x4[*influence1].m_Data;
+			
+			Prefetch(maddr);
+			
+			pose0 = V4LoadUnaligned( maddr, 0x0 );
+			pose1 = V4LoadUnaligned( maddr, 0x4 );
+			pose2 = V4LoadUnaligned( maddr, 0x8 );
+			pose3 = V4LoadUnaligned( maddr, 0xC );
+		}
+		else if (bonesPerVertexCount == 2)
+		{
+			Prefetch(influence2);
+			
+			Simd128 weights = {influence2->weight[0], influence2->weight[1], 0, 0};
+			
+			const float* maddr0 = bones4x4[influence2->boneIndex[0]].m_Data;
+			const float* maddr1 = bones4x4[influence2->boneIndex[1]].m_Data;
+			
+			Prefetch(maddr0);
+			Prefetch(maddr1);
+			
+			Simd128 weight0 = V4Splat(weights, 0);
+			Simd128 weight1 = V4Splat(weights, 1);
+			
+			Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 );
+			Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 );
+			Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 );
+			Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC );
+			
+			Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 );
+			Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 );
+			Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 );
+			Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC );
+			
+			pose0 = V4Mul(mat00, weight0);
+			pose1 = V4Mul(mat01, weight0);
+			pose2 = V4Mul(mat02, weight0);
+			pose3 = V4Mul(mat03, weight0);
+			
+			pose0 = V4MulAdd(mat10, weight1, pose0);
+			pose1 = V4MulAdd(mat11, weight1, pose1);
+			pose2 = V4MulAdd(mat12, weight1, pose2);
+			pose3 = V4MulAdd(mat13, weight1, pose3);
+		}
+		else if (bonesPerVertexCount == 4)
+		{
+			Prefetch(influence4);
+			
+			Simd128 weights = {influence4->weight[0], influence4->weight[1], influence4->weight[2], influence4->weight[3]};
+			
+			const float* maddr0 = bones4x4[influence4->boneIndex[0]].m_Data;
+			const float* maddr1 = bones4x4[influence4->boneIndex[1]].m_Data;
+			const float* maddr2 = bones4x4[influence4->boneIndex[2]].m_Data;
+			const float* maddr3 = bones4x4[influence4->boneIndex[3]].m_Data;
+			
+			Prefetch(maddr0);
+			Prefetch(maddr1);
+			Prefetch(maddr2);
+			Prefetch(maddr3);
+			
+			Simd128 weight0 = V4Splat(weights, 0);
+			Simd128 weight1 = V4Splat(weights, 1);
+			Simd128 weight2 = V4Splat(weights, 2);
+			Simd128 weight3 = V4Splat(weights, 3);
+			
+			Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 );
+			Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 );
+			Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 );
+			Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC );
+			
+			Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 );
+			Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 );
+			Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 );
+			Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC );
+			
+			Simd128 mat20 = V4LoadUnaligned( maddr2, 0x0 );
+			Simd128 mat21 = V4LoadUnaligned( maddr2, 0x4 );
+			Simd128 mat22 = V4LoadUnaligned( maddr2, 0x8 );
+			Simd128 mat23 = V4LoadUnaligned( maddr2, 0xC );
+			
+			Simd128 mat30 = V4LoadUnaligned( maddr3, 0x0 );
+			Simd128 mat31 = V4LoadUnaligned( maddr3, 0x4 );
+			Simd128 mat32 = V4LoadUnaligned( maddr3, 0x8 );
+			Simd128 mat33 = V4LoadUnaligned( maddr3, 0xC );
+			
+			pose0 = V4Mul(mat00, weight0);
+			pose1 = V4Mul(mat01, weight0);
+			pose2 = V4Mul(mat02, weight0);
+			pose3 = V4Mul(mat03, weight0);
+			
+			pose0 = V4MulAdd(mat10, weight1, pose0);
+			pose1 = V4MulAdd(mat11, weight1, pose1);
+			pose2 = V4MulAdd(mat12, weight1, pose2);
+			pose3 = V4MulAdd(mat13, weight1, pose3);
+			
+			pose0 = V4MulAdd(mat20, weight2, pose0);
+			pose1 = V4MulAdd(mat21, weight2, pose1);
+			pose2 = V4MulAdd(mat22, weight2, pose2);
+			pose3 = V4MulAdd(mat23, weight2, pose3);
+			
+			pose0 = V4MulAdd(mat30, weight3, pose0);
+			pose1 = V4MulAdd(mat31, weight3, pose1);
+			pose2 = V4MulAdd(mat32, weight3, pose2);
+			pose3 = V4MulAdd(mat33, weight3, pose3);
+		}
+		
+		Prefetch(inputVertex);
+		
+		Simd128 vpos = V4LoadUnaligned((const float*)inputVertex, 0);
+		TransformPoint3NATIVE(pose0, pose1, pose2, pose3, vpos, vpos);
+		
+		Simd128 vnor, vtan, ndot, tdot;
+		
+		// remember... this is a template and skinNormal & skinTangent are consts 
+		if(skinNormal || skinTangent) 
+		{
+			Simd128 vlen;
+			if( skinNormal ) 
+			{
+				vnor = V4LoadUnaligned((const float*)inputVertex, normalOffset);
+				TransformVector3NATIVE(pose0, pose1, pose2, pose3, vnor, vnor);
+				ndot = V3Dot(vnor, vnor);
+			} 
+			else 
+			{
+				ndot = V4Zero();
+			}
+			
+			if( skinTangent ) 
+			{
+				vtan = V4LoadUnaligned((const float*)inputVertex, tangentOffset);
+				TransformVector3NATIVE(pose0, pose1, pose2, pose3, vtan, vtan);
+				tdot = V3Dot(vtan, vtan);
+			} 
+			else 
+			{
+				tdot = V4Zero();
+			}
+			
+			vlen = V4MergeH(ndot, tdot);
+			vlen = V4Rsqrt(vlen);
+			
+			if(skinNormal) {
+				vnor = V4Mul(vnor, V4Splat(vlen, 0));
+				V3StoreUnaligned(vnor, (float*)outputVertex, normalOffset);
+			}
+			
+			if(skinTangent) {
+				vtan = V4Mul(vtan, V4Splat(vlen, 1));
+				V3StoreUnaligned(vtan, (float*)outputVertex, tangentOffset);
+			}
+		}
+		
+		V3StoreUnaligned(vpos, (float*)outputVertex, 0);
+		
+		if( skinTangent )
+		{
+			*reinterpret_cast<float*>( outputVertex + (tangentOffset<<2) + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + (tangentOffset<<2) + sizeof(Vector3f) );
+		}
+		
+		outputVertex += outStride;
+		inputVertex += inStride;
+		
+		if (bonesPerVertexCount == 1)
+			influence1++;
+		else if (bonesPerVertexCount == 2)
+			influence2++;
+		if (bonesPerVertexCount == 4)
+			influence4++;
+	}
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningMobile.h b/Runtime/Filters/Mesh/MeshSkinningMobile.h
new file mode 100644
index 0000000..f6efc54
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningMobile.h
@@ -0,0 +1,160 @@
+#if UNITY_SUPPORTS_VFP
+
+#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+#define s_SkinVertices_VFP								_s_SkinVertices_VFP
+#define s_SkinVertices_NoNormals_VFP					_s_SkinVertices_NoNormals_VFP
+#define s_SkinVertices_Tangents_VFP						_s_SkinVertices_Tangents_VFP
+
+#define s_SkinVertices2Bones_VFP						_s_SkinVertices2Bones_VFP
+#define s_SkinVertices2Bones_NoNormals_VFP				_s_SkinVertices2Bones_NoNormals_VFP
+#define s_SkinVertices2Bones_Tangents_VFP				_s_SkinVertices2Bones_Tangents_VFP
+
+#define s_SkinVertices4Bones_VFP						_s_SkinVertices4Bones_VFP
+#define s_SkinVertices4Bones_NoNormals_VFP				_s_SkinVertices4Bones_NoNormals_VFP
+#define s_SkinVertices4Bones_Tangents_VFP				_s_SkinVertices4Bones_Tangents_VFP
+#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+
+extern "C"
+{
+	void s_SkinVertices_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+	void s_SkinVertices_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+	void s_SkinVertices_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData);
+
+	void s_SkinVertices2Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+	void s_SkinVertices2Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+	void s_SkinVertices2Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData);
+
+	void s_SkinVertices4Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+	void s_SkinVertices4Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+	void s_SkinVertices4Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData);
+}
+#endif
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+#define s_SkinVertices_NEON								_s_SkinVertices_NEON
+#define s_SkinVertices_NoNormals_NEON					_s_SkinVertices_NoNormals_NEON
+#define s_SkinVertices_Tangents_NEON					_s_SkinVertices_Tangents_NEON
+
+#define s_SkinVertices2Bones_NEON						_s_SkinVertices2Bones_NEON
+#define s_SkinVertices2Bones_NoNormals_NEON				_s_SkinVertices2Bones_NoNormals_NEON
+#define s_SkinVertices2Bones_Tangents_NEON				_s_SkinVertices2Bones_Tangents_NEON
+
+#define s_SkinVertices4Bones_NEON						_s_SkinVertices4Bones_NEON
+#define s_SkinVertices4Bones_NoNormals_NEON				_s_SkinVertices4Bones_NoNormals_NEON
+#define s_SkinVertices4Bones_Tangents_NEON				_s_SkinVertices4Bones_Tangents_NEON
+
+#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+
+extern "C"
+{
+	void s_SkinVertices_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+	void s_SkinVertices_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+	void s_SkinVertices_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData);
+
+	void s_SkinVertices2Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+	void s_SkinVertices2Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+	void s_SkinVertices2Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData);
+
+	void s_SkinVertices4Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+	void s_SkinVertices4Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+	void s_SkinVertices4Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData);
+}
+#endif
+
+#if UNITY_SUPPORTS_VFP || (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+bool SkinMeshOptimizedMobile(SkinMeshInfo& info)
+{
+	static const size_t kPrefetchSizeBones  = 4096;
+	static const size_t kPrefetchSizeVertex = 512;
+
+	const int bonesPerVertexCount = info.bonesPerVertex;
+	const bool skinNormal = info.skinNormals;
+	const bool skinTangent = info.skinTangents;
+
+	const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+	const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+	const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+	const Matrix4x4f* bones4x4 = info.cachedPose;
+
+	const int inStride = info.inStride;
+	int count = info.vertexCount;
+
+	const UInt8* inputVertex = (const UInt8*)info.inVertices;
+	UInt8* outputVertex = (UInt8*)info.outVertices;
+
+	if (skinTangent && !skinNormal)
+		return false;
+
+	if( !UNITY_SUPPORTS_VFP && !CPUInfo::HasNEONSupport() )
+	{
+		ErrorString("non-NEON path not enabled!");
+		return false;
+	}
+
+#if !ENABLE_MULTITHREADED_SKINNING
+	PROFILER_AUTO_THREAD_SAFE(gMeshSkinningOptimized, NULL);
+#endif
+
+	Prefetch(bones4x4, std::min<size_t>(info.boneCount * sizeof(Matrix4x4f), kPrefetchSizeBones));
+	Prefetch(inputVertex + inStride, std::min<size_t>(inStride * (count-1), kPrefetchSizeVertex));
+
+#if UNITY_SUPPORTS_NEON && UNITY_SUPPORTS_VFP
+#define CALL_SKIN_FUNC( name, influence )																	\
+do 																											\
+{																											\
+if (CPUInfo::HasNEONSupport())																				\
+	name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex);	\
+else																										\
+	name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex);	\
+}																											\
+while(0)
+#endif
+#if UNITY_SUPPORTS_NEON && !UNITY_SUPPORTS_VFP
+#define CALL_SKIN_FUNC( name, influence ) name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex)
+#endif
+#if UNITY_SUPPORTS_VFP && !UNITY_SUPPORTS_NEON
+#define CALL_SKIN_FUNC( name, influence ) name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex)
+#endif
+
+	if (bonesPerVertexCount == 1 )
+	{
+		if (skinNormal && skinTangent)
+			CALL_SKIN_FUNC(s_SkinVertices_Tangents, influence1);
+		else if( skinNormal )
+			CALL_SKIN_FUNC(s_SkinVertices, influence1);
+		else
+			CALL_SKIN_FUNC(s_SkinVertices_NoNormals, influence1);
+	}
+	else if (bonesPerVertexCount == 2)
+	{
+		if (skinNormal && skinTangent)
+			CALL_SKIN_FUNC(s_SkinVertices2Bones_Tangents, influence2);
+		else if( skinNormal )
+			CALL_SKIN_FUNC(s_SkinVertices2Bones, influence2);
+		else
+			CALL_SKIN_FUNC(s_SkinVertices2Bones_NoNormals, influence2);
+	}
+	else if (bonesPerVertexCount == 4)
+	{
+		if (skinNormal && skinTangent)
+			CALL_SKIN_FUNC(s_SkinVertices4Bones_Tangents, influence4);
+		else if (skinNormal)
+			CALL_SKIN_FUNC(s_SkinVertices4Bones, influence4);
+		else
+			CALL_SKIN_FUNC(s_SkinVertices4Bones_NoNormals, influence4);
+	}
+
+	return true;
+}
+#else
+bool SkinMeshOptimizedMobile(SkinMeshInfo& info)
+{
+	return false;
+}
+#endif // UNITY_SUPPORTS_VFP || UNITY_SUPPORTS_NEON
+
+
diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.asm b/Runtime/Filters/Mesh/MeshSkinningNEON.asm
new file mode 100644
index 0000000..494b397
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNEON.asm
@@ -0,0 +1,527 @@
+	AREA .text, CODE
+
+	EXPORT _s_SkinVertices_NEON
+	EXPORT _s_SkinVertices_NoNormals_NEON
+	EXPORT _s_SkinVertices_Tangents_NEON
+	EXPORT _s_SkinVertices2Bones_NEON
+	EXPORT _s_SkinVertices2Bones_NoNormals_NEON
+	EXPORT _s_SkinVertices2Bones_Tangents_NEON
+	EXPORT _s_SkinVertices4Bones_NEON
+	EXPORT _s_SkinVertices4Bones_NoNormals_NEON
+	EXPORT _s_SkinVertices4Bones_Tangents_NEON
+
+|_s_SkinVertices_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d10}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	mov.w	r8, #12
+	ldr.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+
+|_s_SkinVertices_NEON_loop|
+	vld1.32	{d24-d27}, [r7@128]!
+	vld1.32	{d28-d31}, [r7@128]
+	vld1.32	{d6-d8}, [r1@64]!
+	vmul.f32	q0, q12, d6[0]
+	vmul.f32	q1, q12, d7[1]
+	cmp	r1, r2
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	vmla.f32	q1, q13, d8[0]
+	it	cc
+	ldrcc.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+	vmla.f32	q0, q14, d7[0]
+	vmla.f32	q1, q14, d8[1]
+	pld	[r7]
+	vadd.f32	q0, q0, q15
+	vst1.32	{d0-d1}, [r4], r8
+	vst1.32	{d2-d3}, [r4], r8
+	bcc.w	|_s_SkinVertices_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d10}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices_NoNormals_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d10}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	mov.w	r8, #12
+	ldr.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+
+|_s_SkinVertices_NoNormals_NEON_loop|
+	vld1.32	{d24-d27}, [r7@128]!
+	vld1.32	{d28-d31}, [r7@128]
+	vld1.32	{d6-d7}, [r1], r8
+	vmul.f32	q0, q12, d6[0]
+	cmp	r1, r2
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	it	cc
+	ldrcc.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+	vmla.f32	q0, q14, d7[0]
+	pld	[r7]
+	vadd.f32	q0, q0, q15
+	vst1.32	{d0-d1}, [r4], r8
+	bcc.w	|_s_SkinVertices_NoNormals_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d10}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices_Tangents_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d10}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	mov.w	r8, #12
+	ldr.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+
+|_s_SkinVertices_Tangents_NEON_loop|
+	vld1.32	{d24-d27}, [r7@128]!
+	vld1.32	{d28-d31}, [r7@128]
+	vld1.32	{d6-d8}, [r1@64]!
+	vld1.32	{d9-d10}, [r1@64]!
+	vmul.f32	q0, q12, d6[0]
+	vmul.f32	q1, q12, d7[1]
+	vmul.f32	q2, q12, d9[0]
+	cmp	r1, r2
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	vmla.f32	q1, q13, d8[0]
+	vmla.f32	q2, q13, d9[1]
+	it	cc
+	ldrcc.w	r5, [r3], #4
+	add.w	r7, r0, r5, lsl #6
+	vmla.f32	q0, q14, d7[0]
+	vmla.f32	q1, q14, d8[1]
+	vmla.f32	q2, q14, d10[0]
+	pld	[r7]
+	vadd.f32	q0, q0, q15
+	vmov.f32	s11, s21
+	vst1.32	{d0-d1}, [r4], r8
+	vst1.32	{d2-d3}, [r4], r8
+	vst1.32	{d4-d5}, [r4]!
+	bcc.w	|_s_SkinVertices_Tangents_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d10}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices2Bones_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d11}
+	stmdb	sp!, {r4, r5, r6, r7, r8, sl}
+	ldr.w	r4, [ip]
+	vld1.32	{d11}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q12, q8, d11[0]
+	vmul.f32	q13, q9, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q12, q8, d11[1]
+	vmla.f32	q13, q9, d11[1]
+	ldr	r5, [r3, #8]
+	mov.w	r8, #12
+	sub.w	sl, r2, #24
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q14, q10, d11[1]
+	nop
+
+|_s_SkinVertices2Bones_NEON_loop|
+	cmp	r1, sl
+	add.w	r7, r0, r5, lsl #6
+	it	cc
+	ldrcc	r6, [r3, #12]
+	vld1.32	{d6-d8}, [r1@64]!
+	vmla.f32	q15, q11, d11[1]
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	cmp	r1, sl
+	vmul.f32	q1, q12, d7[1]
+	vld1.32	{d11}, [r3]
+	vmul.f32	q12, q8, d11[0]
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	vmla.f32	q1, q13, d8[0]
+	it	cc
+	ldrcc	r5, [r3, #24]
+	vmul.f32	q13, q9, d11[0]
+	vmla.f32	q0, q14, d7[0]
+	cmp	r1, r2
+	vmla.f32	q1, q14, d8[1]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q14, q10, d11[0]
+	vadd.f32	q0, q0, q15
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	vst1.32	{d0-d1}, [r4], r8
+	vmla.f32	q13, q9, d11[1]
+	vst1.32	{d2-d3}, [r4], r8
+	add.w	r3, r3, #16
+	vmla.f32	q14, q10, d11[1]
+	bcc.w	|_s_SkinVertices2Bones_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, sl}
+	vpop	{d8-d11}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices2Bones_NoNormals_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d11}
+	stmdb	sp!, {r4, r5, r6, r7, r8, sl}
+	ldr.w	r4, [ip]
+	vld1.32	{d11}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q12, q8, d11[0]
+	vmul.f32	q13, q9, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q12, q8, d11[1]
+	vmla.f32	q13, q9, d11[1]
+	ldr	r5, [r3, #8]
+	mov.w	r8, #12
+	sub.w	sl, r2, #12
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q14, q10, d11[1]
+	nop
+	nop.w
+
+|_s_SkinVertices2Bones_NoNormals_NEON_loop|
+	cmp	r1, sl
+	add.w	r7, r0, r5, lsl #6
+	it	cc
+	ldrcc	r6, [r3, #12]
+	vld1.32	{d6-d7}, [r1], r8
+	vmla.f32	q15, q11, d11[1]
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	cmp	r1, sl
+	vld1.32	{d11}, [r3]
+	vmul.f32	q12, q8, d11[0]
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	it	cc
+	ldrcc	r5, [r3, #24]
+	vmul.f32	q13, q9, d11[0]
+	vmla.f32	q0, q14, d7[0]
+	cmp	r1, r2
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q14, q10, d11[0]
+	vadd.f32	q0, q0, q15
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	vst1.32	{d0-d1}, [r4], r8
+	vmla.f32	q13, q9, d11[1]
+	add.w	r3, r3, #16
+	vmla.f32	q14, q10, d11[1]
+	bcc.w	|_s_SkinVertices2Bones_NoNormals_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, sl}
+	vpop	{d8-d11}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices2Bones_Tangents_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d11}
+	stmdb	sp!, {r4, r5, r6, r7, r8, sl}
+	ldr.w	r4, [ip]
+	vld1.32	{d11}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q12, q8, d11[0]
+	vmul.f32	q13, q9, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q12, q8, d11[1]
+	vmla.f32	q13, q9, d11[1]
+	ldr	r5, [r3, #8]
+	mov.w	r8, #12
+	sub.w	sl, r2, #40	; 0x28
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q14, q10, d11[1]
+	nop
+	nop.w
+
+|_s_SkinVertices2Bones_Tangents_NEON_loop|
+	cmp	r1, sl
+	add.w	r7, r0, r5, lsl #6
+	it	cc
+	ldrcc	r6, [r3, #12]
+	vld1.32	{d6-d8}, [r1@64]!
+	vmla.f32	q15, q11, d11[1]
+	vld1.32	{d9-d10}, [r1@64]!
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	cmp	r1, sl
+	vmul.f32	q1, q12, d7[1]
+	vmul.f32	q2, q12, d9[0]
+	vld1.32	{d11}, [r3]
+	vmul.f32	q12, q8, d11[0]
+	pld	[r1, #256]	; 0x100
+	vmla.f32	q0, q13, d6[1]
+	vld1.32	{d20-d23}, [r7@128]
+	add.w	r7, r0, r6, lsl #6
+	vmla.f32	q1, q13, d8[0]
+	vmla.f32	q2, q13, d9[1]
+	it	cc
+	ldrcc	r5, [r3, #24]
+	vmul.f32	q13, q9, d11[0]
+	vmla.f32	q0, q14, d7[0]
+	cmp	r1, r2
+	vmla.f32	q1, q14, d8[1]
+	vmla.f32	q2, q14, d10[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q14, q10, d11[0]
+	vadd.f32	q0, q0, q15
+	vmov.f32	s11, s21
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	vst1.32	{d0-d1}, [r4], r8
+	vmla.f32	q13, q9, d11[1]
+	vst1.32	{d2-d3}, [r4], r8
+	add.w	r3, r3, #16
+	vmla.f32	q14, q10, d11[1]
+	vst1.32	{d4-d5}, [r4]!
+	bcc.w	|_s_SkinVertices2Bones_Tangents_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, sl}
+	vpop	{d8-d11}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices4Bones_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d12}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	vld1.32	{d11-d12}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vld1.32	{d20-d23}, [r7@128]
+	mov.w	r8, #12
+	nop.w
+	nop.w
+	nop.w
+
+|_s_SkinVertices4Bones_NEON_loop|
+	vmul.f32	q12, q8, d11[0]
+	vld1.32	{d6-d8}, [r1@64]!
+	vmul.f32	q13, q9, d11[0]
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	ldmia	r3!, {r5, r6}
+	vmla.f32	q13, q9, d11[1]
+	add.w	r7, r0, r5, lsl #6
+	cmp	r1, r2
+	vmla.f32	q14, q10, d11[1]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d11[1]
+	pld	[r3, #256]	; 0x100
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[0]
+	add.w	r7, r0, r6, lsl #6
+	vmla.f32	q13, q9, d12[0]
+	vmla.f32	q14, q10, d12[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d12[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[1]
+	vmla.f32	q13, q9, d12[1]
+	vmla.f32	q14, q10, d12[1]
+	vmla.f32	q15, q11, d12[1]
+	pld	[r1, #256]	; 0x100
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d11-d12}, [r3]!
+	vmul.f32	q1, q12, d7[1]
+	it	cc
+	ldmiacc	r3!, {r5, r6}
+	vmla.f32	q0, q13, d6[1]
+	add.w	r7, r0, r5, lsl #6
+	vmla.f32	q1, q13, d8[0]
+	vldmia	r7, {d16-d23}
+	vmla.f32	q0, q14, d7[0]
+	vmla.f32	q1, q14, d8[1]
+	vadd.f32	q0, q0, q15
+	vst1.32	{d0-d1}, [r4], r8
+	vst1.32	{d2-d3}, [r4], r8
+	bcc.w	|_s_SkinVertices4Bones_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d12}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices4Bones_NoNormals_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d12}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	vld1.32	{d11-d12}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vld1.32	{d20-d23}, [r7@128]
+	mov.w	r8, #12
+	nop
+	nop.w
+
+|_s_SkinVertices4Bones_NoNormals_NEON_loop|
+	vmul.f32	q12, q8, d11[0]
+	vld1.32	{d6-d7}, [r1], r8
+	vmul.f32	q13, q9, d11[0]
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	ldmia	r3!, {r5, r6}
+	vmla.f32	q13, q9, d11[1]
+	add.w	r7, r0, r5, lsl #6
+	cmp	r1, r2
+	vmla.f32	q14, q10, d11[1]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d11[1]
+	pld	[r3, #256]	; 0x100
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[0]
+	add.w	r7, r0, r6, lsl #6
+	vmla.f32	q13, q9, d12[0]
+	vmla.f32	q14, q10, d12[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d12[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[1]
+	vmla.f32	q13, q9, d12[1]
+	vmla.f32	q14, q10, d12[1]
+	vmla.f32	q15, q11, d12[1]
+	pld	[r1, #256]	; 0x100
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d11-d12}, [r3]!
+	it	cc
+	ldmiacc	r3!, {r5, r6}
+	vmla.f32	q0, q13, d6[1]
+	add.w	r7, r0, r5, lsl #6
+	vldmia	r7, {d16-d23}
+	vmla.f32	q0, q14, d7[0]
+	vadd.f32	q0, q0, q15
+	vst1.32	{d0-d1}, [r4], r8
+	bcc.w	|_s_SkinVertices4Bones_NoNormals_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d12}
+	bx	lr
+	ENDP
+
+
+|_s_SkinVertices4Bones_Tangents_NEON| PROC
+	mov	ip, sp
+	vpush	{d8-d12}
+	stmdb	sp!, {r4, r5, r6, r7, r8}
+	ldr.w	r4, [ip]
+	vld1.32	{d11-d12}, [r3]!
+	ldmia	r3!, {r5, r6}
+	add.w	r7, r0, r5, lsl #6
+	vld1.32	{d16-d19}, [r7@128]!
+	vld1.32	{d20-d23}, [r7@128]
+	mov.w	r8, #12
+	nop
+	nop.w
+
+|_s_SkinVertices4Bones_Tangents_NEON_loop|
+	vmul.f32	q12, q8, d11[0]
+	vld1.32	{d6-d8}, [r1@64]!
+	vmul.f32	q13, q9, d11[0]
+	vld1.32	{d9-d10}, [r1@64]!
+	add.w	r7, r0, r6, lsl #6
+	vmul.f32	q14, q10, d11[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmul.f32	q15, q11, d11[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d11[1]
+	ldmia	r3!, {r5, r6}
+	vmla.f32	q13, q9, d11[1]
+	add.w	r7, r0, r5, lsl #6
+	cmp	r1, r2
+	vmla.f32	q14, q10, d11[1]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d11[1]
+	pld	[r3, #256]	; 0x100
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[0]
+	add.w	r7, r0, r6, lsl #6
+	vmla.f32	q13, q9, d12[0]
+	vmla.f32	q14, q10, d12[0]
+	vld1.32	{d16-d19}, [r7@128]!
+	vmla.f32	q15, q11, d12[0]
+	vld1.32	{d20-d23}, [r7@128]
+	vmla.f32	q12, q8, d12[1]
+	vmla.f32	q13, q9, d12[1]
+	vmla.f32	q14, q10, d12[1]
+	vmla.f32	q15, q11, d12[1]
+	pld	[r1, #256]	; 0x100
+	vmul.f32	q0, q12, d6[0]
+	vld1.32	{d11-d12}, [r3]!
+	vmul.f32	q1, q12, d7[1]
+	vmul.f32	q2, q12, d9[0]
+	it	cc
+	ldmiacc	r3!, {r5, r6}
+	vmla.f32	q0, q13, d6[1]
+	add.w	r7, r0, r5, lsl #6
+	vmla.f32	q1, q13, d8[0]
+	vmla.f32	q2, q13, d9[1]
+	vldmia	r7, {d16-d23}
+	vmla.f32	q0, q14, d7[0]
+	vmla.f32	q1, q14, d8[1]
+	vmla.f32	q2, q14, d10[0]
+	vadd.f32	q0, q0, q15
+	vmov.f32	s11, s21
+	vst1.32	{d0-d1}, [r4], r8
+	vst1.32	{d2-d3}, [r4], r8
+	vst1.32	{d4-d5}, [r4]!
+	bcc.w	|_s_SkinVertices4Bones_Tangents_NEON_loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8}
+	vpop	{d8-d12}
+	bx	lr
+	nop
+	ENDP
+
+
+	END
diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.s b/Runtime/Filters/Mesh/MeshSkinningNEON.s
new file mode 100644
index 0000000..e94542d
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNEON.s
@@ -0,0 +1,183 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING)
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+
+.globl _s_SkinVertices_NEON
+.globl _s_SkinVertices_NoNormals_NEON
+.globl _s_SkinVertices_Tangents_NEON
+
+.globl _s_SkinVertices2Bones_NEON
+.globl _s_SkinVertices2Bones_NoNormals_NEON
+.globl _s_SkinVertices2Bones_Tangents_NEON
+
+.globl _s_SkinVertices4Bones_NEON
+.globl _s_SkinVertices4Bones_NoNormals_NEON
+.globl _s_SkinVertices4Bones_Tangents_NEON
+
+#if UNITY_ANDROID
+.hidden _s_SkinVertices_NEON
+.hidden _s_SkinVertices_NoNormals_NEON
+.hidden _s_SkinVertices_Tangents_NEON
+
+.hidden _s_SkinVertices2Bones_NEON
+.hidden _s_SkinVertices2Bones_NoNormals_NEON
+.hidden _s_SkinVertices2Bones_Tangents_NEON
+
+.hidden _s_SkinVertices4Bones_NEON
+.hidden _s_SkinVertices4Bones_NoNormals_NEON
+.hidden _s_SkinVertices4Bones_Tangents_NEON
+#endif
+
+
+//===========================================================================================================================================
+
+#define SKIN_POS				1
+#define SKIN_POS_NRM			2
+#define SKIN_POS_NRM_TAN		3
+
+
+#define SKIN_2BONES				0
+#define SKIN_4BONES				0
+
+_s_SkinVertices_NEON:
+
+#define SKIN_1BONE				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_NoNormals_NEON:
+
+#define SKIN_1BONE				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_Tangents_NEON:
+
+#define SKIN_1BONE				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+#undef SKIN_4BONES
+#undef SKIN_2BONES
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE				0
+#define SKIN_4BONES				0
+
+_s_SkinVertices2Bones_NEON:
+
+#define SKIN_2BONES				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices2Bones_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_NoNormals_NEON:
+
+#define SKIN_2BONES				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices2Bones_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_Tangents_NEON:
+
+#define SKIN_2BONES				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices2Bones_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+#undef SKIN_4BONES
+#undef SKIN_1BONE
+
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE				0
+#define SKIN_2BONES				0
+
+_s_SkinVertices4Bones_NEON:
+
+#define SKIN_4BONES				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices4Bones_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_NoNormals_NEON:
+
+#define SKIN_4BONES				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices4Bones_NoNormals_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_Tangents_NEON:
+
+#define SKIN_4BONES				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices4Bones_Tangents_NEON_loop
+
+#include "MeshSkinningNeon_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+
+#undef SKIN_2BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+.endif
+
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
new file mode 100644
index 0000000..8e584da
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h
@@ -0,0 +1,487 @@
+
+// defines
+// SKIN_1BONE
+// SKIN_2BONES
+// SKIN_4BONES
+// LOOP_NAME
+// VERTEX_SZ
+
+// skin types
+// SKIN_POS
+// SKIN_POS_NRM
+// SKIN_POS_NRM_TAN
+
+
+
+//r0: const void* bones4x4
+//r1: const void* srcVertData
+//r2: const void* srcVertDataEnd
+//r3: const BoneInfluence4* srcBoneInfluence4
+//[sp+0] -> r4: const void* dstVertData
+
+// r5, r6:	index
+// r7:		matrix address
+// r8:		12 (offset for vector3)
+
+// q0 <- output: pos
+// q1 <- output: nrm
+// q2 <- output: tan
+// q3 <- input: pos
+// q4 <- input: nrm
+// q5 <- input: tan
+// d11,d12 <- weights
+// q12-q15 (blended matrix)
+// q8-q11 (cur matrix)
+
+
+// input:
+// d6[0], d6[1], d7[0] 			<- pos
+// d7[1], d8[0], d8[1] 			<- nrm
+// d9[0], d9[1], d10[0], d10[1] <- tan
+// q3 <- pos.x, pos.y, pos.z, nrm.x
+// q4 <- nrm.y, nrm.z, tan.x, tan.y
+// q5 <- tan.z, tan.w, w0, w1
+
+
+//===========================================================================================================================================
+//
+// Common
+
+#define CALC_POS_1 		vmul.f32	q0, q12, d6[0]
+#define CALC_POS_2 		vmla.f32	q0, q13, d6[1]
+#define CALC_POS_3 		vmla.f32	q0, q14, d7[0]
+#define CALC_POS_4 		vadd.f32	q0, q15
+
+#define STORE_POS		vst1.32		{d0, d1}, [r4], r8
+
+#if		(SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)		\
+	||	(SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN)		\
+	||	(SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+	#define LOAD_POS_NRM 	vld1.32 	{d6, d7, d8}, [r1, :64]!
+	#define STORE_NRM		vst1.32		{d2, d3}, [r4], r8
+	#define CALC_NRM_1		vmul.f32	q1, q12, d7[1]
+	#define CALC_NRM_2 		vmla.f32	q1, q13, d8[0]
+	#define CALC_NRM_3 		vmla.f32	q1, q14, d8[1]
+#else
+	#define LOAD_POS_NRM 	vld1.32 	{d6, d7}, [r1], r8
+	#define STORE_NRM
+	#define CALC_NRM_1
+	#define CALC_NRM_2
+	#define CALC_NRM_3
+#endif
+
+#if	(SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+	#define LOAD_TAN		vld1.32		{d9, d10}, [r1, :64]!
+	#define STORE_TAN		vst1.32		{d4, d5}, [r4]!
+	#define CALC_TAN_1 		vmul.f32	q2, q12, d9[0]
+	#define CALC_TAN_2 		vmla.f32	q2, q13, d9[1]
+	#define CALC_TAN_3 		vmla.f32	q2, q14, d10[0]
+	#define CALC_TAN_4 		vmov.f32	s11, s21
+#else
+	#define LOAD_TAN
+	#define STORE_TAN
+	#define CALC_TAN_1
+	#define CALC_TAN_2
+	#define CALC_TAN_3
+	#define CALC_TAN_4
+#endif
+
+// right after vertex-data will be copy-data stream, so be careful to not overwrite anything
+#if	(SKIN_1BONE == SKIN_POS) || (SKIN_2BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS)
+#define STORE_POS_LAST1		vst1.32		{d0}, [r4]!
+#define STORE_POS_LAST2		vst1.32		{d1[0]}, [r4]!
+#else
+#define STORE_POS_LAST1		STORE_POS
+#define STORE_POS_LAST2
+#endif
+
+#if	(SKIN_1BONE == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM)
+#define STORE_NRM_LAST1		vst1.32		{d2}, [r4]!
+#define STORE_NRM_LAST2		vst1.32		{d3[0]}, [r4]!
+#else
+#define STORE_NRM_LAST1		STORE_NRM
+#define STORE_NRM_LAST2
+#endif
+
+#define __NAME_EPILOGUE(x) x ## EPILOGUE
+#define _NAME_EPILOGUE(x) __NAME_EPILOGUE(x)
+#define LOOP_EPILOGUE _NAME_EPILOGUE(LOOP_NAME)
+
+
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+	#define LOAD_M_12		vld1.32		{q12,q13}, [r7,:128]!
+	#define LOAD_M_34		vld1.32		{q14,q15}, [r7,:128]
+#else
+	#define LOAD_M_12		vld1.32		{q8,q9}, [r7,:128]!
+	#define LOAD_M_34		vld1.32		{q10,q11}, [r7,:128]
+#endif
+
+#define WEIGHT_MATRIX_1(op,r)	op.f32 q12, q8, r
+#define WEIGHT_MATRIX_2(op,r)	op.f32 q13, q9, r
+#define WEIGHT_MATRIX_3(op,r)	op.f32 q14, q10, r
+#define WEIGHT_MATRIX_4(op,r)	op.f32 q15, q11, r
+
+#define WEIGHT_M0_1 WEIGHT_MATRIX_1(vmul, d11[0])
+#define WEIGHT_M0_2 WEIGHT_MATRIX_2(vmul, d11[0])
+#define WEIGHT_M0_3 WEIGHT_MATRIX_3(vmul, d11[0])
+#define WEIGHT_M0_4 WEIGHT_MATRIX_4(vmul, d11[0])
+
+#define WEIGHT_M1_1 WEIGHT_MATRIX_1(vmla, d11[1])
+#define WEIGHT_M1_2 WEIGHT_MATRIX_2(vmla, d11[1])
+#define WEIGHT_M1_3 WEIGHT_MATRIX_3(vmla, d11[1])
+#define WEIGHT_M1_4 WEIGHT_MATRIX_4(vmla, d11[1])
+
+#define WEIGHT_M2_1 WEIGHT_MATRIX_1(vmla, d12[0])
+#define WEIGHT_M2_2 WEIGHT_MATRIX_2(vmla, d12[0])
+#define WEIGHT_M2_3 WEIGHT_MATRIX_3(vmla, d12[0])
+#define WEIGHT_M2_4 WEIGHT_MATRIX_4(vmla, d12[0])
+
+#define WEIGHT_M3_1 WEIGHT_MATRIX_1(vmla, d12[1])
+#define WEIGHT_M3_2 WEIGHT_MATRIX_2(vmla, d12[1])
+#define WEIGHT_M3_3 WEIGHT_MATRIX_3(vmla, d12[1])
+#define WEIGHT_M3_4 WEIGHT_MATRIX_4(vmla, d12[1])
+
+
+//===========================================================================================================================================
+//
+// 1 bone skinning
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+
+mov			ip, sp
+
+vpush		{d8-d10}
+stmfd		sp!, {r4-r8}
+
+ldr			r4, [ip, #0]
+mov			r8, #12
+
+									ldr		r5, [r3], #4
+									add		r7, r0, r5, lsl #6
+
+LOOP_NAME:
+
+
+
+LOAD_M_12
+LOAD_M_34
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_1
+CALC_NRM_1
+CALC_TAN_1
+
+									cmp 	r1, r2
+									pld		[r1, #256]
+
+CALC_POS_2
+CALC_NRM_2
+CALC_TAN_2
+
+									ldrcc	r5, [r3], #4
+									add		r7, r0, r5, lsl #6
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+									pld		[r7]
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd		sp!, {r4-r8}
+vpop		{d8-d10}
+
+bx			lr
+
+
+//===========================================================================================================================================
+//
+// 2 bones skinning
+
+#elif (SKIN_2BONES == SKIN_POS || SKIN_2BONES == SKIN_POS_NRM || SKIN_2BONES == SKIN_POS_NRM_TAN)
+
+mov			ip, sp
+
+vpush		{d8-d11}
+stmfd		sp!, {r4,r5,r6,r7,r8,r10}
+
+ldr			r4, [ip, #0]
+
+vld1.32		{d11}, [r3,:64]!			// wgt ->
+ldmia		r3!, {r5-r6}				// idx ->
+
+add			r7, r0, r5, lsl #6			// M0 ..
+LOAD_M_12								// M0
+WEIGHT_M0_1
+WEIGHT_M0_2
+
+LOAD_M_34								// M0
+add			r7, r0, r6, lsl #6			// M1 ..
+WEIGHT_M0_3
+WEIGHT_M0_4
+
+LOAD_M_12								// M1
+WEIGHT_M1_1
+WEIGHT_M1_2
+
+ldr			r5, [r3, #8]				// idx0
+
+mov			r8,  #12
+sub			r10, r2, #VERTEX_SZ
+
+LOAD_M_34								// M1
+
+WEIGHT_M1_3
+
+.align 4
+LOOP_NAME:
+
+																cmp			r1, r10
+
+																add			r7, r0, r5, lsl #6			// M0 ..
+																ldrcc		r6, [r3, #12]				// idx1
+LOAD_POS_NRM
+
+WEIGHT_M1_4
+
+LOAD_TAN
+
+CALC_POS_1
+LOAD_M_12								// M0
+																cmp			r1, r10
+CALC_NRM_1
+CALC_TAN_1
+vld1.32		{d11}, [r3,:64]				// wgt ->
+
+WEIGHT_M0_1
+																pld			[r1,#256]
+
+CALC_POS_2
+LOAD_M_34								// M0
+																add			r7, r0, r6, lsl #6			// M1 ..
+CALC_NRM_2
+CALC_TAN_2
+																ldrcc		r5, [r3, #24]				// idx0
+WEIGHT_M0_2
+CALC_POS_3
+
+																cmp			r1, r2
+CALC_NRM_3
+CALC_TAN_3
+LOAD_M_12								// M1
+
+
+WEIGHT_M0_3
+
+CALC_POS_4
+CALC_TAN_4
+
+WEIGHT_M0_4
+LOAD_M_34								// M1
+
+beq LOOP_EPILOGUE
+
+WEIGHT_M1_1
+STORE_POS
+
+WEIGHT_M1_2
+STORE_NRM
+																add			r3, r3, #16
+WEIGHT_M1_3
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd		sp!, {r4,r5,r6,r7,r8,r10}
+vpop		{d8-d11}
+bx			lr
+
+
+//===========================================================================================================================================
+//
+// 4 bones skinning
+
+#elif (SKIN_4BONES == SKIN_POS || SKIN_4BONES == SKIN_POS_NRM || SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+
+mov			ip, sp
+
+vpush		{d8-d12}
+stmfd		sp!, {r4-r8}
+
+ldr			r4, [ip, #0]
+
+vld1.32		{d11,d12}, [r3,:128]!	// wgt ->
+ldmia		r3!, {r5-r6}			// idx' ->
+
+add			r7, r0, r5, lsl #6		// M0 ..
+LOAD_M_12							// M0
+LOAD_M_34							// M0
+
+mov			r8, #12
+
+.align 4
+LOOP_NAME:
+
+WEIGHT_M0_1
+LOAD_POS_NRM
+
+WEIGHT_M0_2
+LOAD_TAN
+													add			r7, r0, r6, lsl #6		// M1 ..
+
+
+WEIGHT_M0_3
+LOAD_M_12							// M1
+
+WEIGHT_M0_4
+LOAD_M_34							// M1
+
+WEIGHT_M1_1
+													ldmia		r3!, {r5-r6}			// idx'' ->
+
+WEIGHT_M1_2
+													add			r7, r0, r5, lsl #6		// M2 ..
+													cmp			r1, r2
+
+WEIGHT_M1_3
+LOAD_M_12							// M2
+
+WEIGHT_M1_4
+													pld			[r3, #256]
+LOAD_M_34							// M2
+
+WEIGHT_M2_1
+													add			r7, r0, r6, lsl #6		// M3 ..
+WEIGHT_M2_2
+WEIGHT_M2_3
+LOAD_M_12							// M3
+WEIGHT_M2_4
+
+LOAD_M_34							// M3
+WEIGHT_M3_1
+WEIGHT_M3_2
+WEIGHT_M3_3
+WEIGHT_M3_4
+													pld			[r1, #256]
+
+CALC_POS_1
+vld1.32		{d11,d12}, [r3,:128]!	// wgt ->
+
+CALC_NRM_1
+CALC_TAN_1
+													ldmcc		r3!, {r5-r6}			// idx ->
+
+CALC_POS_2
+													add			r7, r0, r5, lsl #6		// M0 ..
+CALC_NRM_2
+CALC_TAN_2
+vldmia		r7, {q8-q11}			// M0 ->
+
+CALC_POS_3
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_POS_4
+CALC_TAN_4
+
+beq LOOP_EPILOGUE
+
+STORE_POS
+STORE_NRM
+STORE_TAN
+
+bcc LOOP_NAME
+
+LOOP_EPILOGUE:
+STORE_POS_LAST1
+STORE_POS_LAST2
+STORE_NRM_LAST1
+STORE_NRM_LAST2
+STORE_TAN
+
+
+ldmfd		sp!, {r4-r8}
+vpop		{d8-d12}
+bx			lr
+
+
+//===========================================================================================================================================
+
+#endif
+
+#undef __NAME_EPILOGUE
+#undef _NAME_EPILOGUE
+#undef LOOP_EPILOGUE
+#undef CALC_POS_1
+#undef CALC_POS_2
+#undef CALC_POS_3
+#undef STORE_POS
+#undef STORE_POS_LAST1
+#undef STORE_POS_LAST2
+#undef LOAD_POS_NRM
+#undef STORE_NRM
+#undef STORE_NRM_LAST1
+#undef STORE_NRM_LAST2
+#undef CALC_NRM_1
+#undef CALC_NRM_2
+#undef CALC_NRM_3
+#undef LOAD_TAN
+#undef STORE_TAN
+#undef CALC_TAN_1
+#undef CALC_TAN_2
+#undef CALC_TAN_3
+#undef CALC_TAN_4
+#undef LOAD_M_12
+#undef LOAD_M_34
+#undef WEIGHT_MATRIX_1
+#undef WEIGHT_MATRIX_2
+#undef WEIGHT_MATRIX_3
+#undef WEIGHT_MATRIX_4
+#undef WEIGHT_M0_1
+#undef WEIGHT_M0_2
+#undef WEIGHT_M0_3
+#undef WEIGHT_M0_4
+#undef WEIGHT_M1_1
+#undef WEIGHT_M1_2
+#undef WEIGHT_M1_3
+#undef WEIGHT_M1_4
+#undef WEIGHT_M2_1
+#undef WEIGHT_M2_2
+#undef WEIGHT_M2_3
+#undef WEIGHT_M2_4
+#undef WEIGHT_M3_1
+#undef WEIGHT_M3_2
+#undef WEIGHT_M3_3
+#undef WEIGHT_M3_4
diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.asm b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm
new file mode 100644
index 0000000..395bf16
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm
@@ -0,0 +1,323 @@
+;; SkinSSE2.s
+;;
+;; Created by Kaspar Daugaard on 1/12/11.
+;; Copyright 2011 Unity Technologies. All rights reserved.
+
+bits 32
+
+section .text align=32
+
+%define normalOffset 12
+%define tangentOffset 24
+		
+%macro SkinSSE2_Generic 3
+	; %1 numBones
+	; %2 hasNormals
+	; %3 hasTangents
+	; [ebp +  8] inVertices
+	; [ebp + 12] outVertices
+	; [ebp + 16] numVertices
+	; [ebp + 20] boneMatrices
+	; [ebp + 24] weightsAndIndices
+	; [ebp + 28] inputStride
+	; [ebp + 32] outputStride
+
+	push ebp
+	mov	ebp, esp
+	pushad
+	
+	; Local variables (32 byte aligned)
+	; [esp +  0] MaskW
+	; [esp + 16] MaskVec3
+	; [esp + 32] savedEcx
+	sub esp, 16*3
+	and esp, ~31
+
+	; Create bitmasks on stack
+	sub eax, eax
+	mov [esp +  0], eax ; MaskW
+	mov [esp +  4], eax
+	mov [esp +  8], eax
+	dec eax
+	mov [esp + 12], eax
+	mov [esp + 16], eax ; MaskVec3
+	mov [esp + 20], eax
+	mov [esp + 24], eax
+	inc eax
+	mov [esp + 28], eax
+	
+	mov	esi, [ebp + 8]  ; inVertices
+	mov	edi, [ebp + 12] ; outVertices
+	mov ecx, [ebp + 16] ; numVertices
+	mov edx, [ebp + 24] ; weightsAndIndices
+
+	; Prefetch vertices
+	prefetchnta [edx]
+	prefetchnta [esi]
+	prefetchnta [esi + 32]
+	
+	align 32
+
+%%SkinSSE2_loop:
+	prefetchnta [esi + 64]
+
+	mov ebx, [ebp + 20] ; boneMatrices
+	mov [esp + 32], ecx ; savedEcx
+
+	; Load first bone index
+%if %1 == 1
+	; Single bone, no weight
+	mov eax, [edx]
+	shl eax, 6
+%else
+	; Indices come after weights
+	mov eax, [edx + %1*4]
+	shl eax, 6
+	prefetchnta [ebx + eax]
+	prefetchnta [ebx + eax + 32]
+
+	; Load second bone index
+	mov ecx, [edx + %1*4 + 4]
+	shl ecx, 6
+	prefetchnta [ebx + ecx]
+	prefetchnta [ebx + ecx + 32]
+
+	; Load all weights to xmm0
+	movups xmm0, [edx]
+%endif
+	
+	; Load first matrix to xmm4-xmm7
+	movaps xmm4, [ebx + eax]
+	movaps xmm5, [ebx + eax + 16]
+	movaps xmm6, [ebx + eax + 32]
+	movaps xmm7, [ebx + eax + 48]
+
+%if %1 >= 2
+	; Multiply first matrix with first weight
+	movaps xmm1, xmm0
+	shufps xmm1, xmm1, 0x00
+	mulps xmm4, xmm1
+	mulps xmm5, xmm1
+	mulps xmm6, xmm1
+	mulps xmm7, xmm1
+%endif
+
+%if %1 >= 3
+	; Load third bone index
+	mov eax, [edx + %1*4 + 8]
+	shl eax, 6
+	prefetchnta [ebx + eax]
+	prefetchnta [ebx + eax + 32]
+%endif
+
+%if %1 >= 2
+	; Load first two rows of the second matrix to xmm2-xmm3
+	movaps xmm2, [ebx + ecx]
+	movaps xmm3, [ebx + ecx + 16]
+	; Shuffle second weight to all elements of xmm1
+	movaps xmm1, xmm0
+	shufps xmm1, xmm1, 0x55
+	; Multiply two first rows of second matrix with second weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm4, xmm2
+	addps xmm5, xmm3
+
+	; Load last two rows of the second matrix to xmm2-xmm3
+	movaps xmm2, [ebx + ecx + 32]
+	movaps xmm3, [ebx + ecx + 48]
+	; Multiply two last rows of the second matri with second weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm6, xmm2
+	addps xmm7, xmm3
+%endif
+
+%if %1 >= 4
+	; Load fourth bone index
+	mov ecx, [edx + %1*4 + 12]
+	shl ecx, 6
+	prefetchnta [ebx + ecx]
+	prefetchnta [ebx + ecx + 32]
+%endif
+
+%if %1 >= 3
+	; Load first two rows of the third matrix to xmm2-xmm3
+	movaps xmm2, [ebx + eax]
+	movaps xmm3, [ebx + eax + 16]
+	; Shuffle third weight to all elements of xmm1
+	movaps xmm1, xmm0
+	shufps xmm1, xmm1, 0xaa
+	; Multiply first two rows of third matrix with third weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm4, xmm2
+	addps xmm5, xmm3
+
+	; Load last two rows of the third matrix to xmm2-xmm3
+	movaps xmm2, [ebx + eax + 32]
+	movaps xmm3, [ebx + eax + 48]
+	; Multiply last two rows of third matrix with third weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm6, xmm2
+	addps xmm7, xmm3
+%endif
+
+%if %1 >= 4
+	; Load first two rows of the fourth matrix into xmm2-xmm3
+	movaps xmm2, [ebx + ecx]
+	movaps xmm3, [ebx + ecx + 16]
+	; Shuffle fourth weight to all elements of xmm1
+	movaps xmm1, xmm0
+	shufps xmm1, xmm1, 0xff
+	; Multiply first two rows of the fourth matrix with fourth weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm4, xmm2
+	addps xmm5, xmm3
+
+	; Load last two rows of the fourth matrix to xmm2-xmm3
+	movaps xmm2, [ebx + ecx + 32]
+	movaps xmm3, [ebx + ecx + 48]
+	; Multiply last two rows of the fourth matrix with fourth weight
+	mulps xmm2, xmm1
+	mulps xmm3, xmm1
+	; Add
+	addps xmm6, xmm2
+	addps xmm7, xmm3
+%endif
+
+	; Matrix is in xmm4-xmm7
+	; Transform position by 4x4 matrix in xmm4-xmm7
+	movups xmm0, [esi]
+	movaps xmm1, xmm0
+	movaps xmm2, xmm0
+	shufps xmm1, xmm1, 0x55
+	shufps xmm2, xmm2, 0xaa
+	shufps xmm0, xmm0, 0x00
+	mulps xmm1, xmm5
+	mulps xmm2, xmm6
+	mulps xmm0, xmm4
+	addps xmm1, xmm2
+	addps xmm0, xmm7
+	addps xmm0, xmm1
+	; Store vertex position in outvert
+	movaps xmm7, [esp + 16] ; MaskVec3
+	maskmovdqu xmm0, xmm7
+
+%if %2 ; Has normal 
+	; Transform vector by 3x3 matrix in xmm4-xmm6
+	movups xmm0, [esi + normalOffset]
+	movaps xmm1, xmm0
+	movaps xmm2, xmm0
+	shufps xmm1, xmm1, 0x55
+	shufps xmm2, xmm2, 0xaa
+	shufps xmm0, xmm0, 0x00
+	mulps xmm1, xmm5
+	mulps xmm2, xmm6
+	mulps xmm0, xmm4
+	addps xmm1, xmm2
+	addps xmm0, xmm1
+%endif
+
+%if %3 ; Has tangent
+	; Transform vector by 3x3 matrix in xmm4-xmm6
+	movups xmm1, [esi + tangentOffset]
+	movaps xmm2, xmm1
+	movaps xmm3, xmm1
+	shufps xmm2, xmm2, 0x55
+	shufps xmm3, xmm3, 0xaa
+	mulps xmm2, xmm5
+	mulps xmm3, xmm6
+	movaps xmm6, xmm1 ; Save original tangent's W in xmm6
+	shufps xmm1, xmm1, 0x00
+	andps xmm6, [esp + 0] ; MaskW
+	mulps xmm1, xmm4
+	addps xmm2, xmm3
+	addps xmm1, xmm2
+%endif
+
+%if %2 || %3 ; Has normal or tangent 
+	; Calculate lengths and normalize
+	movaps xmm2, xmm0
+	movaps xmm5, xmm1
+	mulps xmm2, xmm2
+	mulps xmm5, xmm5
+	movaps xmm3, xmm2
+	movaps xmm4, xmm2
+	shufps xmm3, xmm5, 0x55
+	shufps xmm4, xmm5, 0xaa
+	shufps xmm2, xmm5, 0x00
+	addps xmm3, xmm4
+	addps xmm2, xmm3
+	sqrtps xmm2, xmm2
+	rcpps xmm2, xmm2
+	movaps xmm3, xmm2
+	shufps xmm2, xmm2, 0x00
+	shufps xmm3, xmm3, 0xaa
+	mulps xmm0, xmm2
+	mulps xmm1, xmm3
+%endif
+
+%if %2 ; Write normal
+	add edi, normalOffset
+	maskmovdqu xmm0, xmm7 ; MaskVec3
+	sub edi, normalOffset
+%endif
+
+%if %3 ; Write tangent
+	andps xmm1, xmm7	; MaskVec3
+	orps xmm1, xmm6		; Restore original W 
+	movups [edi + tangentOffset], xmm1
+%endif
+	
+%if %1 == 1
+	; Indices only
+	add edx, 4 
+%else
+	; Indices and weights
+	add edx, %1 * 8 
+%endif
+
+	add esi, [ebp + 28] ; inputStride
+	add edi, [ebp + 32] ; outputStride
+	mov ecx, [esp + 32] ; savedEcx
+	dec ecx
+	jnz %%SkinSSE2_loop
+
+	; Remove local variables from stack
+	lea esp, [ebp-32]
+	
+	popad
+	pop ebp
+	ret
+	align 16
+%endmacro
+
+
+global SkinSSE2_1Bone_Pos
+global SkinSSE2_2Bones_Pos
+global SkinSSE2_4Bones_Pos
+global SkinSSE2_1Bone_PosNormal
+global SkinSSE2_2Bones_PosNormal
+global SkinSSE2_4Bones_PosNormal
+global SkinSSE2_1Bone_PosNormalTan
+global SkinSSE2_2Bones_PosNormalTan
+global SkinSSE2_4Bones_PosNormalTan
+
+
+SkinSSE2_1Bone_Pos:					SkinSSE2_Generic 1, 0, 0
+SkinSSE2_2Bones_Pos:				SkinSSE2_Generic 2, 0, 0
+SkinSSE2_4Bones_Pos:				SkinSSE2_Generic 4, 0, 0
+SkinSSE2_1Bone_PosNormal:			SkinSSE2_Generic 1, 1, 0
+SkinSSE2_2Bones_PosNormal:			SkinSSE2_Generic 2, 1, 0
+SkinSSE2_4Bones_PosNormal:			SkinSSE2_Generic 4, 1, 0
+SkinSSE2_1Bone_PosNormalTan:		SkinSSE2_Generic 1, 1, 1
+SkinSSE2_2Bones_PosNormalTan:		SkinSSE2_Generic 2, 1, 1
+SkinSSE2_4Bones_PosNormalTan:		SkinSSE2_Generic 4, 1, 1
diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.h b/Runtime/Filters/Mesh/MeshSkinningSSE2.h
new file mode 100644
index 0000000..c085309
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.h
@@ -0,0 +1,129 @@
+#if UNITY_SUPPORTS_SSE && !UNITY_64
+
+#if UNITY_OSX || UNITY_LINUX
+#define __cdecl
+#endif
+
+#define SKIN_SSE2_PARAMS \
+    const void* inVertices, \
+    void* outVertices, \
+    int numVertices, \
+    const void* boneMatrices, \
+    const void* weightsAndIndices, \
+    int inputStride, \
+    int outputStride
+
+typedef void (__cdecl *SkinSSE2_Function)(SKIN_SSE2_PARAMS);
+
+extern "C"
+{
+    void __cdecl SkinSSE2_1Bone_Pos(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_2Bones_Pos(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_4Bones_Pos(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_1Bone_PosNormal(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_2Bones_PosNormal(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_4Bones_PosNormal(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_1Bone_PosNormalTan(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_2Bones_PosNormalTan(SKIN_SSE2_PARAMS);
+    void __cdecl SkinSSE2_4Bones_PosNormalTan(SKIN_SSE2_PARAMS);
+}
+
+
+bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
+{
+	if (!CPUInfo::HasSSE2Support())
+    {
+        return false;
+    }
+
+	SkinSSE2_Function skinFunc = NULL;
+
+	if (!info.skinNormals && !info.skinTangents)
+	{
+		switch (info.bonesPerVertex)
+		{
+			DebugAssert(info.inStride == sizeof(Vector3f));
+			case 1:
+				skinFunc = &SkinSSE2_1Bone_Pos;
+				break;
+			case 2:
+				skinFunc = &SkinSSE2_2Bones_Pos;
+				break;
+			case 4:
+				skinFunc = &SkinSSE2_4Bones_Pos;
+				break;
+				
+		}
+	}
+	else if (info.skinNormals && !info.skinTangents)
+	{
+		DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f));
+		switch (info.bonesPerVertex)
+		{
+			case 1:
+				skinFunc = &SkinSSE2_1Bone_PosNormal;
+				break;
+			case 2:
+				skinFunc = &SkinSSE2_2Bones_PosNormal;
+				break;
+			case 4:
+				skinFunc = &SkinSSE2_4Bones_PosNormal;
+				break;
+				
+		}
+	}
+	else if (info.skinNormals && info.skinTangents)
+    {
+		DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f) + sizeof(Vector4f));
+		switch (info.bonesPerVertex)
+        {
+			case 1:
+				skinFunc = &SkinSSE2_1Bone_PosNormalTan;
+				break;
+			case 2:
+				skinFunc = &SkinSSE2_2Bones_PosNormalTan;
+				break;
+			case 4:
+				skinFunc = &SkinSSE2_4Bones_PosNormalTan;
+				break;
+				
+		}
+	}
+	
+	if (skinFunc == NULL)
+		return false;
+	
+	// Skin all vertices apart from last one!
+	if (info.vertexCount > 1)
+	{
+		(*skinFunc)(info.inVertices, info.outVertices, info.vertexCount - 1,info.cachedPose, info.compactSkin, info.inStride, info.outStride);
+	}
+	// Copy last vertex to stack to avoid reading/writing past end of buffer
+	if (info.vertexCount > 0)
+	{
+		const int maxStride = 2 * sizeof(Vector3f) + sizeof(Vector4f) + 4;
+		Assert(info.inStride <= maxStride && info.outStride <= maxStride);
+		// Need 4 bytes padding to access Vec3 as Vec4
+		char vertexCopyIn[maxStride + 4];
+		char vertexCopyOut[maxStride + 4];
+		int skinStride = (info.bonesPerVertex == 4) ? sizeof(BoneInfluence) :
+			(info.bonesPerVertex == 2) ? sizeof(BoneInfluence2) : 
+			(info.bonesPerVertex == 1) ? sizeof(int) : 0;
+		Assert(skinStride != 0);
+		int index = info.vertexCount - 1;
+		const char* compactSkin = static_cast<const char*>(info.compactSkin) + index * skinStride;
+		const char* inVertex = static_cast<const char*>(info.inVertices) + index * info.inStride;
+		char* outVertex = static_cast<char*>(info.outVertices) + index * info.outStride;
+		memcpy(vertexCopyIn, inVertex, info.inStride);
+		(*skinFunc)(vertexCopyIn, vertexCopyOut, 1, info.cachedPose, compactSkin, info.inStride, info.outStride);
+		memcpy(outVertex, vertexCopyOut, info.outStride);
+	}
+	
+    return true;
+}
+#else
+inline bool SkinMeshOptimizedSSE2(SkinMeshInfo& info)
+{
+	return false;
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningTests.cpp b/Runtime/Filters/Mesh/MeshSkinningTests.cpp
new file mode 100644
index 0000000..407729b
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningTests.cpp
@@ -0,0 +1,228 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+
+#if ENABLE_UNIT_TESTS && UNITY_SUPPORTS_SSE && !UNITY_64
+
+#include "Runtime/Filters/Mesh/MeshSkinning.h"
+#include "External/UnitTest++/src/UnitTest++.h"
+#include "Runtime/Allocator/MemoryMacros.h"
+#include "Runtime/Math/Random/rand.h"
+#include "Runtime/Math/Matrix4x4.h"
+
+bool SkinMeshOptimizedSSE2(SkinMeshInfo& info);
+void SkinMesh(SkinMeshInfo& info);
+
+Vector3f RandomVector3InUnitBox(Rand& rnd)
+{
+    return Vector3f(rnd.GetSignedFloat(),
+                    rnd.GetSignedFloat(),
+                    rnd.GetSignedFloat());
+}
+
+SUITE (MeshSkinningTests)
+{
+TEST(MeshSkinning_AllFeatures)
+{
+    int failedPositions = 0;
+    int failedNormals = 0;
+    int failedTangents = 0;
+    int failedTangentSigns = 0;
+    int failedVertexCopies = 0;
+
+    const int minVertices = 1;
+    const int maxVertices = 100;
+    const int positionSize = 3*sizeof(float);
+    const int normalSize = 3*sizeof(float);
+    const int tangentSize = 4*sizeof(float);
+	const int maxStride = positionSize + normalSize + tangentSize;
+    const int trailingBytes = 128;
+
+    UInt8 inVertices[maxVertices * maxStride];
+    UInt8 outVerticesRef[maxVertices * maxStride + trailingBytes];
+    UInt8 outVerticesSimd[maxVertices * maxStride + trailingBytes];
+
+    SkinMeshInfo info;
+    memset(&info, 0, sizeof(info));
+    info.inVertices = inVertices;
+    info.vertexCount = minVertices;
+	info.normalOffset = positionSize;
+    info.tangentOffset = positionSize + normalSize;
+
+	// Try a large offset so AABBs don't contain (0,0,0)
+	Vector3f posOffset(-2000, 0, 2000);
+
+    const int numBones = 64;
+	Matrix4x4f *cachedPose;
+    ALLOC_TEMP_ALIGNED(cachedPose, Matrix4x4f, numBones, 32);
+	info.cachedPose = cachedPose;
+    for (int i = 0; i < numBones; i++)
+    {
+        Matrix4x4f mat;
+        mat.SetScale(Vector3f(1.0 + 0.5f*sin(i*0.3f),
+                              1.0 + 0.5f*sin(i*0.5f),
+                              1.0 + 0.5f*sin(i*0.7f)));
+        mat.SetPosition(Vector3f(100.0f*sin(i*1.0f),
+                                 100.0f*sin(i*2.5f),
+                                 100.0f*sin(i*3.3f)) + posOffset);
+        cachedPose[i] = mat;
+    }
+    info.boneCount = numBones;
+
+    Rand rnd(123);
+
+    int boneIndices[maxVertices];
+    BoneInfluence2 boneInfl2[maxVertices];
+    BoneInfluence boneInfl4[maxVertices];
+    for (int i = 0; i < maxVertices; i++)
+    {
+        boneIndices[i] = i%numBones;
+
+        BoneInfluence2& b2 = boneInfl2[i];
+        b2.boneIndex[0] = (i)%numBones;
+        b2.boneIndex[1] = (i/2+10)%numBones;
+	    b2.weight[0] = rnd.GetFloat();
+        b2.weight[1] = 1.0f - b2.weight[0];
+
+        BoneInfluence& b4 = boneInfl4[i];
+        b4.boneIndex[0] = (i)%numBones;
+        b4.boneIndex[1] = (i/2+10)%numBones;
+        b4.boneIndex[2] = (i/3+20)%numBones;
+        b4.boneIndex[3] = (i/4+30)%numBones;
+        float weightLeft = 1.0f;
+        for (int j=0; j<3; j++)
+        {
+            b4.weight[j] = weightLeft * rnd.GetFloat();
+            weightLeft -= b4.weight[j];
+        }
+        b4.weight[3] = weightLeft;
+    }
+
+    for (info.bonesPerVertex = 1; info.bonesPerVertex <= 4; info.bonesPerVertex++)
+    {
+        if (info.bonesPerVertex == 3) continue;
+
+        switch (info.bonesPerVertex)
+        {
+            case 1:
+                info.compactSkin = boneIndices;
+                break;
+            case 2:
+                info.compactSkin = boneInfl2;
+                break;
+            case 4:
+                info.compactSkin = boneInfl4;
+                break;
+        }
+
+        for (int skinNormals = 0; skinNormals <= 1; skinNormals++)
+        {
+            info.skinNormals = (skinNormals != 0);
+
+            for (int skinTangents = 0; skinTangents <= 1; skinTangents++)
+            {
+                if (!skinNormals && skinTangents) continue;
+                info.skinTangents = (skinTangents != 0);
+
+                // Randomize vertex count and stride
+                info.vertexCount += 7;
+                while (info.vertexCount > maxVertices) info.vertexCount -= (maxVertices - minVertices);
+                info.inStride = positionSize;
+				info.inStride += skinNormals ? normalSize : 0;
+				info.inStride += skinTangents ? tangentSize : 0;
+				info.outStride = info.inStride;
+
+                UInt8* inVert = inVertices;
+                for (int i = 0; i < info.vertexCount; i++)
+                {
+                    Vector3f* nextVec = (Vector3f*)inVert;
+                    Vector3f pos = RandomVector3InUnitBox(rnd);
+                    pos *= 1000.0f;
+                    *nextVec++ = pos;
+                    if (info.skinNormals)
+                    {
+                        Vector3f normal = RandomVector3InUnitBox(rnd);
+                        normal = NormalizeSafe(normal);
+                        *nextVec++ = normal;
+                    }
+
+                    if (info.skinTangents)
+                    {
+                        Vector3f tangent = RandomVector3InUnitBox(rnd);
+                        tangent = NormalizeSafe(tangent);
+                        *nextVec++ = tangent;
+                        float* tangentSign = (float*)nextVec;
+                        *tangentSign = (rnd.GetSignedFloat() < 0.0f) ? -1.0f : 1.0f;
+                    }
+                    inVert += info.inStride;
+                }
+
+                int outSize = info.vertexCount * info.outStride;
+                memset(outVerticesRef, 0xcc, outSize + trailingBytes);
+                memset(outVerticesSimd, 0xdd, outSize + trailingBytes);
+
+                info.outVertices = outVerticesRef;
+                SkinMesh(info);
+
+                info.outVertices = outVerticesSimd;
+                bool successSimd = SkinMeshOptimizedSSE2(info);
+                CHECK(successSimd);
+
+                // Check if we wrote past end of buffer
+                for (int i = 0; i < trailingBytes; i++)
+                {
+                    CHECK_EQUAL(0xcc, outVerticesRef[outSize + i]);
+                    CHECK_EQUAL(0xdd, outVerticesSimd[outSize + i]);
+                }
+
+                inVert = inVertices;
+                UInt8* vertRef = outVerticesRef;
+                UInt8* vertSimd = outVerticesSimd;
+                for (int i = 0; i < info.vertexCount; i++)
+                {
+                    Vector3f* posRef = (Vector3f*)vertRef;
+                    Vector3f* posSimd = (Vector3f*)vertRef;
+                    if (!CompareApproximately(*posRef, *posSimd))
+                    {
+                        failedPositions++;
+                    }
+                    if (info.skinNormals)
+                    {
+                        Vector3f* normalRef = (Vector3f*)(vertRef + info.normalOffset);
+                        Vector3f* normalSimd = (Vector3f*)(vertRef + info.normalOffset);
+                        if (!CompareApproximately(*normalRef, *normalSimd))
+                        {
+                            failedNormals++;
+                        }
+                    }
+                    if (info.skinTangents)
+                    {
+                        Vector3f* tangentRef = (Vector3f*)(vertRef + info.tangentOffset);
+                        Vector3f* tangentSimd = (Vector3f*)(vertRef + info.tangentOffset);
+                        if (!CompareApproximately(*tangentRef, *tangentSimd))
+                        {
+                            failedTangents++;
+                        }
+                        float* tangentSignRef = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f));
+                        float* tangentSignSimd = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f));
+                        if (*tangentSignRef != *tangentSignSimd)
+                        {
+                            failedTangentSigns++;
+                        }
+                    }
+
+                    inVert += info.inStride;
+                    vertRef += info.outStride;
+                    vertSimd += info.outStride;
+                }
+            }
+        }
+    }
+
+    CHECK_EQUAL(0, failedPositions);
+    CHECK_EQUAL(0, failedNormals);
+    CHECK_EQUAL(0, failedTangents);
+    CHECK_EQUAL(0, failedTangentSigns);
+    CHECK_EQUAL(0, failedVertexCopies);
+}
+}
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP.s b/Runtime/Filters/Mesh/MeshSkinningVFP.s
new file mode 100644
index 0000000..8829981
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningVFP.s
@@ -0,0 +1,187 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/VFPUtility.h"
+
+#if UNITY_SUPPORTS_VFP
+
+.syntax unified
+
+.set device,0
+.set device,__arm__
+
+.if device
+
+//.code32
+.globl _s_SkinVertices_VFP
+.globl _s_SkinVertices_NoNormals_VFP
+.globl _s_SkinVertices_Tangents_VFP
+
+.globl _s_SkinVertices2Bones_VFP
+.globl _s_SkinVertices2Bones_NoNormals_VFP
+.globl _s_SkinVertices2Bones_Tangents_VFP
+
+.globl _s_SkinVertices4Bones_VFP
+.globl _s_SkinVertices4Bones_Copy4Ints_VFP
+.globl _s_SkinVertices4Bones_NoNormals_VFP
+.globl _s_SkinVertices4Bones_NoNormals_Copy4Ints_VFP
+.globl _s_SkinVertices4Bones_Tangents_VFP
+.globl _s_SkinVertices4Bones_Tangents_Copy4Ints_VFP
+
+#if UNITY_ANDROID
+.hidden _s_SkinVertices_VFP
+.hidden _s_SkinVertices_NoNormals_VFP
+.hidden _s_SkinVertices_Tangents_VFP
+
+.hidden _s_SkinVertices2Bones_VFP
+.hidden _s_SkinVertices2Bones_NoNormals_VFP
+.hidden _s_SkinVertices2Bones_Tangents_VFP
+
+.hidden _s_SkinVertices4Bones_VFP
+.hidden _s_SkinVertices4Bones_NoNormals_VFP
+.hidden _s_SkinVertices4Bones_Tangents_VFP
+#endif
+
+
+//===========================================================================================================================================
+
+
+#define SKIN_POS				1
+#define SKIN_POS_NRM			2
+#define SKIN_POS_NRM_TAN		3
+
+
+#define SKIN_2BONES				0
+#define SKIN_4BONES				0
+
+_s_SkinVertices_VFP:
+
+#define SKIN_1BONE				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_NoNormals_VFP:
+
+#define SKIN_1BONE				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices_NoNormals_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+_s_SkinVertices_Tangents_VFP:
+
+#define SKIN_1BONE				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_1BONE
+
+#undef SKIN_4BONES
+#undef SKIN_2BONES
+
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE				0
+#define SKIN_4BONES				0
+
+_s_SkinVertices2Bones_VFP:
+
+#define SKIN_2BONES				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices2Bones_VFP_Loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_NoNormals_VFP:
+
+#define SKIN_2BONES				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices2Bones_NoNormals_VFP_Loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+_s_SkinVertices2Bones_Tangents_VFP:
+
+#define SKIN_2BONES				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices2Bones_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_2BONES
+
+#undef SKIN_4BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+#define SKIN_1BONE				0
+#define SKIN_2BONES				0
+
+_s_SkinVertices4Bones_VFP:
+
+#define SKIN_4BONES				SKIN_POS_NRM
+#define VERTEX_SZ				24
+#define LOOP_NAME				_s_SkinVertices4Bones_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_NoNormals_VFP:
+
+#define SKIN_4BONES				SKIN_POS
+#define VERTEX_SZ				12
+#define LOOP_NAME				_s_SkinVertices4Bones_NoNormals_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+_s_SkinVertices4Bones_Tangents_VFP:
+
+#define SKIN_4BONES				SKIN_POS_NRM_TAN
+#define VERTEX_SZ				40
+#define LOOP_NAME				_s_SkinVertices4Bones_Tangents_VFP_loop
+
+#include "MeshSkinningVFP_Loop.h"
+
+#undef LOOP_NAME
+#undef VERTEX_SZ
+#undef SKIN_4BONES
+
+#undef SKIN_2BONES
+#undef SKIN_1BONE
+
+//===========================================================================================================================================
+
+.endif
+#endif
diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h
new file mode 100644
index 0000000..3b7400f
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h
@@ -0,0 +1,335 @@
+
+// defines
+// SKIN_1BONE
+// SKIN_2BONES
+// SKIN_4BONES
+// LOOP_NAME
+// VERTEX_SZ
+
+// skin types
+// SKIN_POS
+// SKIN_POS_NRM
+// SKIN_POS_NRM_TAN
+
+//r0: const void* bones4x4
+//r1: const void* srcVertData
+//r2: const void* srcVertDataEnd
+//r3: const BoneInfluence4* srcBoneInfluence4
+//[sp+0] -> r4: const void* dstVertData
+
+// s0,s1,s2			<- output: pos
+// s3,s4,s5			<- output: nrm
+// s6,s7,s8,s9		<- output: tan
+// s10,s11,s12		<- input: pos
+// s13,s14,s15		<- input: nrm
+// s16,s17,s18,s19	<- input: tan
+// s20-s31			<- matrix [3x4] last row loaded directly to output pos
+
+//===========================================================================================================================================
+//
+// Common
+
+#define CALC_POS_2 		FMACS3		(0,1,2, 20,21,22, 10,10,10)
+#define CALC_POS_3 		FMACS3		(0,1,2, 24,25,26, 11,11,11)
+#define CALC_POS_4 		FMACS3		(0,1,2, 28,29,30, 12,12,12)
+
+
+#if		(SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)		\
+	||	(SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN)		\
+	||	(SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+	#define LOAD_POS_NRM 	vldmia.32	r1!, {s10-s15}
+	#define STORE_POS_NRM	vstmia.32	r4!, {s0-s5}
+	#define CALC_NRM_1		FMULS3		(3,4,5, 20,21,22, 13,13,13)
+	#define CALC_NRM_2 		FMACS3		(3,4,5, 24,25,26, 14,14,14)
+	#define CALC_NRM_3 		FMACS3		(3,4,5, 28,29,30, 15,15,15)
+#else
+	#define LOAD_POS_NRM 	vldmia.32	r1!, {s10-s12}
+	#define STORE_POS_NRM	vstmia.32	r4!, {s0-s2}
+	#define CALC_NRM_1
+	#define CALC_NRM_2
+	#define CALC_NRM_3
+#endif
+
+#if	(SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+	#define LOAD_TAN		vldmia.32	r1!, {s16-s19}
+	#define STORE_TAN		vstmia.32	r4!, {s6-s9}
+	#define CALC_TAN_1 		FMULS3		(6,7,8, 20,21,22, 16,16,16)
+	#define CALC_TAN_2 		FMACS3		(6,7,8, 24,25,26, 17,17,17)
+	#define CALC_TAN_3 		FMACS3		(6,7,8, 28,29,30, 18,18,18)
+	#define CALC_TAN_4 		fcpys		s9, s19
+#else
+	#define LOAD_TAN
+	#define STORE_TAN
+	#define CALC_TAN_1
+	#define CALC_TAN_2
+	#define CALC_TAN_3
+	#define CALC_TAN_4
+#endif
+
+
+
+
+//===========================================================================================================================================
+//
+// 1 bone skinning
+
+#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN)
+
+mov			ip, sp
+vpush		{d7-d15}
+stmfd		sp!, {r4,r5,r6,r7,r8,r10,r11}
+
+ldr			r4, [ip, #0]
+
+ldr			r5, [r3], #4
+add			r5, r0, r5, lsl #6
+add			r6, r5, #48
+
+vldmia.32	r6,  {s0-s2}
+vldmia.32	r5!, {s20-s23}
+vldmia.32	r5!, {s24-s27}
+
+.align 4
+LOOP_NAME:
+
+LOAD_POS_NRM
+
+CALC_POS_2
+CALC_NRM_1
+												ldr			r6, [r3], #4					// next matrix index
+vldmia.32	r5, {s28-s30}					// bone matrix
+												add			r5, r0, r6, lsl #6				// next matrix addr
+
+
+CALC_POS_3
+CALC_NRM_2
+
+LOAD_TAN
+												add			r6, r5, #48
+												cmp			r1, r2
+
+CALC_TAN_1
+												vldmiacc.32	r5!, {s20-s23}					// next bone matrix
+
+
+CALC_POS_4
+
+CALC_TAN_2
+CALC_NRM_3
+												vldmiacc.32	r5!, {s24-s27}					// next bone matrix
+
+CALC_TAN_3
+CALC_TAN_4
+
+												pld [r1, #1024]
+
+
+STORE_POS_NRM
+STORE_TAN
+
+												vldmiacc.32	r6,  {s0-s2}
+
+bcc			LOOP_NAME
+
+ldmfd		sp!, {r4,r5,r6,r7,r8,r10,r11}
+vpop		{d7-d15}
+bx			lr
+
+
+//===========================================================================================================================================
+
+#elif (SKIN_2BONES == SKIN_POS) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN)
+
+mov			ip, sp
+vpush		{d7-d15}
+stmfd		sp!, {r4,r5,r6,r7,r8,r10,r11}
+
+ldr			r4, [ip, #0]
+
+
+.align 4
+LOOP_NAME:
+
+vldmia.32	r3!, {s3,s4}						// w
+													ldmia		r3!, {r5-r6}		// idx
+
+													add			r5, r0, r5, lsl #6	// M0
+													add			r6, r0, r6, lsl #6	// M1
+
+
+vldmia.64	r5!, {d4,d5} 						// M0[0]
+
+vldmia.64	r6!, {d6,d7}						// M1[0]
+FMULS3		(20,21,22,  8,9,10,  3,3,3)			//   M0[0] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[1]
+FMACS3		(20,21,22,  12,13,14,  4,4,4)		// + M1[0] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[1]
+FMULS3		(24,25,26,  8,9,10,  3,3,3)			//   M0[1] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[2]
+FMACS3		(24,25,26,  12,13,14,  4,4,4)		// + M1[1] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[2]
+FMULS3		(28,29,30,  8,9,10,  3,3,3)			//   M0[2] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[3]
+FMACS3		(28,29,30,  12,13,14,  4,4,4)		// + M1[2] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[3]
+FMULS3		(0,1,2,  8,9,10,  3,3,3)			//   M0[3] * w
+
+FMACS3		(0,1,2,  12,13,14,  4,4,4)			// + M1[3] * w
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_2
+CALC_NRM_1
+CALC_TAN_1
+
+CALC_POS_3
+CALC_NRM_2
+CALC_TAN_2
+														pld			[r1, #1024]
+														cmp			r1, r2
+CALC_POS_4
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_TAN_4
+
+
+STORE_POS_NRM
+STORE_TAN
+
+bcc			LOOP_NAME
+
+ldmfd		sp!, {r4,r5,r6,r7,r8,r10,r11}
+vpop		{d7-d15}
+bx			lr
+
+
+
+//===========================================================================================================================================
+
+#elif (SKIN_4BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN)
+
+mov			ip, sp
+vpush		{d7-d15}
+stmfd		sp!, {r4,r5,r6,r7,r8}
+
+ldr			r4, [ip, #0]
+
+
+.align 4
+LOOP_NAME:
+
+vldmia.32	r3!, {s3-s6}						// w
+													ldmia		r3!, {r5-r8}		// idx
+
+													add			r5, r0, r5, lsl #6	// M0
+													add			r6, r0, r6, lsl #6	// M1
+													add			r7, r0, r7, lsl #6	// M2
+													add			r8, r0, r8, lsl #6	// M3
+
+
+vldmia.64	r5!, {d4,d5} 						// M0[0]
+
+vldmia.64	r6!, {d6,d7}						// M1[0]
+FMULS3		(20,21,22,  8,9,10,  3,3,3)			//   M0[0] * w
+
+vldmia.64	r7!, {d4,d5}						// M2[0]
+FMACS3		(20,21,22,  12,13,14,  4,4,4)		// + M1[0] * w
+
+vldmia.64	r8!, {d6,d7}						// M3[0]
+FMACS3		(20,21,22,  8,9,10,  5,5,5)			// + M2[0] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[1]
+FMACS3		(20,21,22,  12,13,14,  6,6,6)		// + M3[0] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[1]
+FMULS3		(24,25,26,  8,9,10,  3,3,3)			//   M0[1] * w
+
+vldmia.64	r7!, {d4,d5}						// M2[1]
+FMACS3		(24,25,26,  12,13,14,  4,4,4)		// + M1[1] * w
+
+vldmia.64	r8!, {d6,d7}						// M3[1]
+FMACS3		(24,25,26,  8,9,10,  5,5,5)			// + M2[1] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[2]
+FMACS3		(24,25,26,  12,13,14,  6,6,6)		// + M3[1] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[2]
+FMULS3		(28,29,30,  8,9,10,  3,3,3)			//   M0[2] * w
+
+vldmia.64	r7!, {d4,d5}						// M2[2]
+FMACS3		(28,29,30,  12,13,14,  4,4,4)		// + M1[2] * w
+
+vldmia.64	r8!, {d6,d7}						// M3[2]
+FMACS3		(28,29,30,  8,9,10,  5,5,5)			// + M2[2] * w
+
+vldmia.64	r5!, {d4,d5}						// M0[3]
+FMACS3		(28,29,30,  12,13,14,  6,6,6)		// + M3[2] * w
+
+vldmia.64	r6!, {d6,d7}						// M1[3]
+FMULS3		(0,1,2,  8,9,10,  3,3,3)			//   M0[3] * w
+
+vldmia.64	r7!, {d4,d5}						// M2[3]
+FMACS3		(0,1,2,  12,13,14,  4,4,4)			// + M1[3] * w
+
+vldmia.64	r8!, {d6,d7}						// M3[3]
+FMACS3		(0,1,2,  8,9,10,  5,5,5)			// + M2[3] * w
+
+FMACS3		(0,1,2,  12,13,14,  6,6,6)			// + M3[3] * w
+
+
+LOAD_POS_NRM
+LOAD_TAN
+
+CALC_POS_2
+CALC_NRM_1
+CALC_TAN_1
+
+CALC_POS_3
+CALC_NRM_2
+CALC_TAN_2
+														pld			[r1, #1024]
+														cmp			r1, r2
+CALC_POS_4
+CALC_NRM_3
+CALC_TAN_3
+
+CALC_TAN_4
+
+
+STORE_POS_NRM
+STORE_TAN
+
+bcc			LOOP_NAME
+
+ldmfd		sp!, {r4,r5,r6,r7,r8}
+vpop		{d7-d15}
+bx			lr
+
+#endif
+
+//===========================================================================================================================================
+
+#undef CALC_POS_1
+#undef CALC_POS_2
+#undef CALC_POS_3
+#undef STORE_POS_NRM
+#undef LOAD_POS_NRM
+#undef CALC_NRM_1
+#undef CALC_NRM_2
+#undef CALC_NRM_3
+#undef LOAD_TAN
+#undef STORE_TAN
+#undef CALC_TAN_1
+#undef CALC_TAN_2
+#undef CALC_TAN_3
+#undef CALC_TAN_4
diff --git a/Runtime/Filters/Mesh/MeshUtility.cpp b/Runtime/Filters/Mesh/MeshUtility.cpp
new file mode 100644
index 0000000..75d8e7f
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshUtility.cpp
@@ -0,0 +1,58 @@
+#include "UnityPrefix.h"
+#include "MeshUtility.h"
+#include "Runtime/Geometry/Plane.h"
+#include "Mesh.h"
+
+using namespace std;
+
+void CalculateNormals (StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals)
+{
+	std::fill_n (outNormals, vertexCount, Vector3f(0,0,0));
+	
+	// Add normals from faces
+	int idx = 0;
+	for( int i = 0; i < triangleCount; ++i )
+	{
+		UInt32 index0 = indices[idx+0];
+		UInt32 index1 = indices[idx+1];
+		UInt32 index2 = indices[idx+2];
+		Vector3f faceNormal = CalcRawNormalFromTriangle( verts[index0], verts[index1], verts[index2] );
+		outNormals[index0] += faceNormal;
+		outNormals[index1] += faceNormal;
+		outNormals[index2] += faceNormal;
+		idx += 3;
+	}
+
+	// Normalize
+	for (StrideIterator<Vector3f> end = outNormals + vertexCount; outNormals != end; ++outNormals )
+	{
+		*outNormals = NormalizeFast (*outNormals);
+	}
+}
+
+
+float CalculateSurfaceArea (
+	const Matrix4x4f& objectToWorld,
+	const Mesh::TemporaryIndexContainer& triangles,
+	dynamic_array<Vector3f>& vertices)
+{
+	// transform the vertices to world space,
+	// do it in place since they are a copy
+	for (int i = 0; i < vertices.size (); i++)
+		vertices[i] = objectToWorld.MultiplyPoint3 (vertices[i]);
+
+	// calculate the area
+	float cachedSurfaceArea = 0;
+	for (int i = 0; i < triangles.size () / 3; i++)
+	{	
+		DebugAssert (triangles[3 * i] < vertices.size ());
+		DebugAssert (triangles[3 * i + 1] < vertices.size ());
+		DebugAssert (triangles[3 * i + 2] < vertices.size ());
+		Vector3f a = vertices[triangles[3 * i]];
+		Vector3f b = vertices[triangles[3 * i + 1]];
+		Vector3f c = vertices[triangles[3 * i + 2]];
+		cachedSurfaceArea += Magnitude (Cross (b - a, c - a)) * 0.5f;
+	}
+
+	return cachedSurfaceArea;
+}
diff --git a/Runtime/Filters/Mesh/MeshUtility.h b/Runtime/Filters/Mesh/MeshUtility.h
new file mode 100644
index 0000000..748c874
--- /dev/null
+++ b/Runtime/Filters/Mesh/MeshUtility.h
@@ -0,0 +1,42 @@
+#ifndef MESHUTILITY_H
+#define MESHUTILITY_H
+
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/Math/Quaternion.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/Utilities/dynamic_array.h"
+
+struct Tangent;
+
+// Calculate normals for the mesh, given vertex array and triangle list (3 indices per triangle).
+void CalculateNormals( StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals );
+
+float CalculateSurfaceArea (const Matrix4x4f& objectToWorld, const Mesh::TemporaryIndexContainer& triangles, dynamic_array<Vector3f>& vertices);
+
+// Use this to generate a normal from an tangent basis quickly
+inline Vector3f NormalFromQuatTangentBasis (const Quaternionf& lhs)
+{
+	float x = lhs.x * 2.0F;
+	float y = lhs.y * 2.0F;
+	float z = lhs.z * 2.0F;
+	float xx = lhs.x * x;
+	float yy = lhs.y * y;
+	float xz = lhs.x * z;
+	float yz = lhs.y * z;
+	float wx = lhs.w * x;
+	float wy = lhs.w * y;
+
+	Vector3f res;
+	res.x = xz - wy;
+	res.y = yz + wx;
+	res.z = 1.0f - xx - yy;
+	AssertIf (!CompareApproximately (res, RotateVectorByQuat(Inverse (lhs), Vector3f::zAxis)));
+	return res;
+}
+
+//bool HasDegenerateTriangles (const Vector3f* verts, const MeshData &meshData, float degenerateArea = 0.0001);
+
+
+#endif
diff --git a/Runtime/Filters/Mesh/SkinGeneric.h b/Runtime/Filters/Mesh/SkinGeneric.h
new file mode 100644
index 0000000..ef30d81
--- /dev/null
+++ b/Runtime/Filters/Mesh/SkinGeneric.h
@@ -0,0 +1,338 @@
+#ifndef SKINGENERIC_H
+#define SKINGENERIC_H
+
+#include "Runtime/Filters/Mesh/VertexData.h"
+
+#if UNITY_PS3
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+bool skinNormal, bool skinTangent>
+void SkinGenericStreamed (SkinMeshInfo& info)
+{
+	const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+	const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+	const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+
+	const Matrix4x4f* bones4x4 = info.cachedPose;
+
+	int count = info.vertexCount;
+
+	int vertexOffset = info.vertexData->GetStream(0).offset;
+	const int vertexStride = info.vertexData->GetStream(0).stride;
+
+	int normalOffset = info.vertexData->GetStream(1).offset;
+	const int normalStride = info.vertexData->GetStream(1).stride;
+
+	int tangentOffset = info.vertexData->GetStream(2).offset;
+	const int tangentStride = info.vertexData->GetStream(2).stride;
+
+	const int copyDataOffset = info.vertexData->GetStream(3).offset;
+	const int copyDataSize = info.vertexData->GetStream(3).stride * info.vertexCount;
+
+	const UInt8* inputVertex = (const UInt8*)info.inVertices;
+	UInt8* outputVertex = (UInt8*)info.outVertices;
+
+	Matrix4x4f poseBlended;
+	const Matrix4x4f* poseToUse;
+
+	for( int v = 0; v < count; v++ )
+	{
+		ALIGN_LOOP_OPTIMIZATION
+
+			Prefetch(inputVertex + 256);
+
+		// Blend the matrices first, then transform everything with this
+		// blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+		// in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+		if (bonesPerVertexCount == 1)
+		{
+			poseToUse = &bones4x4[*influence1];
+		}
+		else if (bonesPerVertexCount == 2)
+		{
+			float weight0 = influence2->weight[0];
+			float weight1 = influence2->weight[1];
+			const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data;
+			const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data;
+			// we need only 12 components of the matrix
+			poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1;
+			poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1;
+			poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1;
+			poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1;
+			poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1;
+			poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1;
+			poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1;
+			poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1;
+			poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1;
+			poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1;
+			poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1;
+			poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1;
+			poseToUse = &poseBlended;
+		}
+		else if (bonesPerVertexCount == 4)
+		{
+			float weight0 = influence4->weight[0];
+			float weight1 = influence4->weight[1];
+			float weight2 = influence4->weight[2];
+			float weight3 = influence4->weight[3];
+
+			const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data;
+			const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data;
+			const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data;
+			const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data;
+			// we need only 12 components of the matrix, so unroll 
+			poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3;
+			poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3;
+			poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3;
+			poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3;
+			poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3;
+			poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3;
+			poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3;
+			poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3;
+			poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3;
+			poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3;
+			poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3;
+			poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3;
+			poseToUse = &poseBlended;
+		}
+
+		// skin components
+		Vector3f outVertex, outNormal, outTangent;
+		const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex + vertexOffset);
+		const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset );
+		const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset );
+		poseToUse->MultiplyPoint3( *vertex, outVertex );
+		if( skinNormal )
+		{
+			poseToUse->MultiplyVector3( *normal, outNormal );
+			if (transformInstruction == kNormalizeFastest)
+			{
+				float sqr1 = SqrMagnitude( outNormal );
+				float invsqrt1 = FastestInvSqrt (sqr1);
+				outNormal *= invsqrt1;
+			}
+			else if (transformInstruction == kNormalizeFast)
+			{
+				float sqr1 = SqrMagnitude( outNormal );
+				float invsqrt1 = FastInvSqrt (sqr1);
+				outNormal *= invsqrt1;
+			}
+		}
+		if( skinTangent )
+		{
+			poseToUse->MultiplyVector3( *tangent, outTangent );
+			if (transformInstruction == kNormalizeFastest)
+			{
+				float sqr1 = SqrMagnitude( outTangent );
+				float invsqrt1 = FastestInvSqrt (sqr1);
+				outTangent *= invsqrt1;
+			}
+			else if (transformInstruction == kNormalizeFast)
+			{
+				float sqr1 = SqrMagnitude( outTangent );
+				float invsqrt1 = FastInvSqrt (sqr1);
+				outTangent *= invsqrt1;
+			}
+		}
+
+		// write data out
+		*reinterpret_cast<Vector3f*> (outputVertex + vertexOffset) = outVertex;
+		if( skinNormal )
+		{
+			*reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal;
+		}
+		if( skinTangent )
+		{
+			*reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent;
+			*reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) );
+		}
+
+		vertexOffset += vertexStride;
+		normalOffset += normalStride;
+		tangentOffset += tangentStride;
+
+		if (bonesPerVertexCount == 1)
+			influence1++;
+		else if (bonesPerVertexCount == 2)
+			influence2++;
+		if (bonesPerVertexCount == 4)
+			influence4++;
+	}
+
+	// copy 
+	const UInt8* copyDataSrc = inputVertex + copyDataOffset;
+	UInt8* copyDataDst = outputVertex + copyDataOffset;
+	memcpy(copyDataDst, copyDataSrc, copyDataSize);
+}
+#endif
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+	bool skinNormal, bool skinTangent>
+void SkinGeneric (SkinMeshInfo& info);
+
+template<TransformInstruction transformInstruction, int bonesPerVertexCount,
+	bool skinNormal, bool skinTangent>
+void SkinGeneric (SkinMeshInfo& info)
+{
+#if UNITY_PS3
+	if(info.vertexData && (info.vertexData->GetActiveStreamCount() > 2))
+		return SkinGenericStreamed<transformInstruction, bonesPerVertexCount, skinNormal, skinTangent>(info);
+#endif
+	const int* influence1 = reinterpret_cast<const int*> (info.compactSkin);
+	const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin);
+	const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin);
+	
+	const Matrix4x4f* bones4x4 = info.cachedPose;
+	
+	const int inStride = info.inStride;
+	int outStride = info.outStride;
+	int count = info.vertexCount;
+
+	const int normalOffset = info.normalOffset;
+	const int tangentOffset = info.tangentOffset;
+
+	const UInt8* inputVertex = (const UInt8*)info.inVertices;
+	UInt8* outputVertex = (UInt8*)info.outVertices;
+	
+	Matrix4x4f poseBlended;
+	const Matrix4x4f* poseToUse;
+
+	
+#if !ENABLE_MULTITHREADED_SKINNING
+	PROFILER_AUTO(gMeshSkinningSlowpath, NULL);
+#endif
+
+	//;;printf_console("bonesPerVertexCount: %d, skinNormal: %d, normalOffset: %d, inStride: %d, copyDataSizeInts: %d, count: %d, boneCount: %d, outputVertex: %d\n",
+	//			   bonesPerVertexCount, (int)skinNormal, normalOffset, inStride, copyDataSizeInts, count, info.boneCount, (int)outputVertex);
+	//;;uint64_t delta = mach_absolute_time();
+	
+	for( int v = 0; v < count; v++ )
+	{
+		ALIGN_LOOP_OPTIMIZATION
+		
+		Prefetch(inputVertex + 256);
+		
+		// Blend the matrices first, then transform everything with this
+		// blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS
+		// in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS).
+		if (bonesPerVertexCount == 1)
+		{
+			poseToUse = &bones4x4[*influence1];
+		}
+		else if (bonesPerVertexCount == 2)
+		{
+			float weight0 = influence2->weight[0];
+			float weight1 = influence2->weight[1];
+			const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data;
+			const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data;
+			// we need only 12 components of the matrix
+			poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1;
+			poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1;
+			poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1;
+			poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1;
+			poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1;
+			poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1;
+			poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1;
+			poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1;
+			poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1;
+			poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1;
+			poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1;
+			poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1;
+			poseToUse = &poseBlended;
+		}
+		else if (bonesPerVertexCount == 4)
+		{
+			float weight0 = influence4->weight[0];
+			float weight1 = influence4->weight[1];
+			float weight2 = influence4->weight[2];
+			float weight3 = influence4->weight[3];
+			
+			const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data;
+			const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data;
+			const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data;
+			const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data;
+			// we need only 12 components of the matrix, so unroll 
+			poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3;
+			poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3;
+			poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3;
+			poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3;
+			poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3;
+			poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3;
+			poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3;
+			poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3;
+			poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3;
+			poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3;
+			poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3;
+			poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3;
+			poseToUse = &poseBlended;
+		}
+
+		// skin components
+		Vector3f outVertex, outNormal, outTangent;
+		const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex );
+		const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset );
+		const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset );
+		poseToUse->MultiplyPoint3( *vertex, outVertex );
+		if( skinNormal )
+		{
+			poseToUse->MultiplyVector3( *normal, outNormal );
+			if (transformInstruction == kNormalizeFastest)
+			{
+				float sqr1 = SqrMagnitude( outNormal );
+				float invsqrt1 = FastestInvSqrt (sqr1);
+				outNormal *= invsqrt1;
+			}
+			else if (transformInstruction == kNormalizeFast)
+			{
+				float sqr1 = SqrMagnitude( outNormal );
+				float invsqrt1 = FastInvSqrt (sqr1);
+				outNormal *= invsqrt1;
+			}
+		}
+		if( skinTangent )
+		{
+			poseToUse->MultiplyVector3( *tangent, outTangent );
+			if (transformInstruction == kNormalizeFastest)
+			{
+				float sqr1 = SqrMagnitude( outTangent );
+				float invsqrt1 = FastestInvSqrt (sqr1);
+				outTangent *= invsqrt1;
+			}
+			else if (transformInstruction == kNormalizeFast)
+			{
+				float sqr1 = SqrMagnitude( outTangent );
+				float invsqrt1 = FastInvSqrt (sqr1);
+				outTangent *= invsqrt1;
+			}
+		}
+	
+		// write data out
+		*reinterpret_cast<Vector3f*> (outputVertex) = outVertex;
+		if( skinNormal )
+		{
+			*reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal;
+		}
+		
+		if( skinTangent )
+		{
+			*reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent;
+			*reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) );
+		}
+
+		outputVertex += outStride;
+		inputVertex += inStride;
+		
+		if (bonesPerVertexCount == 1)
+			influence1++;
+		else if (bonesPerVertexCount == 2)
+			influence2++;
+		if (bonesPerVertexCount == 4)
+			influence4++;
+	}
+		
+	//;;static int frameCount = 0; frameCount++;
+	//delta = mach_absolute_time() - delta;
+	//;;static uint64_t deltaAccum = 0; deltaAccum += (int)(delta);
+	//;;printf_console("skin-c: %d %d\n", (int)(deltaAccum / frameCount), (int)delta);
+}
+
+#endif
diff --git a/Runtime/Filters/Mesh/SpriteRenderer.cpp b/Runtime/Filters/Mesh/SpriteRenderer.cpp
new file mode 100644
index 0000000..4ce85a1
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRenderer.cpp
@@ -0,0 +1,338 @@
+#include "UnityPrefix.h"
+#include "SpriteRenderer.h"
+
+#if ENABLE_SPRITES
+
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Graphics/SpriteFrame.h"
+#include "Runtime/Graphics/Texture.h"
+#include "Runtime/Graphics/Texture2D.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Shaders/ShaderNameRegistry.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Filters/Mesh/TransformVertex.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Math/Color.h"
+#include "Runtime/Core/Callbacks/GlobalCallbacks.h"
+#include "Runtime/Misc/ResourceManager.h"
+#include "Runtime/BaseClasses/Tags.h"
+#include "SpriteRendererAnimationBinding.h"
+
+
+PROFILER_INFORMATION(gSpriteRenderSingleProfile, "SpriteRenderer.RenderSingle", kProfilerRender)
+PROFILER_INFORMATION(gSpriteRenderBatchProfile, "SpriteRenderer.RenderBatch", kProfilerRender)
+PROFILER_INFORMATION(gSpriteRenderSubmitVBO, "Mesh.SubmitVBO", kProfilerRender)
+
+const float kSpriteScaleEpsilon = 0.0001f;
+#define kMaxNumSpriteTrianglesPerBatch (2*1024)
+
+static const char* const kDefaultSpriteShader = "Sprites/Default";
+static const char* const kDefaultSpriteMaterial = "Sprites-Default.mat";
+
+static SHADERPROP (MainTex);
+static SHADERPROP (MainTex_TexelSize);
+static Material* gSpriteDefaultMaterial = NULL;
+
+static void InitDefaultSpriteMaterial()
+{
+	Assert(gSpriteDefaultMaterial == NULL);
+	gSpriteDefaultMaterial = GetBuiltinResource<Material>(kDefaultSpriteMaterial);
+}
+
+IMPLEMENT_CLASS_HAS_INIT (SpriteRenderer)
+IMPLEMENT_OBJECT_SERIALIZE (SpriteRenderer)
+
+SpriteRenderer::SpriteRenderer (MemLabelId label, ObjectCreationMode mode)
+: Super(kRendererSprite, label, mode)
+, m_Color(1.0F, 1.0F, 1.0F, 1.0F)
+{
+	m_CastShadows = false;
+	m_ReceiveShadows = false;
+}
+
+SpriteRenderer::~SpriteRenderer ()
+{
+}
+
+inline ColorRGBA32 GetDeviceColor (const ColorRGBAf& color, GfxDevice& device)
+{
+	if (GetActiveColorSpace () == kLinearColorSpace)
+		return device.ConvertToDeviceVertexColor(GammaToActiveColorSpace(color));
+	else
+		return device.ConvertToDeviceVertexColor(color);
+}
+
+void SpriteRenderer::InitializeClass ()
+{
+	REGISTER_GLOBAL_CALLBACK(initializedEngineGraphics, InitDefaultSpriteMaterial());
+	InitializeSpriteRendererAnimationBindingInterface();
+}
+
+void SpriteRenderer::CleanupClass ()
+{
+	CleanupSpriteRendererAnimationBindingInterface ();
+	gSpriteDefaultMaterial = NULL;
+}
+
+template<class TransferFunction>
+void SpriteRenderer::Transfer(TransferFunction& transfer) 
+{
+	Super::Transfer (transfer);
+	TRANSFER (m_Sprite);
+	TRANSFER (m_Color);
+}
+
+void SpriteRenderer::UpdateLocalAABB ()
+{
+	if (m_Sprite.IsValid())
+	{
+		//TODO: calculate AABB from RenderData.
+		m_TransformInfo.localAABB = m_Sprite->GetBounds();
+	}
+	else
+	{
+		m_TransformInfo.localAABB.SetCenterAndExtent(Vector3f::zero, Vector3f::zero);
+	}
+}
+
+void SpriteRenderer::UpdateTransformInfo ()
+{
+	Transform const& transform = GetTransform();
+	if (m_TransformDirty)
+	{
+		// will return a cached matrix most of the time
+		TransformType type = transform.CalculateTransformMatrix (m_TransformInfo.worldMatrix);
+
+		// Always treat sprites has having a non-uniform scale. Will make them batch better
+		// (since we break batches on transform type changes). And does not have any negative effects
+		// since uniform vs. non-uniform scale only affects fixed function vertex normals, which
+		// aren't relevant here.
+		type &= ~kUniformScaleTransform;
+		type |= kNonUniformScaleTransform;
+		m_TransformInfo.transformType = type;
+
+		// Likewise, treat inverse scale as always being 1.
+		m_TransformInfo.invScale = 1.0f;
+	}
+
+	if (m_BoundsDirty)
+		UpdateLocalAABB();
+
+	TransformAABBSlow(m_TransformInfo.localAABB, m_TransformInfo.worldMatrix, m_TransformInfo.worldAABB);
+}
+
+void SpriteRenderer::SetSprite(PPtr<Sprite> sprite)
+{
+	if (m_Sprite != sprite)
+	{
+		m_Sprite = sprite;
+		BoundsChanged();
+		SetupMaterialProperties();
+
+		SetDirty();
+	}
+}
+
+void SpriteRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode)
+{
+	Super::AwakeFromLoad(awakeMode);
+	BoundsChanged();
+	SetupMaterialProperties();
+}
+
+void SpriteRenderer::SmartReset ()
+{
+	SetMaterialCount(1);
+	SetMaterial(GetDefaultSpriteMaterial(), 0);
+}
+
+void SpriteRenderer::SetupMaterialProperties()
+{
+	if (m_Sprite.IsNull())
+		return;
+
+	// Patch sprite texture and apply material property block
+	MaterialPropertyBlock& block = GetPropertyBlockRememberToUpdateHash ();
+	SetupMaterialPropertyBlock(block, GetSpriteRenderDataInContext(m_Sprite)->texture);
+	ComputeCustomPropertiesHash ();
+}
+
+void SpriteRenderer::SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture)
+{
+	const TextureID id = spriteTexture ? spriteTexture->GetTextureID() : TextureID(0);
+	const Vector4f texelSize = spriteTexture ? Vector4f(spriteTexture->GetTexelSizeX(), spriteTexture->GetTexelSizeY(), spriteTexture->GetGLWidth(), spriteTexture->GetGLHeight()) : Vector4f(0, 0, 0, 0);
+
+	block.ReplacePropertyTexture(kSLPropMainTex, kTexDim2D, id);
+	block.ReplacePropertyVector(kSLPropMainTex_TexelSize, texelSize);
+}
+
+const SpriteRenderData* SpriteRenderer::GetSpriteRenderDataInContext(const PPtr<Sprite>& frame)
+{
+	//@Note: this is here for a possible contextual atlas implementation.
+	return &frame->GetRenderDataForPlayMode();
+}
+
+void SpriteRenderer::Render (int materialIndex, const ChannelAssigns& channels)
+{
+	GfxDevice& device = GetGfxDevice();
+
+	Assert(materialIndex == 0);
+	if (m_Sprite.IsNull())
+		return;
+	
+	const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite);
+	Assert(rd->texture.IsValid());
+	
+	PROFILER_AUTO_GFX(gSpriteRenderSingleProfile, this);
+
+	// Get VBO chunk for a rectangle or mesh
+	UInt32 numIndices, numVertices;
+	GetGeometrySize(numIndices, numVertices);
+	if (!numIndices)
+		return;
+	
+	const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor);
+
+	DynamicVBO& vbo = device.GetDynamicVBO();
+	UInt8*  __restrict vbPtr;
+	UInt16* __restrict ibPtr;
+	if ( !vbo.GetChunk(channelMask, numVertices, numIndices, DynamicVBO::kDrawIndexedTriangles, (void**)&vbPtr, (void**)&ibPtr) )
+		return;
+
+	TransformSprite (vbPtr, ibPtr, NULL, rd, GetDeviceColor (m_Color, device), 0);
+	vbo.ReleaseChunk(numVertices, numIndices);
+
+	// Draw
+	if (m_CustomProperties)
+		device.SetMaterialProperties(*m_CustomProperties);
+	
+	PROFILER_BEGIN(gSpriteRenderSubmitVBO, this)
+	vbo.DrawChunk(channels);
+	GPU_TIMESTAMP();
+	PROFILER_END
+}
+
+void SpriteRenderer::GetGeometrySize(UInt32& indexCount, UInt32& vertexCount)
+{
+	if (m_Sprite.IsValid())
+	{
+		const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite);
+		if (rd->indices.size() > 0)
+		{
+			indexCount = rd->indices.size();
+			vertexCount = rd->vertices.size();
+			return;
+		}
+	}
+
+	indexCount = 0;
+	vertexCount = 0;
+}
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+void SpriteRenderer::RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels)
+{
+	DebugAssert(numIndices);
+	DebugAssert(numVertices);
+	PROFILER_AUTO_GFX(gSpriteRenderBatchProfile, 0);
+	
+	GfxDevice& device = GetGfxDevice();
+	const MaterialPropertyBlock* customProps = count > 0 ? instances[0].renderer->GetCustomProperties() : NULL;
+	if (customProps)
+		device.SetMaterialProperties (*customProps);
+	
+	UInt32 expectedFence = device.GetNextCPUFence();
+	const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor);;
+	device.BeginDynamicBatching(channels, channelMask, numVertices, numIndices, kPrimitiveTriangles);
+	
+	for (BatchInstanceData const* it = instances; it < instances + count; ++it)
+	{
+		UInt32 numIndices, numVertices;
+		
+		Assert(it->renderer);
+		Assert(it->renderer->GetRendererType() == kRendererSprite);
+		SpriteRenderer* renderer = (SpriteRenderer*)it->renderer;
+		renderer->GetGeometrySize(numIndices, numVertices);
+		if (!numIndices)
+			continue;
+		
+		const SpriteRenderData *rd = renderer->GetSpriteRenderDataInContext(renderer->m_Sprite);
+		Assert(rd->texture.IsValid());
+		
+#if ENABLE_MULTITHREADED_CODE
+		renderer->m_Sprite->SetCurrentCPUFence(expectedFence);
+#endif
+		device.DynamicBatchSprite(&it->xform, rd, GetDeviceColor(renderer->m_Color, device));
+	}
+	device.SetInverseScale(1.0f);
+	device.EndDynamicBatching(TransformType(kNoScaleTransform));
+	
+	// Insert fence after batching is complete
+	UInt32 fence  = device.InsertCPUFence();
+	Assert(fence == expectedFence);
+	GPU_TIMESTAMP();
+}
+
+void SpriteRenderer::RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels)
+{
+	size_t numIndicesBatch = 0;
+	size_t numVerticesBatch = 0;
+
+	BatchInstanceData const* instancesEnd = instances + count;
+	BatchInstanceData const* iBatchBegin = instances;
+	BatchInstanceData const* iBatchEnd = instances;
+	while (iBatchEnd != instancesEnd)
+	{
+		Assert(iBatchEnd->renderer->GetRendererType() == kRendererSprite);
+		SpriteRenderer* renderer = (SpriteRenderer*)iBatchEnd->renderer;
+
+		if (renderer->GetSprite().IsNull())
+		{
+			iBatchEnd++;
+			continue;
+		}
+				
+		UInt32 numIndices, numVertices;
+		renderer->GetGeometrySize(numIndices, numVertices);
+
+		if ((numIndicesBatch + numIndices) <= kMaxNumSpriteTrianglesPerBatch)
+		{
+			numIndicesBatch += numIndices;
+			numVerticesBatch += numVertices;
+			iBatchEnd++;
+		}
+		else
+		{
+			if (numIndicesBatch)
+			{
+				RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels);
+				numIndicesBatch = 0;
+				numVerticesBatch = 0;
+				iBatchBegin = iBatchEnd;
+			}
+			else // Can't fit in one draw call
+			{
+				RenderBatch(iBatchEnd, 1, numIndices, numVertices, channels);
+				iBatchEnd++;
+				iBatchBegin = iBatchEnd;
+			}
+		}
+	}
+
+	if ((iBatchBegin != iBatchEnd) && numIndicesBatch)
+	{
+		RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels);
+	}
+}
+#endif
+
+Material* SpriteRenderer::GetDefaultSpriteMaterial ()
+{
+	Assert(gSpriteDefaultMaterial);
+	return gSpriteDefaultMaterial;
+}
+
+#endif // ENABLE_SPRITES
diff --git a/Runtime/Filters/Mesh/SpriteRenderer.h b/Runtime/Filters/Mesh/SpriteRenderer.h
new file mode 100644
index 0000000..0bf47b9
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRenderer.h
@@ -0,0 +1,60 @@
+#ifndef SPRITERENDERER_H
+#define SPRITERENDERER_H
+#include "Configuration/UnityConfigure.h"
+
+#if ENABLE_SPRITES
+
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "Runtime/Filters/Renderer.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Graphics/SpriteFrame.h"
+
+class SpriteRenderer : public Renderer
+{
+public:
+	REGISTER_DERIVED_CLASS (SpriteRenderer, Renderer)
+	DECLARE_OBJECT_SERIALIZE (SpriteRenderer)
+	
+	SpriteRenderer (MemLabelId label, ObjectCreationMode mode);
+	// ~SpriteRenderer ();	 declared-by-macro
+	
+	static bool IsSealedClass () { return true; }
+	static void InitializeClass ();
+	static void CleanupClass ();
+
+	virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode);
+	virtual void SmartReset ();
+
+	virtual void UpdateTransformInfo();
+	virtual void UpdateLocalAABB ();
+	virtual void Render (int materialIndex, const ChannelAssigns& channels);
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+	static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels);
+#endif
+	PPtr<Sprite> GetSprite() const { return m_Sprite; }
+	void SetSprite(PPtr<Sprite> sprite);
+
+	ColorRGBAf GetColor() const { return m_Color; }
+	void SetColor(const ColorRGBAf& color) { m_Color = color; }
+
+	static void SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture);
+
+	static Material* GetDefaultSpriteMaterial();
+
+private:
+	PPtr<Sprite> m_Sprite;
+	ColorRGBAf   m_Color;
+
+	void SetupMaterialProperties();
+	void GetGeometrySize(UInt32& indexCount, UInt32& vertexCount);
+	
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+	static void RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels);
+#endif
+	// Context
+	const SpriteRenderData* GetSpriteRenderDataInContext(const PPtr<Sprite>& frame);
+};
+
+#endif //ENABLE_SPRITES
+
+#endif
diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp
new file mode 100644
index 0000000..a36406f
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp
@@ -0,0 +1,68 @@
+#include "UnityPrefix.h"
+#include "Runtime/Animation/GenericAnimationBindingCache.h"
+#include "Runtime/Animation/AnimationClipBindings.h"
+#include "SpriteRenderer.h"
+#include "Runtime/Interfaces/IAnimationBinding.h"
+
+#if ENABLE_SPRITES
+
+static const char* kSpriteFrame = "m_Sprite";
+
+class SpriteRendererAnimationBinding : public IAnimationBinding
+{
+public:
+	
+#if UNITY_EDITOR
+	virtual void GetAllAnimatableProperties (Object& targetObject, std::vector<EditorCurveBinding>& outProperties) const
+	{
+		AddPPtrBinding (outProperties, ClassID(SpriteRenderer), kSpriteFrame);
+	}
+#endif
+	
+	virtual float GetFloatValue (const UnityEngine::Animation::BoundCurve& bind) const { return 0.0F; }
+	virtual void SetFloatValue (const UnityEngine::Animation::BoundCurve& bind, float value) const { }
+	
+	virtual void SetPPtrValue (const UnityEngine::Animation::BoundCurve& bound, SInt32 value) const
+	{
+		SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject);
+		renderer->SetSprite(PPtr<Sprite> (value));
+	}
+	
+	virtual SInt32 GetPPtrValue (const UnityEngine::Animation::BoundCurve& bound) const
+	{
+		SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject);
+		return renderer->GetSprite().GetInstanceID();
+	}
+	
+	virtual bool GenerateBinding (const UnityStr& attribute, bool pptrCurve, UnityEngine::Animation::GenericBinding& outputBinding) const
+	{
+		if (attribute == kSpriteFrame && pptrCurve)
+		{
+			outputBinding.attribute = 0;
+			return true;
+		}
+		
+		return false;
+	}
+	
+	virtual ClassIDType BindValue (Object& target, const UnityEngine::Animation::GenericBinding& inputBinding, UnityEngine::Animation::BoundCurve& bound) const
+	{
+		return ClassID(Sprite);
+	}
+};
+
+static SpriteRendererAnimationBinding* gSpriteRendererBinding = NULL;
+
+void InitializeSpriteRendererAnimationBindingInterface ()
+{
+	Assert(gSpriteRendererBinding == NULL);
+	gSpriteRendererBinding = UNITY_NEW (SpriteRendererAnimationBinding, kMemAnimation);
+	UnityEngine::Animation::GetGenericAnimationBindingCache ().RegisterIAnimationBinding (ClassID(SpriteRenderer), UnityEngine::Animation::kSpriteRendererPPtrBinding, gSpriteRendererBinding);
+}
+
+void CleanupSpriteRendererAnimationBindingInterface ()
+{
+	UNITY_DELETE (gSpriteRendererBinding, kMemAnimation);
+}
+
+#endif
+\ No newline at end of file
diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h
new file mode 100644
index 0000000..63e2731
--- /dev/null
+++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h
@@ -0,0 +1,2 @@
+void InitializeSpriteRendererAnimationBindingInterface ();
+void CleanupSpriteRendererAnimationBindingInterface ();
+\ No newline at end of file
diff --git a/Runtime/Filters/Mesh/TransformVertex.cpp b/Runtime/Filters/Mesh/TransformVertex.cpp
new file mode 100644
index 0000000..e9bebc1
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertex.cpp
@@ -0,0 +1,205 @@
+#include "UnityPrefix.h"
+#include "TransformVertex.h"
+
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/Math/Vector4.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Color.h"
+
+#include "Runtime/Misc/CPUInfo.h"
+
+void
+TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+							 StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+							 StrideIterator<Vector4f> inTangent,
+							 UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream )
+{
+	// NOTE: kill this code once all shaders normalize normals & tangents!
+	//
+	// We batch uniformly scaled objects, so derive the "normal matrix" here by scaling world matrix axes.
+	// On reference code seems much cheaper than full normalization of normal/tangent vectors.
+	// Test with scene of 200k vertices on Core i7 2600K: no handling of scale 3.77ms, normalization 8.00ms,
+	// using scaled normal matrix 3.80ms.
+	//
+	// Note that ARM NEON/VFP transformation code does not handle this, but it's not needed on GLES platforms
+	// since shaders always normalize normal & tangent. Might be needed on WinRT+ARM though (or just disable
+	// dynamic batching with tangents there).
+	Matrix4x4f nm;
+	CopyMatrix(m.GetPtr(), nm.GetPtr());
+	const float axisLen = Magnitude (m.GetAxisX());
+	float scale = axisLen > 1.0e-6f ? 1.0f / axisLen : 1.0f;
+	nm.Get (0, 0) *= scale;
+	nm.Get (1, 0) *= scale;
+	nm.Get (2, 0) *= scale;
+	nm.Get (0, 1) *= scale;
+	nm.Get (1, 1) *= scale;
+	nm.Get (2, 1) *= scale;
+	nm.Get (0, 2) *= scale;
+	nm.Get (1, 2) *= scale;
+	nm.Get (2, 2) *= scale;
+
+	while (vertexCount --> 0)
+	{
+		Vector3f* outPos = reinterpret_cast<Vector3f*> (dstData);
+		m.MultiplyPoint3(*inPos, *outPos);
+		dstData += sizeof(Vector3f);
+		++inPos;
+
+		if (inNormal.GetPointer())
+		{
+			Vector3f* outNormal = reinterpret_cast<Vector3f*> (dstData);
+			nm.MultiplyVector3(*inNormal, *outNormal);
+			dstData += sizeof(Vector3f);
+			++inNormal;
+		}
+
+		if (inColor.GetPointer())
+		{
+			memcpy(dstData, inColor.GetPointer(), sizeof(ColorRGBA32));
+			dstData += sizeof(ColorRGBA32);
+			++inColor;
+		}
+
+		if (inTexCoord0.GetPointer())
+		{
+			memcpy(dstData, inTexCoord0.GetPointer(), sizeof(Vector2f));
+			dstData += sizeof(Vector2f);
+			++inTexCoord0;
+		}
+
+		if (inTexCoord1.GetPointer())
+		{
+			memcpy(dstData, inTexCoord1.GetPointer(), sizeof(Vector2f));
+			dstData += sizeof(Vector2f);
+			++inTexCoord1;
+		}
+
+		if (inTangent.GetPointer())
+		{
+			Vector4f* outTangent = reinterpret_cast<Vector4f*> (dstData);
+			Vector3f* outTangentXYZ = reinterpret_cast<Vector3f*> (outTangent);
+			nm.MultiplyVector3(reinterpret_cast<const Vector3f&>(*inTangent), *outTangentXYZ);
+			outTangent->w = inTangent->w;
+			dstData += sizeof(Vector4f);
+			++inTangent;
+		}
+	}
+}
+
+
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+
+typedef void (*TransformFunc)( const void*, const void*, const void*, const float*, void*, int );
+typedef void (*TransformFuncWithTangents)( const void*, const void*, const void*, const float*, void*, int, const void* );
+
+
+#if UNITY_SUPPORTS_NEON
+namespace TransformNEON
+{
+	#define TRANSFORM_FUNC(prefix, addData)		s_TransformVertices_Strided_##prefix##_##addData##_NEON
+
+	TransformFunc TransformXYZ[] =
+	{
+		TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5)
+	};
+
+	TransformFunc TransformXYZN[] =
+	{
+		TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5)
+	};
+
+	TransformFuncWithTangents TransformXYZNT[] =
+	{
+		TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5)
+	};
+
+	#undef TRANSFORM_FUNC
+}
+#endif // UNITY_SUPPORTS_NEON
+
+
+#if UNITY_SUPPORTS_VFP
+namespace TransformVFP
+{
+	#define TRANSFORM_FUNC(prefix, addData)		s_TransformVertices_Strided_##prefix##_##addData##_VFP
+
+	TransformFunc TransformXYZ[] =
+	{
+		TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5)
+	};
+
+	TransformFunc TransformXYZN[] =
+	{
+		TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5)
+	};
+
+	TransformFuncWithTangents TransformXYZNT[] =
+	{
+		TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5)
+	};
+
+	#undef TRANSFORM_FUNC
+}
+#endif // UNITY_SUPPORTS_VFP
+
+void
+TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+							 StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+							 StrideIterator<Vector4f> inTangent,
+							 UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream )
+{
+	int addDataSize = 0;
+	if( inColor.GetPointer() )		addDataSize += 1;
+	if( inTexCoord0.GetPointer() )	addDataSize += 2;
+	if( inTexCoord1.GetPointer() )	addDataSize += 2;
+
+	const void* addDataSrc  = 0;
+	if( inColor.GetPointer() )			addDataSrc = inColor.GetPointer();
+	else if( inTexCoord0.GetPointer() )	addDataSrc = inTexCoord0.GetPointer();
+	else if( inTexCoord1.GetPointer() )	addDataSrc = inTexCoord1.GetPointer();
+
+	// slow path determination
+	if(   (inColor.GetPointer() && inTexCoord1.GetPointer() && !inTexCoord0.GetPointer())
+	   || (inTangent.GetPointer() && !inNormal.GetPointer()) || multiStream )
+	{
+		TransformVerticesStridedREF(inPos, inNormal, inColor, inTexCoord0, inTexCoord1, inTangent, dstData, m, vertexCount, multiStream);
+		return;
+	}
+
+	int stride = inPos.GetStride();
+	const UInt8* inDataBegin = static_cast<const UInt8*>(inPos.GetPointer());
+	const UInt8* inDataEnd = inDataBegin + vertexCount * stride;
+
+#if UNITY_SUPPORTS_NEON
+	if (CPUInfo::HasNEONSupport())
+	{
+		using namespace TransformNEON;
+		if( inNormal.GetPointer() && inTangent.GetPointer() )
+			TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() );
+		else if( inNormal.GetPointer() )
+			TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+		else
+			TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+	}
+	else
+#endif
+#if UNITY_SUPPORTS_VFP
+	{
+		using namespace TransformVFP;
+		if( inNormal.GetPointer() && inTangent.GetPointer() )
+			TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() );
+		else if( inNormal.GetPointer() )
+			TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+		else
+			TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride );
+	}
+#else
+	{
+		ErrorString("non-NEON path not enabled!");
+	}
+#endif
+}
+#endif
+
diff --git a/Runtime/Filters/Mesh/TransformVertex.h b/Runtime/Filters/Mesh/TransformVertex.h
new file mode 100644
index 0000000..fe7aa77
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertex.h
@@ -0,0 +1,175 @@
+#ifndef TRANSFORM_VERTEX_H_
+#define TRANSFORM_VERTEX_H_
+
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/Math/Vector2.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Vector4.h"
+#include "Runtime/Math/Color.h"
+
+class Matrix4x4f;
+
+
+//==============================================================================
+
+#define DECL_TRANSFORM_VERTICES_STRIDED(code, num, postfix)																				\
+	void s_TransformVertices_Strided_##code##_##num##_##postfix( const void* srcData, const void* srcDataEnd, const void* addData,		\
+																 const float* xform, void* outData, int stride							\
+															   );
+
+#define DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(num, postfix)																			\
+	void s_TransformVertices_Strided_XYZNT_##num##_##postfix(	const void* srcData, const void* srcDataEnd, const void* addData,		\
+																const float* xform, void* outData, int stride, const void* srcTangent	\
+															   );
+
+
+#if UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING
+
+extern "C"
+{
+#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+	#define s_TransformVertices_Strided_XYZ_0_NEON		_s_TransformVertices_Strided_XYZ_0_NEON
+	#define s_TransformVertices_Strided_XYZ_1_NEON		_s_TransformVertices_Strided_XYZ_1_NEON
+	#define s_TransformVertices_Strided_XYZ_2_NEON		_s_TransformVertices_Strided_XYZ_2_NEON
+	#define s_TransformVertices_Strided_XYZ_3_NEON		_s_TransformVertices_Strided_XYZ_3_NEON
+	#define s_TransformVertices_Strided_XYZ_4_NEON		_s_TransformVertices_Strided_XYZ_4_NEON
+	#define s_TransformVertices_Strided_XYZ_5_NEON		_s_TransformVertices_Strided_XYZ_5_NEON
+	
+	#define s_TransformVertices_Strided_XYZN_0_NEON		_s_TransformVertices_Strided_XYZN_0_NEON
+	#define s_TransformVertices_Strided_XYZN_1_NEON		_s_TransformVertices_Strided_XYZN_1_NEON
+	#define s_TransformVertices_Strided_XYZN_2_NEON		_s_TransformVertices_Strided_XYZN_2_NEON
+	#define s_TransformVertices_Strided_XYZN_3_NEON		_s_TransformVertices_Strided_XYZN_3_NEON
+	#define s_TransformVertices_Strided_XYZN_4_NEON		_s_TransformVertices_Strided_XYZN_4_NEON
+	#define s_TransformVertices_Strided_XYZN_5_NEON		_s_TransformVertices_Strided_XYZN_5_NEON
+	
+	#define s_TransformVertices_Strided_XYZNT_0_NEON	_s_TransformVertices_Strided_XYZNT_0_NEON
+	#define s_TransformVertices_Strided_XYZNT_1_NEON	_s_TransformVertices_Strided_XYZNT_1_NEON
+	#define s_TransformVertices_Strided_XYZNT_2_NEON	_s_TransformVertices_Strided_XYZNT_2_NEON
+	#define s_TransformVertices_Strided_XYZNT_3_NEON	_s_TransformVertices_Strided_XYZNT_3_NEON
+	#define s_TransformVertices_Strided_XYZNT_4_NEON	_s_TransformVertices_Strided_XYZNT_4_NEON
+	#define s_TransformVertices_Strided_XYZNT_5_NEON	_s_TransformVertices_Strided_XYZNT_5_NEON
+#if ENABLE_SPRITES
+#define		s_TransformVertices_Sprite_NEON             _s_TransformVertices_Sprite_NEON
+#endif
+	
+#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN
+
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,NEON);
+
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,NEON);
+	
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,NEON);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,NEON);
+#if ENABLE_SPRITES
+	void s_TransformVertices_Sprite_NEON(const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color);
+#endif
+}
+
+#endif
+
+
+#if UNITY_SUPPORTS_VFP
+
+extern "C"
+{
+#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+	#define s_TransformVertices_Strided_XYZ_0_VFP		_s_TransformVertices_Strided_XYZ_0_VFP
+	#define s_TransformVertices_Strided_XYZ_1_VFP		_s_TransformVertices_Strided_XYZ_1_VFP
+	#define s_TransformVertices_Strided_XYZ_2_VFP		_s_TransformVertices_Strided_XYZ_2_VFP
+	#define s_TransformVertices_Strided_XYZ_3_VFP		_s_TransformVertices_Strided_XYZ_3_VFP
+	#define s_TransformVertices_Strided_XYZ_4_VFP		_s_TransformVertices_Strided_XYZ_4_VFP
+	#define s_TransformVertices_Strided_XYZ_5_VFP		_s_TransformVertices_Strided_XYZ_5_VFP
+	
+	#define s_TransformVertices_Strided_XYZN_0_VFP		_s_TransformVertices_Strided_XYZN_0_VFP
+	#define s_TransformVertices_Strided_XYZN_1_VFP		_s_TransformVertices_Strided_XYZN_1_VFP
+	#define s_TransformVertices_Strided_XYZN_2_VFP		_s_TransformVertices_Strided_XYZN_2_VFP
+	#define s_TransformVertices_Strided_XYZN_3_VFP		_s_TransformVertices_Strided_XYZN_3_VFP
+	#define s_TransformVertices_Strided_XYZN_4_VFP		_s_TransformVertices_Strided_XYZN_4_VFP
+	#define s_TransformVertices_Strided_XYZN_5_VFP		_s_TransformVertices_Strided_XYZN_5_VFP
+	
+	#define s_TransformVertices_Strided_XYZNT_0_VFP		_s_TransformVertices_Strided_XYZNT_0_VFP
+	#define s_TransformVertices_Strided_XYZNT_1_VFP		_s_TransformVertices_Strided_XYZNT_1_VFP
+	#define s_TransformVertices_Strided_XYZNT_2_VFP		_s_TransformVertices_Strided_XYZNT_2_VFP
+	#define s_TransformVertices_Strided_XYZNT_3_VFP		_s_TransformVertices_Strided_XYZNT_3_VFP
+	#define s_TransformVertices_Strided_XYZNT_4_VFP		_s_TransformVertices_Strided_XYZNT_4_VFP
+	#define s_TransformVertices_Strided_XYZNT_5_VFP		_s_TransformVertices_Strided_XYZNT_5_VFP
+#if ENABLE_SPRITES
+	#define s_TransformVertices_Sprite_VFP				_s_TransformVertices_Sprite_VFP
+#endif
+#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN
+
+
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,VFP);
+	
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,VFP);
+	
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,VFP);
+	DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,VFP);
+#if ENABLE_SPRITES
+	void s_TransformVertices_Sprite_VFP (const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color);
+#endif
+}
+
+#endif 
+
+
+#undef DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS
+#undef DECL_TRANSFORM_VERTICES_STRIDED
+
+
+//==============================================================================
+
+void
+TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+							 StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+							 StrideIterator<Vector4f> inTangent,
+							 UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream );
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+void
+TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal,
+							 StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1,
+							 StrideIterator<Vector4f> inTangent,
+							 UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream );
+#endif
+
+
+#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP
+	#define TransformVerticesStrided TransformVerticesStridedARM
+#else
+	#define TransformVerticesStrided TransformVerticesStridedREF
+#endif
+
+
+//==============================================================================
+
+#endif // TRANSFORM_VERTEX_H_
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.asm b/Runtime/Filters/Mesh/TransformVertexNEON.asm
new file mode 100644
index 0000000..7db462b
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON.asm
@@ -0,0 +1,694 @@
+	AREA .text, CODE
+
+	EXPORT _s_TransformVertices_Strided_XYZ_0_NEON
+	EXPORT _s_TransformVertices_Strided_XYZ_1_NEON
+	EXPORT _s_TransformVertices_Strided_XYZ_2_NEON
+	EXPORT _s_TransformVertices_Strided_XYZ_3_NEON
+	EXPORT _s_TransformVertices_Strided_XYZ_4_NEON
+	EXPORT _s_TransformVertices_Strided_XYZ_5_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_0_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_1_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_2_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_3_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_4_NEON
+	EXPORT _s_TransformVertices_Strided_XYZN_5_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_0_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_1_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_2_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_3_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_4_NEON
+	EXPORT _s_TransformVertices_Strided_XYZNT_5_NEON
+
+|_s_TransformVertices_Strided_XYZ_0_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+
+|TransformVertices_Strided_XYZ_0_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	bcc.w	|TransformVertices_Strided_XYZ_0_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_1_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	nop.w
+	nop.w
+
+|TransformVertices_Strided_XYZ_1_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vld1.32	{d9}, [r2], r4
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	vst1.32	{d9[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZ_1_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_2_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+
+|TransformVertices_Strided_XYZ_2_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vld1.32	{d9}, [r2], r4
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	vst1.32	{d9}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZ_2_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_3_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+
+|TransformVertices_Strided_XYZ_3_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vld1.32	{d9-d10}, [r2], r4
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	vst1.32	{d9}, [r3]!
+	vst1.32	{d10[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZ_3_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_4_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+
+|TransformVertices_Strided_XYZ_4_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vld1.32	{d9-d10}, [r2], r4
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	vst1.32	{d9-d10}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZ_4_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZ_5_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+
+|TransformVertices_Strided_XYZ_5_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d6-d7}, [r0], r4
+	vmla.f32	q0, q12, d6[0]
+	vmul.f32	q1, q13, d6[1]
+	vmul.f32	q2, q14, d7[0]
+	vadd.f32	q0, q0, q1
+	vld1.32	{d9-d11}, [r2], r4
+	vadd.f32	q0, q0, q2
+	cmp	r0, r1
+	vst1.32	{d0-d1}, [r3], r6
+	vorr	q0, q15, q15
+	vst1.32	{d9-d10}, [r3]!
+	vst1.32	{d11[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZ_5_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_0_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	
+|TransformVertices_Strided_XYZN_0_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	bcc.w	|TransformVertices_Strided_XYZN_0_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_1_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	
+|TransformVertices_Strided_XYZN_1_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vld1.32	{d9}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZN_1_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_2_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	nop.w
+	nop.w
+	
+|TransformVertices_Strided_XYZN_2_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vld1.32	{d9}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZN_2_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+	
+|_s_TransformVertices_Strided_XYZN_3_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	nop.w
+	nop.w
+
+|TransformVertices_Strided_XYZN_3_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vld1.32	{d9-d10}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9}, [r3]!
+	vst1.32	{d10[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZN_3_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_4_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	nop.w
+	
+|TransformVertices_Strided_XYZN_4_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vld1.32	{d9-d10}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9-d10}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZN_4_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZN_5_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	nop
+	nop.w
+	nop.w
+	nop.w
+	
+|TransformVertices_Strided_XYZN_5_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vld1.32	{d9-d11}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9-d10}, [r3]!
+	vst1.32	{d11[0]}, [r3]!
+	bcc.w	|TransformVertices_Strided_XYZN_5_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_0_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	nop.w
+	nop.w
+	nop.w
+	
+|TransformVertices_Strided_XYZNT_0_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_0_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_1_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	nop.w
+	nop.w
+	nop.w
+	
+|TransformVertices_Strided_XYZNT_1_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vld1.32	{d9}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9[0]}, [r3]!
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_1_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_2_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	nop.w
+	
+|TransformVertices_Strided_XYZNT_2_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vld1.32	{d9}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9}, [r3]!
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_2_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_3_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	nop.w
+	
+|TransformVertices_Strided_XYZNT_3_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vld1.32	{d9-d10}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9}, [r3]!
+	vst1.32	{d10[0]}, [r3]!
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_3_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_4_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	
+|TransformVertices_Strided_XYZNT_4_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vld1.32	{d9-d10}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9-d10}, [r3]!
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_4_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	ENDP
+
+
+|_s_TransformVertices_Strided_XYZNT_5_NEON| PROC
+	mov	ip, sp
+	vpush	{s0-s15}
+	stmdb	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vldmia	r3!, {d24-d31}
+	mov.w	r6, #12
+	ldr.w	r3, [ip]
+	ldr.w	r4, [ip, #4]
+	vorr	q0, q15, q15
+	ldr.w	r8, [ip, #8]
+	mov.w	r9, #12
+	mov.w	sl, #4
+	nop
+	nop.w
+
+|TransformVertices_Strided_XYZNT_5_Loop|
+	pld	[r0, #512]	; 0x200
+	vld1.32	{d4-d6}, [r0], r4
+	vld1.32	{d7-d8}, [r8], r4
+	vmla.f32	q0, q12, d4[0]
+	vmul.f32	q1, q12, d5[1]
+	vmul.f32	q11, q12, d7[0]
+	vld1.32	{d9-d11}, [r2], r4
+	vmla.f32	q0, q13, d4[1]
+	vmla.f32	q1, q13, d6[0]
+	vmla.f32	q11, q13, d7[1]
+	vmla.f32	q0, q14, d5[0]
+	vmla.f32	q1, q14, d6[1]
+	vmla.f32	q11, q14, d8[0]
+	vst1.32	{d0-d1}, [r3], r6
+	cmp	r0, r1
+	vorr	q0, q15, q15
+	vst1.32	{d2-d3}, [r3], r6
+	vst1.32	{d9-d10}, [r3]!
+	vst1.32	{d11[0]}, [r3]!
+	vtrn.32	d8, d7
+	vst1.32	{d22-d23}, [r3], r9
+	vst1.32	{d7[0]}, [r3], sl
+	bcc.w	|TransformVertices_Strided_XYZNT_5_Loop|
+	ldmia.w	sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
+	vpop	{s0-s15}
+	bx	lr
+	nop.w
+	nop.w
+	nop.w
+	ENDP
+
+
+	END
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.s b/Runtime/Filters/Mesh/TransformVertexNEON.s
new file mode 100644
index 0000000..e21a554
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON.s
@@ -0,0 +1,224 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+
+#if UNITY_SUPPORTS_NEON
+
+.set device,0
+.set device,__arm__
+ 
+.if device
+
+//.code32
+
+
+.globl _s_TransformVertices_Strided_XYZ_0_NEON
+.globl _s_TransformVertices_Strided_XYZ_1_NEON
+.globl _s_TransformVertices_Strided_XYZ_2_NEON
+.globl _s_TransformVertices_Strided_XYZ_3_NEON
+.globl _s_TransformVertices_Strided_XYZ_4_NEON
+.globl _s_TransformVertices_Strided_XYZ_5_NEON
+
+.globl _s_TransformVertices_Strided_XYZN_0_NEON
+.globl _s_TransformVertices_Strided_XYZN_1_NEON
+.globl _s_TransformVertices_Strided_XYZN_2_NEON
+.globl _s_TransformVertices_Strided_XYZN_3_NEON
+.globl _s_TransformVertices_Strided_XYZN_4_NEON
+.globl _s_TransformVertices_Strided_XYZN_5_NEON
+
+.globl _s_TransformVertices_Strided_XYZNT_0_NEON
+.globl _s_TransformVertices_Strided_XYZNT_1_NEON
+.globl _s_TransformVertices_Strided_XYZNT_2_NEON
+.globl _s_TransformVertices_Strided_XYZNT_3_NEON
+.globl _s_TransformVertices_Strided_XYZNT_4_NEON
+.globl _s_TransformVertices_Strided_XYZNT_5_NEON
+
+.globl _s_TransformVertices_Sprite_NEON
+
+
+#define STRIDED_INPUT	1
+
+
+#define LOOP_XYZ		1
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE		0
+
+
+_s_TransformVertices_Strided_XYZ_0_NEON:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZ_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_1_NEON:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZ_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_2_NEON:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZ_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_3_NEON:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZ_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_4_NEON:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZ_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_5_NEON:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZ_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		1
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE		0
+
+
+_s_TransformVertices_Strided_XYZN_0_NEON:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZN_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_1_NEON:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZN_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_2_NEON:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZN_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_3_NEON:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZN_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_4_NEON:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZN_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_5_NEON:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZN_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		1
+#define LOOP_SPRITE		0
+
+
+_s_TransformVertices_Strided_XYZNT_0_NEON:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_0_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_1_NEON:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_1_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_2_NEON:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_2_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_3_NEON:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_3_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_4_NEON:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_4_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_5_NEON:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_5_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE		1
+
+_s_TransformVertices_Sprite_NEON:
+#define LOOP_NAME		TransformVertices_Sprite_Loop
+#include "TransformVertexNEON_Loop.h"
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#undef STRIDED_INPUT
+
+.endif
+
+#endif
+\ No newline at end of file
diff --git a/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h
new file mode 100644
index 0000000..d84a516
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h
@@ -0,0 +1,254 @@
+// TODO: SOA
+
+// defines
+// LOOP_XYZ
+// LOOP_XYZN
+// LOOP_XYZNT
+// LOOP_NAME
+// COPY_DATA_SZ
+// STRIDED_INPUT
+
+#if STRIDED_INPUT
+
+//r0:		const void* srcData
+//r1:		const void* srcDataEnd
+//r2:		const void* addData
+//r3:		const void* xform
+//[sp+0]:	void* dstData
+//[sp+4]:	const int stride
+
+mov			ip, sp
+
+vpush		{d0-d15}
+stmfd		sp!, {r4-r11}
+
+vldmia		r3!, {q12-q15}
+
+// r3:dstData
+// r4: stride
+// r6: proper offset for out ptr (pos, normal)
+
+mov			r6, #12
+
+ldr			r3, [ip, #0]
+ldr			r4, [ip, #4]
+
+// overlap calculation
+
+vmov.32		q0, q15						// pos.w (1.0)
+
+
+#if LOOP_XYZ
+
+.align 4
+LOOP_NAME:
+
+pld			[r0, #512]					// prefetch
+
+vld1.32		{d6,d7},		[r0], r4	// load pos
+
+vmla.f32	q0, q12, d6[0]				// pos.x
+vmul.f32	q1, q13, d6[1]				// pos.y
+vmul.f32	q2, q14, d7[0]				// pos.z
+
+vadd.f32	q0, q0, q1
+										// load additional data
+#if COPY_DATA_SZ == 1
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32		{d9,d10,d11},	[r2], r4
+#endif
+
+vadd.f32	q0, q0, q2
+cmp			r0, r1						// check cycle
+
+vst1.32		{d0,d1}, [r3], r6
+
+vmov.32		q0, q15						// pos.w (1.0)
+										// save additional data
+#if COPY_DATA_SZ == 1
+vst1.32		{d9[0]},		[r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32		{d9},			[r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32		{d9},		    [r3]!
+vst1.32     {d10[0]},       [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32		{d9,d10},		[r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32		{d9,d10},       [r3]!
+vst1.32     {d11[0]},       [r3]!
+#endif
+
+bcc			LOOP_NAME
+
+
+#elif LOOP_XYZN
+
+
+.align 4
+LOOP_NAME:
+
+pld			[r0, #512]				// prefetch
+
+vld1.32		{d4,d5,d6},	[r0], r4	// load pos + normal
+
+vmla.f32	q0, q12, d4[0]			// pos.x
+vmul.f32	q1, q12, d5[1]			// normal.x
+
+									// load additional data
+#if COPY_DATA_SZ == 1
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32		{d9,d10,d11},	[r2], r4
+#endif
+
+vmla.f32	q0, q13, d4[1]			// pos.y
+vmla.f32	q1, q13, d6[0]			// normal.y
+
+vmla.f32	q0, q14, d5[0]			// pos.z
+vmla.f32	q1, q14, d6[1]			// normal.z
+
+vst1.32		{d0,d1}, [r3], r6
+
+cmp			r0, r1					// check cycle
+vmov.32		q0, q15					// pos.w (1.0)
+vst1.32		{d2,d3}, [r3], r6
+									// save additional data
+#if COPY_DATA_SZ == 1
+vst1.32     {d9[0]},        [r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32     {d9},           [r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32     {d9},           [r3]!
+vst1.32     {d10[0]},       [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32     {d9,d10},       [r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32     {d9,d10},       [r3]!
+vst1.32     {d11[0]},       [r3]!
+#endif
+
+
+bcc			LOOP_NAME
+
+
+#elif LOOP_XYZNT
+
+//[sp+8]:	const void* tangent
+//r8:		tangent
+
+ldr			r8, [ip, #8]
+
+mov			r9,  #12
+mov			r10, #4
+
+.align 4
+LOOP_NAME:
+
+pld			[r0, #512]				// prefetch
+
+vld1.32		{d4,d5,d6},	[r0], r4	// load pos + normal
+vld1.32		{d7,d8}, [r8], r4		// load tangent
+
+vmla.f32	q0,  q12, d4[0]			// pos.x
+vmul.f32	q1,  q12, d5[1]			// normal.x
+vmul.f32	q11, q12, d7[0]			// tangent.x
+
+									// load additional data
+#if COPY_DATA_SZ == 1
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 2
+vld1.32		{d9},			[r2], r4
+#elif COPY_DATA_SZ == 3
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 4
+vld1.32		{d9,d10},		[r2], r4
+#elif COPY_DATA_SZ == 5
+vld1.32		{d9,d10,d11},	[r2], r4
+#endif
+
+vmla.f32	q0,  q13, d4[1]			// pos.y
+vmla.f32	q1,  q13, d6[0]			// normal.y
+vmla.f32	q11, q13, d7[1]			// tangent.y
+
+vmla.f32	q0,  q14, d5[0]			// pos.z
+vmla.f32	q1,  q14, d6[1]			// normal.z
+vmla.f32	q11, q14, d8[0]			// tangent.z
+
+vst1.32		{d0,d1}, [r3], r6
+
+cmp			r0, r1					// check cycle
+vmov.32		q0, q15					// pos.w (1.0)
+vst1.32		{d2,d3}, [r3], r6
+									// save additional data
+#if COPY_DATA_SZ == 1
+vst1.32     {d9[0]},        [r3]!
+#elif COPY_DATA_SZ == 2
+vst1.32     {d9},           [r3]!
+#elif COPY_DATA_SZ == 3
+vst1.32     {d9},           [r3]!
+vst1.32     {d10[0]},       [r3]!
+#elif COPY_DATA_SZ == 4
+vst1.32     {d9,d10},       [r3]!
+#elif COPY_DATA_SZ == 5
+vst1.32     {d9,d10},       [r3]!
+vst1.32     {d11[0]},       [r3]!
+#endif
+
+
+// TODO: less stupid way
+
+vtrn.32		d8, d7
+vst1.32		{d22,d23},     [r3], r9
+vst1.32		{d7[0]},       [r3], r10
+
+bcc			LOOP_NAME
+#elif LOOP_SPRITE
+.align 4
+ldr		   r7, [ip, #8]				    // load color32
+vmov.32    d10[0], r7
+LOOP_NAME:
+
+pld			[r0, #512]					// prefetch
+
+vld1.32		{d6,d7},		[r0], r4	// load pos
+
+vmla.f32	q0, q12, d6[0]				// pos.x
+vmul.f32	q1, q13, d6[1]				// pos.y
+vmul.f32	q2, q14, d7[0]				// pos.z
+vadd.f32	q0, q0, q1
+// load data
+vld1.32		{d9},			[r2], r4
+
+vadd.f32	q0, q0, q2
+cmp			r0, r1						// check cycle
+
+vst1.32		{d0,d1}, [r3], r6
+
+vmov.32		q0, q15						// pos.w (1.0)
+// save data
+vst1.32		{d10[0]},			[r3]!    
+vst1.32		{d9},    			[r3]!
+
+
+bcc			LOOP_NAME
+#endif
+
+ldmfd		sp!, {r4-r11}
+vpop		{d0-d15}
+bx			lr
+
+#endif
diff --git a/Runtime/Filters/Mesh/TransformVertexVFP.s b/Runtime/Filters/Mesh/TransformVertexVFP.s
new file mode 100644
index 0000000..114afc6
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexVFP.s
@@ -0,0 +1,250 @@
+#define UNITY_ASSEMBLER
+#include "Configuration/PrefixConfigure.h"
+#include "Runtime/Utilities/VFPUtility.h"
+
+#if UNITY_SUPPORTS_VFP
+
+.syntax unified
+
+.set device,0
+.set device,__arm__
+ 
+.if device
+
+//.code32
+
+
+.globl _s_TransformVertices_Strided_XYZ_0_VFP
+.globl _s_TransformVertices_Strided_XYZ_1_VFP
+.globl _s_TransformVertices_Strided_XYZ_2_VFP
+.globl _s_TransformVertices_Strided_XYZ_3_VFP
+.globl _s_TransformVertices_Strided_XYZ_4_VFP
+.globl _s_TransformVertices_Strided_XYZ_5_VFP
+
+.globl _s_TransformVertices_Strided_XYZN_0_VFP
+.globl _s_TransformVertices_Strided_XYZN_1_VFP
+.globl _s_TransformVertices_Strided_XYZN_2_VFP
+.globl _s_TransformVertices_Strided_XYZN_3_VFP
+.globl _s_TransformVertices_Strided_XYZN_4_VFP
+.globl _s_TransformVertices_Strided_XYZN_5_VFP
+
+.globl _s_TransformVertices_Strided_XYZNT_0_VFP
+.globl _s_TransformVertices_Strided_XYZNT_1_VFP
+.globl _s_TransformVertices_Strided_XYZNT_2_VFP
+.globl _s_TransformVertices_Strided_XYZNT_3_VFP
+.globl _s_TransformVertices_Strided_XYZNT_4_VFP
+.globl _s_TransformVertices_Strided_XYZNT_5_VFP
+
+.globl _s_TransformVertices_Sprite_VFP
+
+
+#if UNITY_ANDROID
+.hidden _s_TransformVertices_Strided_XYZ_0_VFP
+.hidden _s_TransformVertices_Strided_XYZ_1_VFP
+.hidden _s_TransformVertices_Strided_XYZ_2_VFP
+.hidden _s_TransformVertices_Strided_XYZ_3_VFP
+.hidden _s_TransformVertices_Strided_XYZ_4_VFP
+.hidden _s_TransformVertices_Strided_XYZ_5_VFP
+
+.hidden _s_TransformVertices_Strided_XYZN_0_VFP
+.hidden _s_TransformVertices_Strided_XYZN_1_VFP
+.hidden _s_TransformVertices_Strided_XYZN_2_VFP
+.hidden _s_TransformVertices_Strided_XYZN_3_VFP
+.hidden _s_TransformVertices_Strided_XYZN_4_VFP
+.hidden _s_TransformVertices_Strided_XYZN_5_VFP
+
+.hidden _s_TransformVertices_Strided_XYZNT_0_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_1_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_2_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_3_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_4_VFP
+.hidden _s_TransformVertices_Strided_XYZNT_5_VFP
+
+.hidden _s_TransformVertices_Sprite_VFP
+#endif
+
+#define STRIDED_INPUT	1
+
+
+#define LOOP_XYZ		1
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE     0
+
+_s_TransformVertices_Strided_XYZ_0_VFP:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZ_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_1_VFP:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZ_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_2_VFP:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZ_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_3_VFP:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZ_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_4_VFP:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZ_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZ_5_VFP:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZ_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		1
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE     0
+
+
+_s_TransformVertices_Strided_XYZN_0_VFP:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZN_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_1_VFP:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZN_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_2_VFP:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZN_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_3_VFP:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZN_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_4_VFP:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZN_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZN_5_VFP:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZN_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		1
+#define LOOP_SPRITE     0
+
+
+_s_TransformVertices_Strided_XYZNT_0_VFP:
+#define COPY_DATA_SZ	0
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_0_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_1_VFP:
+#define COPY_DATA_SZ	1
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_1_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_2_VFP:
+#define COPY_DATA_SZ	2
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_2_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_3_VFP:
+#define COPY_DATA_SZ	3
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_3_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_4_VFP:
+#define COPY_DATA_SZ	4
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_4_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+_s_TransformVertices_Strided_XYZNT_5_VFP:
+#define COPY_DATA_SZ	5
+#define LOOP_NAME		TransformVertices_Strided_XYZNT_5_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef COPY_DATA_SZ
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#define LOOP_XYZ		0
+#define LOOP_XYZN		0
+#define LOOP_XYZNT		0
+#define LOOP_SPRITE     1
+
+_s_TransformVertices_Sprite_VFP:
+#define LOOP_NAME TransformVerties_Sprite_Loop
+#include "TransformVertexVFP_Loop.h"
+#undef LOOP_NAME
+
+#undef LOOP_XYZ
+#undef LOOP_XYZN
+#undef LOOP_XYZNT
+#undef LOOP_SPRITE
+
+#undef STRIDED_INPUT
+
+.endif
+
+#endif
diff --git a/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h
new file mode 100644
index 0000000..48193c8
--- /dev/null
+++ b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h
@@ -0,0 +1,252 @@
+// defines
+// LOOP_XYZ
+// LOOP_XYZN
+// LOOP_XYZNT
+// LOOP_SPRITE
+// LOOP_NAME
+// COPY_DATA_SZ
+// STRIDED_INPUT
+
+#if STRIDED_INPUT
+
+//r0:		const void* srcData
+//r1:		const void* srcDataEnd
+//r2:		const void* addData
+//r3:		const void* xform
+//[sp+0]:	void* dstData
+//[sp+4]:	const int stride
+//[sp+8]:	const void* tangent
+
+mov			ip, sp
+
+vpush		{d0-d15}
+stmfd		sp!, {r4-r11}
+
+// {s16-s31} xform
+
+vldmia.32	r3!, {s16-s31}
+
+// r3:	dstData
+// r4:  stride
+//r11:		tangent
+ldr			r3, [ip, #0]
+ldr			r4, [ip, #4]
+
+#if LOOP_XYZNT
+ldr			r11, [ip, #8]
+#endif
+
+#if LOOP_SPRITE
+//r6:		color
+ldr			r6, [ip, #8]
+#endif
+
+
+mov			ip, r0
+// VFP_VECTOR_LENGTH(3)
+mov			r0, ip
+
+
+#if LOOP_XYZ
+
+.align 4
+LOOP_NAME:
+
+mov			r5, r0
+pld			[r0, #512]				// prefetch
+
+vldmia.32	r5!, {s0-s2}			// load pos
+FCPYS4		(8,9,10,11,	28,29,30,31)						// pos.w
+
+FMACS4		(8,9,10,11, 16,17,18,19, 0,0,0,0)				// pos.x
+#if COPY_DATA_SZ == 1
+ldmia		r2, {r6}				// load additional data
+#elif COPY_DATA_SZ == 2
+ldmia		r2, {r6-r7}				// load additional data
+#elif COPY_DATA_SZ == 3
+ldmia		r2, {r6-r8}				// load additional data
+#elif COPY_DATA_SZ == 4
+ldmia		r2, {r6-r9}				// load additional data
+#elif COPY_DATA_SZ == 5
+ldmia		r2, {r6-r10}			// load additional data
+#endif
+
+FMACS4		(8,9,10,11, 20,21,22,23, 1,1,1,1)				// pos.y
+add			r0, r0, r4				// inc srcData
+
+FMACS4		(8,9,10,11, 24,25,26,27, 2,2,2,2)				// pos.z
+add			r2, r2, r4				// inc srcAddData
+
+vstmia.32	r3!, {s8-s10}			// store pos
+cmp			r0, r1					// check cycle
+
+#if COPY_DATA_SZ == 1
+stmia		r3!, {r6}				// save additional data
+#elif COPY_DATA_SZ == 2
+stmia		r3!, {r6-r7}			// save additional data
+#elif COPY_DATA_SZ == 3
+stmia		r3!, {r6-r8}			// save additional data
+#elif COPY_DATA_SZ == 4
+stmia		r3!, {r6-r9}			// save additional data
+#elif COPY_DATA_SZ == 5
+stmia		r3!, {r6-r10}			// save additional data
+#endif
+
+bcc			LOOP_NAME
+
+
+#elif LOOP_XYZN
+
+.align 4
+LOOP_NAME:
+
+mov			r5, r0
+pld			[r0, #512]				// prefetch
+
+vldmia.32	r5!, {s0-s2}			// load pos
+FCPYS4		(8,9,10,11,   28,29,30,31)						// pos.w
+
+vldmia.32	r5!, {s3-s5}			// load normal
+FMACS4		(8,9,10,11,   16,17,18,19, 0,0,0,0)				// pos.x
+
+FMULS4		(12,13,14,15, 16,17,18,19, 3,3,3,3)				// normal.x
+FMACS4		(8,9,10,11,   20,21,22,23, 1,1,1,1)				// pos.y
+
+#if COPY_DATA_SZ == 1
+ldmia		r2, {r6}				// load additional data
+#elif COPY_DATA_SZ == 2
+ldmia		r2, {r6-r7}				// load additional data
+#elif COPY_DATA_SZ == 3
+ldmia		r2, {r6-r8}				// load additional data
+#elif COPY_DATA_SZ == 4
+ldmia		r2, {r6-r9}				// load additional data
+#elif COPY_DATA_SZ == 5
+ldmia		r2, {r6-r10}			// load additional data
+#endif
+FMACS4		(8,9,10,11,   24,25,26,27, 2,2,2,2)				// pos.z
+
+FMACS4		(12,13,14,15, 20,21,22,23, 4,4,4,4)				// normal.y
+vstmia.32	r3!, {s8-s10}			// store pos
+
+FMACS4		(12,13,14,15, 24,25,26,27, 5,5,5,5)				// normal.z
+add			r0, r0, r4				// inc srcData
+
+vstmia.32	r3!, {s12-s14}			// store normal
+add			r2, r2, r4				// inc srcAddData
+
+cmp			r0, r1					// check cycle
+#if COPY_DATA_SZ == 1
+stmia		r3!, {r6}				// save additional data
+#elif COPY_DATA_SZ == 2
+stmia		r3!, {r6-r7}			// save additional data
+#elif COPY_DATA_SZ == 3
+stmia		r3!, {r6-r8}			// save additional data
+#elif COPY_DATA_SZ == 4
+stmia		r3!, {r6-r9}			// save additional data
+#elif COPY_DATA_SZ == 5
+stmia		r3!, {r6-r10}			// save additional data
+#endif
+
+bcc			LOOP_NAME
+
+#elif LOOP_XYZNT
+
+.align 4
+LOOP_NAME:
+
+mov			r5, r0
+pld			[r0, #512]				// prefetch
+
+vldmia.32	r5!, {s0-s2}			// load pos
+FCPYS4		(8,9,10,11,   28,29,30,31)						// pos.w
+
+vldmia.32	r5!, {s3-s5}			// load normal
+FMACS4		(8,9,10,11,   16,17,18,19, 0,0,0,0)				// pos.x
+
+FMULS4		(12,13,14,15, 16,17,18,19, 3,3,3,3)				// normal.x
+FMACS4		(8,9,10,11,   20,21,22,23, 1,1,1,1)				// pos.y
+
+#if COPY_DATA_SZ == 1
+ldmia		r2, {r6}				// load additional data
+#elif COPY_DATA_SZ == 2
+ldmia		r2, {r6-r7}				// load additional data
+#elif COPY_DATA_SZ == 3
+ldmia		r2, {r6-r8}				// load additional data
+#elif COPY_DATA_SZ == 4
+ldmia		r2, {r6-r9}				// load additional data
+#elif COPY_DATA_SZ == 5
+ldmia		r2, {r6-r10}			// load additional data
+#endif
+FMACS4		(8,9,10,11,   24,25,26,27, 2,2,2,2)				// pos.z
+
+FMACS4		(12,13,14,15, 20,21,22,23, 4,4,4,4)				// normal.y
+vstmia.32	r3!, {s8-s10}			// store pos
+
+FMACS4		(12,13,14,15, 24,25,26,27, 5,5,5,5)				// normal.z
+vldmia.32	r11, {s0-s3}			// load tangent
+
+add			r0, r0, r4				// inc srcData
+FMULS4		(8,9,10,11, 16,17,18,19,   0,0,0,0)				// tangent.x
+
+vstmia.32	r3!, {s12-s14}			// store normal
+FMACS4		(8,9,10,11, 20,21,22,23,   1,1,1,1)				// tangent.y
+
+cmp			r0, r1					// check cycle
+FMACS4		(8,9,10,11, 24,25,26,27,   2,2,2,2)				// tangent.z
+
+#if COPY_DATA_SZ == 1
+stmia		r3!, {r6}				// save additional data
+#elif COPY_DATA_SZ == 2
+stmia		r3!, {r6-r7}			// save additional data
+#elif COPY_DATA_SZ == 3
+stmia		r3!, {r6-r8}			// save additional data
+#elif COPY_DATA_SZ == 4
+stmia		r3!, {r6-r9}			// save additional data
+#elif COPY_DATA_SZ == 5
+stmia		r3!, {r6-r10}			// save additional data
+#endif
+fcpys		s11, s3											// copy tangent.w
+
+vstmia.32	r3!, {s8-s11}			// store tangent
+add			r2, r2, r4				// inc srcAddData
+
+add			r11, r11, r4			// inc srcTangent
+bcc			LOOP_NAME
+
+#elif LOOP_SPRITE
+
+.align 4
+LOOP_NAME:
+
+mov			r5, r0
+pld			[r0, #512]				// prefetch
+
+vldmia.32	r5!, {s0-s2}			// load pos
+FCPYS4		(8,9,10,11,	28,29,30,31)						// pos.w
+
+FMACS4		(8,9,10,11, 16,17,18,19, 0,0,0,0)				// pos.x
+
+
+ldmia		r2, {r7-r8}				// load uv
+
+FMACS4		(8,9,10,11, 20,21,22,23, 1,1,1,1)				// pos.y
+add			r0, r0, r4				// inc srcData
+
+FMACS4		(8,9,10,11, 24,25,26,27, 2,2,2,2)				// pos.z
+add			r2, r2, r4				// inc srcAddData
+
+vstmia.32	r3!, {s8-s10}			// store pos
+cmp			r0, r1					// check cycle
+
+stmia		r3!, {r6-r8}			// save color and uv
+
+bcc			LOOP_NAME
+#endif
+
+// VFP_VECTOR_LENGTH_ZERO
+
+ldmfd		sp!, {r4-r11}
+vpop		{d0-d15}
+bx			lr
+
+#endif // STRIDED_INPUT
diff --git a/Runtime/Filters/Mesh/VertexData.cpp b/Runtime/Filters/Mesh/VertexData.cpp
new file mode 100644
index 0000000..b922805
--- /dev/null
+++ b/Runtime/Filters/Mesh/VertexData.cpp
@@ -0,0 +1,559 @@
+#include "UnityPrefix.h"
+#include "Configuration/UnityConfigure.h"
+#include "VertexData.h"
+#include "Runtime/Shaders/VBO.h"
+#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h"
+#include "Runtime/Serialize/TransferUtility.h"
+#include "Runtime/Serialize/SwapEndianArray.h"
+#include <algorithm>
+
+/*
+	On most platforms, for skinning/non-uniform-scaling of meshes you would want to split your data into 
+	a hot data stream (position, normal and tangent) and a cold data stream (diffuse and uvs) in order to maximize CPU cache access patterns and 
+	reduce bandwidth and computation ( you won't need to copy the cold data )
+*/
+
+VertexStreamsLayout VertexDataInfo::kVertexStreamsDefault = {{ kShaderChannelsAll, 0, 0, 0 }};
+#if UNITY_PS3
+	VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }};
+#else
+	VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ kShaderChannelsHot, kShaderChannelsCold, 0, 0 }};
+#	if UNITY_EDITOR
+		VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplitPS3 = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }};
+#	endif
+#endif
+
+#define MAKE_CHANNEL(fmt, dim) VertexChannelsLayout::Channel(kChannelFormat##fmt, dim)
+VertexChannelsLayout VertexDataInfo::kVertexChannelsDefault =
+{{	// Array wrapped by struct requires double braces
+	MAKE_CHANNEL(Float, 3),		// position
+	MAKE_CHANNEL(Float, 3),		// normal
+	MAKE_CHANNEL(Color, 1),		// color
+	MAKE_CHANNEL(Float, 2),		// texcoord0
+	MAKE_CHANNEL(Float, 2),		// texcoord1
+	MAKE_CHANNEL(Float, 4)		// tangent
+}};
+VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressed =
+{{	// Array wrapped by struct requires double braces
+	MAKE_CHANNEL(Float,   3),	// position
+	MAKE_CHANNEL(Float16, 4),	// normal
+	MAKE_CHANNEL(Color,   1),	// color
+	MAKE_CHANNEL(Float16, 2),	// texcoord0
+	MAKE_CHANNEL(Float16, 2),	// texcoord1
+	MAKE_CHANNEL(Float16, 4)	// tangent
+}};
+VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressedAggressive =
+{{  // Array wrapped by struct requires double braces
+    MAKE_CHANNEL(Float,   3),   // position
+    MAKE_CHANNEL(Byte,    4),   // normal
+    MAKE_CHANNEL(Color,   1),   // color
+    MAKE_CHANNEL(Float16, 2),   // texcoord0
+    MAKE_CHANNEL(Float16, 2),   // texcoord1
+    MAKE_CHANNEL(Byte,    4)    // tangent
+}};
+#undef MAKE_CHANNEL
+
+static const UInt8 kVertexChannelFormatSizes[kChannelFormatCount] = {
+    4,  // kChannelFormatFloat
+    2,  // kChannelFormatFloat16
+    4,  // kChannelFormatColor
+    1   // kChannelFormatByte
+};
+
+size_t GetChannelFormatSize(UInt8 format)
+{
+	Assert (format < kChannelFormatCount);
+    return kVertexChannelFormatSizes[format];
+}
+
+static bool operator == (const VertexStreamsLayout& lhs, const VertexStreamsLayout& rhs)
+{
+	return CompareArrays(lhs.channelMasks, rhs.channelMasks, kMaxVertexStreams);
+}
+
+template<class TransferFunction>
+void VertexData::Transfer (TransferFunction& transfer)
+{
+	#if SUPPORT_SERIALIZED_TYPETREES
+	if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+	{
+		TransferWorkaround35SerializationFuckup (transfer);
+		return;
+	}
+	#endif
+
+	transfer.Transfer (m_CurrentChannels, "m_CurrentChannels", kHideInEditorMask);
+	transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask);
+
+	dynamic_array<ChannelInfo> channels;
+	dynamic_array<StreamInfo> streams;
+	if (transfer.IsWriting ())
+	{
+		channels.resize_uninitialized (kShaderChannelCount);
+		streams.resize_uninitialized (kMaxVertexStreams);
+		std::copy (m_Channels, m_Channels + kShaderChannelCount, channels.begin ());
+		std::copy (m_Streams, m_Streams + kMaxVertexStreams, streams.begin ());
+	}
+	transfer.Transfer (channels, "m_Channels", kHideInEditorMask);
+	transfer.Transfer (streams, "m_Streams", kHideInEditorMask);
+	
+	if (transfer.IsReading ())
+	{
+		// For compatibility do this even if channels/streams info didn't exist (case 558604)
+		// In the past there was only a channels mask, UpdateStreams() generates the info from that
+		if (channels.size () == kShaderChannelCount)
+			std::copy (channels.begin (), channels.begin () + kShaderChannelCount, m_Channels);
+		if (streams.size () == kMaxVertexStreams)
+			std::copy (streams.begin (), streams.begin () + kMaxVertexStreams, m_Streams);
+		else
+			std::fill (m_Streams, m_Streams + kMaxVertexStreams, StreamInfo());
+
+		UInt32 channelsInStreams = 0;
+		for (int i = 0; i < kMaxVertexStreams ; i++)
+			channelsInStreams |= m_Streams[i].channelMask;
+		if (channelsInStreams)
+			UpdateStreams(channelsInStreams, m_VertexCount, GetStreamsLayout (), GetChannelsLayout ());
+		else
+			UpdateStreams(m_CurrentChannels, m_VertexCount, kVertexStreamsDefault, kVertexChannelsDefault);
+	}
+
+	transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask);
+	if (transfer.DidReadLastProperty ())
+	{
+		if (m_Data)
+			UNITY_FREE (kMemVertexData, m_Data);
+		m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+	}
+
+	transfer.TransferTypelessData (m_DataSize, m_Data);
+}	
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunction>
+void VertexData::TransferWorkaround35SerializationFuckup (TransferFunction& transfer)
+{
+	UInt32 currentChannels = m_CurrentChannels;
+	transfer.Transfer (currentChannels, "m_CurrentChannels", kHideInEditorMask);
+	transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask);
+
+	TRANSFER(m_Streams[0]);
+	TRANSFER(m_Streams[1]);
+	TRANSFER(m_Streams[2]);
+	TRANSFER(m_Streams[3]);
+
+	if (transfer.IsReading ())
+	{
+		if(m_VertexCount && (currentChannels == 0))
+		{
+			for(int i=0;i<kMaxVertexStreams;i++)
+				currentChannels |= m_Streams[i].channelMask;
+		}
+		UpdateStreams(currentChannels, m_VertexCount);
+		//GetComponentInfo(m_Components, currentChannels);
+		m_CurrentChannels = currentChannels;
+	}
+
+	transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask);
+
+	if (transfer.IsReading ())
+	{
+		if (m_Data)
+			UNITY_FREE (kMemVertexData, m_Data);
+		m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+	}
+
+	transfer.TransferTypelessData (m_DataSize, m_Data);
+}
+#endif
+
+INSTANTIATE_TEMPLATE_TRANSFER(VertexData)
+
+void VertexDataInfo::UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+	m_VertexCount = newVertexCount;
+	m_CurrentChannels = 0;
+	m_VertexSize = 0;
+	size_t streamOffset = 0;
+	for (int s = 0; s < kMaxVertexStreams; s++)
+	{
+		StreamInfo& stream = m_Streams[s];
+		m_Streams[s].Reset();
+		stream.channelMask = streams.channelMasks[s] & newChannelMask;
+		if (stream.channelMask == 0)
+			continue;
+		m_CurrentChannels |= stream.channelMask;
+		for (int c = 0; c < kShaderChannelCount; c++)
+		{
+			if (stream.channelMask & (1 << c))
+			{
+				ChannelInfo& channel = m_Channels[c];
+				const VertexChannelsLayout::Channel& srcChannel = channels.channels[c];
+				channel.stream = s;
+				channel.offset = stream.stride;
+				channel.format = srcChannel.format;
+				channel.dimension = srcChannel.dimension;
+				stream.stride += channel.dimension * GetChannelFormatSize(channel.format);
+			}
+		}
+		streamOffset = AlignStreamSize(streamOffset);
+		stream.offset = streamOffset; 
+		streamOffset += stream.stride * newVertexCount;
+		m_VertexSize += stream.stride;
+	}
+	for (int c = 0; c < kShaderChannelCount; c++)
+	{
+		// Reset channels that were removed
+		if (!(m_CurrentChannels & (1 << c)))
+			m_Channels[c].Reset();
+	}
+	m_DataSize = streamOffset;
+}
+
+size_t VertexDataInfo::GetActiveStreamCount() const 
+{
+	size_t activeStreamCount = 0;
+	for (int i=0; i<kMaxVertexStreams; i++)
+	{
+		if(m_Streams[i].channelMask != 0)
+			activeStreamCount++;
+	}
+	return activeStreamCount;
+}
+
+size_t VertexDataInfo::GetStreamIndex(ShaderChannel channel) const
+{
+	UInt32 channelMask = 1 << channel;
+	for (int i=0; i<kMaxVertexStreams; i++)
+	{
+		if(m_Streams[i].channelMask & channelMask)
+			return i;
+	}
+	return -1;
+}
+
+VertexStreamsLayout VertexDataInfo::GetStreamsLayout() const
+{
+	VertexStreamsLayout result;
+	for (int i = 0; i < kMaxVertexStreams; i++)
+		result.channelMasks[i] = m_Streams[i].channelMask;
+	return result;
+}
+
+VertexChannelsLayout VertexDataInfo::GetChannelsLayout() const
+{
+	VertexChannelsLayout result;
+	for (int i = 0; i < kShaderChannelCount; i++)
+	{
+		result.channels[i] = VertexChannelsLayout::Channel(m_Channels[i].format, m_Channels[i].dimension);
+	}
+	return result;
+}
+
+bool VertexDataInfo::ConformsToStreamsLayout(const VertexStreamsLayout& streams) const
+{
+	for (int i = 0; i < kMaxVertexStreams; i++)
+	{
+		// Fail if we have a channel that's not in the layout
+		if (m_Streams[i].channelMask & ~streams.channelMasks[i])
+			return false;
+	}
+	return true;
+}
+
+bool VertexDataInfo::ConformsToChannelsLayout(const VertexChannelsLayout& channels) const
+{
+	for (int i = 0; i < kShaderChannelCount; i++)
+	{
+		if (m_Channels[i].IsValid())
+		{
+			const VertexChannelsLayout::Channel& channel = channels.channels[i];
+			if (m_Channels[i].format != channel.format ||
+				m_Channels[i].dimension != channel.dimension)
+				return false;
+		}
+	}
+	return true;
+}
+
+signed char f32_to_s8(float fval)
+{
+   return ((fval * 255.0f) - 1.0f) / 2.0f;
+}
+
+float s8_to_f32(signed char val)
+{
+   return (2*(val/255.0f)-1.0f);
+}
+
+static void ConvertCopyChannel(size_t vertexCount, 
+                        const UInt8* srcPtr, UInt8 srcStride, UInt8 srcType, UInt8 srcDim, 
+                        UInt8* dstPtr, UInt8 dstStride, UInt8 dstType, UInt8 dstDim)
+{
+	UInt8 minDim = std::min(srcDim, dstDim);
+	if (srcType == kChannelFormatFloat16 && dstType == kChannelFormatFloat)
+	{
+		// decompressing
+		for (size_t i = 0; i < vertexCount; i++)
+		{
+			UInt8 comp = 0;
+			for ( ; comp < minDim; comp++)
+				HalfToFloat(reinterpret_cast<const UInt16*>(srcPtr)[comp], reinterpret_cast<float*>(dstPtr)[comp]);
+			for ( ; comp < dstDim; comp++)
+				reinterpret_cast<float*>(dstPtr)[comp] = 0.0f;
+			srcPtr += srcStride;
+			dstPtr += dstStride;
+		}
+	}
+	else if (srcType == kChannelFormatByte && dstType == kChannelFormatFloat)
+	{
+		// decompressing
+		for (size_t i = 0; i < vertexCount; i++)
+		{
+			UInt8 comp = 0;
+			for ( ; comp < minDim; comp++)
+				reinterpret_cast<float*>(dstPtr)[comp] = s8_to_f32(reinterpret_cast<const SInt8*>(srcPtr)[comp]);
+			for ( ; comp < dstDim; comp++)
+				reinterpret_cast<float*>(dstPtr)[comp] = 0.0f;
+			srcPtr += srcStride;
+			dstPtr += dstStride;
+		}
+	}
+#if UNITY_EDITOR
+	else if (srcType == kChannelFormatFloat && dstType == kChannelFormatFloat16)
+	{
+		// compressing
+		for (size_t i = 0; i < vertexCount; i++)
+		{
+			UInt8 comp = 0;
+			for ( ; comp < minDim; comp++)
+				g_FloatToHalf.Convert(reinterpret_cast<const float*>(srcPtr)[comp], reinterpret_cast<UInt16*>(dstPtr)[comp]);
+			for ( ; comp < dstDim; comp++)
+				reinterpret_cast<UInt16*>(dstPtr)[comp] = 0;
+			srcPtr += srcStride;
+			dstPtr += dstStride;
+		}
+	}
+	else if (srcType == kChannelFormatFloat && dstType == kChannelFormatByte)
+	{
+		// compressing
+		for (size_t i = 0; i < vertexCount; i++)
+		{
+			UInt8 comp = 0;
+			for ( ; comp < minDim; comp++)
+				reinterpret_cast<SInt8*>(dstPtr)[comp] = f32_to_s8(reinterpret_cast<const float*>(srcPtr)[comp]);
+			for ( ; comp < dstDim; comp++)
+				reinterpret_cast<SInt8*>(dstPtr)[comp] = 0;
+			srcPtr += srcStride;
+			dstPtr += dstStride;
+		}
+	}
+#endif
+	else
+		ErrorString("Unsupported conversion of vertex formats");
+}
+
+static void CopyChannels (size_t vertexCount, unsigned copyChannels, 
+                   const StreamInfoArray srcStreams, const ChannelInfoArray srcChannels, const UInt8* srcData, 
+                   const StreamInfoArray dstStreams, const ChannelInfoArray dstChannels, UInt8* dstData)
+{
+	for (unsigned chan = copyChannels, i = 0; chan && (i < kShaderChannelCount); i++, chan >>= 1)
+	{
+		if (0 == (chan & 1))
+            continue;
+        
+        const ChannelInfo& srcChannel = srcChannels[i];
+        const ChannelInfo& dstChannel = dstChannels[i];
+
+        const UInt8* srcPtr = srcData + srcChannel.CalcOffset(srcStreams);
+        UInt8* dstPtr = dstData + dstChannel.CalcOffset(dstStreams);
+        UInt8 srcStride = srcChannel.CalcStride(srcStreams);
+        UInt8 dstStride = dstChannel.CalcStride(dstStreams);
+        
+        if(srcChannel.format == dstChannel.format)
+        {
+			size_t copySize = srcChannel.dimension * GetChannelFormatSize(srcChannel.format);
+            switch (copySize)
+            {
+                case 4:
+                {
+                    for (size_t i=0; i<vertexCount; ++i)
+                    {
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+                        srcPtr += srcStride;
+                        dstPtr += dstStride;
+                    }
+                    break;
+                }
+                case 8:
+                {
+                    for (size_t i=0; i<vertexCount; ++i)
+                    {
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1);
+                        srcPtr += srcStride;
+                        dstPtr += dstStride;
+                    }
+                    break;
+                }
+                case 12:
+                {
+                    for (size_t i=0; i<vertexCount; ++i)
+                    {
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0);
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1);
+                        *(reinterpret_cast<UInt32*> (dstPtr) + 2) = *(reinterpret_cast<const UInt32*> (srcPtr) + 2);
+                        srcPtr += srcStride;
+                        dstPtr += dstStride;
+                    }
+                    break;
+                }
+                default:
+                {
+                    for (size_t i=0; i<vertexCount; ++i)
+                    {
+                        memcpy (dstPtr, srcPtr, copySize);
+                        srcPtr += srcStride;
+                        dstPtr += dstStride;
+                    }
+                    break;
+                }
+            }
+        }
+        else
+        {
+            ConvertCopyChannel(vertexCount, srcPtr, srcStride, srcChannel.format, srcChannel.dimension, dstPtr, dstStride, dstChannel.format, dstChannel.dimension);
+        }
+    }
+}
+
+VertexDataInfo::VertexDataInfo ()
+:	m_Data(NULL)
+,	m_DataSize(0)
+,	m_VertexCount(0)
+,	m_VertexSize(0)
+,	m_CurrentChannels(0)
+{
+	// Channels and streams have default constructors
+}
+
+VertexData::VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+	// We do not support inserting new channels that are not present in the source
+	Assert ((copyChannels & src.GetChannelMask()) == copyChannels);
+
+	UpdateStreams(copyChannels, src.m_VertexCount, streams, channels);
+	m_Data = (UInt8*) UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign);
+	
+	const VertexData& dest = *this;
+	if (m_DataSize == src.m_DataSize &&
+		copyChannels == src.GetChannelMask() && 
+		CompareMemory(dest.m_Channels, src.m_Channels) &&
+		CompareMemory(dest.m_Streams, src.m_Streams))
+	{
+		// Simple copy if the format didn't change
+		memcpy (m_Data, src.m_Data, m_DataSize);
+	}
+	else
+		CopyChannels (m_VertexCount, copyChannels, src.m_Streams, src.m_Channels, src.m_Data, m_Streams, m_Channels, m_Data);
+}
+
+VertexData::~VertexData ()
+{
+	Deallocate();
+}
+
+void VertexData::Deallocate ()
+{
+	if (m_Data)
+		UNITY_FREE(kMemVertexData, m_Data);
+	m_Data = NULL;
+}
+
+void VertexData::Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels)
+{
+	ChannelInfoArray srcChannels;
+	StreamInfoArray srcStreams;
+	memcpy(srcChannels, m_Channels, sizeof(srcChannels));
+	memcpy(srcStreams, m_Streams, sizeof(srcStreams));
+	UInt32 srcChannelMask = m_CurrentChannels;
+	UInt32 srcVertexCount = m_VertexCount;
+	UInt8* srcData = m_Data;
+	
+    UpdateStreams(channelMask, vertexCount, streams, channels);
+	
+	// In case the streams and channels don't change, simply reallocate the buffer and return
+	// Note that this will rarely be true with multiple streams since the stream offsets change
+	if (m_Data && CompareMemory(srcChannels, m_Channels) && CompareMemory(srcStreams, m_Streams))
+	{
+		m_Data = (UInt8*)UNITY_REALLOC_ALIGNED(kMemVertexData, m_Data, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign);
+		return;
+	}
+
+	m_Data = (UInt8*)UNITY_MALLOC_ALIGNED(kMemVertexData, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign);
+	// copy over the old data
+	if (srcData)
+	{
+		unsigned copyChannels = srcChannelMask & m_CurrentChannels;
+		size_t toCopyCount = std::min<size_t>(srcVertexCount, m_VertexCount);
+		CopyChannels(toCopyCount, copyChannels, srcStreams, srcChannels, srcData, m_Streams, m_Channels, m_Data);
+		UNITY_FREE(kMemVertexData, srcData);
+	}
+}
+
+
+void VertexData::SwapEndianess ()
+{
+	unsigned const kChannelSwapMask = VERTEX_FORMAT5(Vertex, Normal, TexCoord0, TexCoord1, Tangent);
+	for (int s = 0; s < kMaxVertexStreams; s++)
+	{
+		if (m_Streams[s].stride)
+		{
+			StreamInfo& stream = m_Streams[s];
+			size_t stride = stream.stride;
+			UInt8* dataStart = m_Data + stream.offset;
+			UInt8* dataEnd = dataStart + stream.stride * m_VertexCount;
+			UInt32 channelMask = stream.channelMask;
+			for (UInt8* p = dataStart, *end = dataEnd; p != end; p += stride)
+			{				
+				// counting from LSb, 1 denotes that a value should be endian-swapped
+				int localOffset = 0;
+				for (unsigned i=0, chan = channelMask, swap = kChannelSwapMask; i<kShaderChannelCount; ++i, chan >>= 1, swap >>= 1)
+				{
+					if (chan & 1)
+					{
+						size_t componentCount = m_Channels[i].dimension;
+						size_t componentSize = GetChannelFormatSize(m_Channels[i].format);
+						if(swap & 1)
+						{
+							Assert (m_Channels [i].IsValid());
+							SwapEndianArray (p + localOffset, componentSize, componentCount);
+						}
+						localOffset += componentCount * componentSize;
+					}
+				}
+			}
+		}
+	}
+}
+
+void swap (VertexData& a, VertexData& b)
+{
+	std::swap_ranges (a.m_Channels, a.m_Channels + kShaderChannelCount, b.m_Channels);
+	std::swap_ranges (a.m_Streams, a.m_Streams + kMaxVertexStreams, b.m_Streams);
+	std::swap (a.m_CurrentChannels, b.m_CurrentChannels);
+	std::swap (a.m_VertexSize, b.m_VertexSize);
+	std::swap (a.m_VertexCount, b.m_VertexCount);
+	std::swap (a.m_DataSize, b.m_DataSize);
+	std::swap (a.m_Data, b.m_Data);
+}
+
+void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData)
+{
+	Assert (vertexCount <= srcData.GetVertexCount() && vertexCount <= dstData.GetVertexCount());
+	Assert ((srcData.GetChannelMask() & copyChannels) == copyChannels);
+	Assert ((dstData.GetChannelMask() & copyChannels) == copyChannels);
+	CopyChannels (vertexCount, copyChannels,
+		srcData.GetStreams(), srcData.GetChannels(), srcData.GetDataPtr(),
+		dstData.GetStreams(), dstData.GetChannels(), dstData.GetDataPtr());
+}
+
diff --git a/Runtime/Filters/Mesh/VertexData.h b/Runtime/Filters/Mesh/VertexData.h
new file mode 100644
index 0000000..7cc6c98
--- /dev/null
+++ b/Runtime/Filters/Mesh/VertexData.h
@@ -0,0 +1,253 @@
+#ifndef VERTEX_DATA_H_
+#define VERTEX_DATA_H_
+
+#include "Runtime/Utilities/StrideIterator.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include "Runtime/BaseClasses/ObjectDefines.h"
+#include "Runtime/Serialize/SerializeUtility.h"
+#include "Runtime/Serialize/TransferFunctionFwd.h"
+
+class VertexData;
+
+void swap (VertexData& a, VertexData& b);
+
+typedef struct StreamInfo
+{
+    enum { kDividerOpDivide=0, kDividerOpModulo };
+    
+	UInt32			channelMask;
+	UInt32			offset;
+    UInt16          frequency;
+	UInt8			stride;
+    UInt8           dividerOp;
+    
+	// We use default constructors instead of memset()
+	StreamInfo() : channelMask(0), offset(0), frequency(0), stride(0), dividerOp(kDividerOpDivide) {}
+	void Reset() { *this = StreamInfo(); }
+    
+	bool operator == (const StreamInfo& rhs) const { return (channelMask == rhs.channelMask) && (offset == rhs.offset) && (frequency == rhs.frequency) && (stride == rhs.stride) && (dividerOp == rhs.dividerOp); }
+	bool operator != (const StreamInfo& rhs) const { return !(*this == rhs); }
+
+	DECLARE_SERIALIZE_NO_PPTR (StreamInfo);
+    
+#if SUPPORT_SERIALIZED_TYPETREES
+	template<class TransferFunction>
+	void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+
+} StreamInfoArray [kMaxVertexStreams];
+
+struct VertexStreamsLayout
+{
+	UInt32 channelMasks[kMaxVertexStreams];
+};
+
+typedef struct ALIGN_TYPE(4) ChannelInfo
+{
+	UInt8 stream;
+	UInt8 offset;
+	UInt8 format;
+	UInt8 dimension;
+
+	enum { kInvalidDimension = 0 };
+
+	// We use default constructors instead of memset()
+	ChannelInfo() : stream(0), offset(0), format(0), dimension(kInvalidDimension) {}
+
+	UInt32 CalcOffset(const StreamInfoArray streams) const { return streams[stream].offset + offset; }
+	UInt32 CalcStride(const StreamInfoArray streams) const { return streams[stream].stride; }
+	bool IsValid() const { return (kInvalidDimension != dimension); }
+	void Reset() { *this = ChannelInfo(); }
+
+	bool operator == (const ChannelInfo& rhs) const { return (stream == rhs.stream) && (offset == rhs.offset) && (format == rhs.format) && (dimension == rhs.dimension); }
+	bool operator != (const ChannelInfo& rhs) const { return !(*this == rhs); }
+
+    DECLARE_SERIALIZE_NO_PPTR (ChannelInfo);
+    
+} ChannelInfoArray [kShaderChannelCount];
+
+struct VertexChannelsLayout
+{
+	struct Channel
+	{
+		Channel(UInt8 fmt, UInt8 dim) : format(fmt), dimension(dim) {}
+		Channel() : format(0), dimension(0) {}
+		UInt8 format;
+		UInt8 dimension;
+	};
+	Channel channels[kShaderChannelCount];
+};
+
+
+template<class TransferFunc>
+void StreamInfo::Transfer (TransferFunc& transfer)
+{
+	#if SUPPORT_SERIALIZED_TYPETREES
+	if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup)
+	{
+		TransferWorkaround35SerializationFuckup (transfer);
+		return;
+	}
+	#endif
+
+	transfer.Transfer (channelMask, "channelMask", kHideInEditorMask);
+	transfer.Transfer (offset, "offset", kHideInEditorMask);
+	transfer.Transfer (stride, "stride", kHideInEditorMask);
+	transfer.Transfer (dividerOp, "dividerOp", kHideInEditorMask);
+	transfer.Transfer (frequency, "frequency", kHideInEditorMask);
+}
+
+#if SUPPORT_SERIALIZED_TYPETREES
+template<class TransferFunc>
+void StreamInfo::TransferWorkaround35SerializationFuckup (TransferFunc& transfer)
+{
+	transfer.Transfer (channelMask, "channelMask", kHideInEditorMask);
+	transfer.Transfer (offset, "offset", kHideInEditorMask);
+
+	UInt32 align;
+	UInt32 stride32bit;
+	transfer.Transfer (stride32bit, "stride", kHideInEditorMask);
+	transfer.Transfer (align, "align", kHideInEditorMask);
+
+	stride = (UInt8) stride32bit;
+}
+#endif
+
+template<class TransferFunc>
+void ChannelInfo::Transfer (TransferFunc& transfer)
+{
+	transfer.Transfer (stream, "stream", kHideInEditorMask);
+	transfer.Transfer (offset, "offset", kHideInEditorMask);
+	transfer.Transfer (format, "format", kHideInEditorMask);
+	transfer.Transfer (dimension, "dimension", kHideInEditorMask);
+}
+
+// Information about all vertex data, but does not own the memory
+class VertexDataInfo
+{
+public:	
+	enum
+	{
+		kVertexDataAlign = 32,
+		kVertexStreamAlign = 16,
+		kVertexDataPadding = 16
+	};
+
+	static VertexStreamsLayout kVertexStreamsDefault;
+	static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplit;
+	static VertexChannelsLayout kVertexChannelsDefault;
+	static VertexChannelsLayout kVertexChannelsCompressed;
+	static VertexChannelsLayout kVertexChannelsCompressedAggressive;
+#if UNITY_EDITOR
+	static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplitPS3;
+#endif
+
+	static size_t AlignStreamSize (size_t size) { return (size + (kVertexStreamAlign-1)) & ~(kVertexStreamAlign-1); }
+
+	friend void ::swap (VertexData& a, VertexData& b);
+	
+	VertexDataInfo ();
+	
+	bool HasChannel (ShaderChannel shaderChannelIndex) const 
+    {
+		Assert ((m_Channels[shaderChannelIndex].dimension != 0) == (((m_CurrentChannels & (1 << shaderChannelIndex)) != 0)));
+		return m_Channels[shaderChannelIndex].dimension != 0;
+	}
+	
+	void UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+
+	size_t GetActiveStreamCount() const ;
+	size_t GetStreamIndex(ShaderChannel channel) const ;
+	const StreamInfo* GetStreams() const { return m_Streams; }
+	const StreamInfo& GetStream(int index) const { return m_Streams[index]; }
+    
+	const ChannelInfo* GetChannels() const { return m_Channels; }
+	const ChannelInfo& GetChannel(int index) const { return m_Channels[index]; }
+    
+	VertexStreamsLayout GetStreamsLayout() const;
+	VertexChannelsLayout GetChannelsLayout() const;
+
+	bool ConformsToStreamsLayout(const VertexStreamsLayout& streams) const;
+	bool ConformsToChannelsLayout(const VertexChannelsLayout& channels) const;
+
+	unsigned GetChannelMask () const { return m_CurrentChannels; }
+	size_t GetDataSize () const { return m_DataSize; }
+	size_t GetVertexSize () const { return m_VertexSize; }
+	size_t GetVertexCount () const { return m_VertexCount; }
+	size_t GetChannelOffset (unsigned channel) const { return m_Channels[channel].CalcOffset(m_Streams); }
+	size_t GetChannelStride (unsigned channel) const { return m_Channels[channel].CalcStride(m_Streams); }
+	UInt8* GetDataPtr () const { return m_Data; }
+	
+	template<class T>
+	StrideIterator<T> MakeStrideIterator (ShaderChannel shaderChannelIndex) const
+	{
+		Assert (shaderChannelIndex < kShaderChannelCount);
+		void* p = m_Data + GetChannelOffset(shaderChannelIndex);
+		return HasChannel (shaderChannelIndex) ? StrideIterator<T> (p, GetChannelStride (shaderChannelIndex)) : StrideIterator<T> (NULL, GetChannelStride (shaderChannelIndex));
+	}
+	
+	template<class T>
+	StrideIterator<T> MakeEndIterator (ShaderChannel shaderChannelIndex) const
+	{
+		T* end = GetEndPointer<T> (shaderChannelIndex);
+		return StrideIterator<T> (end, GetChannelStride (shaderChannelIndex));
+	}
+	
+	template<class T>
+	T* GetEndPointer (ShaderChannel shaderChannelIndex) const
+	{
+		Assert (shaderChannelIndex < kShaderChannelCount);
+		void* p = HasChannel (shaderChannelIndex) ? (m_Data + GetChannelOffset(shaderChannelIndex) + m_VertexCount * GetChannelStride (shaderChannelIndex)) : NULL;
+		return reinterpret_cast<T*> (p);
+	}
+
+protected:
+	ChannelInfoArray m_Channels;
+	StreamInfoArray	m_Streams;
+
+	size_t m_VertexSize; // must match m_CurrentChannels
+	UInt8* m_Data;
+
+	// The following are being serialized. Their size must match in both 32 and 64 bit platforms
+	UInt32 m_CurrentChannels; // kShaderChannel bitmask
+	UInt32 m_VertexCount;
+	unsigned m_DataSize;
+};
+
+
+// Owns the vertex memory
+class VertexData : public VertexDataInfo
+{
+public:
+	
+	DECLARE_SERIALIZE (VertexData)
+	
+	VertexData () : VertexDataInfo() { }
+	VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+	~VertexData ();
+	
+	static size_t GetAllocateDataSize (size_t accesibleBufferSize) { return accesibleBufferSize + kVertexDataPadding; }
+	
+	void Deallocate ();
+	void Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault);
+	void SwapEndianess ();
+	
+private:
+	VertexData (const VertexData& o);
+	void operator= (const VertexData& o);
+	VertexData (const VertexDataInfo& o);
+	void operator= (const VertexDataInfo& o);
+
+#if SUPPORT_SERIALIZED_TYPETREES
+	template<class TransferFunction>
+	void TransferWorkaround35SerializationFuckup (TransferFunction& transfer);
+#endif
+};
+
+
+void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData);
+size_t GetChannelFormatSize(UInt8 format);
+
+
+
+#endif