diff options
Diffstat (limited to 'Runtime/Filters/Mesh')
47 files changed, 13319 insertions, 0 deletions
diff --git a/Runtime/Filters/Mesh/CompressedMesh.cpp b/Runtime/Filters/Mesh/CompressedMesh.cpp new file mode 100644 index 0000000..02cc74c --- /dev/null +++ b/Runtime/Filters/Mesh/CompressedMesh.cpp @@ -0,0 +1,755 @@ +#include "UnityPrefix.h" +#include "CompressedMesh.h" +#include "LodMesh.h" +#include "Runtime/Animation/AnimationCurveUtility.h" + + +#define sqr(x) ((x)*(x)) + +void PackedFloatVector::PackFloats(float *data, int itemCountInChunk, int chunkStride, int numChunks, int bitSize, bool adjustBitSize) +{ + float maxf = -std::numeric_limits<float>::infinity(); + float minf = std::numeric_limits<float>::infinity(); + float* end = Stride (data, numChunks * chunkStride); + for(float* it = data; it != end; it = Stride (it, chunkStride)) + { + for (int i=0; i<itemCountInChunk; ++i) + { + if(maxf < it[i]) + maxf = it[i]; + if(minf > it[i]) + minf = it[i]; + } + } + + m_Range = maxf-minf; + + if(adjustBitSize) + bitSize += int(ceilf(Log2(m_Range))); + if(bitSize > 32) + bitSize = 32; + + m_Start = minf; + m_NumItems = numChunks * itemCountInChunk; + m_BitSize = bitSize; + m_Data.resize((m_NumItems * bitSize + 7)/8, 0); + + + float scale = 1.0/m_Range; + + int indexPos = 0; + int bitPos = 0; + + for(float* it = data; it != end; it = Stride (it, chunkStride)) + { + for(int i=0; i<itemCountInChunk; ++i) + { + float scaled = (it[i] - m_Start) * scale; + if(scaled < 0) scaled = 0; + if(scaled > 1) scaled = 1; + + UInt32 x = UInt32(scaled * ((1 << (m_BitSize)) - 1)); + + int bits = 0; + while(bits < m_BitSize) + { + m_Data[indexPos] |= (x >> bits) << bitPos; + int num = std::min( m_BitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + } + } +} + +void PackedFloatVector::UnpackFloats(float *data, int itemCountInChunk, int chunkStride, int start, int numChunks) +{ + int bitPos = m_BitSize*start; + int indexPos = bitPos/8; + bitPos %= 8; + + float scale = 1.0/m_Range; + if (numChunks == -1) + numChunks = m_NumItems / itemCountInChunk; + + for(float* end = Stride (data, chunkStride * numChunks); data != end; data = Stride (data, chunkStride)) + { + for (int i=0; i<itemCountInChunk; ++i) + { + UInt32 x = 0; + + int bits = 0; + while(bits < m_BitSize) + { + x |= (m_Data[indexPos] >> bitPos) << bits; + int num = std::min( m_BitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + x &= (1 << m_BitSize) - 1; + data[i] = (x / (scale * ((1 << (m_BitSize)) - 1))) + m_Start; + } + } +} + +template <class IntSize> void PackedIntVector::PackInts(IntSize *data, int numItems) +{ + // make sure that the intsize is an unsigned type + Assert( (IntSize)0 < (IntSize)-1 ); + + UInt32 maxi = 0; + for(int i=0; i<numItems; i++) + if(maxi < data[i]) + maxi = data[i]; + + m_NumItems = numItems; + //Prevent overflow + m_BitSize = UInt8(maxi == 0xFFFFFFFF ? 32 : ceilf(Log2(maxi+1))); + m_Data.resize((numItems * m_BitSize + 7)/8, 0); + + + int indexPos = 0; + int bitPos = 0; + for(int i=0; i<numItems; i++) + { + int bits = 0; + while(bits < m_BitSize) + { + m_Data[indexPos] |= (data[i] >> bits) << bitPos; + int num = std::min( m_BitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + } +} + +template <class IntSize> void PackedIntVector::UnpackInts(IntSize *data) +{ + int indexPos = 0; + int bitPos = 0; + for(int i=0; i<m_NumItems; i++) + { + int bits = 0; + data[i] = 0; + while(bits < m_BitSize) + { + data[i] |= (m_Data[indexPos] >> bitPos) << bits; + int num = std::min( m_BitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + data[i] &= (1ULL << m_BitSize) - 1; + } +} + + +void PackedQuatVector::PackQuats(Quaternionf *data, int numItems) +{ + m_NumItems = numItems; + m_Data.resize(numItems * (32/8), 0); + + int indexPos = 0; + int bitPos = 0; + + for(int i=0; i<numItems; i++) + { + Quaternionf &q = data[i]; + UInt8 flags = q.x<0? 4:0; + + float max=fabs(q.x); + if(fabs(q.y) > max) + { + max = fabs(q.y); + flags = 1; + if(q.y<0) + flags |= 4; + } + if(fabs(q.z) > max) + { + max = fabs(q.z); + flags = 2; + if(q.z<0) + flags |= 4; + } + if(fabs(q.w) > max) + { + max = fabs(q.w); + flags = 3; + if(q.w<0) + flags |= 4; + } + int bits = 0; + while(bits < 3) + { + m_Data[indexPos] |= (flags >> bits) << bitPos; + int num = std::min( 3-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + for(int j=0;j<4;j++) + { + if((flags&3) != j) + { + int bitSize = (((flags&3)+1)%4 == j)?9:10; + float scaled = (q[j] + 1) * 0.5; + if(scaled < 0) scaled = 0; + if(scaled > 1) scaled = 1; + + UInt32 x = UInt32(scaled * ((1 << bitSize) - 1)); + + bits = 0; + while(bits < bitSize) + { + m_Data[indexPos] |= (x >> bits) << bitPos; + int num = std::min( bitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + } + } + } +} + +void PackedQuatVector::UnpackQuats(Quaternionf *data) +{ + int indexPos = 0; + int bitPos = 0; + + for(int i=0; i<m_NumItems; i++) + { + UInt32 flags = 0; + + int bits = 0; + while(bits < 3) + { + flags |= (m_Data[indexPos] >> bitPos) << bits; + int num = std::min( 3-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + flags &= 7; + + + Quaternionf &q = data[i]; + float sum = 0; + for(int j=0;j<4;j++) + { + if((flags&3) != j) + { + int bitSize = (((flags&3)+1)%4 == j)?9:10; + UInt32 x = 0; + + bits = 0; + while(bits < bitSize) + { + x |= (m_Data[indexPos] >> bitPos) << bits; + int num = std::min( bitSize-bits, 8-bitPos); + bitPos += num; + bits += num; + if(bitPos == 8) + { + indexPos++; + bitPos = 0; + } + } + x &= (1 << bitSize) - 1; + q[j] = (x / (0.5 * ((1 << (bitSize)) - 1))) - 1; + sum += sqr(q[j]); + } + } + + int lastComponent = flags&3; + q[lastComponent] = FastSqrt(1 - sum); + if(flags & 4) + q[lastComponent] = -q[lastComponent]; + } +} + +void CompressedMesh::Compress(Mesh &src, int compression) +{ + int numVertices = src.GetVertexCount(); + + int vertexBits = 0; + switch(compression) + { + case kMeshCompressionHigh: vertexBits = 10; break; + case kMeshCompressionMed: vertexBits = 16; break; + case kMeshCompressionLow: vertexBits = 20; break; + } + m_Vertices.PackFloats((float*)src.GetChannelPointer(kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex), numVertices, vertexBits, false); + + //Possible optimization: use Edgebreaker algorithm + //for 1.8 bits per triangle connectivity information + //http://www.gvu.gatech.edu/~jarek/edgebreaker/eb/ + + int numIndices = src.m_IndexBuffer.size(); + numIndices/=2; + + m_Triangles.PackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0],numIndices); + + if(src.IsAvailable(kShaderChannelTexCoord0)) + { + int uvBits = 0; + switch(compression) + { + case kMeshCompressionHigh: uvBits = 8; break; + case kMeshCompressionMed: uvBits = 10; break; + case kMeshCompressionLow: uvBits = 16; break; + } + if(src.IsAvailable(kShaderChannelTexCoord1)) + { + Vector2f *uv12 = new Vector2f[numVertices*2]; + src.ExtractUvArray(0, uv12); + src.ExtractUvArray(1, uv12 + numVertices); + m_UV.PackFloats(&uv12->x, 2, sizeof(Vector2f), numVertices*2, uvBits, true); + delete[] uv12; + } + else + m_UV.PackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), numVertices, uvBits, true); + } + else if(src.IsAvailable(kShaderChannelTexCoord1)) + ErrorString( "Mesh compression doesn't work on Meshes wich only have a UV1 channel but no UV0 channel. UVs will be dropped." ); + + if(src.IsAvailable (kShaderChannelNormal)) + { + int normalBits = 0; + switch(compression) + { + case kMeshCompressionHigh: normalBits = 6; break; + case kMeshCompressionMed: normalBits = 8; break; + case kMeshCompressionLow: normalBits = 8; break; + } + + float *normals = new float[numVertices*2]; + UInt32 *signs = new UInt32[numVertices]; + StrideIterator<Vector3f> n = src.GetNormalBegin (); + for(int i=0;i<numVertices; ++i, ++n) + { + normals[i*2+0] = n->x; + normals[i*2+1] = n->y; + signs[i] = n->z>0?1:0; + } + m_Normals.PackFloats(normals, 2, sizeof (float) * 2, numVertices, normalBits, false); + m_NormalSigns.PackInts(signs, numVertices); + delete[] normals; + delete[] signs; + } + + if(src.IsAvailable (kShaderChannelTangent)) + { + int normalBits = 0; + switch(compression) + { + case kMeshCompressionHigh: normalBits = 6; break; + case kMeshCompressionMed: normalBits = 8; break; + case kMeshCompressionLow: normalBits = 8; break; + } + + float *tangents = new float[numVertices*2]; + UInt32 *signs = new UInt32[numVertices*2]; + StrideIterator<Vector4f> t = src.GetTangentBegin (); + for(int i=0;i<numVertices; ++i, ++t) + { + tangents[i*2+0] = t->x; + tangents[i*2+1] = t->y; + signs[i*2+0] = t->z>0?1:0; + signs[i*2+1] = t->w>0?1:0; + } + m_Tangents.PackFloats(tangents, 2, sizeof (float) * 2, numVertices, normalBits, false); + m_TangentSigns.PackInts(signs, numVertices*2); + delete[] tangents; + delete[] signs; + } + + // TODO: do an actual compression + if(src.IsAvailable (kShaderChannelColor)) + { + dynamic_array<UInt32> tempColors (numVertices, kMemTempAlloc); + std::transform (src.GetColorBegin (), src.GetColorEnd (), tempColors.begin (), OpColorRGBA32ToUInt32()); + m_Colors.PackInts<UInt32> (tempColors.data (), tempColors.size ()); + } + + BoneInfluence* influence = src.GetBoneWeights(); + if(influence) + { + UInt32 *weights = new UInt32[numVertices*3]; + UInt32 *indices = new UInt32[numVertices*4]; + int weightPos = 0; + int boneIndexPos = 0; + for(int i=0;i<numVertices;i++) + { + int j; + int sum = 0; + + //As all four bone weights always add up to 1, we can always calculate the fourth one + // by subtracting the other three from 1. So we don't need to store it. + + //Furthermore, once the weights we stored add up to 1, we don't need to store further + //weights or indices, as these will necessarily be zero. This is often the case, as many + //vertices have only the first weight set to one, and all others to zero. + + //find last non-zero entry -- we don't need to store those after this. + int lastNonZero; + for(lastNonZero=3;lastNonZero>0&&influence[i].weight[lastNonZero]==0;lastNonZero--) + {} + + + for(j=0;j<3 && j<=lastNonZero && sum<31;j++) + { + weights[weightPos] = UInt32(influence[i].weight[j] * 31); + indices[boneIndexPos++] = influence[i].boneIndex[j]; + sum += weights[weightPos++]; + } + if(lastNonZero<3) + { + //we stored less then 3 weights, but they don't add up to one, due to quantization + //inprecision. + //Add the difference, so the math works out on decompression. + if(sum<31) + weights[weightPos-1] += 31-sum; + } + + //we stored three weights, but they don't add up to one. we don't need to store the fourth weight + //(as it can be calculated from the other three), but we need the bone index. + else if(sum<31) + indices[boneIndexPos++] = influence[i].boneIndex[j]; + } + + m_Weights.PackInts(weights, weightPos); + m_BoneIndices.PackInts(indices, boneIndexPos); + + delete[] weights; + delete[] indices; + } +} + +void CompressedMesh::Decompress(Mesh &src) +{ + int numIndices = m_Triangles.Count(); + src.m_IndexBuffer.resize(numIndices * 2); + m_Triangles.UnpackInts<UInt16>((UInt16*)&src.m_IndexBuffer[0]); + + int numVertices = m_Vertices.Count()/3; + unsigned decompressedFormat = 0; + if (m_Vertices.Count ()) decompressedFormat |= VERTEX_FORMAT1(Vertex); + if (m_Normals.Count()) decompressedFormat |= VERTEX_FORMAT1(Normal); + if (m_UV.Count()) decompressedFormat |= VERTEX_FORMAT1(TexCoord0); + if (m_UV.Count() == numVertices * 4) decompressedFormat |= VERTEX_FORMAT1(TexCoord1); + if (m_Tangents.Count()) decompressedFormat |= VERTEX_FORMAT1(Tangent); + if (m_Colors.Count()) decompressedFormat |= VERTEX_FORMAT1(Color); + + src.ResizeVertices(numVertices, decompressedFormat); + Assert (src.GetVertexCount () == numVertices); + + m_Vertices.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelVertex), 3, src.GetStride (kShaderChannelVertex)); + + if(m_UV.Count()) + { + m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord0), 2, src.GetStride (kShaderChannelTexCoord0), 0, numVertices); + + if(m_UV.Count()==numVertices * 4) + { + m_UV.UnpackFloats((float*)src.GetChannelPointer (kShaderChannelTexCoord1), 2, src.GetStride (kShaderChannelTexCoord1), numVertices*2, numVertices); + } + } + + // TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data. + // Most likely no version ever did. Remove code and bindpose serialization. + if(m_BindPoses.Count()) + { + src.m_Bindpose.resize_initialized(m_BindPoses.Count()/16); + m_BindPoses.UnpackFloats(src.m_Bindpose[0].m_Data, 16, sizeof(float) * 16); + } + + if(m_Normals.Count()) + { + float *normalData = new float[m_Normals.Count()]; + UInt32 *signs = new UInt32[m_NormalSigns.Count()]; + + m_Normals.UnpackFloats(normalData, 2, sizeof(float) * 2); + m_NormalSigns.UnpackInts(signs); + + StrideIterator<Vector3f> n = src.GetNormalBegin (); + for(int i=0;i<m_Normals.Count()/2; ++i, ++n) + { + n->x = normalData[i*2+0]; + n->y = normalData[i*2+1]; + float zsqr = 1 - sqr(n->x) - sqr(n->y); + if(zsqr >= 0) + n->z = FastSqrt( zsqr ); + else + { + n->z = 0; + *n = Normalize(*n); + } + if(signs[i]==0) + n->z = -n->z; + } + + delete[] normalData; + delete[] signs; + } + + if(m_Tangents.Count()) + { + float *tangentData = new float[m_Tangents.Count()]; + UInt32 *signs = new UInt32[m_TangentSigns.Count()]; + + m_Tangents.UnpackFloats(tangentData, 2, sizeof(float) * 2); + m_TangentSigns.UnpackInts(signs); + + StrideIterator<Vector4f> t = src.GetTangentBegin (); + for(int i=0;i<m_Tangents.Count()/2; ++i, ++t) + { + t->x = tangentData[i*2+0]; + t->y = tangentData[i*2+1]; + float zsqr = 1-sqr(tangentData[i*2+0])-sqr(tangentData[i*2+1]); + if(zsqr >= 0.0f) + t->z = FastSqrt( zsqr ); + else + { + t->z = 0; + *(Vector3f*)(&*t) = Normalize(*(Vector3f*)(&*t)); + } + if(signs[i*2+0]==0) + t->z = -t->z; + + t->w = signs[i*2+1]?1.0:-1.0; + } + + delete[] tangentData; + delete[] signs; + } + + // TODO: do an actual compression + if (m_Colors.Count()) + { + dynamic_array<UInt32> tempColors (m_Colors.Count (), kMemTempAlloc); + m_Colors.UnpackInts<UInt32> (tempColors.data ()); + Assert (tempColors.size () == src.GetVertexCount ()); + strided_copy ((ColorRGBA32*)tempColors.begin (), (ColorRGBA32*)tempColors.end (), src.GetColorBegin ()); + } + + if(m_Weights.Count()) + { + UInt32 *weights = new UInt32[m_Weights.Count()]; + m_Weights.UnpackInts(weights); + UInt32 *boneIndices = new UInt32[m_BoneIndices.Count()]; + m_BoneIndices.UnpackInts(boneIndices); + src.m_Skin.resize_uninitialized(numVertices); + int bonePos = 0; + int boneIndexPos = 0; + int j=0; + int sum = 0; + + for(int i=0;i<m_Weights.Count();i++) + { + //read bone index and weight. + src.m_Skin[bonePos].weight[j] = weights[i]/31.0; + src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++]; + j++; + sum += weights[i]; + + //the weights add up to one. fill the rest for this vertex with zero, and continue with next one. + if(sum >= 31) + { + for(;j<4;j++) + { + src.m_Skin[bonePos].weight[j] = 0; + src.m_Skin[bonePos].boneIndex[j] = 0; + } + bonePos++; + j = 0; + sum = 0; + } + //we read three weights, but they don't add up to one. calculate the fourth one, and read + //missing bone index. continue with next vertex. + else if(j==3) + { + src.m_Skin[bonePos].weight[j] = (31-sum)/31.0; + src.m_Skin[bonePos].boneIndex[j] = boneIndices[boneIndexPos++]; + bonePos++; + j = 0; + sum = 0; + } + } + + delete[] weights; + delete[] boneIndices; + } +} + +template <class T> void CompressedAnimationCurve::CompressTimeKeys(AnimationCurveTpl<T> &src) +{ + int numKeys = src.GetKeyCount(); + + float minTime=0; + for(int i=0;i<numKeys;i++) + { + float t = src.GetKey(i).time; + if(t < minTime) + { + //negative time key. offset all keys by this, so math doesn't break - but it's still wrong. + minTime = t; + } + } + + + UInt32 *times = new UInt32[numKeys]; + UInt32 t=0; + for(int i=0;i<numKeys;i++) + { + times[i] = UInt32((src.GetKey(i).time - minTime) * 100); + times[i] -= t; + t += times[i]; + } + + m_Times.PackInts(times, numKeys); + + delete[] times; +} + +template <class T> void CompressedAnimationCurve::DecompressTimeKeys(AnimationCurveTpl<T> &src) +{ + int numKeys = m_Times.Count(); + UInt32 *times = new UInt32[numKeys]; + m_Times.UnpackInts(times); + + UInt32 t=0; + + src.ResizeUninitialized(numKeys); + + for(int i=0;i<numKeys;i++) + { + t+=times[i]; + src.GetKey(i).time = t*0.01; + } + delete[] times; +} + +void CompressedAnimationCurve::CompressQuatCurve(AnimationClip::QuaternionCurve &src) +{ + CompressTimeKeys(src.curve); + int numKeys = src.curve.GetKeyCount(); + + Quaternionf *qkeys = new Quaternionf[numKeys]; + for(int i=0;i<numKeys;i++) + qkeys[i] = src.curve.GetKey(i).value; + m_Values.PackQuats(qkeys, numKeys); + + delete[] qkeys; + + bool same = true; + + for(int i=0;i<numKeys && same;i++) + { + Quaternionf &q1 = src.curve.GetKey(i).inSlope; + Quaternionf &q2 = src.curve.GetKey(i).inSlope; + if(q1.x!=q2.x) + same = false; + if(q1.y!=q2.y) + same = false; + if(q1.z!=q2.z) + same = false; + if(q1.w!=q2.w) + same = false; + } + + float *keys = new float[numKeys*8]; + for(int i=0;i<numKeys;i++) + { + Quaternionf q = src.curve.GetKey(i).inSlope; + keys[i*4+0] = q.x; + keys[i*4+1] = q.y; + keys[i*4+2] = q.z; + keys[i*4+3] = q.w; + q = src.curve.GetKey(i).outSlope; + keys[(i+numKeys)*4+0] = q.x; + keys[(i+numKeys)*4+1] = q.y; + keys[(i+numKeys)*4+2] = q.z; + keys[(i+numKeys)*4+3] = q.w; + } + + //if in and out slopes are all the same, pack only the first of the two. + if(same) + m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 4, 6, false); + else + m_Slopes.PackFloats(keys, 1, sizeof(float), numKeys * 8, 6, false); + + delete[] keys; + + m_PreInfinity = src.curve.GetPreInfinityInternal(); + m_PostInfinity = src.curve.GetPostInfinityInternal(); + m_Path = src.path; +} + +void CompressedAnimationCurve::DecompressQuatCurve(AnimationClip::QuaternionCurve &src) +{ + DecompressTimeKeys(src.curve); + int numKeys = m_Values.Count(); + + Quaternionf *qkeys = new Quaternionf[numKeys]; + m_Values.UnpackQuats(qkeys); + for(int i=0;i<numKeys;i++) + src.curve.GetKey(i).value = qkeys[i]; + delete[] qkeys; + + float *keys = new float[numKeys*8]; + m_Slopes.UnpackFloats(keys, 1, sizeof(float)); + + //are there seperate in and out slopes? + int offs = 0; + if(m_Slopes.Count() == numKeys*8) + offs = numKeys; + for(int i=0;i<numKeys;i++) + { + src.curve.GetKey(i).inSlope.x = keys[i*4+0]; + src.curve.GetKey(i).inSlope.y = keys[i*4+1]; + src.curve.GetKey(i).inSlope.z = keys[i*4+2]; + src.curve.GetKey(i).inSlope.w = keys[i*4+3]; + src.curve.GetKey(i).outSlope.x = keys[(i+offs)*4+0]; + src.curve.GetKey(i).outSlope.y = keys[(i+offs)*4+1]; + src.curve.GetKey(i).outSlope.z = keys[(i+offs)*4+2]; + src.curve.GetKey(i).outSlope.w = keys[(i+offs)*4+3]; + } + delete[] keys; + + src.curve.SetPreInfinityInternal( m_PreInfinity ); + src.curve.SetPostInfinityInternal( m_PostInfinity ); + src.path = m_Path; +} diff --git a/Runtime/Filters/Mesh/CompressedMesh.h b/Runtime/Filters/Mesh/CompressedMesh.h new file mode 100644 index 0000000..cf2f01c --- /dev/null +++ b/Runtime/Filters/Mesh/CompressedMesh.h @@ -0,0 +1,175 @@ +#ifndef COMPRESSEDMESH_H +#define COMPRESSEDMESH_H + +#include "Runtime/Serialize/SerializeUtility.h" +#include "Runtime/Animation/AnimationClip.h" +class Mesh; +class AnimationClip; + +enum +{ + kMeshCompressionOff = 0, + kMeshCompressionLow = 1, + kMeshCompressionMed = 2, + kMeshCompressionHigh = 3, +}; + +typedef std::vector<UInt8> DataVector; + +class PackedFloatVector +{ +public: + DECLARE_SERIALIZE (PackedBitVector) + + PackedFloatVector() { m_NumItems = 0; m_Range = 0; m_Start = 0; m_BitSize = 0; } + + void PackFloats(float *data, int chunkSize, int chunkStride, int chunkCount, int bitSize, bool adjustBitSize); + void UnpackFloats(float *data, int chunkSize, int chunkStride, int start = 0, int count = -1); + int Count() {return m_NumItems;} + +private: + UInt32 m_NumItems; + float m_Range; + float m_Start; + UInt8 m_BitSize; + std::vector<UInt8> m_Data; +}; + +class PackedIntVector +{ +public: + DECLARE_SERIALIZE (PackedBitVector) + + PackedIntVector() { m_NumItems = 0; m_BitSize = 0; } + + template <class IntSize> void PackInts(IntSize *data, int numItems); + template <class IntSize> void UnpackInts(IntSize *data); + int Count() {return m_NumItems;} + +private: + UInt32 m_NumItems; + UInt8 m_BitSize; + std::vector<UInt8> m_Data; +}; + +class PackedQuatVector +{ +public: + DECLARE_SERIALIZE (PackedBitVector) + + PackedQuatVector() {m_NumItems = 0;} + + void PackQuats(Quaternionf *data, int numItems); + void UnpackQuats(Quaternionf *data); + int Count() {return m_NumItems;} + +private: + UInt32 m_NumItems; + std::vector<UInt8> m_Data; +}; + +class CompressedMesh +{ +public: + DECLARE_SERIALIZE (CompressedMesh) + + void Compress(Mesh &src, int quality); + void Decompress(Mesh &src); + +private: + PackedFloatVector m_Vertices; + PackedFloatVector m_UV; + + // TODO: This never gets written. Unity 3.4 and 3.5 never wrote this data. + // Most likely no version ever did. Remove code and bindpose serialization. + PackedFloatVector m_BindPoses; + + PackedFloatVector m_Normals; + PackedIntVector m_NormalSigns; + PackedFloatVector m_Tangents; + PackedIntVector m_TangentSigns; + PackedIntVector m_Weights; + PackedIntVector m_BoneIndices; + PackedIntVector m_Triangles; + PackedIntVector m_Colors; +}; + +template<class TransferFunc> +void PackedFloatVector::Transfer (TransferFunc& transfer) { + TRANSFER ( m_NumItems ); + TRANSFER( m_Range ); + TRANSFER( m_Start ); + TRANSFER( m_Data ); + TRANSFER( m_BitSize ); + transfer.Align(); +} + +template<class TransferFunc> +void PackedIntVector::Transfer (TransferFunc& transfer) { + TRANSFER( m_NumItems ); + TRANSFER( m_Data ); + TRANSFER( m_BitSize ); + transfer.Align(); +} + +template<class TransferFunc> +void PackedQuatVector::Transfer (TransferFunc& transfer) { + TRANSFER( m_NumItems ); + TRANSFER( m_Data ); + transfer.Align(); +} + +template<class TransferFunc> +void CompressedMesh::Transfer (TransferFunc& transfer) { + TRANSFER( m_Vertices ); + TRANSFER( m_UV ); + TRANSFER( m_BindPoses ); + TRANSFER( m_Normals ); + TRANSFER( m_Tangents ); + TRANSFER( m_Weights ); + TRANSFER( m_NormalSigns ); + TRANSFER( m_TangentSigns ); + TRANSFER( m_BoneIndices ); + TRANSFER( m_Triangles ); + TRANSFER( m_Colors ); +} + +class CompressedAnimationCurve +{ +public: + DECLARE_SERIALIZE (CompressedAnimationCurve) + + CompressedAnimationCurve() { m_PreInfinity = 0; m_PostInfinity = 0; } + + void CompressQuatCurve(AnimationClip::QuaternionCurve &src); + void DecompressQuatCurve(AnimationClip::QuaternionCurve &src); + +private: + + template <class T> void CompressTimeKeys(AnimationCurveTpl<T> &src); + template <class T> void DecompressTimeKeys(AnimationCurveTpl<T> &src); + + PackedIntVector m_Times; + PackedQuatVector m_Values; + PackedFloatVector m_Slopes; + + int m_PreInfinity; + int m_PostInfinity; + + UnityStr m_Path; +}; + +template<class TransferFunc> +void CompressedAnimationCurve::Transfer (TransferFunc& transfer) { + + TRANSFER( m_Path ); + + TRANSFER( m_Times ); + TRANSFER( m_Values ); + TRANSFER( m_Slopes ); + + TRANSFER( m_PreInfinity ); + TRANSFER( m_PostInfinity ); +} + +#endif diff --git a/Runtime/Filters/Mesh/LodMesh.cpp b/Runtime/Filters/Mesh/LodMesh.cpp new file mode 100644 index 0000000..fc5dca8 --- /dev/null +++ b/Runtime/Filters/Mesh/LodMesh.cpp @@ -0,0 +1,2344 @@ +#include "UnityPrefix.h" +#include "Configuration/UnityConfigure.h" +#include "LodMesh.h" +#include "Runtime/Utilities/vector_utility.h" +#include "Runtime/Utilities/Utility.h" +#include "Runtime/Math/FloatConversion.h" +#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h" +#include "Runtime/Serialize/PersistentManager.h" +#include "Runtime/Graphics/TriStripper.h" +#include "MeshUtility.h" +#include "Runtime/Geometry/TangentSpaceCalculation.h" +#include "Runtime/BaseClasses/GameObject.h" +#include "Runtime/Shaders/VBO.h" +#include "Runtime/Serialize/TransferUtility.h" +#include "Runtime/Serialize/SwapEndianArray.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/BaseClasses/IsPlaying.h" +#include "Runtime/Camera/IntermediateRenderer.h" +#include "Runtime/Filters/Mesh/MeshRenderer.h" +#include "Runtime/Allocator/MemoryMacros.h" +#include "Runtime/Misc/Allocator.h" +#include "Runtime/Profiler/Profiler.h" +#include "Runtime/Camera/Camera.h" +#include "Runtime/Camera/RenderManager.h" +#include "Runtime/Threads/Thread.h" +#include "Runtime/Misc/BuildSettings.h" +#include "Runtime/Utilities/UniqueIDGenerator.h" +#if UNITY_XENON +#include "PlatformDependent/Xbox360/Source/GfxDevice/GfxXenonVBO.h" +#endif +#include "Runtime/GfxDevice/GfxDeviceConfigure.h" + +#if UNITY_FLASH +#include <limits.h> +#define FLT_MAX __FLT_MAX__ +#define FLT_MIN __FLT_MIN__ +#endif + +#if UNITY_EDITOR +# include "Editor/Src/BuildPipeline/PrepareMeshDataForBuildTarget.h" +# include "Runtime/Camera/RenderLoops/RenderLoopPrivate.h" +# include "Runtime/Misc/Player.h" +#endif + + +///* Checkbox in mesh importer that allows you have mesh access (Done) +///* Default for new importers is to have mesh access enabled (done) +///* Error Messages when acessing data although you shouldn't be allowed (--) +///* MeshColliders / SkinnedMeshes / non-uniform scale. Forces meshes to be non-readable. (Done) + + +///* MeshCollider with no-access allowed. Does it work / no errors +///* MeshCollider with no-access allowed, mesh is assigned from script. Does it give an error in editor & player +///* MeshCollider with no-access allowed, mesh is scaled at runtime does it give an error +///* MeshCollider with no-access allowed, mesh is scaled in scene. Does it work without errors. +///* Mesh data accessed from script, does it give an error. + + + +static char const* kMeshAPIErrorMessage = +"Mesh.%s is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array."; + + +static UniqueIDGenerator s_MeshIDGenerator; + + +// The Mesh class contains one of these for every Material that is bound to it. +struct DeprecatedMeshData +{ + std::vector<Face> faces; // Indices for specific faces + std::vector <unsigned short> strips; // A list of triangle strips + int triangleCount; + DECLARE_SERIALIZE_NO_PPTR (MeshData) +}; + +template<class TransferFunc> +void DeprecatedMeshData::Transfer (TransferFunc& transfer) +{ + TRANSFER (faces); + TRANSFER (strips); + TRANSFER(triangleCount); +} + +struct DeprecatedLOD +{ + vector<DeprecatedMeshData> m_MeshData; + + DECLARE_SERIALIZE (LOD) +}; + +template<class TransferFunction> +void DeprecatedLOD::Transfer (TransferFunction& transfer) +{ + TRANSFER (m_MeshData); +} + +static void LoadDeprecatedMeshData (Mesh& mesh, vector<DeprecatedLOD> &lods) +{ + mesh.GetIndexBuffer().clear(); + mesh.GetSubMeshes().clear(); + + if (lods.empty()) + return; + + DeprecatedLOD& lod = lods.front(); + + mesh.SetSubMeshCount(lod.m_MeshData.size()); + for (int i=0;i<lod.m_MeshData.size();i++) + { + DeprecatedMeshData& oldMeshData = lod.m_MeshData[i]; + if (oldMeshData.faces.size()) + mesh.SetIndicesComplex (&oldMeshData.faces[0].v1, oldMeshData.faces.size()*3, i, kPrimitiveTriangles, Mesh::k16BitIndices); + else + { + UNITY_TEMP_VECTOR(UInt16) triangles; + Destripify(&oldMeshData.strips[0], oldMeshData.strips.size(), triangles); + mesh.SetIndicesComplex (&triangles[0], triangles.size(), i, kPrimitiveTriangles, Mesh::k16BitIndices); + } + } +} + + +using namespace std; + +Mesh::Mesh (MemLabelId label, ObjectCreationMode mode) +: Super(label, mode) +, m_ChannelsInVBO(0) +, m_VerticesDirty(true) +, m_IndicesDirty(true) +, m_IsDynamic(false) +, m_HideFromRuntimeStats(false) +, m_VertexColorsSwizzled(false) +, m_MeshUsageFlags(0) +, m_LocalAABB(Vector3f::zero, Vector3f::zero) +, m_VBO(NULL) +, m_InternalMeshID (0) +, m_Skin (label) +, m_CachedSkin2 (label) +, m_CachedSkin1 (label) +, m_CachedBonesAABB(label) +, m_Bindpose(label) +, m_BonePathHashes(label) +, m_RootBonePathHash(0) +{ + m_MaxBoneIndex = -1; + SubMesh sub; + m_SubMeshes.push_back(sub); + + m_MeshCompression = kMeshCompressionOff; + m_StreamCompression = kStreamCompressionDefault; + m_IsReadable = true; + m_KeepVertices = false; + m_KeepIndices = false; + +#if UNITY_EDITOR + m_MeshOptimized = false; +#endif + +#if ENABLE_MULTITHREADED_CODE + m_CurrentCPUFence = 0; + m_WaitOnCPUFence = false; +#endif + + m_InternalMeshID = 0; +} + +Mesh::~Mesh () +{ + MainThreadCleanup (); +} + +bool Mesh::MainThreadCleanup () +{ + WaitOnRenderThreadUse(); + NotifyObjectUsers( kDidDeleteMesh ); + m_IntermediateUsers.Notify( kImNotifyAssetDeleted ); + + m_CollisionMesh.Cleanup(); + + if (m_VBO) + { + GetGfxDevice().DeleteVBO(m_VBO); + m_VBO = NULL; + } + + if (m_InternalMeshID != 0) + { + s_MeshIDGenerator.RemoveID (m_InternalMeshID); + m_InternalMeshID = 0; + } + + return true; +} + +void Mesh::LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &inTangents) +{ + int count = inTangents.size(); + unsigned needChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT2(Normal, Tangent); + if (count != GetVertexCount () || m_VertexData.GetChannelMask () != needChannels) + ResizeVertices (count, needChannels); + + Assert (GetVertexCount () == count); + + StrideIterator<Vector3f> normals = GetNormalBegin (); + StrideIterator<Vector4f> tangents = GetTangentBegin (); + + for(int i=0;i<count; ++i, ++normals, ++tangents) + { + *normals = inTangents[i].normal; + *tangents = Vector4f(inTangents[i].tangent.x,inTangents[i].tangent.y,inTangents[i].tangent.z,inTangents[i].handedness); + } +} + +void Mesh::SwizzleVertexColorsIfNeeded () +{ + // Early out if color are already in the right format + if (gGraphicsCaps.needsToSwizzleVertexColors == m_VertexColorsSwizzled) + return; + + // Due to runtime GfxDevice switching we might need to unswizzle vertex colors (case 562695) + if (m_VertexColorsSwizzled) + { + std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform); + m_VertexColorsSwizzled = false; + } + else + { + std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform); + m_VertexColorsSwizzled = true; + } +} + +void Mesh::ExtractVertexArray (Vector3f* destination) const +{ + StrideIterator<Vector3f> v = GetVertexBegin (); + for (Vector3f* end = destination + GetVertexCount(); destination != end; ++v, ++destination) + *destination = *v; +} + +void Mesh::ExtractNormalArray (Vector3f* destination) const +{ + StrideIterator<Vector3f> n = GetNormalBegin (); + for (Vector3f* end = destination + GetVertexCount(); destination != end; ++n, ++destination) + *destination = *n; +} + +void Mesh::ExtractColorArray (ColorRGBA32* destination) const +{ + if (m_VertexColorsSwizzled) + std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform); + else + std::copy(GetColorBegin(), GetColorEnd(), destination); +} + +void Mesh::ExtractColorArrayConverting (ColorRGBAf* destination) const +{ + if (m_VertexColorsSwizzled) + std::transform(GetColorBegin(), GetColorEnd(), destination, UnswizzleColorForPlatform); + else + std::copy(GetColorBegin(), GetColorEnd(), destination); +} + +void Mesh::ExtractUvArray (int uvIndex, Vector2f* destination) const +{ + StrideIterator<Vector2f> uv = GetUvBegin (uvIndex); + for (Vector2f* end = destination + GetVertexCount(); destination != end; ++uv, ++destination) + *destination = *uv; +} + +void Mesh::ExtractTangentArray (Vector4f* destination) const +{ + StrideIterator<Vector4f> t = GetTangentBegin (); + for (Vector4f* end = destination + GetVertexCount(); destination != end; ++t, ++destination) + *destination = *t; +} + + +UInt32 Mesh::ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels) +{ + Assert (count <= std::numeric_limits<UInt16>::max()); + + UInt32 prevChannels = m_VertexData.GetChannelMask(); + + if (m_VertexData.GetVertexCount() != count || + m_VertexData.GetChannelMask() != shaderChannels || + !m_VertexData.ConformsToStreamsLayout(streams) || + !m_VertexData.ConformsToChannelsLayout(channels)) + { + WaitOnRenderThreadUse(); + + SET_ALLOC_OWNER(this); + m_VertexData.Resize(count, shaderChannels, streams, channels); + + if (!m_Skin.empty ()) + m_Skin.resize_initialized (count, BoneInfluence()); + } + + return m_VertexData.GetChannelMask() & ~prevChannels; +} + + +UInt32 Mesh::FormatVertices (UInt32 shaderChannels) +{ + return ResizeVertices(GetVertexCount(), shaderChannels); +} + +void Mesh::InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels) +{ + if (shaderChannels & VERTEX_FORMAT1(Vertex)) + std::fill (GetVertexBegin () + begin, GetVertexBegin () + begin + count, Vector3f (0,0,0)); + if (shaderChannels & VERTEX_FORMAT1(Normal)) + std::fill (GetNormalBegin () + begin, GetNormalBegin () + begin + count, Vector3f (0,0,0)); + if (shaderChannels & VERTEX_FORMAT1(Color)) + std::fill (GetColorBegin () + begin, GetColorBegin () + begin + count, ColorRGBA32 (0xffffffff)); + if (shaderChannels & VERTEX_FORMAT1(TexCoord0)) + std::fill (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, Vector2f (0,0)); + if (shaderChannels & VERTEX_FORMAT1(Tangent)) + std::fill (GetTangentBegin () + begin, GetTangentBegin () + begin + count, Vector4f (0,0,0,0)); + + if (shaderChannels & VERTEX_FORMAT1(TexCoord1)) + { + if( GetAvailableChannels () & VERTEX_FORMAT1(TexCoord0) ) + std::copy (GetUvBegin (0) + begin, GetUvBegin (0) + begin + count, GetUvBegin (1) + begin); + else + std::fill (GetUvBegin (1) + begin, GetUvBegin (1) + begin + count, Vector2f (0,0)); + } +} + +namespace +{ + bool IsStripValid(const Mesh::TemporaryIndexContainer& triangles, const Mesh::TemporaryIndexContainer& newStrip) + { + int invalidTriangleCount = 0; + for (int j = 0; j < triangles.size(); j += 3) + { + int i0 = triangles[j + 0]; + int i1 = triangles[j + 1]; + int i2 = triangles[j + 2]; + + bool found = false; + for (int k = 0; k < newStrip.size() - 2; ++k) + { + int s0 = newStrip[k + 0]; + int s1 = newStrip[k + 1]; + int s2 = newStrip[k + 2]; + + if (k&1) + std::swap(s1, s2); + + if ((s0 == i0 && s1 == i1 && s2 == i2) || + (s0 == i1 && s1 == i2 && s2 == i0) || + (s0 == i2 && s1 == i0 && s2 == i1)) + { + found = true; + break; + } + } + + if (!found) + ++invalidTriangleCount; + } + + AssertMsg(invalidTriangleCount == 0, "Mesh strip is missing %d triangles", invalidTriangleCount); + return invalidTriangleCount == 0; + } +} + +void Mesh::RecalculateBoundsInternal () +{ + MinMaxAABB minmax; + minmax.Init (); + for (StrideIterator<Vector3f> it = GetVertexBegin (), end = GetVertexEnd (); it != end; ++it) + minmax.Encapsulate (*it); + + // Apply all blendshape targets to bounding volumes + if (!m_Shapes.vertices.empty()) + { + StrideIterator<Vector3f> verts = GetVertexBegin (); + + for (int i=0;i<m_Shapes.vertices.size();i++) + { + Vector3f pos = verts[m_Shapes.vertices[i].index] + m_Shapes.vertices[i].vertex; + minmax.Encapsulate (pos); + } + } + + AABB aabb; + if (GetVertexCount ()) + aabb = minmax; + else + aabb = AABB (Vector3f::zero, Vector3f::zero); + + m_LocalAABB = aabb; + + for (int submesh = 0; submesh < m_SubMeshes.size(); ++submesh) + RecalculateSubmeshBoundsInternal (submesh); +} + +void Mesh::RecalculateSubmeshBoundsInternal (unsigned submesh) +{ + MinMaxAABB minmax; + minmax.Init (); + + const UInt16* indices = GetSubMeshBuffer16(submesh); + StrideIterator<Vector3f> vertices = GetVertexBegin (); + for (unsigned int i = 0; i < GetSubMeshFast(submesh).indexCount; i++) + minmax.Encapsulate (vertices[indices[i]]); + + AABB aabb; + if (GetSubMeshFast(submesh).indexCount > 0) + aabb = minmax; + else + aabb = AABB (Vector3f::zero, Vector3f::zero); + + GetSubMeshFast(submesh).localAABB = aabb; +} + + +void Mesh::RecalculateBounds () +{ + RecalculateBoundsInternal (); + + SetDirty(); + NotifyObjectUsers( kDidModifyBounds ); + m_IntermediateUsers.Notify( kImNotifyBoundsChanged ); +} + +void Mesh::RecalculateSubmeshBounds (unsigned submesh) +{ + RecalculateSubmeshBoundsInternal (submesh); + + SetDirty(); + NotifyObjectUsers( kDidModifyBounds ); + m_IntermediateUsers.Notify( kImNotifyBoundsChanged ); +} + + +void Mesh::Clear (bool keepVertexLayout) +{ + WaitOnRenderThreadUse(); + + m_SubMeshes.clear(); + SubMesh sub; + m_SubMeshes.push_back(sub); + + ClearBlendShapes (m_Shapes); + + m_IndexBuffer.clear(); +#if UNITY_EDITOR + m_MeshOptimized = false; +#endif + +#if UNITY_PS3 || UNITY_EDITOR + m_PartitionInfos.clear(); + m_Partitions.clear(); +#endif + + unsigned prevFormat = m_VertexData.GetChannelMask(); + + if (m_VertexData.GetVertexCount() > 0) + { + // keepVertexLayout added in Unity 3.5.3; keep previous behaviour + // for older content for safety. + if (keepVertexLayout && IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1)) + { + ResizeVertices (0, prevFormat); + } + else + { + VertexData tempVD; + swap (tempVD, m_VertexData); + } + } + + if (!m_Skin.empty()) + { + m_Skin.clear(); + } + + m_VertexColorsSwizzled = false; + ClearSkinCache(); + + SetChannelsDirty( prevFormat, true ); +} + +IMPLEMENT_CLASS (Mesh) +IMPLEMENT_OBJECT_SERIALIZE (Mesh) + +template <typename Index> +static void GetVertexBufferRange(const Index* indices, int indexCount, UInt32& fromVertex, UInt32& toVertex) +{ + Index a = Index(INT_MAX); + Index b = 0; + const Index* indicesEnd = indices + indexCount; + for (const Index* index = indices; index < indicesEnd; ++index) + { + a = std::min(a, *index); + b = std::max(b, *index); + } + fromVertex = a; + toVertex = b; +} + +void Mesh::ByteSwapIndices () +{ + SwapEndianArray (&m_IndexBuffer[0], kVBOIndexSize, GetTotalndexCount()); +} + +template<class T> +bool ShouldSerializeForBigEndian (T& transfer) +{ + bool bigEndian = UNITY_BIG_ENDIAN; + if (transfer.ConvertEndianess()) + bigEndian = !bigEndian; + return bigEndian; +} + +void Mesh::DestripifyIndices () +{ + if (m_IndexBuffer.empty() || m_SubMeshes.empty()) + return; + + int submeshCount = m_SubMeshes.size(); + bool anyStripped = false; + for (size_t i = 0; i < submeshCount; ++i) + { + if (m_SubMeshes[i].topology == kPrimitiveTriangleStripDeprecated) + { + anyStripped = true; + break; + } + } + if(!anyStripped) + return; + + // destripify the stripped submeshes + typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer; + + std::vector<TemporaryIndexContainer> submeshIndices; + submeshIndices.resize(submeshCount); + for(int i=0;i<submeshCount;i++) + { + SubMesh& sm = m_SubMeshes[i]; + if (sm.topology == kPrimitiveTriangleStripDeprecated) + Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]); + else + { + submeshIndices[i].resize(sm.indexCount); + memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1); + } + } + + SetSubMeshCount(0); + SetSubMeshCount(submeshCount); + + for(int i=0;i<submeshCount;i++) + SetIndices(&submeshIndices[i][0], submeshIndices[i].size(), i, kPrimitiveTriangles); +} + +bool Mesh::CanAccessFromScript() const +{ +#if UNITY_EDITOR + // Allow editor scripts access even if not allowed in runtime + if (!IsInsidePlayerLoop() && !IsInsideRenderLoop()) + return true; +#endif + return m_IsReadable; +} + + +template<class TransferFunction> +void Mesh::Transfer (TransferFunction& transfer) +{ + #if SUPPORT_SERIALIZED_TYPETREES + // See TransferWorkaround35SerializeFuckup below for comments. + // Remove when we can break backwards-compatiblity. + if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup) + { + TransferWorkaround35SerializeFuckup (transfer); + return; + } + #endif + + Super::Transfer (transfer); + transfer.SetVersion (8); + + #if UNITY_EDITOR + const UInt32 supportedChannels = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshSupportedChannels : 0; + const UInt32 meshUsageFlags = transfer.IsWritingGameReleaseData() ? transfer.GetBuildUsage().meshUsageFlags : 0; + PrepareMeshDataForBuildTarget prepareMesh(*this, transfer.GetBuildingTarget().platform, supportedChannels, meshUsageFlags); + #endif + + bool reswizzleColors = false; + if (m_VertexColorsSwizzled) + { + // Unswizzle colors before serializing + std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform); + m_VertexColorsSwizzled = false; + reswizzleColors = true; + } + + transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask); + transfer.Transfer (m_Shapes, "m_Shapes", kHideInEditorMask); + transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask); + transfer.Transfer (m_BonePathHashes, "m_BoneNameHashes", kHideInEditorMask); + transfer.Transfer (m_RootBonePathHash, "m_RootBoneNameHash", kHideInEditorMask); + + transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask); + transfer.Transfer (m_StreamCompression, "m_StreamCompression", kHideInEditorMask); + transfer.Transfer (m_IsReadable, "m_IsReadable", kHideInEditorMask); + transfer.Transfer (m_KeepVertices, "m_KeepVertices", kHideInEditorMask); + transfer.Transfer (m_KeepIndices, "m_KeepIndices", kHideInEditorMask); + transfer.Align(); + + // Notice the two codepaths for serialization here. + // It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash. + // Look at kSerializeForPrefabSystem to disable compression when using Transfer to instantiate a Mesh. + // Changes to compression can break web content if we recompress at runtime. (case 546159) + bool doCompression = m_MeshCompression && !(transfer.GetFlags() & kSerializeForPrefabSystem); + if (!doCompression) + { + if (transfer.ConvertEndianess() && transfer.IsWriting ()) + ByteSwapIndices(); + + transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask); + + if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ())) + ByteSwapIndices(); + + transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask); + + if (transfer.IsVersionSmallerOrEqual (5)) + { + dynamic_array<Vector4f> tangents; + dynamic_array<Vector3f> vertices, normals; + dynamic_array<Vector2f> uvs, uvs1; + dynamic_array<ColorRGBA32> colors; + + + transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask); + transfer.Transfer (uvs, "m_UV", kHideInEditorMask); + transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask); + transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask); + transfer.Transfer (normals, "m_Normals", kHideInEditorMask); + transfer.Transfer (colors, "m_Colors", kHideInEditorMask); + + unsigned format = 0; + if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex); + if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent); + if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal); + if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0); + if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1); + if (!colors.empty ()) format |= VERTEX_FORMAT1(Color); + + size_t vertexCount = vertices.size (); + if (GetVertexCount () != vertexCount || GetAvailableChannels () != format) + ResizeVertices (vertexCount, format); + + strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ()); + strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ()); + strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0)); + strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1)); + strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ()); + strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ()); + } + else + { + // version 6 introduces interleaved buffer + if (transfer.ConvertEndianess() && transfer.IsWriting ()) + m_VertexData.SwapEndianess (); + + transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask); + + if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ())) + m_VertexData.SwapEndianess (); + } + } + // Notice the two codepaths for serialization here. + // It is very important to keep both codepaths in sync, otherwise SafeBinaryRead serialization will crash. + else + { + BoneInfluenceContainer dummySkin; + VertexData dummyVertexData; + IndexContainer dummyIndexContainer; + + transfer.Transfer (dummyIndexContainer, "m_IndexBuffer", kHideInEditorMask); + transfer.Transfer (dummySkin, "m_Skin", kHideInEditorMask); + transfer.Transfer (dummyVertexData, "m_VertexData", kHideInEditorMask); + } + + { + // only keep the compressed mesh in memory while needed + CompressedMesh m_CompressedMesh; + transfer.Align(); + // Check both IsWriting() and IsReading() since both are true when reading with SafeBinaryRead + if (doCompression && transfer.IsWriting()) + m_CompressedMesh.Compress(*this, m_MeshCompression); + + transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask); + + if (doCompression && transfer.DidReadLastProperty ()) + m_CompressedMesh.Decompress(*this); + } + + #if !GFX_SUPPORTS_TRISTRIPS + if (transfer.IsReading()) + DestripifyIndices (); + #endif + + // Reswizzle colors after serializing + if (reswizzleColors) + { + std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), SwizzleColorForPlatform); + m_VertexColorsSwizzled = true; + } + + transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask); + + #if UNITY_EDITOR + // When building player we precalcuate mesh usage based on who uses the different MeshColliders in different scenes. + if (transfer.IsWritingGameReleaseData()) + { + int buildMeshUsageFlags = transfer.GetBuildUsage().meshUsageFlags; + transfer.Transfer (buildMeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask); + } + else + transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask); + #else + transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask); + #endif + + m_CollisionMesh.Transfer(transfer, *this); + + if (transfer.IsOldVersion(1)) + { + vector<DeprecatedLOD> lod; + transfer.Transfer (lod, "m_LODData", kHideInEditorMask); + LoadDeprecatedMeshData(*this, lod); + } + + if (transfer.IsVersionSmallerOrEqual(4)) + { + for (int sm = 0; sm < m_SubMeshes.size(); ++sm) + { + UpdateSubMeshVertexRange (sm); + RecalculateSubmeshBoundsInternal (sm); + } + } + + if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1)) + { + DeprecatedTangentsArray m_TangentSpace; + transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask); + if(transfer.IsReading()) + LoadDeprecatedTangentData(*this,m_TangentSpace); + } + + if (transfer.IsVersionSmallerOrEqual(7)) + { + DestripifySubmeshOnTransferInternal(); + } + TRANSFER_EDITOR_ONLY_HIDDEN(m_MeshOptimized); + +#if UNITY_EDITOR || UNITY_PS3 + TransferPS3Data(transfer); +#endif +} + +#if SUPPORT_SERIALIZED_TYPETREES +// Except for some dead-path removal and a change to the ResizeVertices call to account for an +// API change, this is an exact copy of the Mesh::Transfer function as it shipped in 3.5.0 final. +// This path exists solely to work around the issue with compressed mesh serialization in 3.5.0 +// which produced different serializations for compressed and uncompressed meshes while using the +// same type tree for either case. This makes it impossible for SafeBinaryRead to sort things out. +// +// By having the exact same transfer path, we end up with identical type trees compared to version +// 3.5.0 and thus automatically end up on the StreamedBinaryRead codepath. Also, as long as this +// separate path here is preserved, we can read the faulty 3.5.0 streams without having to worry +// about it in the normal transfer path. +template<class TransferFunction> +void Mesh::TransferWorkaround35SerializeFuckup (TransferFunction& transfer) +{ + Super::Transfer (transfer); + transfer.SetVersion (6); + + if (m_VertexColorsSwizzled) + { + // Unswizzle colors before serializing + std::transform(GetColorBegin(), GetColorEnd(), GetColorBegin(), UnswizzleColorForPlatform); + m_VertexColorsSwizzled = false; + } + + transfer.Transfer (m_SubMeshes, "m_SubMeshes", kHideInEditorMask); + + if (!transfer.IsVersionSmallerOrEqual(3)) + transfer.Transfer (m_MeshCompression, "m_MeshCompression", kHideInEditorMask); + else + m_MeshCompression = kMeshCompressionOff; + + transfer.Align(); + if (m_MeshCompression == kMeshCompressionOff) + { + if (transfer.ConvertEndianess() && transfer.IsWriting ()) + ByteSwapIndices(); + + transfer.Transfer (m_IndexBuffer, "m_IndexBuffer", kHideInEditorMask); + + if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ())) + ByteSwapIndices(); + + transfer.Transfer (m_Skin, "m_Skin", kHideInEditorMask); + transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask); + + if (transfer.IsVersionSmallerOrEqual (5)) + { + dynamic_array<Vector4f> tangents; + dynamic_array<Vector3f> vertices, normals; + dynamic_array<Vector2f> uvs, uvs1; + dynamic_array<ColorRGBA32> colors; + + + transfer.Transfer (vertices, "m_Vertices", kHideInEditorMask); + transfer.Transfer (uvs, "m_UV", kHideInEditorMask); + transfer.Transfer (uvs1, "m_UV1", kHideInEditorMask); + transfer.Transfer (tangents, "m_Tangents", kHideInEditorMask); + transfer.Transfer (normals, "m_Normals", kHideInEditorMask); + transfer.Transfer (colors, "m_Colors", kHideInEditorMask); + + unsigned format = 0; + if (!vertices.empty ()) format |= VERTEX_FORMAT1(Vertex); + if (!tangents.empty ()) format |= VERTEX_FORMAT1(Tangent); + if (!normals.empty ()) format |= VERTEX_FORMAT1(Normal); + if (!uvs.empty ()) format |= VERTEX_FORMAT1(TexCoord0); + if (!uvs1.empty ()) format |= VERTEX_FORMAT1(TexCoord1); + if (!colors.empty ()) format |= VERTEX_FORMAT1(Color); + + size_t vertexCount = vertices.size (); + if (GetVertexCount () != vertexCount || GetAvailableChannels () != format) + ResizeVertices (vertexCount, format); + + strided_copy (vertices.begin (), vertices.begin () + std::min (vertices.size (), vertexCount), GetVertexBegin ()); + strided_copy (normals.begin (), normals.begin () + std::min (normals.size (), vertexCount), GetNormalBegin ()); + strided_copy (uvs.begin (), uvs.begin () + std::min (uvs.size (), vertexCount), GetUvBegin (0)); + strided_copy (uvs1.begin (), uvs1.begin () + std::min (uvs1.size (), vertexCount), GetUvBegin (1)); + strided_copy (tangents.begin (), tangents.begin () + std::min (tangents.size (), vertexCount), GetTangentBegin ()); + strided_copy (colors.begin (), colors.begin () + std::min (colors.size (), vertexCount), GetColorBegin ()); + } + else + { + // version 6 introduces interleaved buffer + if (transfer.ConvertEndianess() && transfer.IsWriting ()) + m_VertexData.SwapEndianess (); + + transfer.Transfer (m_VertexData, "m_VertexData", kHideInEditorMask); + + if (transfer.ConvertEndianess() && (transfer.IsWriting () || transfer.IsReading ())) + m_VertexData.SwapEndianess (); + } + } + else + { + vector<Vector4f> emptyVector4; + vector<Vector3f> emptyVector3; + vector<Vector2f> emptyVector2; + vector<BoneInfluence> emptyBones; + vector<UInt8> emptyIndices; + vector<ColorRGBA32> emptyColors; + + transfer.Transfer (emptyIndices, "m_IndexBuffer", kHideInEditorMask); + transfer.Transfer (emptyVector3, "m_Vertices", kHideInEditorMask); + transfer.Transfer (emptyBones, "m_Skin", kHideInEditorMask); + transfer.Transfer (m_Bindpose, "m_BindPose", kHideInEditorMask); + transfer.Transfer (emptyVector2, "m_UV", kHideInEditorMask); + transfer.Transfer (emptyVector2, "m_UV1", kHideInEditorMask); + transfer.Transfer (emptyVector4, "m_Tangents", kHideInEditorMask); + transfer.Transfer (emptyVector3, "m_Normals", kHideInEditorMask); + transfer.Transfer (emptyColors, "m_Colors", kHideInEditorMask); + } + + CompressedMesh m_CompressedMesh; + transfer.Align(); + if (transfer.IsWriting() && m_MeshCompression) + m_CompressedMesh.Compress(*this, m_MeshCompression); + + printf_console( "Reading compressed mesh...\n" ); + transfer.Transfer (m_CompressedMesh, "m_CompressedMesh", kHideInEditorMask); + + if (transfer.DidReadLastProperty () && m_MeshCompression) + m_CompressedMesh.Decompress(*this); + + +#if !GFX_SUPPORTS_TRISTRIPS + if (transfer.IsReading()) + DestripifyIndices (); +#endif + + transfer.Transfer (m_LocalAABB, "m_LocalAABB", kHideInEditorMask); + transfer.Transfer (m_MeshUsageFlags, "m_MeshUsageFlags", kHideInEditorMask); + + m_CollisionMesh.Transfer(transfer, *this); + + if (transfer.IsOldVersion(1)) + { + vector<DeprecatedLOD> lod; + transfer.Transfer (lod, "m_LODData", kHideInEditorMask); + LoadDeprecatedMeshData(*this, lod); + } + + if (transfer.IsVersionSmallerOrEqual(4)) + { + for (int sm = 0; sm < m_SubMeshes.size(); ++sm) + { + UpdateSubMeshVertexRange (sm); + RecalculateSubmeshBoundsInternal (sm); + } + } + + if (transfer.IsOldVersion(2) || transfer.IsOldVersion(1)) + { + DeprecatedTangentsArray m_TangentSpace; + transfer.Transfer (m_TangentSpace, "m_TangentSpace", kHideInEditorMask); + if(transfer.IsReading()) + LoadDeprecatedTangentData(*this,m_TangentSpace); + } + + if (transfer.IsReading()) + DestripifySubmeshOnTransferInternal(); +} +#endif + +#if UNITY_EDITOR || UNITY_PS3 +template<class TransferFunction> +void Mesh::TransferPS3Data (TransferFunction& transfer) +{ + if (UNITY_PS3 || (kBuildPS3 == transfer.GetBuildingTarget().platform)) + { + transfer.Transfer(m_Partitions, "m_Partitions", kHideInEditorMask); + transfer.Transfer(m_PartitionInfos, "m_PartitionInfos", kHideInEditorMask); + } +} +#endif + + +void Mesh::UpdateSubMeshVertexRange (int index) +{ + SubMesh& submesh = m_SubMeshes[index]; + if (submesh.indexCount > 0) + { + UInt32 lastVertex = 0; + GetVertexBufferRange(GetSubMeshBuffer16(index), submesh.indexCount, submesh.firstVertex, lastVertex); + Assert(lastVertex < GetVertexCount ()); + Assert(submesh.firstVertex <= lastVertex); + submesh.vertexCount = lastVertex - submesh.firstVertex + 1; + } + else + { + submesh.firstVertex = 0; + submesh.vertexCount = 0; + } +} + +static bool CheckOutOfBounds (unsigned max, const UInt16* p, unsigned count) +{ + for (int i=0;i<count;i++) + { + if (p[i] >= max) + return false; + } + return true; +} + +static bool CheckOutOfBounds (unsigned max, const UInt32* p, unsigned count) +{ + for (int i=0;i<count;i++) + { + if (p[i] >= max) + return false; + } + return true; +} + +bool Mesh::ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount) +{ + if (newTriangles) + { + return CheckOutOfBounds (newVertexCount, reinterpret_cast<const UInt16*>(newTriangles), indexCount); + } + else + { + return CheckOutOfBounds(newVertexCount, reinterpret_cast<const UInt16*>(&m_IndexBuffer[0]), GetTotalndexCount()); + } +} + +int Mesh::GetTotalndexCount () const +{ + return m_IndexBuffer.size () / kVBOIndexSize; +} + +void Mesh::SetVertices (Vector3f const* data, size_t count) +{ + if (m_StreamCompression) + return; + + if (count > std::numeric_limits<UInt16>::max()) + { + ErrorString("Mesh.vertices is too large. A mesh may not have more than 65000 vertices."); + return; + } + + size_t prevCount = GetVertexCount (); + if (IS_CONTENT_NEWER_OR_SAME (kUnityVersion3_5_3_a1) && count < prevCount && !ValidateVertexCount(count, NULL, 0)) + { + ErrorString("Mesh.vertices is too small. The supplied vertex array has less vertices than are referenced by the triangles array."); + return; + } + + WaitOnRenderThreadUse(); + +#if UNITY_PS3 + if(m_Skin.empty() || (!(m_Skin.empty() || m_PartitionInfos.empty()))) + { + // mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226) + SET_ALLOC_OWNER(this); + VertexData vertexData(m_VertexData, GetAvailableChannels(), GetStreamsLayout(), GetChannelsLayout()); + swap(vertexData, m_VertexData); + } +#endif + + if (prevCount != count) + { + unsigned prevChannels = GetAvailableChannels (); + ResizeVertices (count, prevChannels | VERTEX_FORMAT1(Vertex)); + + // In case there were other channels present, initialize the newly created values of + // the expanded buffer to something meaningful. + if (prevCount != 0 && count > prevCount && (prevChannels & ~VERTEX_FORMAT1(Vertex))) + { + InitChannelsToDefault (prevCount, count - prevCount, prevChannels & ~VERTEX_FORMAT1(Vertex)); + } + } + + // Make sure we'll not be overrunning the buffer + if (GetVertexCount () < count) + count = GetVertexCount (); + + strided_copy (data, data + count, GetVertexBegin ()); + SetChannelsDirty (VERTEX_FORMAT1(Vertex), false); + + // We do not recalc the bounds automatically when re-writing existing vertices + if (prevCount != count) + RecalculateBounds (); +} + +void Mesh::SetNormals (Vector3f const* data, size_t count) +{ + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + if (count == 0 || !data) + { + FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Normal)); + SetChannelsDirty (VERTEX_FORMAT1(Normal), false); + return; + } + + if (count != GetVertexCount ()) + { + ErrorStringMsg(kMeshAPIErrorMessage, "normals"); + return; + } + + if (!IsAvailable (kShaderChannelNormal)) + FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Normal)); + + strided_copy (data, data + count, GetNormalBegin ()); + + SetChannelsDirty (VERTEX_FORMAT1(Normal), false); +} + +void Mesh::SetTangents (Vector4f const* data, size_t count) +{ + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + if (count == 0 || !data) + { + FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Tangent)); + SetChannelsDirty (VERTEX_FORMAT1(Tangent), false); + return; + } + + if (count != GetVertexCount ()) + { + ErrorStringMsg(kMeshAPIErrorMessage, "tangents"); + return; + } + + if (!IsAvailable (kShaderChannelTangent)) + FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Tangent)); + + strided_copy (data, data + count, GetTangentBegin ()); + SetChannelsDirty( VERTEX_FORMAT1(Tangent), false ); +} + +void Mesh::SetUv (int uvIndex, Vector2f const* data, size_t count) +{ + Assert (uvIndex <= 1); + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + ShaderChannel texCoordChannel = static_cast<ShaderChannel>(kShaderChannelTexCoord0 + uvIndex); + unsigned texCoordMask = 1 << texCoordChannel; + if (count == 0 || !data) + { + FormatVertices (GetAvailableChannels () & ~texCoordMask); + SetChannelsDirty (texCoordMask, false); + return; + } + + if (count != GetVertexCount ()) + { + const char* uvName = uvIndex == 1 ? "uv2" : "uv"; + ErrorStringMsg(kMeshAPIErrorMessage, uvName); + return; + } + + if (!IsAvailable (texCoordChannel)) + FormatVertices (GetAvailableChannels () | texCoordMask); + + strided_copy (data, data + count, GetUvBegin (uvIndex)); + SetChannelsDirty (texCoordMask, false); +} + +void Mesh::SetColors (ColorRGBA32 const* data, size_t count) +{ + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + if (count == 0 || !data) + { + FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color)); + SetChannelsDirty( VERTEX_FORMAT1(Color), false ); + return; + } + + if (count != GetVertexCount ()) + { + ErrorStringMsg(kMeshAPIErrorMessage, "colors"); + return; + } + + if (!IsAvailable (kShaderChannelColor)) + { + FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color)); + } + m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors; + + if (m_VertexColorsSwizzled) + std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform); + else + std::copy(data, data + count, GetColorBegin()); + + SetChannelsDirty( VERTEX_FORMAT1(Color), false ); +} + +void Mesh::SetColorsConverting (ColorRGBAf const* data, size_t count) +{ + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + if (count == 0 || !data) + { + FormatVertices (GetAvailableChannels () & ~VERTEX_FORMAT1(Color)); + SetChannelsDirty( VERTEX_FORMAT1(Color), false ); + return; + } + + if (count != GetVertexCount ()) + { + ErrorStringMsg(kMeshAPIErrorMessage, "colors"); + return; + } + + if (!IsAvailable (kShaderChannelColor)) + { + FormatVertices (GetAvailableChannels () | VERTEX_FORMAT1(Color)); + } + m_VertexColorsSwizzled = gGraphicsCaps.needsToSwizzleVertexColors; + + if (m_VertexColorsSwizzled) + std::transform(data, data + count, GetColorBegin(), SwizzleColorForPlatform); + else + strided_copy_convert(data, data + count, GetColorBegin()); + + SetChannelsDirty( VERTEX_FORMAT1(Color), false ); +} + + +void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles) const +{ + triangles.clear(); + for (unsigned m=0;m<GetSubMeshCount();m++) + AppendTriangles(triangles, m); +} + +void Mesh::GetTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const +{ + triangles.clear(); + AppendTriangles(triangles, submesh); +} + +void QuadsToTriangles(const UInt16* quads, const int indexCount, Mesh::TemporaryIndexContainer& triangles) +{ + DebugAssert (indexCount%4 == 0); + triangles.resize((indexCount/2)*3); + for (int q = 0, t = 0; q < indexCount; q += 4, t +=6) + { + triangles[t] = quads[q]; + triangles[t + 1] = quads[q + 1]; + triangles[t + 2] = quads[q + 2]; + + triangles[t + 3] = quads[q]; + triangles[t + 4] = quads[q + 2]; + triangles[t + 5] = quads[q + 3]; + } +} + + +void Mesh::AppendTriangles (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const +{ + if (submesh >= GetSubMeshCount()) + { + ErrorString("Failed getting triangles. Submesh index is out of bounds."); + return; + } + + int topology = GetSubMeshFast(submesh).topology; + if (topology == kPrimitiveTriangleStripDeprecated) + Destripify(GetSubMeshBuffer16(submesh), GetSubMeshFast(submesh).indexCount, triangles); + else if (topology == kPrimitiveQuads) + QuadsToTriangles (GetSubMeshBuffer16 (submesh), GetSubMeshFast (submesh).indexCount, triangles); + else if (topology == kPrimitiveTriangles) + triangles.insert(triangles.end(), GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount); + else + ErrorString("Failed getting triangles. Submesh topology is lines or points."); +} + +void Mesh::GetStrips (Mesh::TemporaryIndexContainer& triangles, unsigned submesh) const +{ + triangles.clear(); + if (submesh >= GetSubMeshCount()) + { + ErrorString("Failed getting triangles. Submesh index is out of bounds."); + return; + } + + if (GetSubMeshFast(submesh).topology != kPrimitiveTriangleStripDeprecated) + return; + + triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount); +} + +void Mesh::GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const +{ + triangles.clear(); + if (submesh >= GetSubMeshCount()) + { + ErrorString("Failed getting indices. Submesh index is out of bounds."); + return; + } + triangles.assign(GetSubMeshBuffer16(submesh), GetSubMeshBuffer16(submesh) + GetSubMeshFast(submesh).indexCount); +} + + +bool Mesh::SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology) +{ + int mask = kRebuildCollisionTriangles; + return SetIndicesComplex (indices, count, submesh, topology, mask); +} + +bool Mesh::SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology) +{ + int mask = kRebuildCollisionTriangles | k16BitIndices; + return SetIndicesComplex (indices, count, submesh, topology, mask); +} + + +bool Mesh::SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode) +{ + WaitOnRenderThreadUse(); + + if (indices == NULL && count != 0 && (mode & kDontAssignIndices) == 0) + { + ErrorString("failed setting triangles. triangles is NULL"); + return false; + } + + if (submesh >= GetSubMeshCount()) + { + ErrorString("Failed setting triangles. Submesh index is out of bounds."); + return false; + } + + if ((topology == kPrimitiveTriangles) && (count % 3 != 0)) + { + ErrorString("Failed setting triangles. The number of supplied triangle indices must be a multiple of 3."); + return false; + } + + if ((mode & kDontAssignIndices) == 0) + { + bool valid; + if (mode & k16BitIndices) + valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt16*>(indices), count); + else + valid = CheckOutOfBounds (GetVertexCount(), reinterpret_cast<const UInt32*>(indices), count); + + if (!valid) + { + ErrorString("Failed setting triangles. Some indices are referencing out of bounds vertices."); + return false; + } + } + + SetIndexData(submesh, count, indices, topology, mode); + + if (mode & Mesh::kDontSupportSubMeshVertexRanges) + { + Assert(m_SubMeshes.size () == 1); + m_SubMeshes[0].firstVertex = 0; + m_SubMeshes[0].vertexCount = GetVertexCount(); + m_SubMeshes[0].localAABB = m_LocalAABB; + } + else + { + // Update vertex range + UpdateSubMeshVertexRange (submesh); + RecalculateSubmeshBounds(submesh); + } + + if (mode & kRebuildCollisionTriangles) + RebuildCollisionTriangles(); + + SetChannelsDirty( 0, true ); + + return true; +} + +void Mesh::DestripifySubmeshOnTransferInternal() +{ + if (m_IndexBuffer.empty() || m_SubMeshes.empty()) + return; + + int submeshCount = m_SubMeshes.size(); + typedef UNITY_TEMP_VECTOR(UInt16) TemporaryIndexContainer; + + std::vector<TemporaryIndexContainer> submeshIndices; + submeshIndices.resize(submeshCount); + + // We have to do this in two batches, as SetIndexData seems to have a bug that causes + // triangle windings to get screwed up if we attempt to modify the submeshes in-place. + + for (size_t i = 0; i < submeshCount; ++i) + { + SubMesh& sm = m_SubMeshes[i]; + if (sm.topology == kPrimitiveTriangleStripDeprecated) + { + Destripify (GetSubMeshBuffer16(i), sm.indexCount, submeshIndices[i]); + } + else + { + submeshIndices[i].resize(sm.indexCount); + memcpy(&submeshIndices[i][0], GetSubMeshBuffer16(i), sm.indexCount << 1); + } + } + + for(size_t i = 0; i < submeshCount; ++i) + { + SetIndexData(i, submeshIndices[i].size(), &submeshIndices[i][0], kPrimitiveTriangles, kRebuildCollisionTriangles | k16BitIndices); + } +} + +void Mesh::SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode) +{ + int newByteSize = indexCount * kVBOIndexSize; + int oldSubmeshSize = GetSubMeshBufferByteSize (submeshIndex); + int insertedBytes = newByteSize - GetSubMeshBufferByteSize (submeshIndex); + int oldFirstByte = m_SubMeshes[submeshIndex].firstByte; + // Growing the buffer + if (insertedBytes > 0) + { + m_IndexBuffer.insert(m_IndexBuffer.begin() + oldFirstByte + oldSubmeshSize, insertedBytes, 0); + } + // Shrinking the buffer + else + { + m_IndexBuffer.erase(m_IndexBuffer.begin() + oldFirstByte, m_IndexBuffer.begin() + oldFirstByte - insertedBytes); + } + +#if UNITY_PS3 + + // mircea@info: sadly for us GPU renders from pointers, so we need to create a new instance when something changes....(fixes nasty bug #434226) + IndexContainer newIndexContainer; + newIndexContainer.resize(m_IndexBuffer.size()); + m_IndexBuffer.swap(newIndexContainer); + +#endif + + // Update the sub mesh + m_SubMeshes[submeshIndex].indexCount = indexCount; + m_SubMeshes[submeshIndex].topology = topology; + + // Synchronize subsequent sub meshes + for (int i=submeshIndex+1;i<m_SubMeshes.size();i++) + { + m_SubMeshes[i].firstByte = m_SubMeshes[i-1].firstByte + m_SubMeshes[i-1].indexCount * kVBOIndexSize; + } + + // Write indices into the allocated data + if ((mode & kDontAssignIndices) == 0) + { + if (mode & k16BitIndices) + { + const UInt16* src = reinterpret_cast<const UInt16*>(indices); + UInt16* dst = GetSubMeshBuffer16(submeshIndex); + for (int i=0;i<indexCount;i++) + dst[i] = src[i]; + } + else + { + const UInt32* src = reinterpret_cast<const UInt32*>(indices); + UInt16* dst = GetSubMeshBuffer16(submeshIndex); + for (int i=0;i<indexCount;i++) + dst[i] = src[i]; + } + } + + return; +} + +const UInt16* Mesh::GetSubMeshBuffer16 (int submesh) const +{ + return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<const UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL; +} +UInt16* Mesh::GetSubMeshBuffer16 (int submesh) +{ + return m_IndexBuffer.size() > 0 && m_SubMeshes[submesh].firstByte < m_IndexBuffer.size() ? reinterpret_cast<UInt16*> (&m_IndexBuffer[m_SubMeshes[submesh].firstByte]) : NULL; +} + +void Mesh::SetBindposes (const Matrix4x4f* bindposes, int count) +{ + m_Bindpose.assign(bindposes, bindposes + count); + SetDirty(); +} + +void Mesh::SetBounds (const AABB& aabb) +{ + m_LocalAABB = aabb; + SetDirty(); + NotifyObjectUsers( kDidModifyBounds ); + m_IntermediateUsers.Notify( kImNotifyBoundsChanged ); +} + +void Mesh::SetBounds (unsigned submesh, const AABB& aabb) +{ + GetSubMeshFast(submesh).localAABB = aabb; + SetDirty(); + NotifyObjectUsers( kDidModifyBounds ); + m_IntermediateUsers.Notify( kImNotifyBoundsChanged ); +} + +void Mesh::NotifyObjectUsers(const MessageIdentifier& msg) +{ + ASSERT_RUNNING_ON_MAIN_THREAD; + + MessageData data; + data.SetData (this, ClassID (Mesh)); + + ObjectList::iterator next; + for( ObjectList::iterator i = m_ObjectUsers.begin(); i != m_ObjectUsers.end(); i=next ) + { + next = i; + ++next; + Object& target = **i; + SendMessageDirect(target, msg, data); + } +} + +void Mesh::WaitOnRenderThreadUse() +{ +#if ENABLE_MULTITHREADED_CODE + if (m_WaitOnCPUFence) + { + GetGfxDevice().WaitOnCPUFence(m_CurrentCPUFence); + m_WaitOnCPUFence = false; + } +#endif +} + +void Mesh::RebuildCollisionTriangles() +{ + m_CollisionMesh.VertexDataHasChanged (); +} + +PROFILER_INFORMATION(gRecalculateNormals, "Mesh.RecalculateNormals", kProfilerOther) + +void Mesh::RecalculateNormals() +{ + if (m_StreamCompression) + return; + WaitOnRenderThreadUse(); + + PROFILER_AUTO(gRecalculateNormals, this); + + if (int vertexCount = GetVertexCount()) + { + unsigned newChannels = m_VertexData.GetChannelMask () | VERTEX_FORMAT1(Normal); + if (newChannels != m_VertexData.GetChannelMask ()) + FormatVertices (newChannels); + + TemporaryIndexContainer triangles; + GetTriangles (triangles); + + CalculateNormals( GetVertexBegin (), &triangles[0], vertexCount, triangles.size()/3, GetNormalBegin () ); + } + + SetChannelsDirty( VERTEX_FORMAT1(Normal), false ); +} + + +void Mesh::SetSubMeshCount (unsigned int count) +{ + WaitOnRenderThreadUse(); + + if (count == 0) + { + m_IndexBuffer.clear(); + m_SubMeshes.clear(); + return; + } + + // Remove elements + if (count < m_SubMeshes.size ()) + { + m_IndexBuffer.resize(m_SubMeshes[count].firstByte); + m_SubMeshes.resize(count); + } + // Append elements + else if (count > m_SubMeshes.size ()) + { + SubMesh data; + data.firstByte = m_IndexBuffer.size(); + data.indexCount = 0; + data.topology = kPrimitiveTriangles; + data.firstVertex = 0; + data.vertexCount = 0; + data.localAABB = AABB (Vector3f::zero, Vector3f::zero); + m_SubMeshes.resize(count, data); + RecalculateBounds(); + } +} + +size_t Mesh::GetSubMeshCount () const +{ + return m_SubMeshes.size(); +} + +int Mesh::GetPrimitiveCount() const +{ + int submeshes = GetSubMeshCount(); + int count = 0; + for( int m = 0; m < submeshes; ++m ) { + const SubMesh& sub = m_SubMeshes[m]; + count += ::GetPrimitiveCount(sub.indexCount, sub.topology, false); + } + return count; +} + +int Mesh::CalculateTriangleCount() const +{ + int submeshes = GetSubMeshCount(); + int count = 0; + for( int m = 0; m < submeshes; ++m ) + { + const SubMesh& sub = m_SubMeshes[m]; + if (sub.topology == kPrimitiveTriangleStripDeprecated) + { + const UInt16* indices = GetSubMeshBuffer16(m); + int triCount = CountTrianglesInStrip (indices, sub.indexCount); + count += triCount; + } + else if (sub.topology == kPrimitiveTriangles) + { + count += sub.indexCount / 3; + } + } + return count; +} + +Mesh& Mesh::GetInstantiatedMesh (Mesh* mesh, Object& owner) +{ + if (NULL != mesh && mesh->m_Owner == PPtr<Object> (&owner)) + return *mesh; + + if (!IsWorldPlaying()) + ErrorStringObject("Instantiating mesh due to calling MeshFilter.mesh during edit mode. This will leak meshes. Please use MeshFilter.sharedMesh instead.", &owner); + + if (mesh == NULL || !mesh->HasVertexData ()) + { + if (!mesh) + mesh = NEW_OBJECT (Mesh); + mesh->Reset(); + + mesh->SetName(owner.GetName()); + mesh->m_Owner = &owner; + + mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad); + return *mesh; + } + + Mesh* instance = NEW_OBJECT (Mesh); + CopySerialized(*mesh, *instance); + instance->SetNameCpp (Append (mesh->GetName (), " Instance")); + instance->m_Owner = &owner; + return *instance; +} + +const VertexStreamsLayout& Mesh::GetStreamsLayout() const +{ + if (!m_Skin.empty() || GetBlendShapeChannelCount() != 0) + return VertexDataInfo::kVertexStreamsSkinnedHotColdSplit; + else + return VertexDataInfo::kVertexStreamsDefault; +} + +const VertexChannelsLayout& Mesh::GetChannelsLayout() const +{ + UInt8 compressed = m_StreamCompression; +#if !UNITY_EDITOR + // Editor only does build step for compression and never draws float16 vertices + if (!gGraphicsCaps.has16BitFloatVertex) + { + compressed = kStreamCompressionDefault; + } +#endif + switch (compressed) + { + default: // fall through + case kStreamCompressionDefault: + return VertexDataInfo::kVertexChannelsDefault; + case kStreamCompressionCompressed: + return VertexDataInfo::kVertexChannelsCompressed; + case kStreamCompressionCompressedAggressive: + return VertexDataInfo::kVertexChannelsCompressedAggressive; + } +} + +void Mesh::InitVertexBufferData( UInt32 wantedChannels ) +{ +#if GFX_CAN_UNLOAD_MESH_DATA + // If data was uploaded and freed we cannot update it. + if (!HasVertexData()) + return; +#endif + UInt32 presentChannels = GetAvailableChannels (); + + // Modify the vertex buffer before fetching any channel pointers, as modifying the format reallocates the buffer and pointers + // are invalidated. Due to possible format changes, also fetch the stride sizes only after buffer reformatting. + unsigned initChannels = 0; + + // Silently create an all-white color array if shader wants colors, but mesh does not have them. + // On D3D, some runtime/driver combinations will crash if a vertex shader wants colors but does not + // have them (e.g. Vista drivers for Intel 965). In other cases it will default to white for fixed function + // pipe, and to undefined value for vertex shaders, which is not good either. + if( (wantedChannels & VERTEX_FORMAT1(Color)) && !(presentChannels & VERTEX_FORMAT1(Color)) ) + initChannels |= VERTEX_FORMAT1(Color); + +#if UNITY_PEPPER + // Pepper OpenGL implementation fails to draw anything if any channel is missing. + if( (wantedChannels & VERTEX_FORMAT1(Tangent)) && !(presentChannels & VERTEX_FORMAT1(Tangent)) ) + initChannels |= VERTEX_FORMAT1(Tangent); +#endif + + if ((initChannels & presentChannels) != initChannels) + { + FormatVertices (presentChannels | initChannels); + InitChannelsToDefault (0, GetVertexCount (), initChannels); + } +} + +void Mesh::GetVertexBufferData( VertexBufferData& buffer, UInt32 wantedChannels ) +{ + InitVertexBufferData(wantedChannels); + + for (int i = 0; i < kShaderChannelCount; i++) + buffer.channels[i] = m_VertexData.GetChannel(i); + + for (int i = 0; i < kMaxVertexStreams; i++) + buffer.streams[i] = m_VertexData.GetStream(i); + + int srcTexcoord = kShaderChannelNone; + for (int i = kShaderChannelTexCoord0; i <= kShaderChannelTexCoord1; i++) + { + if (buffer.channels[i].IsValid()) + { + // We have a valid texcoord + srcTexcoord = i; + continue; + } + UInt32 channelMask = 1 << i; + if (srcTexcoord != kShaderChannelNone) + { + // Replicate last valid texture coord + const ChannelInfo& srcChannel = buffer.channels[srcTexcoord]; + buffer.channels[i] = srcChannel; + buffer.streams[srcChannel.stream].channelMask |= channelMask; + } + } + + // Data pointer can be NULL if we are only updating declaration of uploaded VBO + buffer.buffer = m_VertexData.GetDataPtr(); + buffer.bufferSize = m_VertexData.GetDataSize(); + buffer.vertexCount = GetVertexCount(); + +#if UNITY_EDITOR + #define LogStringObjectEditor(x) LogStringObject(Format(x, GetName()),this) + + if (Camera::ShouldShowChannelErrors(GetCurrentCameraPtr())) + { + const ChannelInfo* channels = buffer.channels; + + if ((wantedChannels & VERTEX_FORMAT1(Tangent)) && !channels[kShaderChannelTangent].IsValid()) + LogStringObjectEditor ("Shader wants tangents, but the mesh %s doesn't have them"); + + if ((wantedChannels & VERTEX_FORMAT1(Normal)) && !channels[kShaderChannelNormal].IsValid()) + LogStringObjectEditor ("Shader wants normals, but the mesh %s doesn't have them"); + + if ((wantedChannels & VERTEX_FORMAT1(TexCoord0)) && !channels[kShaderChannelTexCoord0].IsValid()) + LogStringObjectEditor ("Shader wants texture coordinates, but the mesh %s doesn't have them"); + + if ((wantedChannels & VERTEX_FORMAT1(TexCoord1)) && !channels[kShaderChannelTexCoord1].IsValid()) + LogStringObjectEditor ("Shader wants secondary texture coordinates, but the mesh %s doesn't have any"); + + if ((wantedChannels & VERTEX_FORMAT1(Color)) && !channels[kShaderChannelColor].IsValid()) + LogStringObjectEditor ("Shader wants vertex colors, and failed to create a vertex color array"); + } + #undef LogStringObjectEditor +#endif + +#if UNITY_PS3 + if(m_PartitionInfos.empty()) + { + int submeshCount = m_SubMeshes.size(); + for (int submesh=0; submesh<submeshCount; submesh++) + { + SubMesh& sm = GetSubMeshFast(submesh); + + MeshPartitionInfo partInfo; + partInfo.submeshStart = submesh; + partInfo.partitionCount = 1; + buffer.partInfo.push_back(partInfo); + + MeshPartition part; + part.vertexCount = sm.vertexCount; + part.vertexOffset = 0; + part.indexCount = sm.indexCount; + part.indexByteOffset = sm.firstByte; + buffer.partitions.push_back(part);; + } + } + else + { + buffer.partInfo = m_PartitionInfos; + buffer.partitions = m_Partitions; + } + +#endif + + buffer.vertexCount = GetVertexCount (); +} + +void Mesh::GetIndexBufferData (IndexBufferData& buffer) +{ + DebugAssert (!m_IndexBuffer.empty()); + buffer.indices = m_IndexBuffer.empty() ? NULL : (void*)&m_IndexBuffer[0]; + + ///@TODO: HACK for now to get index buffers working, without changing a lot of vbo code + // We should be passing the byte size not the number of indices + buffer.count = GetTotalndexCount(); + buffer.hasTopologies = 0; + for (size_t i = 0, n = m_SubMeshes.size(); i < n; ++i) + { + buffer.hasTopologies |= (1<<m_SubMeshes[i].topology); + } +} + +PROFILER_INFORMATION(gCreateVBOProfile, "Mesh.CreateVBO", kProfilerRender); +PROFILER_INFORMATION(gAwakeFromLoadMesh, "Mesh.AwakeFromLoad", kProfilerLoading); +PROFILER_INFORMATION(gUploadMeshDataMesh, "Mesh.UploadMeshData", kProfilerLoading); + +VBO* Mesh::GetSharedVBO( UInt32 wantedChannels ) +{ + // Some badly written shaders have no Bind statements in the vertex shaders parts; + // and only happened to work before by accident. If requiredChannels turns out to be + // zero, let's pretend it did request at least position. + if (wantedChannels == 0) + wantedChannels = (1<<kShaderChannelVertex); + + UInt32 newChannels = wantedChannels | m_ChannelsInVBO; + bool addedChannels = newChannels != m_ChannelsInVBO; + +#if GFX_CAN_UNLOAD_MESH_DATA + if (!m_IsReadable && !m_KeepVertices && m_VBO) + { + // Everything is already prepared, just return VBO + return m_VBO; + } +#endif + + if ((GFX_ALL_BUFFERS_CAN_BECOME_LOST || m_IsDynamic) && m_VBO && m_VBO->IsVertexBufferLost()) + m_VerticesDirty = true; + if (GFX_ALL_BUFFERS_CAN_BECOME_LOST && m_VBO && m_VBO->IsIndexBufferLost()) + m_IndicesDirty = true; + + if (addedChannels || m_VerticesDirty || m_IndicesDirty) + CreateSharedVBO(wantedChannels); + + return m_VBO; +} + +void Mesh::CreateSharedVBO( UInt32 wantedChannels ) +{ + if (m_IndexBuffer.empty()) + { + if (m_VBO) + { + GetGfxDevice().DeleteVBO(m_VBO); + m_VBO = NULL; + } + return; + } + + PROFILER_BEGIN(gCreateVBOProfile, this) + SET_ALLOC_OWNER(this); + + if (!m_VBO) + { + m_VBO = GetGfxDevice().CreateVBO(); + m_VBO->SetHideFromRuntimeStats(m_HideFromRuntimeStats); + } + + UInt32 newChannels = wantedChannels | m_ChannelsInVBO; + if (m_VerticesDirty || newChannels != m_ChannelsInVBO) + { + if (m_IsDynamic) + m_VBO->SetVertexStreamMode(0, VBO::kStreamModeDynamic); + + VertexBufferData vertexBuffer; + GetVertexBufferData (vertexBuffer, newChannels); + m_VBO->UpdateVertexData (vertexBuffer); + } + + if (m_IndicesDirty) + { + // TODO: probably add separate script access to set vertex/index dynamic + if (m_IsDynamic) + m_VBO->SetIndicesDynamic(true); + + IndexBufferData indexBuffer; + GetIndexBufferData (indexBuffer); + m_VBO->UpdateIndexData (indexBuffer); + } + + m_VerticesDirty = false; + m_IndicesDirty = false; + m_ChannelsInVBO = newChannels; + + PROFILER_END +} + +bool Mesh::CopyToVBO ( UInt32 wantedChannels, VBO& vbo ) +{ + if( m_IndexBuffer.empty() ) + return false; + + PROFILER_BEGIN(gCreateVBOProfile, this) + + VertexBufferData vertexBuffer; + GetVertexBufferData( vertexBuffer, wantedChannels ); + vbo.UpdateVertexData( vertexBuffer ); + + IndexBufferData indexBuffer; + GetIndexBufferData (indexBuffer); + vbo.UpdateIndexData (indexBuffer); +#if UNITY_XENON + if( m_VBO ) + vbo.CopyExtraUvChannels( m_VBO ); +#endif + PROFILER_END + + return true; +} + + +void Mesh::UnloadVBOFromGfxDevice() +{ + if (m_VBO) + { + WaitOnRenderThreadUse(); + GetGfxDevice().DeleteVBO (m_VBO); + } + m_VBO = NULL; + m_ChannelsInVBO = 0; + m_VerticesDirty = m_IndicesDirty = true; +#if ENABLE_MULTITHREADED_CODE + m_CurrentCPUFence = 0; + m_WaitOnCPUFence = false; +#endif +} + +void Mesh::ReloadVBOToGfxDevice() +{ + const bool needReloadFromDisk = (!m_IsReadable && !HasVertexData()); + if (needReloadFromDisk) + { + GetPersistentManager().ReloadFromDisk(this); + } + else + { + m_ChannelsInVBO = 0; + m_VerticesDirty = m_IndicesDirty = true; + } + SwizzleVertexColorsIfNeeded(); +} + + +bool Mesh::ExtractTriangle (UInt32 face, UInt32* indices) const +{ + ///@TODO: OPTIMIZE this away + TemporaryIndexContainer triangles; + GetTriangles(triangles); + if (face * 3 > triangles.size ()) + return false; + + indices[0] = triangles[face * 3 + 0]; + indices[1] = triangles[face * 3 + 1]; + indices[2] = triangles[face * 3 + 2]; + return true; +} + +static void TransformNormals (const Matrix3x3f& invTranspose, StrideIterator<Vector3f> inNormals, StrideIterator<Vector3f> inNormalsEnd, StrideIterator<Vector3f> outNormals) +{ + for (; inNormals != inNormalsEnd; ++inNormals, ++outNormals) + *outNormals = NormalizeSafe (invTranspose.MultiplyVector3 (*inNormals)); +} + +static void TransformTangents (const Matrix3x3f& invTranspose, StrideIterator<Vector4f> inTangents, StrideIterator<Vector4f> inTangentsEnd, StrideIterator<Vector4f> outTangents) +{ + for ( ; inTangents != inTangentsEnd; ++inTangents, ++outTangents) + { + Vector3f tangent = Vector3f(inTangents->x,inTangents->y,inTangents->z); + Vector3f normalized = NormalizeSafe (invTranspose.MultiplyVector3 (tangent)); + *outTangents = Vector4f(normalized.x, normalized.y ,normalized.z, inTangents->w); + } +} + +void Mesh::CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform) +{ + int vertexCount = mesh.GetVertexCount(); + unsigned outVertexFormat = mesh.GetAvailableChannelsForRendering (); + + ResizeVertices(mesh.GetVertexCount (), outVertexFormat); + + if (outVertexFormat & VERTEX_FORMAT1(Vertex)) + TransformPoints3x4 (transform, + (Vector3f*)mesh.GetChannelPointer (kShaderChannelVertex), mesh.GetStride (kShaderChannelVertex), + (Vector3f*)GetChannelPointer (kShaderChannelVertex), GetStride (kShaderChannelVertex), + vertexCount); + + Matrix3x3f invTranspose3x3 = Matrix3x3f(transform); invTranspose3x3.InvertTranspose (); + + if (outVertexFormat & VERTEX_FORMAT1(Normal)) + TransformNormals (invTranspose3x3, mesh.GetNormalBegin (), mesh.GetNormalEnd (), GetNormalBegin ()); + if (outVertexFormat & VERTEX_FORMAT1(Tangent)) + TransformTangents (invTranspose3x3, mesh.GetTangentBegin (), mesh.GetTangentEnd (), GetTangentBegin ()); + + m_IndexBuffer = mesh.m_IndexBuffer; + m_SubMeshes = mesh.m_SubMeshes; + m_Skin = mesh.m_Skin; + if (outVertexFormat & VERTEX_FORMAT1(TexCoord0)) + strided_copy (mesh.GetUvBegin (0), mesh.GetUvEnd (0), GetUvBegin (0)); + if (outVertexFormat & VERTEX_FORMAT1(TexCoord1)) + strided_copy (mesh.GetUvBegin (1), mesh.GetUvEnd (1), GetUvBegin (1)); + if (outVertexFormat & VERTEX_FORMAT1(Color)) + strided_copy (mesh.GetColorBegin (), mesh.GetColorEnd (), GetColorBegin ()); + m_VertexColorsSwizzled = mesh.m_VertexColorsSwizzled; + m_LocalAABB = mesh.m_LocalAABB; + + SetChannelsDirty( outVertexFormat, true ); + ClearSkinCache(); +} + + +void Mesh::SetChannelsDirty (unsigned vertexChannelsChanged, bool indices) +{ + SetDirty(); + + m_VerticesDirty |= vertexChannelsChanged != 0; + m_IndicesDirty |= indices; + + // We should regenreate physics mesh only if verex data have changed + if ((vertexChannelsChanged & VERTEX_FORMAT1(Vertex)) || indices) + { + m_CollisionMesh.VertexDataHasChanged(); + m_CachedBonesAABB.clear(); + } + NotifyObjectUsers( kDidModifyMesh ); +} + +bool Mesh::SetBoneWeights (const BoneInfluence* v, int count) +{ + WaitOnRenderThreadUse(); + ClearSkinCache(); + if (count == 0) + { + m_Skin.clear(); + UpdateVertexFormat(); + return true; + } + + if (count != GetVertexCount ()) + { + ErrorString("Mesh.boneWeights is out of bounds. The supplied array needs to be the same size as the Mesh.vertices array."); + return false; + } + m_Skin.assign(v, v + count); + SetChannelsDirty (0, false); + UpdateVertexFormat(); + + return true; +} + +static void ComputeBoneBindPoseAABB (const Matrix4x4f* bindPoses, size_t bindPoseCount, const StrideIterator<Vector3f> vertices, const BoneInfluence* influences, size_t vertexCount, const BlendShapeVertices& blendShapeVertices, MinMaxAABB* outputBounds) +{ + if (blendShapeVertices.empty()) + { + for(int v=0;v<vertexCount;v++) + { + const Vector3f& vert = vertices[v]; + for (int i = 0; i < 4; i++) + { + if(influences[v].weight[i] > 0.0f) + { + const UInt32 boneIndex = influences[v].boneIndex[i]; + + outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(vert)); + } + } + } + } + else + { + Vector3f* minVertices; + ALLOC_TEMP(minVertices, Vector3f, vertexCount); + Vector3f* maxVertices; + ALLOC_TEMP(maxVertices, Vector3f, vertexCount); + + strided_copy(vertices, vertices + vertexCount, minVertices); + strided_copy(vertices, vertices + vertexCount, maxVertices); + + for (int i=0;i<blendShapeVertices.size();i++) + { + int index = blendShapeVertices[i].index; + Vector3f pos = blendShapeVertices[i].vertex + vertices[index]; + maxVertices[index] = max (maxVertices[index], pos); + minVertices[index] = min (minVertices[index], pos); + } + + for(int v=0;v<vertexCount;v++) + { + for (int i = 0; i < 4; i++) + { + if(influences[v].weight[i] > 0.0f) + { + const UInt32 boneIndex = influences[v].boneIndex[i]; + outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(minVertices[v])); + outputBounds[boneIndex].Encapsulate(bindPoses[boneIndex].MultiplyPoint3(maxVertices[v])); + } + } + } + } +} + +const Mesh::AABBContainer& Mesh::GetCachedBonesBounds() +{ + // Use cached result if it has the correct size (including empty) + if (m_CachedBonesAABB.size() == m_Bindpose.size()) + return m_CachedBonesAABB; + + Assert(GetMaxBoneIndex() < m_Bindpose.size()); + + m_CachedBonesAABB.resize_initialized(m_Bindpose.size(), MinMaxAABB()); + + ComputeBoneBindPoseAABB (GetBindposes(), m_CachedBonesAABB.size(), GetVertexBegin(), m_Skin.begin(), GetVertexCount(), m_Shapes.vertices, &m_CachedBonesAABB[0]); + + return m_CachedBonesAABB; +} + +void Mesh::ClearSkinCache () +{ + m_CachedBonesAABB.clear(); + m_CachedSkin2.clear(); + m_CachedSkin1.clear(); + m_MaxBoneIndex = -1; +} + +int Mesh::GetMaxBoneIndex () +{ + if (m_MaxBoneIndex != -1) + return m_MaxBoneIndex; + + m_MaxBoneIndex = 0; + for (int i=0;i<m_Skin.size();i++) + { + m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[0]); + m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[1]); + m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[2]); + m_MaxBoneIndex = max(m_MaxBoneIndex, m_Skin[i].boneIndex[3]); + } + + return m_MaxBoneIndex; +} + +void* Mesh::GetSkinInfluence (int count) +{ + if (!m_Skin.empty()) + { + BoneInfluence* bones4 = &m_Skin[0]; + if (count == 1) + { + if (!m_CachedSkin1.empty()) + return &m_CachedSkin1[0]; + + // Cache 1 bone skin weights + int size = m_Skin.size(); + m_CachedSkin1.resize_uninitialized(size); + + int* bones1 = &m_CachedSkin1[0]; + for (int i=0;i<size;i++) + bones1[i] = bones4[i].boneIndex[0]; + return bones1; + + } + else if (count == 2) + { + if (!m_CachedSkin2.empty ()) + return &m_CachedSkin2[0]; + + // Cache 2 bone skin weights + int size = m_Skin.size(); + m_CachedSkin2.resize_uninitialized(size); + + BoneInfluence2* bones2 = &m_CachedSkin2[0]; + for (int i=0;i<size;i++) + { + bones2[i].boneIndex[0] = bones4[i].boneIndex[0]; + bones2[i].boneIndex[1] = bones4[i].boneIndex[1]; + + float invSum = 1.0F / (bones4[i].weight[0] + bones4[i].weight[1]); + bones2[i].weight[0] = bones4[i].weight[0] * invSum; + bones2[i].weight[1] = bones4[i].weight[1] * invSum; + } + return bones2; + } + else if (count == 4) + { + return bones4; + } + else + { + return NULL; + } + } + else + { + return NULL; + } +} + + +int Mesh::GetRuntimeMemorySize () const +{ + int size = Super::GetRuntimeMemorySize(); + + #if ENABLE_PROFILER + if (m_VBO) + size += m_VBO->GetRuntimeMemorySize(); + #endif + + return size; +} + + +void* Mesh::GetSharedNxMesh () +{ + return m_CollisionMesh.GetSharedNxMesh (*this); +} + +void* Mesh::GetSharedNxConvexMesh () +{ + return m_CollisionMesh.GetSharedNxConvexMesh (*this); +} + +void Mesh::UploadMeshData(bool markNoLongerReadable) +{ + if(markNoLongerReadable) + m_IsReadable = false; + + ClearSkinCache(); + UpdateVertexFormat(); + + // prepare VBO + UInt32 channelMask = GetAvailableChannelsForRendering(); + + // Create color channel in case it's needed by shader (and we can't patch it) +#if GFX_CAN_UNLOAD_MESH_DATA + bool unloadData = !m_IsReadable && m_Skin.empty(); + if (unloadData && !m_KeepVertices) + channelMask |= VERTEX_FORMAT1(Color); +#endif + + // Shared VBO is not required for skinned meshes (unless used as non-skinned) + if (m_Skin.empty()) + CreateSharedVBO(channelMask); + +#if GFX_CAN_UNLOAD_MESH_DATA + if (unloadData) + { + if (!m_KeepVertices && m_VBO && !m_VBO->IsUsingSourceVertices()) + { + Assert(m_Skin.empty()); + m_VertexData.Deallocate(); + m_VBO->UnloadSourceVertices(); + } + if (!m_KeepIndices && m_VBO && !m_VBO->IsUsingSourceIndices()) + { +#if UNITY_METRO + m_IndexBuffer.clear(); + m_IndexBuffer.shrink_to_fit(); +#else + // On Metro this throws "Expression: vector containers incompatible for swap" when compiling in VS 2013, works okay if compiling in VS 2012 + // Case 568418 + IndexContainer emptyIndices; + m_IndexBuffer.swap(emptyIndices); +#endif + } + } +#endif +} + +void Mesh::AwakeFromLoad(AwakeFromLoadMode awakeMode) +{ + PROFILER_AUTO(gAwakeFromLoadMesh, this) + + Super::AwakeFromLoad(awakeMode); + m_CollisionMesh.AwakeFromLoad(awakeMode); + + UploadMeshData(!m_IsReadable); + + if (m_InternalMeshID == 0) + m_InternalMeshID = s_MeshIDGenerator.AllocateID (); +} + +void Mesh::AwakeFromLoadThreaded() +{ + Super::AwakeFromLoadThreaded(); + m_CollisionMesh.AwakeFromLoadThreaded(*this); +} + +void Mesh::MarkDynamic() +{ + // Optimize for frequent updates + m_IsDynamic = true; +} + +void Mesh::UpdateVertexFormat() +{ + // Make sure vertex streams are in the format we want for rendering + // This will also handle decompression of unsupported vertex formats + FormatVertices(GetAvailableChannels()); + SwizzleVertexColorsIfNeeded(); +} + +bool Mesh::ShouldIgnoreInGarbageDependencyTracking () +{ + return true; +} + +UInt32 Mesh::GetAvailableChannels() const +{ + return m_VertexData.GetChannelMask (); +} + +UInt32 Mesh::GetAvailableChannelsForRendering() const +{ + unsigned availChannels = m_VertexData.GetChannelMask (); + return availChannels; +} + +bool Mesh::IsSuitableSizeForDynamicBatching () const +{ + // If any submesh has too many vertices, don't keep mesh data for batching + for (size_t i = 0; i < GetSubMeshCount(); i++) + { + if (m_SubMeshes[i].vertexCount > kDynamicBatchingVerticesThreshold) + return false; + } + return true; +} + +void Mesh::CheckConsistency() +{ + Super::CheckConsistency(); + + for (int i = 0; i < m_SubMeshes.size(); ++i) + { + Assert(m_SubMeshes[i].topology != kPrimitiveTriangleStripDeprecated); + } +} + +void Mesh::SwapBlendShapeData (BlendShapeData& shapes) +{ + WaitOnRenderThreadUse(); + +// swap (m_Shapes, shapes); + m_Shapes = shapes; + + NotifyObjectUsers( kDidModifyMesh ); +} diff --git a/Runtime/Filters/Mesh/LodMesh.h b/Runtime/Filters/Mesh/LodMesh.h new file mode 100644 index 0000000..41fcf74 --- /dev/null +++ b/Runtime/Filters/Mesh/LodMesh.h @@ -0,0 +1,509 @@ +#ifndef LODMESH_H +#define LODMESH_H + +#include "Runtime/BaseClasses/NamedObject.h" +#include "Runtime/Geometry/AABB.h" +#include "Runtime/Math/Vector2.h" +#include "Runtime/Math/Vector4.h" +#include "Mesh.h" +#include "Runtime/Math/Color.h" +#include <string> +#include <vector> +#include "Runtime/BaseClasses/MessageIdentifier.h" +#include "Runtime/Shaders/VBO.h" +#include "CompressedMesh.h" +#include "VertexData.h" +#include "Runtime/Dynamics/CollisionMeshData.h" +#include "MeshBlendShape.h" +#include "Runtime/Misc/Allocator.h" +#include "Runtime/Camera/IntermediateUsers.h" + +class IntermediateRenderer; + +struct SubMesh +{ + UInt32 firstByte; + UInt32 indexCount; + GfxPrimitiveType topology; + + UInt32 firstVertex; + UInt32 vertexCount; + AABB localAABB; + + SubMesh () + { + firstByte = 0; + indexCount = 0; + topology = kPrimitiveTriangles; + firstVertex = 0; + vertexCount = 0; + localAABB = AABB (Vector3f::zero, Vector3f::zero); + } + + DECLARE_SERIALIZE_NO_PPTR (SubMesh) + +#if SUPPORT_SERIALIZED_TYPETREES + template<class TransferFunction> + void TransferWorkaround35SerializationFuckup (TransferFunction& transfer); +#endif +}; + +/// typedef for tangent space lighting rotations +typedef std::vector<DeprecatedTangent, STL_ALLOCATOR(kMemGeometry, DeprecatedTangent) > DeprecatedTangentsArray; + +template<class TransferFunc> +void SubMesh::Transfer (TransferFunc& transfer) +{ + #if SUPPORT_SERIALIZED_TYPETREES + if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup) + { + TransferWorkaround35SerializationFuckup (transfer); + return; + } + #endif + + transfer.SetVersion (2); + TRANSFER(firstByte); + TRANSFER(indexCount); + TRANSFER_ENUM(topology); + TRANSFER(firstVertex); + TRANSFER(vertexCount); + TRANSFER(localAABB); + if (transfer.IsOldVersion(1)) + { + UInt32 triStrip; + transfer.Transfer (triStrip, "isTriStrip"); + topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles; + } +} + +#if SUPPORT_SERIALIZED_TYPETREES +template<class TransferFunc> +void SubMesh::TransferWorkaround35SerializationFuckup (TransferFunc& transfer) +{ + TRANSFER(firstByte); + TRANSFER(indexCount); + + UInt32 triStrip; + transfer.Transfer (triStrip, "isTriStrip"); + topology = triStrip ? kPrimitiveTriangleStripDeprecated : kPrimitiveTriangles; + + UInt32 triangleCount; + transfer.Transfer (triangleCount, "triangleCount"); + + TRANSFER(firstVertex); + TRANSFER(vertexCount); + TRANSFER(localAABB); +} +#endif + +template<class TransferFunc> +void MeshPartition::Transfer (TransferFunc& transfer) +{ + TRANSFER(vertexCount); + TRANSFER(vertexOffset); + TRANSFER(indexCount); + TRANSFER(indexByteOffset); +} + +template<class TransferFunc> +void MeshPartitionInfo::Transfer (TransferFunc& transfer) +{ + TRANSFER(submeshStart); + TRANSFER(partitionCount); +} + +class EXPORT_COREMODULE Mesh : public NamedObject +{ +public: + enum + { + #if UNITY_IPHONE || UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN + alignBoneContainer = 16, + #else + alignBoneContainer = kDefaultMemoryAlignment, + #endif + }; + + //mircea@INFO PS3 doesn't render from VBOs hence m_VertexData and m_IndexBuffer *have* to be allocated with kMemVertexData. + typedef UNITY_VECTOR(kMemVertexData, UInt8) IndexContainer; + typedef UNITY_VECTOR(kMemGeometry, SubMesh) SubMeshContainer; + typedef dynamic_array<Matrix4x4f> MatrixContainer; + typedef dynamic_array<int> SkinContainer; + typedef UNITY_VECTOR(kMemGeometry, UInt32) CollisionTriangleContainer; + typedef dynamic_array<MinMaxAABB> AABBContainer; + + typedef dynamic_array<BoneInfluence, alignBoneContainer> BoneInfluenceContainer; + typedef dynamic_array<BoneInfluence2, alignBoneContainer> BoneInfluence2Container; + + typedef UNITY_TEMP_VECTOR(UInt32) TemporaryIndexContainer; + +#if UNITY_PS3 || UNITY_EDITOR + typedef UNITY_VECTOR(kMemVertexData, MeshPartition) MeshPartitionContainer; + typedef UNITY_VECTOR(kMemVertexData, MeshPartitionInfo) MeshPartitionInfoContainer; +#endif + + REGISTER_DERIVED_CLASS (Mesh, NamedObject) + DECLARE_OBJECT_SERIALIZE (Mesh) + + Mesh (MemLabelId label, ObjectCreationMode mode); + // ~Mesh (); declared-by-macro + +public: + + virtual int GetRuntimeMemorySize () const; + + VBO* GetSharedVBO( UInt32 wantedChannels ); + bool CopyToVBO ( UInt32 wantedChannels, VBO& vbo ); + void InitVertexBufferData ( UInt32 wantedChannels ); + void GetVertexBufferData ( VertexBufferData& buffer, UInt32 wantedChannels ); + void GetIndexBufferData (IndexBufferData& buffer); + void UnloadVBOFromGfxDevice(); + void ReloadVBOToGfxDevice(); + + + void AwakeFromLoad(AwakeFromLoadMode mode); + void AwakeFromLoadThreaded(); + void UploadMeshData(bool markNoLongerReadable); + + virtual bool MainThreadCleanup (); + + void MarkDynamic(); + void UpdateVertexFormat(); + + void SetBounds (const AABB& aabb ); + const AABB& GetBounds () const { return m_LocalAABB; } + + void SetBounds (unsigned submesh, const AABB& aabb ); + const AABB& GetBounds (unsigned submesh) const + { + DebugAssertIf(submesh >= m_SubMeshes.size()); + return m_SubMeshes[submesh].localAABB; + } + + void Clear (bool keepVertexLayout); + + /// Recalculate the bounding volume + void RecalculateBounds (); + void RecalculateSubmeshBounds (unsigned submesh); + + // Recalculate normals + void RecalculateNormals(); + void RecalculateNormalsWithHardAngle( float hardAngle ); + + // Validate that there are no out of bounds indices in the triangles + bool ValidateVertexCount (unsigned newVertexCount, const void* newTriangles, unsigned indexCount); + + int GetVertexCount () const { return m_VertexData.GetVertexCount (); } + + // Gets count in all submeshes. + int GetPrimitiveCount() const; + int CalculateTriangleCount() const; // ignores degenerates in strips + + // NOTE: make sure to call SetChannelDirty and RecalculateBounds when changing the geometry! + StrideIterator<Vector3f> GetVertexBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex); } + StrideIterator<Vector3f> GetVertexEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelVertex); } + + StrideIterator<Vector3f> GetNormalBegin () const { return m_VertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal); } + StrideIterator<Vector3f> GetNormalEnd () const { return m_VertexData.MakeEndIterator<Vector3f> (kShaderChannelNormal); } + + StrideIterator<ColorRGBA32> GetColorBegin () const { return m_VertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor); } + StrideIterator<ColorRGBA32> GetColorEnd () const { return m_VertexData.MakeEndIterator<ColorRGBA32> (kShaderChannelColor); } + + StrideIterator<Vector2f> GetUvBegin (int uvIndex = 0) const { return m_VertexData.MakeStrideIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); } + StrideIterator<Vector2f> GetUvEnd (int uvIndex = 0) const { return m_VertexData.MakeEndIterator<Vector2f> ((ShaderChannel)(kShaderChannelTexCoord0 + uvIndex)); } + + StrideIterator<Vector4f> GetTangentBegin () const { return m_VertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent); } + StrideIterator<Vector4f> GetTangentEnd () const { return m_VertexData.MakeEndIterator<Vector4f> (kShaderChannelTangent); } + + void ExtractVertexArray (Vector3f* destination) const; + void ExtractNormalArray (Vector3f* destination) const; + void ExtractColorArray (ColorRGBA32* destination) const; + void ExtractColorArrayConverting (ColorRGBAf* destination) const; + void ExtractUvArray (int uvIndex, Vector2f* destination) const; + void ExtractTangentArray (Vector4f* destination) const; + + void SetVertices (Vector3f const* data, size_t count); + void SetNormals (Vector3f const* data, size_t count); + void SetTangents (Vector4f const* data, size_t count); + void SetUv (int uvIndex, Vector2f const* data, size_t count); + void SetColors (ColorRGBA32 const* data, size_t count); + void SetColorsConverting (ColorRGBAf const* data, size_t count); + + bool GetVertexColorsSwizzled() const { return m_VertexColorsSwizzled; } + void SetVertexColorsSwizzled(bool flag) { m_VertexColorsSwizzled = flag; } + bool HasVertexData () const { return m_VertexData.GetDataPtr () != NULL; } + void* GetVertexDataPointer () const { return m_VertexData.GetDataPtr (); } + size_t GetVertexDataSize () const { return m_VertexData.GetDataSize (); } + size_t GetVertexSize () const { return m_VertexData.GetVertexSize(); } + + const void* GetChannelPointer (ShaderChannel channel) const { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); } + void* GetChannelPointer (ShaderChannel channel) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel); } + void* GetChannelPointer (ShaderChannel channel, size_t offsetInElements) { return m_VertexData.GetDataPtr () + m_VertexData.GetChannelOffset (channel) + offsetInElements * m_VertexData.GetChannelStride(channel); } + size_t GetStride (ShaderChannel channel) const { return m_VertexData.GetChannelStride(channel); } + + bool IsAvailable (ShaderChannel channel) const { return m_VertexData.HasChannel (channel); } + // returns a bitmask of a newly created channels + UInt32 ResizeVertices (size_t count, UInt32 shaderChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels); + UInt32 ResizeVertices (size_t count, UInt32 shaderChannels) { return ResizeVertices(count, shaderChannels, GetStreamsLayout(), GetChannelsLayout()); } + + // returns a bitmask of a newly created channels + UInt32 FormatVertices (UInt32 shaderChannels); + // initializes the specified channels to default values + void InitChannelsToDefault (unsigned begin, unsigned count, unsigned shaderChannels); + + bool SetBoneWeights (const BoneInfluence* v, int count); + const BoneInfluence* GetBoneWeights () const { return m_Skin.empty() ? NULL : &m_Skin[0]; } + BoneInfluence* GetBoneWeights () { return m_Skin.empty() ? NULL : &m_Skin[0]; } + void ClearSkinCache (); + int GetMaxBoneIndex (); + + const Matrix4x4f* GetBindposes () const { return m_Bindpose.empty() ? NULL : &m_Bindpose[0]; } + int GetBindposeCount () const { return m_Bindpose.size(); } + void SetBindposes (const Matrix4x4f* bindposes, int count); + + bool SetIndices (const UInt32* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology); + bool SetIndices (const UInt16* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology); + + void GetTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const; + void GetTriangles (TemporaryIndexContainer& triangles) const; + void AppendTriangles (TemporaryIndexContainer& triangles, unsigned submesh) const; + void GetStrips (TemporaryIndexContainer& triangles, unsigned submesh) const; + void GetIndices (TemporaryIndexContainer& triangles, unsigned submesh) const; + + enum { + k16BitIndices = 1 << 0, + kRebuildCollisionTriangles = 1 << 2, + kDontAssignIndices = 1 << 3, + kDontSupportSubMeshVertexRanges = 1 << 4 + }; + bool SetIndicesComplex (const void* indices, unsigned count, unsigned submesh, GfxPrimitiveType topology, int mode); + + bool ExtractTriangle (UInt32 face, UInt32* indices) const; + + void SetSubMeshCount (unsigned int count); + size_t GetSubMeshCount () const; + + void UpdateSubMeshVertexRange (int index); + + void AddObjectUser( ListNode<Object>& node ) { m_ObjectUsers.push_back(node); } + void AddIntermediateUser( ListNode<IntermediateRenderer>& node ) { m_IntermediateUsers.AddUser(node); } + + const BlendShapeData& GetBlendShapeData() const { return m_Shapes; } + size_t GetBlendShapeChannelCount() const { return m_Shapes.channels.size(); } + void SwapBlendShapeData (BlendShapeData& shapes); + + + BlendShapeData& GetWriteBlendShapeDataInternal() { return m_Shapes; } + + + void CheckConsistency(); + +#if ENABLE_MULTITHREADED_CODE + void SetCurrentCPUFence( UInt32 fence ) { m_CurrentCPUFence = fence; m_WaitOnCPUFence = true; } +#endif + + void WaitOnRenderThreadUse(); + + static Mesh& GetInstantiatedMesh (Mesh* mesh, Object& owner); + + void CopyTransformed (const Mesh& mesh, const Matrix4x4f& transform); + + void SetChannelsDirty (unsigned vertexChannelsChanged, bool indices); + + void* GetSharedNxMesh (); + void* GetSharedNxConvexMesh (); + + void RebuildCollisionTriangles(); + + const SubMesh& GetSubMeshFast (unsigned int submesh) const + { + DebugAssertIf(submesh >= m_SubMeshes.size()); + return m_SubMeshes[submesh]; + } + SubMesh& GetSubMeshFast (unsigned int submesh) + { + DebugAssertIf(submesh >= m_SubMeshes.size()); + return m_SubMeshes[submesh]; + } + + const UInt16* GetSubMeshBuffer16 (int submesh) const; + UInt16* GetSubMeshBuffer16 (int submesh); + + int GetSubMeshBufferByteSize (int submesh) const { return kVBOIndexSize * m_SubMeshes[submesh].indexCount; } + + // The number of indices contained in the index buffer (all submeshes) + int GetTotalndexCount () const; + + void ByteSwapIndices (); + + /// 4, 2, 1 bone influence (BoneInfluence, BoneInfluence2, int) + void* GetSkinInfluence (int count); + + int GetMeshUsageFlags () const { return m_MeshUsageFlags; } + + virtual bool ShouldIgnoreInGarbageDependencyTracking (); + + UInt32 GetAvailableChannels() const; + // May return only a subset of channels that are present in the mesh + UInt32 GetAvailableChannelsForRendering() const; + UInt32 GetChannelsInVBO() const { return m_ChannelsInVBO; } + + bool IsSuitableSizeForDynamicBatching () const; + + // Calculate cached bone bounds per bone by calculating the bounding volume in bind pose space. + // This is used by the SkinnedMeshRenderer to compute an accurate world space bounding volume quickly. + const AABBContainer& GetCachedBonesBounds(); + + void DestripifyIndices (); + void SetHideFromRuntimeStats(bool flag) { m_HideFromRuntimeStats = flag; } + + bool IsSharedPhysicsMeshDirty () { return m_CollisionMesh.IsSharedPhysicsMeshDirty(); } + + bool CanAccessFromScript() const; + + const VertexData& GetVertexData() const { return m_VertexData; } + VertexData& GetVertexData() { return m_VertexData; } + + UInt8 GetMeshCompression() const { return m_MeshCompression; } + void SetMeshCompression(UInt8 mc) { m_MeshCompression = mc; } + + enum + { + kStreamCompressionDefault = 0, + kStreamCompressionCompressed, + kStreamCompressionCompressedAggressive + }; + + UInt8 GetStreamCompression() const { return m_StreamCompression; } + void SetStreamCompression(UInt8 cs) { m_StreamCompression = cs; } + bool GetIsReadable() const { return m_IsReadable; } + void SetIsReadable(bool readable) { m_IsReadable = readable; } + + + bool GetKeepVertices() const { return m_KeepVertices; } + void SetKeepVertices(bool keep) { m_KeepVertices = keep; } + + bool GetKeepIndices() const { return m_KeepIndices; } + void SetKeepIndices(bool keep) { m_KeepIndices = keep; } + + const IndexContainer& GetIndexBuffer() const { return m_IndexBuffer; } + IndexContainer& GetIndexBuffer() { return m_IndexBuffer; } + + const SubMeshContainer& GetSubMeshes() const { return m_SubMeshes; } + SubMeshContainer& GetSubMeshes() { return m_SubMeshes; } + + const MatrixContainer& GetBindpose() const { return m_Bindpose; } + MatrixContainer& GetBindpose() { return m_Bindpose; } + + const dynamic_array<BindingHash>& GetBonePathHashes() const { return m_BonePathHashes; } + dynamic_array<BindingHash>& GetBonePathHashes() { return m_BonePathHashes; } + BindingHash GetRootBonePathHash() const { return m_RootBonePathHash; } + void SetRootBonePathHash(BindingHash val) { m_RootBonePathHash = val; } + + const BoneInfluenceContainer& GetSkin() const { return m_Skin; } + BoneInfluenceContainer& GetSkin() { return m_Skin; } + + const AABB& GetLocalAABB() const { return m_LocalAABB; } + void SetLocalAABB(const AABB& aabb) { m_LocalAABB = aabb; } + +#if UNITY_PS3 || UNITY_EDITOR + MeshPartitionContainer m_Partitions; + MeshPartitionInfoContainer m_PartitionInfos; +#endif + + +#if UNITY_EDITOR + void SetMeshOptimized(bool meshOptimized) { m_MeshOptimized = meshOptimized; } + bool GetMeshOptimized() const { return m_MeshOptimized; } +#endif + + UInt32 GetInternalMeshID() const { Assert(m_InternalMeshID); return m_InternalMeshID; } + +private: + void CreateSharedVBO( UInt32 wantedChannels ); + void NotifyObjectUsers( const MessageIdentifier& msg ); + void RecalculateSubmeshBoundsInternal (unsigned submesh); + void RecalculateBoundsInternal (); + void LoadDeprecatedTangentData (Mesh& mesh, DeprecatedTangentsArray &tangents); + void SwizzleVertexColorsIfNeeded (); + + const VertexStreamsLayout& GetStreamsLayout() const; + const VertexChannelsLayout& GetChannelsLayout() const; + + void DestripifySubmeshOnTransferInternal(); + void SetIndexData(int submeshIndex, int indexCount, const void* indices, GfxPrimitiveType topology, int mode); + +#if SUPPORT_SERIALIZED_TYPETREES + template<class TransferFunction> + void TransferWorkaround35SerializeFuckup (TransferFunction& transfer); +#endif + +#if UNITY_EDITOR || UNITY_PS3 + template<class TransferFunction> + void TransferPS3Data (TransferFunction& transfer); +#endif +#if UNITY_EDITOR + bool m_MeshOptimized; +#endif + + VertexData m_VertexData; + + UInt8 m_MeshCompression; + UInt8 m_StreamCompression; + bool m_IsReadable; + bool m_KeepVertices; + bool m_KeepIndices; + UInt32 m_InternalMeshID; + + int m_MeshUsageFlags; + + IndexContainer m_IndexBuffer; + SubMeshContainer m_SubMeshes; + MatrixContainer m_Bindpose; + BlendShapeData m_Shapes; + + dynamic_array<BindingHash> m_BonePathHashes; + BindingHash m_RootBonePathHash; + + AABBContainer m_CachedBonesAABB; + + BoneInfluenceContainer m_Skin; + BoneInfluence2Container m_CachedSkin2; + SkinContainer m_CachedSkin1; + + int m_MaxBoneIndex; + + AABB m_LocalAABB; + + CollisionMeshData m_CollisionMesh; + + typedef List< ListNode<Object> > ObjectList; + ObjectList m_ObjectUsers; // Object-derived users of this mesh + + IntermediateUsers m_IntermediateUsers; // IntermediateRenderer users of this mesh + + #if ENABLE_MULTITHREADED_CODE + UInt32 m_CurrentCPUFence; + bool m_WaitOnCPUFence; + #endif + + PPtr<Object> m_Owner; + VBO* m_VBO; + + + UInt32 m_ChannelsInVBO; + bool m_VerticesDirty; + bool m_IndicesDirty; + bool m_IsDynamic; + bool m_HideFromRuntimeStats; + bool m_VertexColorsSwizzled; + + friend class MeshFilter; + friend class ClothAnimator; + friend class CompressedMesh; + friend void PartitionSubmeshes (Mesh& m); + friend void OptimizeReorderVertexBuffer (Mesh& mesh); +}; + +#endif diff --git a/Runtime/Filters/Mesh/LodMeshFilter.cpp b/Runtime/Filters/Mesh/LodMeshFilter.cpp new file mode 100644 index 0000000..512f153 --- /dev/null +++ b/Runtime/Filters/Mesh/LodMeshFilter.cpp @@ -0,0 +1,96 @@ +#include "UnityPrefix.h" +#include "LodMeshFilter.h" +#include "LodMesh.h" +#include "MeshRenderer.h" +#include "Runtime/Filters/Particles/MeshParticleEmitter.h" +#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h" +#include "Runtime/Serialize/TransferFunctions/TransferNameConversions.h" + +MeshFilter::MeshFilter (MemLabelId label, ObjectCreationMode mode) +: Super(label, mode) +{ + m_Mesh = NULL; +} + +MeshFilter::~MeshFilter () +{ +} + +void MeshFilter::OnDidAddMesh () +{ + AssignMeshToRenderer (); +} + +void MeshFilter::AssignMeshToRenderer () +{ + if (GetGameObjectPtr()) + { + MeshRenderer* renderer = QueryComponent(MeshRenderer); + if (renderer && renderer->GetSharedMesh() != m_Mesh) + renderer->SetSharedMesh(m_Mesh); + + MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter); + if (emitter && emitter->GetMesh() != m_Mesh) + emitter->SetMesh(m_Mesh); + } +} + +void MeshFilter::SetSharedMesh (PPtr<Mesh> mesh) +{ + m_Mesh = mesh; + + MeshRenderer* renderer = QueryComponent(MeshRenderer); + if (renderer) + renderer->SetSharedMesh(m_Mesh); + + MeshParticleEmitter* emitter = QueryComponent(MeshParticleEmitter); + if (emitter) + emitter->SetMesh(m_Mesh); + + SetDirty (); +} + +PPtr<Mesh> MeshFilter::GetSharedMesh () +{ + return m_Mesh; +} + +Mesh* MeshFilter::GetInstantiatedMesh () +{ + Mesh* instantiated = &Mesh::GetInstantiatedMesh (m_Mesh, *this); + if (PPtr<Mesh> (instantiated) != m_Mesh) + { + SetSharedMesh(instantiated); + } + + return instantiated; +} + +void MeshFilter::SetInstantiatedMesh (Mesh* mesh) +{ + SetSharedMesh(mesh); +} + +IMPLEMENT_CLASS_HAS_INIT (MeshFilter) +IMPLEMENT_OBJECT_SERIALIZE (MeshFilter) + +template<class TransferFunction> inline +void MeshFilter::Transfer (TransferFunction& transfer) +{ + Super::Transfer (transfer); + transfer.Transfer (m_Mesh, "m_Mesh", kSimpleEditorMask); +} + +void MeshFilter::InitializeClass () +{ + RegisterAllowNameConversion(GetClassStringStatic(), "m_LodMesh", "m_Mesh"); + RegisterAllowTypeNameConversion ("PPtr<LodMesh>", "PPtr<Mesh>"); + + REGISTER_MESSAGE_VOID(MeshFilter, kDidAddComponent, OnDidAddMesh); +} + +void MeshFilter::AwakeFromLoad (AwakeFromLoadMode awakeMode) +{ + Super::AwakeFromLoad (awakeMode); + AssignMeshToRenderer (); +} diff --git a/Runtime/Filters/Mesh/LodMeshFilter.h b/Runtime/Filters/Mesh/LodMeshFilter.h new file mode 100644 index 0000000..ff6273b --- /dev/null +++ b/Runtime/Filters/Mesh/LodMeshFilter.h @@ -0,0 +1,38 @@ +#ifndef LODMESHFILTER_H +#define LODMESHFILTER_H + +#include "Runtime/BaseClasses/GameObject.h" +#include "Runtime/Modules/ExportModules.h" + +class Mesh; + +class EXPORT_COREMODULE MeshFilter : public Unity::Component +{ +public: + REGISTER_DERIVED_CLASS (MeshFilter, Unity::Component) + DECLARE_OBJECT_SERIALIZE (MeshFilter) + + MeshFilter (MemLabelId label, ObjectCreationMode mode); + + void SetSharedMesh (PPtr<Mesh> mesh); + PPtr<Mesh> GetSharedMesh (); + + Mesh* GetInstantiatedMesh (); + void SetInstantiatedMesh (Mesh* mesh); + + static void InitializeClass (); + static void CleanupClass () {} + + void OnDidAddMesh (); + +protected: + virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode); + + +private: + void AssignMeshToRenderer (); + + PPtr<Mesh> m_Mesh; +}; + +#endif diff --git a/Runtime/Filters/Mesh/Mesh.h b/Runtime/Filters/Mesh/Mesh.h new file mode 100644 index 0000000..e6b58dc --- /dev/null +++ b/Runtime/Filters/Mesh/Mesh.h @@ -0,0 +1,76 @@ +#ifndef MESH_H +#define MESH_H + +#include <vector> +#include "Runtime/Serialize/SerializeUtility.h" +#include "Runtime/Math/Vector3.h" +#include "Runtime/Misc/Allocator.h" + +class Quaternionf; + +/// A face in the mesh. +struct Face { + UInt16 v1, v2, v3; + Face (UInt16 vert1, UInt16 vert2, UInt16 vert3) + {v1 = vert1; v2 = vert2; v3 = vert3;} + Face () {} + + UInt16 &operator[] (int i) { return (&v1)[i]; } + UInt16 operator[] (int i) const { return (&v1)[i]; } + + DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Face) +}; + +template<class TransferFunc> +void Face::Transfer (TransferFunc& transfer) +{ + TRANSFER (v1); + TRANSFER (v2); + TRANSFER (v3); +} + +struct DeprecatedTangent +{ + Vector3f normal; + Vector3f tangent; + float handedness; + DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (Tangent) +}; + +template<class TransferFunc> +void DeprecatedTangent::Transfer (TransferFunc& transfer) +{ + TRANSFER (normal); + TRANSFER (tangent); + TRANSFER (handedness); +} + +struct BoneInfluence +{ + float weight[4]; + int boneIndex[4]; + + DECLARE_SERIALIZE_OPTIMIZE_TRANSFER (BoneInfluence) +}; + +struct BoneInfluence2 +{ + float weight[2]; + int boneIndex[2]; +}; + +template<class TransferFunc> +void BoneInfluence::Transfer (TransferFunc& transfer) +{ + TRANSFER (weight[0]); + TRANSFER (weight[1]); + TRANSFER (weight[2]); + TRANSFER (weight[3]); + + TRANSFER (boneIndex[0]); + TRANSFER (boneIndex[1]); + TRANSFER (boneIndex[2]); + TRANSFER (boneIndex[3]); +} + +#endif diff --git a/Runtime/Filters/Mesh/MeshBlendShape.cpp b/Runtime/Filters/Mesh/MeshBlendShape.cpp new file mode 100644 index 0000000..c7588e2 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshBlendShape.cpp @@ -0,0 +1,234 @@ +#include "UnityPrefix.h" +#include "Configuration/UnityConfigure.h" +#include "MeshBlendShape.h" +#include "Runtime/mecanim/generic/crc32.h" + +static const float kVertexDeltaEpsilon = 1e-5f; +static const float kNormalDeltaEpsilon = 1e-5f; + +void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame) +{ + Assert(deltaNormals.empty() || deltaVertices.size() == deltaNormals.size()); + Assert(deltaTangents.empty() || deltaVertices.size() == deltaTangents.size()); + + frame.firstVertex = sharedSparceVertices.size(); + + // Converting blend shape in to sparse blend shape + sharedSparceVertices.reserve(sharedSparceVertices.size() + deltaVertices.size()); + + frame.hasNormals = frame.hasTangents = false; + + for (int j = 0; j < deltaVertices.size(); ++j) + { + const bool vertexHasNormal = (!deltaNormals.empty() && Magnitude(deltaNormals[j]) > kNormalDeltaEpsilon); + const bool vertexHasTangent = (!deltaTangents.empty() && Magnitude(deltaTangents[j]) > kNormalDeltaEpsilon); + + frame.hasNormals = frame.hasNormals || vertexHasNormal; + frame.hasTangents = frame.hasTangents || vertexHasTangent; + + if (Magnitude(deltaVertices[j]) > kVertexDeltaEpsilon || vertexHasNormal || vertexHasTangent) + { + BlendShapeVertex v; + + v.vertex = deltaVertices[j]; + if (!deltaNormals.empty()) + v.normal = deltaNormals[j]; + if (!deltaTangents.empty()) + v.tangent = deltaTangents[j]; + + v.index = j; + sharedSparceVertices.push_back(v); + } + } + + frame.vertexCount = sharedSparceVertices.size() - frame.firstVertex; +} + +void BlendShape::UpdateFlags(const BlendShapeVertices& sharedSparceVertices) +{ + hasNormals = hasTangents = false; + + for (int j = 0; j < vertexCount; ++j) + { + const BlendShapeVertex& v = sharedSparceVertices[firstVertex + j]; + const bool vertexHasNormal = Magnitude(v.normal) > kNormalDeltaEpsilon; + const bool vertexHasTangent = Magnitude(v.tangent) > kNormalDeltaEpsilon; + + hasNormals = hasNormals || vertexHasNormal; + hasTangents = hasTangents || vertexHasTangent; + } +} + +void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel) +{ + channel.name.assign(inName.c_str(), kMemGeometry); + channel.nameHash = mecanim::processCRC32(inName.c_str()); + channel.frameIndex = frameIndex; + channel.frameCount = frameCount; +} + +const char* GetChannelName (const BlendShapeData& data, int index) +{ + return data.channels[index].name.c_str(); +} + +int GetChannelIndex (const BlendShapeData& data, const char* name) +{ + for (int i=0;i<data.channels.size();i++) + { + if (name == data.channels[i].name) + return i; + } + return -1; +} + +int GetChannelIndex (const BlendShapeData& data, BindingHash name) +{ + for (int i=0;i<data.channels.size();i++) + { + if (name == data.channels[i].nameHash) + return i; + } + return -1; +} + +void ClearBlendShapes (BlendShapeData& data) +{ + data.vertices.clear(); + data.shapes.clear(); + data.channels.clear(); + data.fullWeights.clear(); +} + +/* + +STRUCT BlendShapeChannel + +// BlendShape vertex class. +STRUCT Vertex +// Vertex delta. +CSRAW public Vector3 vertex; + +// Normal delta. +CSRAW public Vector3 normal; + +// Tangent delta. +CSRAW public Vector3 tangent; + +// Index to [[Mesh]] vertex data. +CSRAW public int index; +END + +// A class representing a single BlendShape (also called morph-target). +STRUCT BlendShape + +// The weight of the frame +CSRAW public float weight; + +// Sparse vertex data. +CSRAW public Vertex[] vertices; +END + +// Name of the BlendShape. +CSRAW public string name; + +// The frames making up a blendshape animation. +// Each frame has a weight, based on the weight of the BlendShape in the SkinnedMeshRenderer, Unity will apply 1 or 2 frames. +CSRAW public BlendShape[] shapes; +END + + +C++RAW +/* + struct MonoMeshBlendShape + { + ScriptingStringPtr name; + ScriptingArrayPtr vertices; + }; + + void BlendShapeVertexToMono (const BlendShapeVertex &src, MonoBlendShapeVertex &dest) { + dest.vertex = src.vertex; + dest.normal = src.normal; + dest.tangent = src.tangent; + dest.index = src.index; + } + void BlendShapeVertexToCpp (const MonoBlendShapeVertex &src, BlendShapeVertex &dest) { + dest.vertex = src.vertex; + dest.normal = src.normal; + dest.tangent = src.tangent; + dest.index = src.index; + } + + class MeshBlendShapeToMono + { + public: + MeshBlendShapeToMono(const BlendShapeVertices& sharedVertices_) : sharedVertices(sharedVertices_) {} + + void operator() (const MeshBlendShape &src, MonoMeshBlendShape &dest) + { + dest.name = scripting_string_new(src.m_Name); + const BlendShapeVertices vertices(sharedVertices.begin() + src.firstVertex, sharedVertices.begin() + src.firstVertex + src.vertexCount); + + ScriptingTypePtr classVertex = GetScriptingTypeRegistry().GetType("UnityEngine", "BlendShapeVertex"); + dest.vertices = VectorToScriptingStructArray<BlendShapeVertex, MonoBlendShapeVertex>(vertices, classVertex, BlendShapeVertexToMono); + } + + private: + const BlendShapeVertices& sharedVertices; + }; + + class MeshBlendShapeToCpp + { + public: + MeshBlendShapeToCpp(int meshVertexCount_, BlendShapeVertices& sharedVertices_) : meshVertexCount(meshVertexCount_), sharedVertices(sharedVertices_) {} + + void operator() (MonoMeshBlendShape &src, MeshBlendShape &dest) + { + dest.weight = src.weight; + + const BlendShapeVertex* vertices = Scripting::GetScriptingArrayStart<BlendShapeVertex> (src.vertices); + sharedVertices.insert(sharedVertices.end(), vertices, vertices + GetScriptingArraySize(src.vertices)); + + for (BlendShapeVertices::iterator it = vertices.begin(), end = vertices.end(); it != end; ++it) + { + BlendShapeVertex& v = *it; + if (v.index < 0 || v.index >= meshVertexCount) + { + ErrorStringMsg("Value (%d) of BlendShapeVertex.index #%d is out of bounds (Mesh vertex count: %d) on BlendShape '%s'. It will be reset to 0.", v.index, it - vertices.begin(), meshVertexCount, dest.m_Name.c_str()); + v.index = 0; + } + } + + dest.firstVertex = sharedVertices.size(); + dest.vertexCount = vertices.size(); + + sharedVertices.insert(sharedVertices.end(), vertices.begin(), vertices.end()); + dest.UpdateFlags(sharedVertices); + } + + private: + int meshVertexCount; + BlendShapeVertices& sharedVertices; + }; + + + + ---------------- + + // BlendShapes for this mesh. + CUSTOM_PROP BlendShapeChannel[] blendShapes + { + // ScriptingTypePtr classBlendShape = GetScriptingTypeRegistry().GetType("UnityEngine", "MeshBlendShape"); + // return VectorToScriptingStructArray<MeshBlendShape, MonoMeshBlendShape>(self->GetShapesVector(), classBlendShape, MeshBlendShapeToMono(self->GetShapeVertexVector())); + return SCRIPTING_NULL; + } + { + // Mesh::MeshBlendShapeContainer shapes; + // self->GetShapeVertexVector().clear(); + // ScriptingStructArrayToVector<MeshBlendShape, MonoMeshBlendShape>(value, shapes, MeshBlendShapeToCpp(self->GetVertexCount(), self->GetShapeVertexVector())); + // self->SwapShapesVector(shapes); + } + + + + */ diff --git a/Runtime/Filters/Mesh/MeshBlendShape.h b/Runtime/Filters/Mesh/MeshBlendShape.h new file mode 100644 index 0000000..d4d0f41 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshBlendShape.h @@ -0,0 +1,115 @@ +#ifndef MESHBLENDSHAPES_H +#define MESHBLENDSHAPES_H + +#include "Runtime/Geometry/AABB.h" +#include "Runtime/Math/Vector3.h" +#include "Runtime/Serialize/SerializeUtility.h" +#include "Runtime/Utilities/dynamic_array.h" +#include "Runtime/Containers/ConstantString.h" +#include "Runtime/Containers/ConstantStringSerialization.h" + +typedef UInt32 BindingHash; + +struct BlendShapeVertex +{ + // vertex, normal & tangent are stored as deltas + Vector3f vertex; + Vector3f normal; + Vector3f tangent; + UInt32 index; + + BlendShapeVertex() : vertex(Vector3f::zero), normal(Vector3f::zero), tangent(Vector3f::zero), index(0) {} + + DECLARE_SERIALIZE_NO_PPTR (BlendShapeVertex) +}; +typedef dynamic_array<BlendShapeVertex> BlendShapeVertices; + +struct BlendShapeChannel +{ + ConstantString name; + BindingHash nameHash; + + int frameIndex; + int frameCount; + + DECLARE_SERIALIZE_NO_PPTR(MeshBlendShapeChannel) +}; + +struct BlendShape +{ + BlendShape() : firstVertex(0), vertexCount(0), hasNormals(false), hasTangents(false) {} + + UInt32 firstVertex; + UInt32 vertexCount; + + bool hasNormals; + bool hasTangents; + + + ///@TODO: MOve + // updates hasNormals and hasTangents based on data in vertices + void UpdateFlags(const BlendShapeVertices& sharedSparceVertices); + + DECLARE_SERIALIZE_NO_PPTR (MeshBlendShape) +}; + +struct BlendShapeData +{ + BlendShapeVertices vertices; + dynamic_array<BlendShape> shapes; + std::vector<BlendShapeChannel> channels; + dynamic_array<float> fullWeights; + + DECLARE_SERIALIZE_NO_PPTR(BlendShapeData) +}; + + +// Convert between blendshape name and index +const char* GetChannelName (const BlendShapeData& data, int index); +inline size_t GetBlendShapeChannelCount (const BlendShapeData& data) { return data.channels.size(); } +int GetChannelIndex (const BlendShapeData& data, const char* name); +int GetChannelIndex (const BlendShapeData& data, BindingHash name); + +// data is passed as non-sparce arrays, i.e. deltaVertices.size() has to be the same as vertex count on the Mesh +void SetBlendShapeVertices(const std::vector<Vector3f>& deltaVertices, const std::vector<Vector3f>& deltaNormals, const std::vector<Vector3f>& deltaTangents, BlendShapeVertices& sharedSparceVertices, BlendShape& frame); +void InitializeChannel (const UnityStr& inName, int frameIndex, int frameCount, BlendShapeChannel& channel); +void ClearBlendShapes (BlendShapeData& data); + +template<class TransferFunc> +void BlendShape::Transfer (TransferFunc& transfer) +{ + TRANSFER(firstVertex); + TRANSFER(vertexCount); + TRANSFER(hasNormals); + TRANSFER(hasTangents); + transfer.Align(); +} + +template<class TransferFunc> +void BlendShapeData::Transfer (TransferFunc& transfer) +{ + TRANSFER (vertices); + TRANSFER (shapes); + TRANSFER (channels); + TRANSFER (fullWeights); +} + +template<class TransferFunc> +void BlendShapeVertex::Transfer (TransferFunc& transfer) +{ + TRANSFER(vertex); + TRANSFER(normal); + TRANSFER(tangent); + TRANSFER(index); +} + +template<class TransferFunc> +void BlendShapeChannel::Transfer (TransferFunc& transfer) +{ + TransferConstantString (name, "name", kNoTransferFlags, kMemGeometry, transfer); + TRANSFER (nameHash); + TRANSFER (frameIndex); + TRANSFER (frameCount); +} + +#endif diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.cpp b/Runtime/Filters/Mesh/MeshBlendShaping.cpp new file mode 100644 index 0000000..a86a24d --- /dev/null +++ b/Runtime/Filters/Mesh/MeshBlendShaping.cpp @@ -0,0 +1,184 @@ +#include "UnityPrefix.h" +#include "MeshBlendShaping.h" +#include "MeshSkinning.h" +#include "MeshBlendShape.h" + +template<bool skinNormal, bool skinTangent> +void ApplyBlendShapeTmpl (const BlendShapeVertex* vertices, size_t vertexCount, size_t dstVertexCount, float weight, int normalOffset, int tangentOffset, int inStride, UInt8* dst) +{ + for (int i = 0; i < vertexCount; ++i) + { + const BlendShapeVertex& blendShapeVertex = vertices[i]; + + int offset = inStride * blendShapeVertex.index; + + *reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * weight; + if (skinNormal) + { + DebugAssert (offset + normalOffset < inStride * dstVertexCount); + *reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * weight; + } + if (skinTangent) + { + DebugAssert (offset + tangentOffset < inStride * dstVertexCount); + *reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * weight; + } + } +} + + +void ApplyBlendShape (const BlendShape& target, const BlendShapeVertices& vertices, float weight, const SkinMeshInfo& info, UInt8* dst) +{ + if (!HasValidWeight(weight)) + return; + + weight = std::min(weight, 1.0F); + + const BlendShapeVertex* v = vertices.begin() + target.firstVertex; + + if (info.skinNormals && info.skinTangents && target.hasNormals && target.hasTangents) + ApplyBlendShapeTmpl<true, true> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst); + else if (info.skinNormals && target.hasNormals) + ApplyBlendShapeTmpl<true, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst); + else + ApplyBlendShapeTmpl<false, false> (v, target.vertexCount, info.vertexCount, weight, info.normalOffset, info.tangentOffset, info.inStride, dst); +} + +static int FindFrame (const float* weights, size_t count, float targetWeight) +{ + // Find frame (left index) + int frame = 0; + while (frame < count-1 && targetWeight > weights[frame+1]) + frame++; + + return frame; +} + +void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst) +{ + DebugAssert (info.blendshapeCount != 0); + Assert (info.inStride == info.outStride); + const int inStride = info.inStride; + const int count = info.vertexCount; + + Assert (dst); + memcpy (dst, info.inVertices, inStride * count); + + const BlendShapeData& blendShapeData = *info.blendshapes; + + for (int c = 0; c < info.blendshapeCount; ++c) + { + const float targetWeight = info.blendshapeWeights[c]; + + if (!HasValidWeight (targetWeight)) + continue; + + const BlendShapeChannel& channel = blendShapeData.channels[c]; + Assert(channel.frameCount != 0); + + const BlendShape* blendShapeFrames = &blendShapeData.shapes[channel.frameIndex]; + const float* weights = &blendShapeData.fullWeights[channel.frameIndex]; + + // The first blendshape does not need to do any blending. Just fade it in. + if (targetWeight < weights[0] || channel.frameCount == 1) + { + float lhsShapeWeight = weights[0]; + ApplyBlendShape (blendShapeFrames[0], blendShapeData.vertices, targetWeight / lhsShapeWeight, info, dst); + } + // We are blending with two frames + else + { + // Find the frame we are blending with + int frame = FindFrame(weights, channel.frameCount, targetWeight); + + float lhsShapeWeight = weights[frame + 0]; + float rhsShapeWeight = weights[frame + 1]; + + float relativeWeight = (targetWeight - lhsShapeWeight) / (rhsShapeWeight - lhsShapeWeight); + + ApplyBlendShape (blendShapeFrames[frame + 0], blendShapeData.vertices, 1.0F - relativeWeight, info, dst); + ApplyBlendShape (blendShapeFrames[frame + 1], blendShapeData.vertices, relativeWeight, info, dst); + } + } +} + +///@TODO: How do we deal with resizing vertex count once mesh blendshapes have been created??? + +/* + template<bool skinNormal, bool skinTangent> + static void ApplyBlendShapesTmpl (SkinMeshInfo& info, UInt8* dst) + { + DebugAssert (info.blendshapeCount != 0); + Assert (info.inStride == info.outStride); + const int inStride = info.inStride; + const int count = info.vertexCount; + + Assert (dst); + memcpy (dst, info.inVertices, inStride * count); + + const int normalOffset = info.normalOffset; + const int tangentOffset = info.tangentOffset; + + #if BLEND_DIRECT_NORMALS + if (skinNormal) + { // figure out how what fraction of original normal should be used + float totalBlendshapeWeight = 0.0f; + for (int i = 0; i < info.blendshapeCount; ++i) + totalBlendshapeWeight += info.blendshapeWeights[i]; + Assert (totalBlendshapeWeight >= 0.0f); + if (totalBlendshapeWeight > 0.0f) + { + for (int i = 0; i < count; ++i) + *reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset) *= max(0.0f, (1.0f - totalBlendshapeWeight)); + } + } + + bool atLeastOneSparseBlendshape = false; + #endif + for (int bs = 0; bs < info.blendshapeCount; ++bs) + { + const float w = info.blendshapeWeights[bs]; + + if (HasWeight(w)) + { + const MeshBlendShape& blendShape = info.blendshapes[bs]; + + const BlendShapeVertex* vertices = info.blendshapesVertices + blendShape.firstVertex; + for (int i = 0; i < blendShape.vertexCount; ++i) + { + const BlendShapeVertex& blendShapeVertex = vertices[i]; + + int offset = inStride * blendShapeVertex.index; + Assert (offset < inStride * count); + *reinterpret_cast<Vector3f*>(dst + offset) += blendShapeVertex.vertex * w; + if (skinNormal) + { + Assert (offset + normalOffset < inStride * count); + *reinterpret_cast<Vector3f*>(dst + offset + normalOffset) += blendShapeVertex.normal * w; + } + if (skinTangent) + { + Assert (offset + tangentOffset < inStride * count); + *reinterpret_cast<Vector3f*>(dst + offset + tangentOffset) += blendShapeVertex.tangent * w; + } + } + + #if BLEND_DIRECT_NORMALS + if (vertices.size () < count) + atLeastOneSparseBlendshape = true; + #endif + } + } + + #if BLEND_DIRECT_NORMALS + if (atLeastOneSparseBlendshape && skinNormal) // we might need to take larger fraction from original normal + for (int i = 0; i < count; ++i) + { + Vector3f const& srcNormal = *reinterpret_cast<Vector3f*>((UInt8*)info.inVertices + i*inStride + normalOffset); + Vector3f* dstNormal = reinterpret_cast<Vector3f*>(dst + i*inStride + normalOffset); + const float missingFractionOfNormal = max (0.0f, 1.0f - Magnitude (*dstNormal)); + *dstNormal += srcNormal * missingFractionOfNormal; + } + #endif + } +*/
\ No newline at end of file diff --git a/Runtime/Filters/Mesh/MeshBlendShaping.h b/Runtime/Filters/Mesh/MeshBlendShaping.h new file mode 100644 index 0000000..7b39f26 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshBlendShaping.h @@ -0,0 +1,12 @@ +#pragma once + +struct SkinMeshInfo; + +// Does "mesh skinning" logic for BlendShapes +void ApplyBlendShapes (SkinMeshInfo& info, UInt8* dst); + +inline bool HasValidWeight(const float w) +{ + const float kWeightEpsilon = 1e-4f; + return w > kWeightEpsilon; +} diff --git a/Runtime/Filters/Mesh/MeshCombiner.cpp b/Runtime/Filters/Mesh/MeshCombiner.cpp new file mode 100644 index 0000000..1bf93e5 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshCombiner.cpp @@ -0,0 +1,502 @@ +#include "UnityPrefix.h" +#include "MeshCombiner.h" +#include "Runtime/Graphics/TriStripper.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "Runtime/Profiler/Profiler.h" +#include <limits> + + +#define sqr(x) ((x)*(x)) + +PROFILER_INFORMATION(gCombineMeshesProfile, "CombineMeshes", kProfilerRender) +PROFILER_INFORMATION(gCombineVerticesProfile, "CombineVertices", kProfilerRender) +PROFILER_INFORMATION(gCombineIndicesProfile, "CombineIndices", kProfilerRender) + +static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh); + + +size_t ExtractMeshIndices(Mesh::TemporaryIndexContainer& srcIndices, const CombineInstance& in, bool useVertexOffsets, size_t& inoutTotalVertexOffset, UInt16* dstIndices) +{ + srcIndices.clear(); + + if (in.subMeshIndex < 0 || in.subMeshIndex >= in.mesh->GetSubMeshCount()) + return 0; + + const int subMeshIndex = in.subMeshIndex; + const int vertexOffset = useVertexOffsets ? in.vertexOffset : inoutTotalVertexOffset; + inoutTotalVertexOffset += in.mesh->GetVertexCount(); + + in.mesh->GetTriangles( srcIndices, subMeshIndex ); + + size_t numIndices = srcIndices.size(); + if (Dot (Cross(in.transform.GetAxisX(), in.transform.GetAxisY()), in.transform.GetAxisZ()) >= 0) + { + for ( size_t k=0; k!=numIndices; ++k ) + dstIndices[k] = srcIndices[k] + vertexOffset; + } + else + { + // if trilist, then + // reverse Cull order by reversing indices + for ( size_t k=0; k!=numIndices; ++k ) + dstIndices[k] = srcIndices[numIndices-k-1] + vertexOffset; + } + + return numIndices; +} + +static bool IsMeshBatchable (const Mesh* mesh, int subMeshIndex) +{ + return mesh && mesh->HasVertexData() && subMeshIndex >= 0 && subMeshIndex < mesh->GetSubMeshCount(); +} + + +void CombineMeshIndicesForStaticBatching(const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets) +{ + PROFILER_AUTO(gCombineIndicesProfile, &inoutMesh); + + size_t size = in.size(); + + UInt32 maxIndices = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const UInt32 numTris = in[i].mesh->GetSubMeshFast( in[i].subMeshIndex ).indexCount; + if (mergeSubMeshes) + maxIndices += numTris; + else + maxIndices = std::max( maxIndices, numTris ); + } + } + + UInt16* dstIndices = new UInt16[maxIndices+1]; + Mesh::TemporaryIndexContainer srcIndices; + srcIndices.reserve( maxIndices+1 ); + + size_t totalVertexOffset = 0; + if (mergeSubMeshes) + { + inoutMesh.SetSubMeshCount( 1 ); + size_t totalNumIndices = 0; + for ( size_t s=0; s!=size; ++s ) + { + if (in[s].mesh) + { + size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices+totalNumIndices); + + totalNumIndices += numIndices; + Assert(totalNumIndices <= (maxIndices+1)); + } + } + int mask = Mesh::k16BitIndices; + inoutMesh.SetIndicesComplex (dstIndices, totalNumIndices, 0, kPrimitiveTriangles, mask); + } + else + { + inoutMesh.SetSubMeshCount( in.size() ); + for ( size_t s=0; s!=size; ++s ) + { + if (in[s].mesh) + { + size_t numIndices = ExtractMeshIndices (srcIndices, in[s], useVertexOffsets, totalVertexOffset, dstIndices); + Assert(numIndices <= (maxIndices+1)); + + int mask = Mesh::k16BitIndices; + inoutMesh.SetIndicesComplex (dstIndices, numIndices, s, kPrimitiveTriangles, mask); + } + } + } + + delete []dstIndices; +} + +void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms ) +{ + PROFILER_AUTO(gCombineVerticesProfile, &outCombinedMesh); + + int vertexCount = 0; + size_t size = in.size(); + for( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + vertexCount += in[i].mesh->GetVertexCount(); + } + + bool hasNormals = false; + bool hasTangents = false; + bool hasUV0 = false; + bool hasUV1 = false; + bool hasColors = false; + bool hasSkin = false; + int bindposeCount = 0; + + for( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + const UInt32 channels = mesh->GetAvailableChannels(); + hasNormals |= (channels & (1<<kShaderChannelNormal)) != 0; + hasTangents |= (channels & (1<<kShaderChannelTangent)) != 0; + hasUV0 |= (channels & (1<<kShaderChannelTexCoord0)) != 0; + hasUV1 |= (channels & (1<<kShaderChannelTexCoord1)) != 0 || (in[i].lightmapTilingOffset != Vector4f(1, 1, 0, 0)); + hasColors |= (channels & (1<<kShaderChannelColor)) != 0; + hasSkin |= mesh->GetSkin().size() && mesh->GetBindpose().size(); + bindposeCount += mesh->GetBindpose().size(); + } + } + + UInt32 channels = 1<<kShaderChannelVertex; + if ( hasNormals ) channels |= 1<<kShaderChannelNormal; + if ( hasTangents ) channels |= 1<<kShaderChannelTangent; + if ( hasUV0 ) channels |= 1<<kShaderChannelTexCoord0; + if ( hasUV1 ) channels |= 1<<kShaderChannelTexCoord1; + if ( hasColors ) channels |= 1<<kShaderChannelColor; + + outCombinedMesh.Clear(true); + outCombinedMesh.ResizeVertices( vertexCount, channels ); + outCombinedMesh.SetName( combinedMeshName.c_str() ); + // Input meshes are already swizzled correctly, so we can copy colors directly + outCombinedMesh.SetVertexColorsSwizzled(gGraphicsCaps.needsToSwizzleVertexColors); + + if ( hasSkin ) + { + outCombinedMesh.GetSkin().resize_initialized(vertexCount); + outCombinedMesh.GetBindpose().resize_initialized(bindposeCount); + outCombinedMesh.GetBonePathHashes().resize_uninitialized(bindposeCount); + } + + // avoid doing twice (in worst case) + Matrix4x4f* normalMatrices; + bool* isNonUniformScaleTransform; + ALLOC_TEMP (normalMatrices, Matrix4x4f, size); + ALLOC_TEMP (isNonUniformScaleTransform, bool, size); + if ( hasNormals || hasTangents ) + { + for( size_t i=0; i!=size; ++i ) + { + float uniformScale; + TransformType type = ComputeTransformType(in[i].transform, uniformScale); + Matrix4x4f m; + isNonUniformScaleTransform[i] = IsNonUniformScaleTransform(type); + if (isNonUniformScaleTransform[i]) + { + Matrix4x4f::Invert_General3D( in[i].transform, normalMatrices[i] ); + normalMatrices[i].Transpose(); + } + else + { + normalMatrices[i] = Matrix3x3f(in[i].transform); + // Scale matrix to keep normals normalized + normalMatrices[i].Scale(Vector3f::one * (1.0f/uniformScale)); + } + } + } + + int offset = 0; + for( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Matrix4x4f& transform = in[i].transform; + const Mesh* mesh = in[i].mesh; + if (useTransforms) + TransformPoints3x4 (transform, + (Vector3f const*)mesh->GetChannelPointer (kShaderChannelVertex), + mesh->GetStride (kShaderChannelVertex), + (Vector3f*)outCombinedMesh.GetChannelPointer (kShaderChannelVertex, offset), + outCombinedMesh.GetStride (kShaderChannelVertex), + mesh->GetVertexCount()); + else + strided_copy (mesh->GetVertexBegin (), mesh->GetVertexEnd (), outCombinedMesh.GetVertexBegin () + offset); + offset += mesh->GetVertexCount(); + } + } + + if ( hasNormals ) + { + offset = 0; + for( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + int vertexCount = mesh->GetVertexCount (); + if (!mesh->IsAvailable (kShaderChannelNormal)) + std::fill(outCombinedMesh.GetNormalBegin () + offset, outCombinedMesh.GetNormalBegin () + offset + vertexCount, Vector3f(0.0f,1.0f,0.0f)); + else + { + const Matrix4x4f& transform = normalMatrices[i]; + + StrideIterator<Vector3f> outNormal = outCombinedMesh.GetNormalBegin () + offset; + if (useTransforms) + { + if (isNonUniformScaleTransform[i]) + { + for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal) + *outNormal = Normalize( transform.MultiplyVector3( *it) ); + } + else + { + for (StrideIterator<Vector3f> it = mesh->GetNormalBegin (), end = mesh->GetNormalEnd (); it != end; ++it, ++outNormal) + *outNormal = transform.MultiplyVector3( *it); + } + } + else + strided_copy (mesh->GetNormalBegin (), mesh->GetNormalEnd (), outCombinedMesh.GetNormalBegin () + offset); + } + offset += vertexCount; + } + } + } + + if ( hasTangents ) + { + offset = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + int vertexCount = mesh->GetVertexCount (); + if (!mesh->IsAvailable (kShaderChannelTangent)) + std::fill(outCombinedMesh.GetTangentBegin () + offset, outCombinedMesh.GetTangentBegin () + offset + vertexCount, Vector4f(1.0f,0.0f,0.0f,1.0f)); + else + { + const Matrix4x4f& transform = normalMatrices[i]; + + StrideIterator<Vector4f> outTanget = outCombinedMesh.GetTangentBegin () + offset; + if (useTransforms) + { + if (isNonUniformScaleTransform[i]) + { + for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget) + { + Vector3f t3 = Normalize(transform.MultiplyVector3(Vector3f(it->x, it->y, it->z))); + *outTanget = Vector4f(t3.x,t3.y,t3.z,it->w); + } + } + else + { + for (StrideIterator<Vector4f> it = mesh->GetTangentBegin (), end = mesh->GetTangentEnd (); it != end; ++it, ++outTanget) + { + Vector3f t3 = transform.MultiplyVector3(Vector3f(it->x, it->y, it->z)); + *outTanget = Vector4f(t3.x,t3.y,t3.z,it->w); + } + } + } + else + strided_copy (mesh->GetTangentBegin (), mesh->GetTangentEnd (), outCombinedMesh.GetTangentBegin () + offset); + } + offset += vertexCount; + } + } + } + + if ( hasUV0 ) + { + offset = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + int vertexCount = mesh->GetVertexCount (); + if (!mesh->IsAvailable (kShaderChannelTexCoord0)) + std::fill (outCombinedMesh.GetUvBegin (0) + offset, outCombinedMesh.GetUvBegin (0) + offset + vertexCount, Vector2f(0.0f,0.0f)); + else + strided_copy (mesh->GetUvBegin (0), mesh->GetUvEnd (0), outCombinedMesh.GetUvBegin (0) + offset); + offset += vertexCount; + } + } + } + + if ( hasUV1 ) + { + offset = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + const int uvIndex = (mesh->GetAvailableChannels() & (1<<kShaderChannelTexCoord1))!=0? 1 : 0; + StrideIterator<Vector2f> it = in[i].mesh->GetUvBegin( uvIndex ); + StrideIterator<Vector2f> end = in[i].mesh->GetUvEnd( uvIndex ); + + int vertexCount = mesh->GetVertexCount (); + if ( it == end) + std::fill (outCombinedMesh.GetUvBegin (1) + offset, outCombinedMesh.GetUvBegin (1) + offset + vertexCount, Vector2f(0.0f,0.0f)); + else + { + // we have to apply lightmap UV scale and offset factors + // callee is responsible to reset lightmapTilingOffset on the Renderer afterwards + const Vector4f uvScaleOffset = in[i].lightmapTilingOffset; + if ( uvScaleOffset != Vector4f(1, 1, 0, 0) ) + { + StrideIterator<Vector2f> outUV = outCombinedMesh.GetUvBegin (1) + offset; + for (; it != end; ++it, ++outUV) + { + outUV->x = it->x * uvScaleOffset.x + uvScaleOffset.z; + outUV->y = it->y * uvScaleOffset.y + uvScaleOffset.w; + } + } + else + strided_copy (it, end, outCombinedMesh.GetUvBegin (1) + offset); + } + offset += vertexCount; + } + } + } + + if ( hasColors ) + { + offset = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + { + const Mesh* mesh = in[i].mesh; + int vertexCount = mesh->GetVertexCount (); + if (!mesh->IsAvailable (kShaderChannelColor)) + std::fill (outCombinedMesh.GetColorBegin () + offset, outCombinedMesh.GetColorBegin () + offset + vertexCount, ColorRGBA32(255,255,255,255)); + else + { + DebugAssert(mesh->GetVertexColorsSwizzled() == outCombinedMesh.GetVertexColorsSwizzled()); + strided_copy (mesh->GetColorBegin (), mesh->GetColorEnd (), outCombinedMesh.GetColorBegin () + offset); + } + offset += vertexCount; + } + } + } + + if ( hasSkin ) + { + CombineBoneSkinning (in, outCombinedMesh); + } +} + +static void CalculateRootBonePathHash (const CombineInstances &in, Mesh& outCombinedMesh) +{ + // We always pick the root bone path hash of the first combine instance. + // This is because anything else gives unpredictable behaviour and makes it impossible for the user + // to setup the skinned mesh renderer T/R/S correctly. + outCombinedMesh.SetRootBonePathHash(in[0].mesh->GetRootBonePathHash()); + + // If we made it so that the skinnedmeshrenderer always used the default pose from the Avatar + // Then it would be possible to pick the root bone from the mesh with the most bones instead. +#if 0 + size_t size = in.size(); + + BindingHash rootBonePathHash = 0; + int boneCount = 0; + for (size_t i=0; i<size; ++i) + { + } + } + if (rootBonePathHash) + outCombinedMesh.SetRootBonePathHash(rootBonePathHash); +#endif +} + +static void CombineBoneSkinning (const CombineInstances &in, Mesh& outCombinedMesh) +{ + size_t size = in.size(); + + int boneOffset = 0; + int offset = 0; + for ( size_t i=0; i!=size; ++i ) + { + if (!IsMeshBatchable(in[i].mesh, in[i].subMeshIndex)) + continue; + + const Mesh* mesh = in[i].mesh; + Mesh::BoneInfluenceContainer& outSkin = outCombinedMesh.GetSkin(); + const Mesh::BoneInfluenceContainer& inSkin = mesh->GetSkin(); + int vertexCount = mesh->GetVertexCount (); + if (inSkin.empty()) + { + for(int i=0; i<vertexCount;i++) + { + outSkin[offset+i].weight[0] = 0; + outSkin[offset+i].weight[1] = 0; + outSkin[offset+i].weight[2] = 0; + outSkin[offset+i].weight[3] = 0; + outSkin[offset+i].boneIndex[0] = 0; + outSkin[offset+i].boneIndex[1] = 0; + outSkin[offset+i].boneIndex[2] = 0; + outSkin[offset+i].boneIndex[3] = 0; + } + } + else + { + for(int i=0; i<vertexCount;i++) + { + outSkin[offset+i].weight[0] = inSkin[i].weight[0]; + outSkin[offset+i].weight[1] = inSkin[i].weight[1]; + outSkin[offset+i].weight[2] = inSkin[i].weight[2]; + outSkin[offset+i].weight[3] = inSkin[i].weight[3]; + outSkin[offset+i].boneIndex[0] = inSkin[i].boneIndex[0]+boneOffset; + outSkin[offset+i].boneIndex[1] = inSkin[i].boneIndex[1]+boneOffset; + outSkin[offset+i].boneIndex[2] = inSkin[i].boneIndex[2]+boneOffset; + outSkin[offset+i].boneIndex[3] = inSkin[i].boneIndex[3]+boneOffset; + } + } + + offset += vertexCount; + + int poseCount = mesh->GetBindpose().size(); + int bindingHashCount = mesh->GetBonePathHashes().size(); + + memcpy(outCombinedMesh.GetBindpose().begin() + boneOffset, mesh->GetBindpose().begin(), poseCount*sizeof(Matrix4x4f)); + + // Old asset bundles might not have bindingHashCount in sync with bind poses. + if (poseCount == bindingHashCount) + memcpy(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, mesh->GetBonePathHashes().begin(), poseCount*sizeof(BindingHash)); + else + memset(outCombinedMesh.GetBonePathHashes().begin () + boneOffset, 0, poseCount*sizeof(BindingHash)); + + boneOffset += poseCount; + } + + CalculateRootBonePathHash (in, outCombinedMesh); +} + + +void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms) +{ + if (!out.CanAccessFromScript()) + { + ErrorStringMsg("Cannot combine into mesh that does not allow access: %s", out.GetName()); + return; + } + for (size_t i = 0; i < in.size(); ++i) + { + Mesh* mesh = in[i].mesh; + if (!mesh) + { + WarningStringMsg("Combine mesh instance %" PRINTF_SIZET_FORMAT " is null.", i); + } + if (mesh && (in[i].subMeshIndex < 0 || in[i].subMeshIndex >= mesh->GetSubMeshCount())) + { + WarningStringMsg("Submesh index %d is invalid for mesh %s.", in[i].subMeshIndex, mesh->GetName()); + } + if (mesh && !mesh->CanAccessFromScript()) + { + ErrorStringMsg("Cannot combine mesh that does not allow access: %s", mesh->GetName()); + return; + } + if (mesh == &out) + { + ErrorStringMsg("Cannot combine into a mesh that is also in the CombineInstances input: %s", mesh->GetName()); + return; + } + } + + CombineMeshVerticesForStaticBatching (in, out.GetName(), out, useTransforms); + CombineMeshIndicesForStaticBatching (in, out, mergeSubMeshes, false); + + out.RecalculateBounds(); + out.UpdateVertexFormat(); +} + diff --git a/Runtime/Filters/Mesh/MeshCombiner.h b/Runtime/Filters/Mesh/MeshCombiner.h new file mode 100644 index 0000000..a6975a9 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshCombiner.h @@ -0,0 +1,33 @@ +#ifndef MESHCOMBINER_H +#define MESHCOMBINER_H + +#include "LodMesh.h" + +class Renderer; + +struct CombineInstance +{ + Mesh *mesh; + int subMeshIndex; + Matrix4x4f transform; + + Vector4f lightmapTilingOffset; + int vertexOffset; + + CombineInstance() : + mesh(NULL), + subMeshIndex(0), + lightmapTilingOffset(1, 1, 0, 0), + vertexOffset(0) + {} +}; + +typedef std::vector<CombineInstance> CombineInstances; + +void CombineMeshes (const CombineInstances &in, Mesh& out, bool mergeSubMeshes, bool useTransforms); +// takes an array of meshes(their vertex data) and merges them into 1 combined mesh. +void CombineMeshVerticesForStaticBatching ( const CombineInstances& in, const string& combinedMeshName, Mesh& outCombinedMesh, bool useTransforms = true ); +// takes an array of meshes(their indices) and merges them in 1 mesh (setups subsets) +void CombineMeshIndicesForStaticBatching (const CombineInstances& in, Mesh& inoutMesh, bool mergeSubMeshes, bool useVertexOffsets); + +#endif diff --git a/Runtime/Filters/Mesh/MeshOptimizer.cpp b/Runtime/Filters/Mesh/MeshOptimizer.cpp new file mode 100644 index 0000000..068dc53 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshOptimizer.cpp @@ -0,0 +1,359 @@ +#include "UnityPrefix.h" +#include "MeshOptimizer.h" +#include <vector> + +//@TODO: + +// Step 1 + +//* bool ExtractCollisionData (Mesh& mesh, UNITY_TEMP_VECTOR(kMemGeometry, Vector3f)& vertices, UNITY_TEMP_VECTOR(kMemGeometry, UInt32)& triangles); +// -> make it return welded vertices and triangle array +//* Enable Deformablemesh code and make it work with welding code and check that cloth works visually... + +// Testing: +//* Check mesh collision detection code to work visually correct. +// * run functional test suite +// * run lightmapper tests in the integration test suite. They have a complete test for the lightmap uv coordinates picking up lightmap values... + + +// Step 2: +//* Verify vertex cache performance on iPad1 / Wii / intel integrated graphics +//* Switch to default gpu optimized mode and update all model importer templates + + + +template<typename T, const int CACHE_SIZE> +class VertexCacheOptimizer +{ + UInt32* m_cacheEntries; + UInt32 m_cacheSize; + + mutable UInt32 m_cacheMisses; + mutable UInt32 m_cacheHits; + + UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const + { + return vertexInCache[lIndex] ? 1 : 0; + } + + void AddToCache(UInt32 lIndex, char* vertexInCache) + { + if(m_cacheEntries[0]!=-1) + vertexInCache[m_cacheEntries[0]]=0; + + for(UInt32 i=0; i<m_cacheSize-1; i++) + m_cacheEntries[i]=m_cacheEntries[i+1]; + + m_cacheEntries[m_cacheSize-1]=lIndex; + vertexInCache[lIndex]=1; + } + +public: + + VertexCacheOptimizer () : m_cacheSize(CACHE_SIZE) + { + m_cacheEntries=new UInt32 [m_cacheSize]; + + m_cacheHits = m_cacheMisses = 0; + for(UInt32 i=0; i<m_cacheSize; i++) + m_cacheEntries[i]=(UInt32)-1; + } + + ~VertexCacheOptimizer() { delete m_cacheEntries; } + + UInt32 GetCacheMisses() { return m_cacheMisses; } + UInt32 GetCacheHits() { return m_cacheHits; } + + void OptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles) + { + UInt32 cachedVerts=0; + char* triangleUsed=new char [numTriangles]; + char* vertexInCache=new char [numVertices]; + memset(triangleUsed,0,numTriangles); + memset(vertexInCache,0,numVertices); + + bool foundTriangle=true; + while (foundTriangle) + { + foundTriangle=false; + UInt32 bestCandidate=0; + UInt32 bestCacheValue=0; + for (UInt32 i = 0; i < numTriangles; i++) + { + if (triangleUsed[i]) + continue; + + foundTriangle=true; + UInt32 i1=srcTris[i*3+0]; + UInt32 i2=srcTris[i*3+1]; + UInt32 i3=srcTris[i*3+2]; + + UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1; + if (lCacheValue > bestCacheValue) + { + bestCandidate=i; + bestCacheValue=lCacheValue; + if (bestCacheValue == 4) + break; + } + } + if(foundTriangle) + { + triangleUsed[bestCandidate]=1; + UInt32 i1=srcTris[bestCandidate*3+0]; + UInt32 i2=srcTris[bestCandidate*3+1]; + UInt32 i3=srcTris[bestCandidate*3+2]; + *pdstTris++=(T)i1; + *pdstTris++=(T)i2; + *pdstTris++=(T)i3; + if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + } + } + delete[] triangleUsed; + delete[] vertexInCache; + } +}; + +inline bool CompareBlendShapeVertexIndex (const BlendShapeVertex& lhs, const BlendShapeVertex& rhs) +{ + return lhs.index < rhs.index; +} + +void OptimizeReorderVertexBuffer (Mesh& mesh) +{ + const int submeshCount = mesh.GetSubMeshCount(); + const int vertexCount = mesh.GetVertexCount(); + + // backup required data + VertexData backupVertexData(mesh.m_VertexData, mesh.GetAvailableChannels(), mesh.GetVertexData().GetStreamsLayout(), mesh.GetVertexData().GetChannelsLayout()); + + Mesh::BoneInfluenceContainer backupSkin; + if (!mesh.m_Skin.empty()) + backupSkin.swap(mesh.m_Skin); + + // reorder the vertices so they come in increasing order + dynamic_array<UInt32> oldToNew; + dynamic_array<UInt32> newToOld; + newToOld.resize_initialized(vertexCount, 0xFFFFFFFF); + oldToNew.resize_initialized(vertexCount, 0xFFFFFFFF); + + Mesh::TemporaryIndexContainer dstIndices; + int newVertexCount = 0; + for (int submesh = 0; submesh < submeshCount; submesh++) + { + Mesh::TemporaryIndexContainer indices; + mesh.GetTriangles (indices, submesh); + + const int indexCount = indices.size(); + dstIndices.resize(indexCount); + for (int index=0; index < indexCount; index++) + { + int vertex = indices[index]; + AssertBreak(vertex >= 0); + AssertBreak(vertex < vertexCount); + + if (oldToNew[vertex] == 0xFFFFFFFF) + { + oldToNew[vertex]=newVertexCount; + newToOld[newVertexCount]=vertex; + newVertexCount++; + } + dstIndices[index] = oldToNew[vertex]; + } + + mesh.SetIndices (&dstIndices[0], dstIndices.size(), submesh, kPrimitiveTriangles); + } + + mesh.ResizeVertices(newVertexCount, backupVertexData.GetChannelMask()); + + if (!backupSkin.empty()) + mesh.m_Skin.resize_initialized(newVertexCount); + + for (int vertex=0; vertex < newVertexCount; vertex++) + { + UInt32 remapNew = newToOld[vertex]; + Assert(remapNew != 0xFFFFFFFF); + + if (!backupSkin.empty()) + mesh.m_Skin[vertex] = backupSkin[remapNew]; + + mesh.GetVertexBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelVertex)[remapNew]; + + if (backupVertexData.HasChannel(kShaderChannelNormal)) + mesh.GetNormalBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector3f> (kShaderChannelNormal)[remapNew]; + + if (backupVertexData.HasChannel(kShaderChannelColor)) + mesh.GetColorBegin()[vertex] = backupVertexData.MakeStrideIterator<ColorRGBA32> (kShaderChannelColor)[remapNew]; + + if (backupVertexData.HasChannel(kShaderChannelTexCoord0)) + mesh.GetUvBegin(0)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord0)[remapNew]; + + if (backupVertexData.HasChannel(kShaderChannelTexCoord1)) + mesh.GetUvBegin(1)[vertex] = backupVertexData.MakeStrideIterator<Vector2f> (kShaderChannelTexCoord1)[remapNew]; + + if (backupVertexData.HasChannel(kShaderChannelTangent)) + mesh.GetTangentBegin()[vertex] = backupVertexData.MakeStrideIterator<Vector4f> (kShaderChannelTangent)[remapNew]; + } + + // Remap vertex indices stored in blend shapes + BlendShapeData& blendShapeData = mesh.GetWriteBlendShapeDataInternal(); + BlendShapeVertices& blendShapeVertices = blendShapeData.vertices; + for (BlendShapeVertices::iterator itv = blendShapeVertices.begin(), endv = blendShapeVertices.end(); itv != endv; ++itv) + { + BlendShapeVertex& bsv = *itv; + bsv.index = oldToNew[bsv.index]; + } + + // Sort each shape's vertices by index so the blending writes to memory as linearly as possible + for (int shapeIndex = 0; shapeIndex < blendShapeData.shapes.size(); shapeIndex++) + { + const BlendShape& shape = blendShapeData.shapes[shapeIndex]; + BlendShapeVertex* vertices = &blendShapeVertices[shape.firstVertex]; + std::sort(vertices, vertices + shape.vertexCount, CompareBlendShapeVertexIndex); + } + + mesh.SetChannelsDirty(mesh.GetAvailableChannels(), true); +} + +void OptimizeIndexBuffers (Mesh& mesh) +{ + const int submeshCount = mesh.GetSubMeshCount(); + const int vertexCount = mesh.GetVertexCount(); + + // first optimize the indices for each submesh + for (int submesh = 0; submesh < submeshCount; submesh++) + { + Mesh::TemporaryIndexContainer unoptimizedIndices; + mesh.GetTriangles (unoptimizedIndices, submesh); + + Mesh::TemporaryIndexContainer optimizedIndices; + optimizedIndices.resize(unoptimizedIndices.size()); + + VertexCacheOptimizer<UInt32, 16> vertexCacheOptimizer; + vertexCacheOptimizer.OptimizeTriangles(&optimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3); + // LogString(Format("[Optimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheOptimizer.GetCacheHits(), vertexCacheOptimizer.GetCacheMisses())); + + mesh.SetIndices (&optimizedIndices[0], optimizedIndices.size(), submesh, kPrimitiveTriangles); + } +} + + +template<typename T, const int CACHE_SIZE> +class VertexCacheDeOptimizer +{ + UInt32* m_cacheEntries; + UInt32 m_cacheSize; + + mutable UInt32 m_cacheMisses; + mutable UInt32 m_cacheHits; + + UInt32 GetInCache(UInt32 lIndex, const char* vertexInCache) const + { + return vertexInCache[lIndex] ? 1 : 0; + } + + void AddToCache(UInt32 lIndex, char* vertexInCache) + { + if(m_cacheEntries[0]!=-1) + vertexInCache[m_cacheEntries[0]]=0; + + for(UInt32 i=0; i<m_cacheSize-1; i++) + m_cacheEntries[i]=m_cacheEntries[i+1]; + + m_cacheEntries[m_cacheSize-1]=lIndex; + vertexInCache[lIndex]=1; + } + +public: + + VertexCacheDeOptimizer () : m_cacheSize(CACHE_SIZE) + { + m_cacheEntries=new UInt32 [m_cacheSize]; + + m_cacheHits = m_cacheMisses = 0; + for(UInt32 i=0; i<m_cacheSize; i++) + m_cacheEntries[i]=(UInt32)-1; + } + + ~VertexCacheDeOptimizer() { delete m_cacheEntries; } + + UInt32 GetCacheMisses() { return m_cacheMisses; } + UInt32 GetCacheHits() { return m_cacheHits; } + + void DeOptimizeTriangles(T* pdstTris, UInt32 numVertices, const T* srcTris, UInt32 numTriangles) + { + UInt32 cachedVerts=0; + char* triangleUsed=new char [numTriangles]; + char* vertexInCache=new char [numVertices]; + memset(triangleUsed,0,numTriangles); + memset(vertexInCache,0,numVertices); + + bool foundTriangle=true; + while (foundTriangle) + { + foundTriangle=false; + UInt32 bestCandidate=0; + UInt32 bestCacheValue=4; + for (UInt32 i = 0; i < numTriangles; i++) + { + if (triangleUsed[i]) + continue; + + foundTriangle=true; + UInt32 i1=srcTris[i*3+0]; + UInt32 i2=srcTris[i*3+1]; + UInt32 i3=srcTris[i*3+2]; + + UInt32 lCacheValue=GetInCache(i1,vertexInCache)+GetInCache(i2,vertexInCache)+GetInCache(i3,vertexInCache)+1; + if (lCacheValue <= bestCacheValue) + { + bestCandidate=i; + bestCacheValue=lCacheValue; + if (bestCacheValue == 1) + break; + } + } + if(foundTriangle) + { + triangleUsed[bestCandidate]=1; + UInt32 i1=srcTris[bestCandidate*3+0]; + UInt32 i2=srcTris[bestCandidate*3+1]; + UInt32 i3=srcTris[bestCandidate*3+2]; + *pdstTris++=(T)i1; + *pdstTris++=(T)i2; + *pdstTris++=(T)i3; + if (!GetInCache(i1,vertexInCache)) { AddToCache(i1,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + if (!GetInCache(i2,vertexInCache)) { AddToCache(i2,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + if (!GetInCache(i3,vertexInCache)) { AddToCache(i3,vertexInCache); cachedVerts++; m_cacheMisses++; } else m_cacheHits++; + } + } + delete triangleUsed; + delete vertexInCache; + } +}; + +void DeOptimizeIndexBuffers (Mesh& mesh) +{ + const int submeshCount = mesh.GetSubMeshCount(); + const int vertexCount = mesh.GetVertexCount(); + + // first optimize the indices for each submesh + for (int submesh = 0; submesh < submeshCount; submesh++) + { + Mesh::TemporaryIndexContainer unoptimizedIndices; + mesh.GetTriangles (unoptimizedIndices, submesh); + + Mesh::TemporaryIndexContainer deOptimizedIndices; + deOptimizedIndices.resize(unoptimizedIndices.size()); + + VertexCacheDeOptimizer<UInt32, 16> vertexCacheDeOptimizer; + vertexCacheDeOptimizer.DeOptimizeTriangles(&deOptimizedIndices[0], vertexCount, &unoptimizedIndices[0], unoptimizedIndices.size() / 3); + + //LogString(Format("[Deoptimize] mesh: %s: submesh: %d hits: %d misses: %d\n", mesh.GetName(), submesh, vertexCacheDeOptimizer.GetCacheHits(), vertexCacheDeOptimizer.GetCacheMisses())); + + mesh.SetIndices (&deOptimizedIndices[0], deOptimizedIndices.size(), submesh, kPrimitiveTriangles); + } +} + diff --git a/Runtime/Filters/Mesh/MeshOptimizer.h b/Runtime/Filters/Mesh/MeshOptimizer.h new file mode 100644 index 0000000..8964edf --- /dev/null +++ b/Runtime/Filters/Mesh/MeshOptimizer.h @@ -0,0 +1,13 @@ +#pragma once + +#ifndef __importmeshoptimizer_h_included__ +#define __importmeshoptimizer_h_included__ + +#include "Runtime/Filters/Mesh/LodMesh.h" + +void DeOptimizeIndexBuffers (Mesh& mesh); +void OptimizeIndexBuffers (Mesh& mesh); +void OptimizeReorderVertexBuffer (Mesh& mesh); + + +#endif //__importmeshoptimizer_h_included__ diff --git a/Runtime/Filters/Mesh/MeshPartitioner.cpp b/Runtime/Filters/Mesh/MeshPartitioner.cpp new file mode 100644 index 0000000..9ec9f87 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshPartitioner.cpp @@ -0,0 +1,346 @@ + +#include "UnityPrefix.h" +#include "MeshPartitioner.h" +#include "Runtime/Filters/Mesh/LodMesh.h" + +#if UNITY_EDITOR + +static const UInt32 ComponentStride[] = { 12, 12, 4, 8, 8, 16, sizeof(BoneInfluence) }; + +static int CalcDMABatchSize(int totalVerts, int stride, const int sizeRestriction, bool padded) +{ + const int alignmentRestriction = 16; // DMA transfers address must be a multiple of 16 + int a = alignmentRestriction; + + if(a>stride) + { + if(a % stride == 0) + return sizeRestriction; + while(a % stride) { a+=alignmentRestriction; } + } + else + { + if(stride % a == 0) + return sizeRestriction; + while(stride % a) { a+=alignmentRestriction; } + } + + int batchMultiple = a / stride; + totalVerts = (totalVerts < sizeRestriction) ? totalVerts : sizeRestriction; + if(padded) + totalVerts += batchMultiple - 1; + totalVerts /= batchMultiple; + totalVerts *= batchMultiple; + return totalVerts; +}; + +static int CalcBestFitBatchSize(const UInt32 availableChannels, int vertexCount, int maxVerts, bool padded = false) +{ + int bestFit = INT_MAX; + for(int i=0;i<=kShaderChannelCount;i++) + { + if (availableChannels & (1<<i)) + { + int maxVCount = CalcDMABatchSize(vertexCount, ComponentStride[i], maxVerts, padded); + bestFit = (bestFit > maxVCount) ? maxVCount : bestFit; + } + } + return bestFit; +} + +template<typename T> +struct TempPartition +{ + dynamic_array<Vector3f> m_Vertices; + dynamic_array<Vector2f> m_UV; + dynamic_array<Vector2f> m_UV1; + dynamic_array<ColorRGBA32> m_Colors; + dynamic_array<Vector3f> m_Normals; + dynamic_array<Vector4f> m_Tangents; + dynamic_array<BoneInfluence> m_Skin; + dynamic_array<T> indexBuffer; + dynamic_array<T> newToOld; + int vertexCount; + // + void InitRemapping(int numVertices) + { + newToOld.resize_uninitialized(numVertices); + memset(&newToOld[0],(T)-1,numVertices*sizeof(T)); + } + void RemapVertices(Mesh& mesh, int actualVertexCount) + { + m_Vertices.resize_uninitialized(vertexCount); + const UInt32 channels = mesh.GetAvailableChannels(); + if(channels&(1<<kShaderChannelNormal)) + m_Normals.resize_uninitialized(vertexCount); + if(channels&(1<<kShaderChannelTexCoord0)) + m_UV.resize_uninitialized(vertexCount); + if(channels&(1<<kShaderChannelTexCoord1)) + m_UV1.resize_uninitialized(vertexCount); + if(channels&(1<<kShaderChannelTangent)) + m_Tangents.resize_uninitialized(vertexCount); + if(channels&(1<<kShaderChannelColor)) + m_Colors.resize_uninitialized(vertexCount); + if(!mesh.GetSkin().empty()) + m_Skin.resize_uninitialized(vertexCount); + + T remapNew = 0; + for(int vertex=0; vertex<vertexCount; vertex++) + { + if((T)-1 != newToOld[vertex]) + remapNew = newToOld[vertex]; + m_Vertices[vertex]=mesh.GetVertexBegin()[remapNew]; + if(channels&(1<<kShaderChannelNormal)) + m_Normals[vertex]=mesh.GetNormalBegin()[remapNew]; + if(channels&(1<<kShaderChannelTexCoord0)) + m_UV[vertex]=mesh.GetUvBegin(0)[remapNew]; + if(channels&(1<<kShaderChannelTexCoord1)) + m_UV1[vertex]=mesh.GetUvBegin(1)[remapNew]; + if(channels&(1<<kShaderChannelTangent)) + m_Tangents[vertex]=mesh.GetTangentBegin()[remapNew]; + if(channels&(1<<kShaderChannelColor)) + m_Colors[vertex]=mesh.GetColorBegin()[remapNew]; + if(!mesh.GetSkin().empty()) + m_Skin[vertex]=mesh.GetSkin()[remapNew]; + } + } +}; + +template<typename T> +struct SegmentedMesh +{ + std::vector<TempPartition<T> > m_Partitions; + void Clear() { m_Partitions.clear(); } +}; + +template<typename T> +static void CreateFromSubMesh(std::vector< SegmentedMesh<T> >& segments, Mesh& mesh, int submesh) +{ + SubMesh& sm = mesh.GetSubMeshFast(submesh); + + T vertexCount = 0; + const int numIndices = sm.indexCount; + const int numTriangles = numIndices / 3; + + AssertBreak((numTriangles * 3) == numIndices); + + UInt32 maxComponentStride = 0; + const UInt32 availableChannels = mesh.GetAvailableChannels() | (mesh.GetSkin().empty() ? 0 : (1<<kShaderChannelCount)); + for(int i=0;i<=kShaderChannelCount;i++) + { + if(availableChannels & (1<<i)) + { + if(maxComponentStride < ComponentStride[i]) + maxComponentStride = ComponentStride[i]; + } + } + + const UInt32 maxDMATransferSize = 16 * 1024; + const UInt32 numVerts = (numIndices + 15) & (~15); + const UInt32 maxVerts = std::min(numVerts, maxDMATransferSize / maxComponentStride); + const UInt32 batchSize = CalcBestFitBatchSize(availableChannels, numVerts, maxVerts); + + const int maxPartitions = (numIndices + batchSize-1) / batchSize; + const int numVertices = (sm.indexCount + 2*maxPartitions); + + const T* srcIndices = reinterpret_cast<const T*> (&mesh.GetIndexBuffer()[sm.firstByte]); + + int startTriangle = 0; + int startVertex = 0; + std::vector<T> oldToNew; + oldToNew.resize(mesh.GetVertexCount()); + std::vector<TempPartition<T> > & partitions = segments[submesh].m_Partitions; + while(startTriangle != numTriangles) + { + TempPartition<T> p; + p.indexBuffer.clear(); + p.vertexCount = 0; + p.InitRemapping(batchSize+3); + dynamic_array<T>& dstIndices = p.indexBuffer; + memset(&oldToNew[0],(T)-1,oldToNew.size()*sizeof(T)); + for(int i=startTriangle; i<numTriangles; i++) + { + startTriangle = numTriangles; + T lastVertexCount = vertexCount; // undo stack + for(int j=0;j<3;j++) + { + int index = i*3+j; + int vertex = srcIndices[index]; + AssertBreak(vertex >= 0); + AssertBreak(vertex < mesh.GetVertexCount()); + AssertBreak(lastVertexCount-startVertex+j < p.newToOld.size()); + AssertBreak(p.newToOld[lastVertexCount-startVertex+j] == (T)-1); + if(oldToNew[vertex]==(T)-1) + { + AssertBreak(vertexCount < numVertices); + oldToNew[vertex]=vertexCount-startVertex; + p.newToOld[vertexCount-startVertex]=vertex; + vertexCount++; + } + dstIndices.push_back(oldToNew[vertex]); + } + if((vertexCount-startVertex) > batchSize) + { + //undo the last one in the partition + for(int j=0;j<3;j++) + { + p.newToOld[lastVertexCount-startVertex+j] = -1;; + dstIndices.pop_back(); + } + startTriangle = i; + vertexCount = lastVertexCount; + break; + } + } + const int actualVertexCount = vertexCount - startVertex; + p.vertexCount = maxVerts;//CalcBestFitBatchSize(availableChannels, actualVertexCount, maxVerts, true); // FIXME!!! This needs to find the next "best fit" that will still keep alignment restrictions.. + p.RemapVertices(mesh, actualVertexCount); + partitions.push_back(p); + startVertex = vertexCount; + } + oldToNew.clear(); +} + +// mircea: todo: this would be awesome!!! +// spuInOut: +// m_Vertices +// m_Normals +// m_Tangents +// spuIn: +// m_Skin + +// rsxDirect +// m_UV +// m_UV1 +// m_Colors +// m_IndexBuffer + +void PartitionSubmeshes(Mesh& m) +{ + typedef UInt16 T; + + const int submeshCount = m.m_SubMeshes.size(); + + m.m_PartitionInfos.clear(); + m.m_Partitions.clear(); + + // skinned meshes cannot be partitioned if the optimization flag is not set because partitioning changes the vertex/index buffers + if (!m.GetMeshOptimized() || m.GetSkin().empty()) + return; + + // destripify if needed + m.DestripifyIndices (); + + // need to fixup the indices first so they are not relative to the partition start anymore. + Mesh::MeshPartitionInfoContainer& partInfos = m.m_PartitionInfos; + for(int pi=0; pi<partInfos.size(); pi++) + { + const MeshPartitionInfo& partInfo = m.m_PartitionInfos[pi]; + + for(int s=0; s<partInfo.partitionCount; s++) + { + const MeshPartition& p = m.m_Partitions[partInfo.submeshStart + s]; + IndexBufferData indexBufferData; + m.GetIndexBufferData(indexBufferData); + UInt16* indices = (UInt16*)(&m.m_IndexBuffer[0] + p.indexByteOffset); + for(int i=0;i<p.indexCount;i++) + indices[i] += p.vertexOffset; + } + } + + // make a segment for each submesh + std::vector< SegmentedMesh<T> > segments; + segments.resize(submeshCount); + for(int submesh=0;submesh<submeshCount;submesh++) + CreateFromSubMesh<T>(segments, m, submesh); + + /////////////////////////////////////////////////////////////////////////////// + // combine the segments to get the script accessible buffers + + UInt32 availableChannels = m.GetAvailableChannels(); + + m.Clear(false); + m.SetMeshOptimized(true); //mircea@ m.Clear will set the optimized mesh to false. Being here means we are partitioning an optimized mesh so restore the flag. + m.SetSubMeshCount(submeshCount); + + UInt32 vertexOffset = 0; + UInt32 indexOffset = 0; + + for(int submesh=0;submesh<submeshCount;submesh++) + { + int indexCount = 0; + SegmentedMesh<T>& seg = segments[submesh]; + + MeshPartitionInfo partInfo; + partInfo.submeshStart = m.m_Partitions.size(); + partInfo.partitionCount = seg.m_Partitions.size(); + m.m_PartitionInfos.push_back(partInfo); + + // create partitions & build the mesh buffers + for(int s=0;s<seg.m_Partitions.size();s++) + { + MeshPartition part; + TempPartition<T>& p = seg.m_Partitions[s]; + part.vertexCount = p.vertexCount; + part.vertexOffset = vertexOffset; + part.indexCount = p.indexBuffer.size(); + part.indexByteOffset = indexOffset; + AssertBreak(0 == (part.vertexOffset & 15)); + m.m_Partitions.push_back(part);; + indexCount += part.indexCount; + indexOffset += p.indexBuffer.size() * sizeof(T); + vertexOffset += p.vertexCount; + } + } + + // fill in the partitioned data back into the mesh. + m.ResizeVertices(vertexOffset, availableChannels); + + for(int submesh=0;submesh<submeshCount;submesh++) + { + const SegmentedMesh<T>& seg = segments[submesh]; + const MeshPartitionInfo& partInfo = m.m_PartitionInfos[submesh]; + for(int s=0;s<seg.m_Partitions.size();s++) + { + const TempPartition<T>& p = seg.m_Partitions[s]; + const MeshPartition& part = m.m_Partitions[partInfo.submeshStart + s]; + strided_copy (p.m_Vertices.begin (), p.m_Vertices.end(), m.GetVertexBegin () + part.vertexOffset); + if(!p.m_Normals.empty()) + strided_copy (p.m_Normals.begin (), p.m_Normals.end(), m.GetNormalBegin () + part.vertexOffset); + if(!p.m_UV.empty()) + strided_copy (p.m_UV.begin (), p.m_UV.end (), m.GetUvBegin (0) + part.vertexOffset); + if(!p.m_UV1.empty()) + strided_copy (p.m_UV1.begin (), p.m_UV1.end (), m.GetUvBegin (1) + part.vertexOffset); + if(!p.m_Tangents.empty()) + strided_copy (p.m_Tangents.begin (), p.m_Tangents.end (), m.GetTangentBegin () + part.vertexOffset); + if(!p.m_Colors.empty()) + strided_copy (p.m_Colors.begin (), p.m_Colors.end (), m.GetColorBegin() + part.vertexOffset); + if(!p.m_Skin.empty()) + m.GetSkin().insert(m.GetSkin().end(), p.m_Skin.begin(), p.m_Skin.end()); + } + + std::vector<T> indices; + for(int s=0;s<partInfo.partitionCount;s++) + { + const MeshPartition& p = m.m_Partitions[partInfo.submeshStart+s]; + const TempPartition<T>& tp = seg.m_Partitions[s]; + for(int i=0;i<p.indexCount;i++) + { + int index = tp.indexBuffer[i]; + AssertBreak( (index>=0) && (index < (p.vertexCount))); + #if DEBUG_PARTITIONING + index += p.vertexOffset; + #endif + indices.push_back(index); + } + } + m.SetIndices (&indices[0], indices.size(), submesh, kPrimitiveTriangles); + } +} + +void PartitionMesh(Mesh* m) +{ + PartitionSubmeshes(*m); +} + +#endif //UNITY_EDITOR diff --git a/Runtime/Filters/Mesh/MeshPartitioner.h b/Runtime/Filters/Mesh/MeshPartitioner.h new file mode 100644 index 0000000..95a0d98 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshPartitioner.h @@ -0,0 +1,5 @@ +#pragma once + +#define DEBUG_PARTITIONING 0 +class Mesh; +void PartitionMesh(Mesh* m); diff --git a/Runtime/Filters/Mesh/MeshRenderer.cpp b/Runtime/Filters/Mesh/MeshRenderer.cpp new file mode 100644 index 0000000..08dfbae --- /dev/null +++ b/Runtime/Filters/Mesh/MeshRenderer.cpp @@ -0,0 +1,664 @@ +#include "UnityPrefix.h" +#include "MeshRenderer.h" +#include "Runtime/Graphics/Transform.h" +#include "LodMesh.h" +#include "Runtime/Filters/Mesh/MeshUtility.h" +#include "Runtime/Graphics/DrawUtil.h" +#include "Runtime/GfxDevice/BatchRendering.h" +#include "Runtime/Math/Vector3.h" +#include "Runtime/Shaders/Material.h" +#include "Runtime/Profiler/Profiler.h" +#include "Runtime/Profiler/ExternalGraphicsProfiler.h" +#include "Runtime/Utilities/BitUtility.h" +#include "Runtime/GfxDevice/GfxDevice.h" + +#include "Runtime/GfxDevice/ChannelAssigns.h" +#include "External/shaderlab/Library/properties.h" +#include "External/shaderlab/Library/shaderlab.h" + +#include "Runtime/Camera/Renderqueue.h" +#include "Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h" +#include "Runtime/GfxDevice/BatchRendering.h" + +#include "Runtime/Profiler/TimeHelper.h" +#include "Runtime/GfxDevice/GfxDeviceStats.h" +#include "Runtime/Misc/BuildSettings.h" + + +PROFILER_INFORMATION(gMeshRenderProfile, "MeshRenderer.Render", kProfilerRender) +PROFILER_INFORMATION(gMeshRenderScaledProfile, "MeshRenderer.ComputeScaledMesh", kProfilerRender) +PROFILER_INFORMATION(gMeshRenderStaticBatch, "MeshRenderer.RenderStaticBatch", kProfilerRender) +PROFILER_INFORMATION(gMeshRenderDynamicBatch, "MeshRenderer.RenderDynamicBatch", kProfilerRender) + + +#if UNITY_EDITOR +#define SET_CACHED_SURFACE_AREA_DIRTY() m_CachedSurfaceArea = -1.0f; +#else +#define SET_CACHED_SURFACE_AREA_DIRTY() //do nothing +#endif + +IMPLEMENT_CLASS_INIT_ONLY (MeshRenderer) + +MeshRenderer::MeshRenderer (MemLabelId label, ObjectCreationMode mode) +: Super(kRendererMesh, label, mode) +, m_MeshNode (this) +{ + m_ScaledMeshDirty = true; + m_MeshWasModified = false; + + m_CachedMesh = NULL; + m_ScaledMesh = NULL; + SET_CACHED_SURFACE_AREA_DIRTY(); +} + +MeshRenderer::~MeshRenderer () +{ + FreeScaledMesh (); +} + +void MeshRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode) +{ + Super::AwakeFromLoad (awakeMode); + UpdateCachedMesh (); +} + +void MeshRenderer::Deactivate (DeactivateOperation operation) +{ + Super::Deactivate (operation); + FreeScaledMesh (); +} + +void MeshRenderer::InitializeClass () +{ + REGISTER_MESSAGE (MeshRenderer, kTransformChanged, TransformChanged, int); + + REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyBounds, DidModifyMeshBounds); + REGISTER_MESSAGE_VOID(MeshRenderer, kDidDeleteMesh, DidDeleteMesh); + REGISTER_MESSAGE_VOID(MeshRenderer, kDidModifyMesh, DidModifyMesh); +} + +void MeshRenderer::TransformChanged (int changeMask) +{ + if (changeMask & Transform::kScaleChanged) + { + SET_CACHED_SURFACE_AREA_DIRTY(); + m_ScaledMeshDirty = true; + } + Super::TransformChanged (changeMask); +} + +void MeshRenderer::UpdateLocalAABB() +{ + DebugAssertIf( m_CachedMesh != m_Mesh ); + if( m_CachedMesh ) + { + if (HasSubsetIndices()) + { + if (GetMaterialCount() == 1) + m_TransformInfo.localAABB = m_CachedMesh->GetBounds(GetSubsetIndex(0)); + else + { + MinMaxAABB minMaxAABB; + for (int m = 0; m < GetMaterialCount(); ++m) + minMaxAABB.Encapsulate(m_CachedMesh->GetBounds(GetSubsetIndex(m))); + m_TransformInfo.localAABB = minMaxAABB; + } + } + else + { + m_TransformInfo.localAABB = m_CachedMesh->GetBounds(); + } + } + else + m_TransformInfo.localAABB.SetCenterAndExtent( Vector3f::zero, Vector3f::zero ); +} + +void MeshRenderer::SetSubsetIndex(int subsetIndex, int index) +{ + Renderer::SetSubsetIndex(subsetIndex, index); + + // Reset scaled mesh if this renderer is now statically batched. + // Mesh scaling should never be used with static batching (case 551504). + FreeScaledMesh(); +} + +int MeshRenderer::GetStaticBatchIndex() const +{ + // Wrap non-virtual version in a virtual call + return GetMeshStaticBatchIndex(); +} + +int MeshRenderer::GetMeshStaticBatchIndex() const +{ + return IsPartOfStaticBatch() ? m_CachedMesh->GetInstanceID(): 0; +} + +UInt32 MeshRenderer::GetMeshIDSmall() const +{ + return m_CachedMesh ? m_CachedMesh->GetInternalMeshID(): 0; +} + + +Mesh* MeshRenderer::GetCachedMesh () +{ + DebugAssertIf(m_CachedMesh != m_Mesh); + return m_CachedMesh; +} + + +Mesh* MeshRenderer::GetMeshUsedForRendering () +{ + Mesh* cachedMesh = GetCachedMesh (); + + if (cachedMesh != NULL) + { + // NOTE: staticaly batched geometry already has scale applied + // therefore we skip mesh scaling + if (!m_ScaledMeshDirty || IsPartOfStaticBatch()) + return m_ScaledMesh == NULL ? cachedMesh : m_ScaledMesh->mesh; + + m_ScaledMeshDirty = false; + + float unused2; + Matrix4x4f unused; + Matrix4x4f scalematrix; + TransformType type = GetTransform().CalculateTransformMatrixDisableNonUniformScale (unused, scalematrix, unused2); + // Check if no scale is needed or we can't access vertices anyway to transform them correctly + DebugAssert(!IsNonUniformScaleTransform(type) || cachedMesh->HasVertexData()); + if (!IsNonUniformScaleTransform(type) || !cachedMesh->HasVertexData()) + { + // Cleanup scaled mesh + FreeScaledMesh(); + m_MeshWasModified = false; + + return cachedMesh; + } + // Need scaled mesh + else + { + // Early out if the mesh scale hasn't actually changed + if (m_ScaledMesh != NULL && CompareApproximately(scalematrix, m_ScaledMesh->matrix) && !m_MeshWasModified) + return m_ScaledMesh->mesh; + + // Scale has changed, maybe generated a new scaled mesh + PROFILER_AUTO(gMeshRenderScaledProfile, this) + + // Allocate scaled mesh + if (m_ScaledMesh == NULL) + { + m_ScaledMesh = new ScaledMesh (); + m_ScaledMesh->mesh = NEW_OBJECT (Mesh); + m_ScaledMesh->mesh->Reset(); + m_ScaledMesh->mesh->AwakeFromLoad(kInstantiateOrCreateFromCodeAwakeFromLoad); + m_ScaledMesh->mesh->SetHideFlags(kHideAndDontSave); + } + + m_MeshWasModified = false; + + // Rescale mesh + m_ScaledMesh->matrix = scalematrix; + m_ScaledMesh->mesh->CopyTransformed(*cachedMesh, scalematrix); + return m_ScaledMesh->mesh; + } + } + else + { + return NULL; + } +} + +static SubMesh const& GetSubMesh(Mesh& mesh, int subsetIndex) +{ + const int subMeshCount = mesh.GetSubMeshCount()? mesh.GetSubMeshCount()-1 : 0; + const int subMeshIndex = std::min<unsigned int>(subsetIndex, subMeshCount); + return mesh.GetSubMeshFast(subMeshIndex); +} + + +void MeshRenderer::Render (int subsetIndex, const ChannelAssigns& channels) +{ + PROFILER_AUTO(gMeshRenderProfile, this); + + Mesh* mesh = GetMeshUsedForRendering (); + if (!mesh) + return; + if (m_CustomProperties) + GetGfxDevice().SetMaterialProperties (*m_CustomProperties); + DrawUtil::DrawMeshRaw (channels, *mesh, subsetIndex); +} + + +#if UNITY_EDITOR + +void MeshRenderer::GetRenderStats (RenderStats& renderStats) +{ + ///@TODO: This does not work with static batching fixor it. + memset(&renderStats, 0, sizeof(renderStats)); + + Mesh* mesh = m_Mesh; + if (mesh) + { + for (int i=0;i<GetMaterialCount();i++) + { + const SubMesh& submesh = GetSubMesh (*mesh, GetSubsetIndex(i)); + + renderStats.triangleCount += GetPrimitiveCount(submesh.indexCount, submesh.topology, false); + renderStats.vertexCount += submesh.vertexCount; + renderStats.submeshCount++; + } + } +} + +float MeshRenderer::GetCachedSurfaceArea () +{ + if (m_CachedSurfaceArea >= 0.0f) + return m_CachedSurfaceArea; + + Mesh* mesh = GetCachedMesh (); + if (!mesh) + { + m_CachedSurfaceArea = 1.0f; + return m_CachedSurfaceArea; + } + + Matrix4x4f objectToWorld; + GetComponent (Transform).CalculateTransformMatrix (objectToWorld); + + Mesh::TemporaryIndexContainer triangles; + mesh->GetTriangles (triangles); + + dynamic_array<Vector3f> vertices (mesh->GetVertexCount(), kMemTempAlloc); + mesh->ExtractVertexArray (vertices.begin ()); + + m_CachedSurfaceArea = CalculateSurfaceArea (objectToWorld, triangles, vertices); + + return m_CachedSurfaceArea; +} +#endif + +void MeshRenderer::DidModifyMeshBounds () +{ + SET_CACHED_SURFACE_AREA_DIRTY(); + m_TransformDirty = true; + BoundsChanged (); +} + +void MeshRenderer::DidModifyMesh () +{ + m_MeshWasModified = true; + m_ScaledMeshDirty = true; + m_TransformDirty = true; + BoundsChanged(); +} + +void MeshRenderer::DidDeleteMesh () +{ + m_CachedMesh = NULL; +} + +void MeshRenderer::SetSharedMesh (PPtr<Mesh> mesh) +{ + SET_CACHED_SURFACE_AREA_DIRTY(); + m_Mesh = mesh; + UpdateCachedMesh (); +} + +PPtr<Mesh> MeshRenderer::GetSharedMesh () +{ + return m_Mesh; +} + +void MeshRenderer::UpdateCachedMesh () +{ + Mesh* mesh = m_Mesh; + if (mesh != m_CachedMesh) + { + // In order to make sure we are not using old subset indices referring to the previous mesh + // we clear them here, assuming that the correct subset indices will be set subsequently. + // We only do this if there was a previous mesh that the new mesh is replacing, since some + // code paths are transferring in the values and then call this function. In that case we do + // not want to mess with the indices. + if (m_CachedMesh) ClearSubsetIndices(); + m_ScaledMeshDirty = true; + m_MeshWasModified = true; + m_CachedMesh = mesh; + m_TransformDirty = true; + BoundsChanged(); + m_MeshNode.RemoveFromList(); + if (m_CachedMesh) + m_CachedMesh->AddObjectUser( m_MeshNode ); + } +} + +void MeshRenderer::FreeScaledMesh () +{ + if (m_ScaledMesh) + { + DestroySingleObject (m_ScaledMesh->mesh); + delete m_ScaledMesh; + m_ScaledMesh = NULL; + m_ScaledMeshDirty = false; + } +} + +#if GFX_ENABLE_DRAW_CALL_BATCHING + +PROFILER_INFORMATION(gDrawStaticBatchProfile, "Batch.DrawStatic", kProfilerRender) +PROFILER_INFORMATION(gDrawDynamicBatchProfile, "Batch.DrawDynamic", kProfilerRender) + +static bool RenderStaticBatch (Mesh& mesh, VBO& vbo, + BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels) +{ + if (count <= 1) + return false; + IndexBufferData indexBuffer; + mesh.GetIndexBufferData (indexBuffer); + if (!indexBuffer.indices) + return false; + + PROFILER_AUTO(gMeshRenderStaticBatch, &mesh) + + const SubMesh& firstSubmesh = GetSubMesh (mesh, instances[0].subsetIndex); + GfxPrimitiveType topology = firstSubmesh.topology; + const Matrix4x4f& xform = instances[0].xform; + int xformType = instances[0].xformType; + + GfxDevice& device = GetGfxDevice(); + device.BeginStaticBatching(channels, topology); + + // Concat SubMeshes + for (BatchInstanceData const* it = instances; it < instances + count; ++it) + { + const SubMesh& submesh = GetSubMesh (mesh, it->subsetIndex); + device.StaticBatchMesh(submesh.firstVertex, submesh.vertexCount, indexBuffer, submesh.firstByte, submesh.indexCount); + + Assert(topology == submesh.topology); + Assert(xformType == it->xformType); + } + + device.EndStaticBatching(vbo, xform, TransformType(xformType), mesh.GetChannelsInVBO()); + GPU_TIMESTAMP(); + +#if ENABLE_MULTITHREADED_CODE + // Make sure renderer is done before mesh is changed or deleted + UInt32 cpuFence = device.InsertCPUFence(); + mesh.SetCurrentCPUFence(cpuFence); +#endif + + return true; +} + +static bool RenderDynamicBatch (BatchInstanceData const* instances, size_t count, size_t maxVertices, size_t maxIndices, const ChannelAssigns& shaderChannels, UInt32 availableChannels, GfxPrimitiveType topology) +{ + if (count <= 1) + return false; + + if (gGraphicsCaps.buggyDynamicVBOWithTangents && (shaderChannels.GetSourceMap() & (1<<kShaderChannelTangent))) + return false; + + PROFILER_AUTO(gMeshRenderDynamicBatch, NULL) + + DebugAssert (topology != -1); + + GfxDevice& device = GetGfxDevice(); + UInt32 expectedFence = device.GetNextCPUFence(); + device.BeginDynamicBatching(shaderChannels, availableChannels, maxVertices, maxIndices, topology); + + // Transform on CPU + int xformType = -1; + + + for (BatchInstanceData const* it = instances; it < instances + count; ++it) + { + Assert(it->renderer); + Assert(it->renderer->GetRendererType() == kRendererMesh); + MeshRenderer* meshRenderer = (MeshRenderer*)it->renderer; + Mesh* mesh = meshRenderer->GetMeshUsedForRendering(); + if (!mesh) + continue; + + SubMesh const& submesh = GetSubMesh (*mesh, it->subsetIndex); + + Assert(topology == ~0UL || topology == submesh.topology); + Assert(xformType == -1 || xformType == it->xformType); + xformType = it->xformType; + + VertexBufferData vbData; + mesh->GetVertexBufferData(vbData, availableChannels); + IndexBufferData ibData; + mesh->GetIndexBufferData(ibData); + + // Make sure renderer is done before mesh is changed or deleted +#if ENABLE_MULTITHREADED_CODE + mesh->SetCurrentCPUFence(expectedFence); +#endif + + device.DynamicBatchMesh(it->xform, vbData, submesh.firstVertex, submesh.vertexCount, ibData, submesh.firstByte, submesh.indexCount); + } + + // Draw + Assert(xformType != -1); + Assert(topology != ~0UL); + + // We transformed all geometry into the world (Identity) space already. + // However, we did not normalize the normals. + // In fixed function, most GfxDevices (e.g. OpenGL & D3D) will try to figure out uniform + // scale directly from the matrix, and hence will not scale our normals. + // Therefore we upgrade normalization mode to "full normalize" to make them transform properly. + if (xformType & kUniformScaleTransform) + { + xformType &= ~kUniformScaleTransform; + xformType |= kNonUniformScaleTransform; + } + + // Caveat: we do pass identity matrix when batching + // currently normals handling in vprog is: + // xform * (normalize(normal) * unity_Scale.w); + // as we pass identity matrix (no scale) we need NOT apply inv_scale + device.SetInverseScale(1.0f); + device.EndDynamicBatching(TransformType(xformType)); + + // Insert fence after batching is complete + UInt32 fence = device.InsertCPUFence(); + Assert(fence == expectedFence); + + GPU_TIMESTAMP(); + + return true; +} + +void MeshRenderer::RenderMultiple (BatchInstanceData const* instances, size_t count, const ChannelAssigns& channels) +{ + Assert(count > 0); + + GfxDevice& device = GetGfxDevice(); + const float invScale = device.GetBuiltinParamValues().GetInstanceVectorParam(kShaderInstanceVecScale).w; + + const MaterialPropertyBlock* customProps = instances[0].renderer->GetCustomProperties(); + if (customProps) + device.SetMaterialProperties (*customProps); + + const UInt32 wantedChannels = channels.GetSourceMap(); + const bool enableDynamicBatching = GetBuildSettings().enableDynamicBatching; + + BatchInstanceData const* instancesEnd = instances + count; + for (BatchInstanceData const* iBatchBegin = instances; iBatchBegin != instancesEnd; ) + { + Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh); + MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer; + Mesh* mesh = meshRenderer->GetMeshUsedForRendering (); + VBO* vbo = mesh ? mesh->GetSharedVBO (wantedChannels) : NULL; + if (!vbo) + { + // Skip mesh + ++iBatchBegin; + continue; + } + + const UInt32 availableChannels = mesh->GetChannelsInVBO() & wantedChannels; + const int staticBatchIndex = meshRenderer->GetMeshStaticBatchIndex (); + const int xformType = iBatchBegin->xformType; + + const SubMesh& firstSubMesh = GetSubMesh(*mesh, iBatchBegin->subsetIndex); + const GfxPrimitiveType topology = firstSubMesh.topology; + size_t batchVertexCount = firstSubMesh.vertexCount; + size_t batchIndexCount = firstSubMesh.indexCount; + + // For first strip take 1 connecting (degenerate) triangles into account + if (topology == kPrimitiveTriangleStripDeprecated) + batchIndexCount += 1; + + BatchInstanceData const* iBatchEnd = iBatchBegin + 1; + + // static batching + if (staticBatchIndex != 0) + { + Assert(topology == kPrimitiveTriangles || topology == kPrimitiveTriangleStripDeprecated); + const int maxIndices = GetGfxDevice().GetMaxStaticBatchIndices(); + + for (; iBatchEnd != instancesEnd; ++iBatchEnd) + { + if (xformType != iBatchEnd->xformType) + break; + + Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh); + MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer; + if (staticBatchIndex != meshRenderer->GetMeshStaticBatchIndex()) + break; + + Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering (); + if (!nextMesh) + break; + + const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex); + if (submesh.topology != topology) + break; + + VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels); + if (nextVbo != vbo) // also a NULL check since vbo is non-NULL + break; + + UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels; + if (availableChannels != nextAvailableChannels) + break; + + UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount; + if (topology == kPrimitiveTriangleStripDeprecated) + requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account + + if (requiredIndexCount > maxIndices) + break; + + batchIndexCount = requiredIndexCount; + } + + if (mesh && vbo) + if (RenderStaticBatch (*mesh, *vbo, iBatchBegin, iBatchEnd - iBatchBegin, channels)) + iBatchBegin = iBatchEnd; + } + else if (vbo && enableDynamicBatching) + // dynamic batching + { + const int firstVertexCount = batchVertexCount; + const int firstIndexCount = batchIndexCount; + + // after moving to fully strided meshes we were hit by the issue that we might have different channels + // in src and dst data, so our optimized asm routines doesn't quite work. + // we will move to support vertex streams (this will solve lots of issues after skinning/batching asm rewrite ;-)) + // but for now let just play safe + + if (CanUseDynamicBatching(*mesh, wantedChannels, firstVertexCount) && + firstIndexCount < kDynamicBatchingIndicesThreshold && + topology != kPrimitiveLineStrip) + { + for (; iBatchEnd != instancesEnd; ++iBatchEnd) + { + if (xformType != iBatchEnd->xformType) + break; + + Assert(iBatchEnd->renderer->GetRendererType() == kRendererMesh); + MeshRenderer* meshRenderer = (MeshRenderer*)iBatchEnd->renderer; + if (meshRenderer->IsPartOfStaticBatch()) + break; + + Mesh* nextMesh = meshRenderer->GetMeshUsedForRendering (); + if (!nextMesh) + break; + + const SubMesh& submesh = GetSubMesh(*nextMesh, iBatchEnd->subsetIndex); + if (submesh.topology != topology) + break; + + if (!CanUseDynamicBatching(*nextMesh, wantedChannels, submesh.vertexCount)) + break; + + UInt32 requiredVertexCount = batchVertexCount + submesh.vertexCount; + UInt32 requiredIndexCount = batchIndexCount + submesh.indexCount; + if (topology == kPrimitiveTriangleStripDeprecated) + requiredIndexCount += 3; // take 3 connecting (degenerate) triangles into account + + if (requiredVertexCount > 0xffff) + break; + + if (requiredIndexCount > kDynamicBatchingIndicesThreshold) + break; + + VBO* nextVbo = nextMesh->GetSharedVBO (wantedChannels); + if (!nextVbo) + break; + + const UInt32 nextAvailableChannels = nextMesh->GetChannelsInVBO() & wantedChannels; + if (availableChannels != nextAvailableChannels) + break; + + batchVertexCount = requiredVertexCount; + batchIndexCount = requiredIndexCount; + } + + // Skip batch if batchVertexCount == 0 or batchIndexCount == 0 + if (batchVertexCount == 0 || batchIndexCount == 0 || RenderDynamicBatch (iBatchBegin, iBatchEnd - iBatchBegin, batchVertexCount, batchIndexCount, channels, availableChannels, topology)) + iBatchBegin = iBatchEnd; + } + } + + // old-school rendering for anything left + for (; iBatchBegin != iBatchEnd; ++iBatchBegin) + { + BatchInstanceData const* it = iBatchBegin; + Assert(iBatchBegin->renderer->GetRendererType() == kRendererMesh); + MeshRenderer* meshRenderer = (MeshRenderer*)iBatchBegin->renderer; + Mesh* mesh = meshRenderer->GetMeshUsedForRendering (); + if (!mesh) + continue; + + VBO* vbo = mesh->GetSharedVBO (wantedChannels); + if (!vbo) + continue; + + if (customProps) + device.SetMaterialProperties (*customProps); + + // Batched rendering above will have set inverse scale to 1.0 (since everything is transformed + // to identity). For remaining meshes that aren't batched, we have to setup the original scale + // back. + device.SetInverseScale(invScale); + SetupObjectMatrix (it->xform, it->xformType); + DrawUtil::DrawVBOMeshRaw (*vbo, *mesh, channels, it->subsetIndex); + } + + Assert(iBatchBegin == iBatchEnd); // everything was rendered successfully + } +} + +bool MeshRenderer::CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount) +{ + if (mesh.GetStreamCompression() != Mesh::kStreamCompressionDefault || + mesh.GetIndexBuffer().empty() || + vertexCount > kDynamicBatchingVerticesThreshold || + vertexCount * BitsInMask(wantedChannels) > kDynamicBatchingVertsByChannelThreshold) + return false; + return true; +} + +#endif // #if GFX_ENABLE_DRAW_CALL_BATCHING + diff --git a/Runtime/Filters/Mesh/MeshRenderer.h b/Runtime/Filters/Mesh/MeshRenderer.h new file mode 100644 index 0000000..d42c22e --- /dev/null +++ b/Runtime/Filters/Mesh/MeshRenderer.h @@ -0,0 +1,87 @@ +#ifndef MESHRENDERER_H +#define MESHRENDERER_H + +#include "Runtime/Filters/Renderer.h" + +class Mesh; + + + +class MeshRenderer : public Renderer { + public: + MeshRenderer (MemLabelId label, ObjectCreationMode mode); + // ~MeshRenderer (); declared-by-macro + REGISTER_DERIVED_CLASS (MeshRenderer, Renderer) + static void InitializeClass (); + + // Tag class as sealed, this makes QueryComponent faster. + static bool IsSealedClass () { return true; } + + static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels); + virtual void Render (int materialIndex, const ChannelAssigns& channels); + + virtual void UpdateLocalAABB(); + + virtual void SetSubsetIndex(int subsetIndex, int index); + + virtual int GetStaticBatchIndex() const; + virtual UInt32 GetMeshIDSmall() const; + int GetMeshStaticBatchIndex() const; + + void TransformChanged (int changeMask); + void AwakeFromLoad(AwakeFromLoadMode mode); + virtual void Deactivate (DeactivateOperation operation); + + void SetSharedMesh (PPtr<Mesh> mesh); + PPtr<Mesh> GetSharedMesh (); + + Mesh& GetInstantiatedMesh (); + void SetInstantiatedMesh (Mesh* mesh); + + Mesh* GetMeshUsedForRendering(); + + void DidModifyMeshBounds (); + void DidModifyMeshValidity (); + void DidModifyMesh (); + void DidDeleteMesh (); + #if UNITY_EDITOR + float GetCachedSurfaceArea (); + virtual void GetRenderStats (RenderStats& renderStats); + #endif + + static bool CanUseDynamicBatching(const Mesh& mesh, UInt32 wantedChannels, int vertexCount); + + private: + + Mesh* GetCachedMesh (); + + ListNode<Object> m_MeshNode; + void UpdateCachedMesh (); + + void FreeScaledMesh (); + + Mesh* m_CachedMesh; + PPtr<Mesh> m_Mesh; + + struct ScaledMesh + { + Matrix4x4f matrix; + Mesh* mesh; + }; + + ScaledMesh* m_ScaledMesh; + + // as we have padding anyway, we can add more flags here + UInt8 m_ScaledMeshDirty; + // setted on responce to event to properly handle vertices changing on non-uniform scale + UInt8 m_MeshWasModified; + // for future + UInt16 m_Padding16; + + #if UNITY_EDITOR + float m_CachedSurfaceArea; + #endif + +}; + +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinning.cpp b/Runtime/Filters/Mesh/MeshSkinning.cpp new file mode 100644 index 0000000..7d01667 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinning.cpp @@ -0,0 +1,165 @@ +#include "UnityPrefix.h" +#include "MeshSkinning.h" +#if UNITY_OSX +#include <alloca.h> // this is really deprecated and should be exchanged for stdlib.h +#else +#include <stdlib.h> +#endif +#include "Runtime/Utilities/Utility.h" +#include "Runtime/Utilities/LogAssert.h" +#include "Runtime/Utilities/OptimizationUtility.h" +#include "Runtime/Misc/Allocator.h" +#include "Runtime/Utilities/Prefetch.h" +#include "Runtime/Profiler/TimeHelper.h" +#include "Runtime/Profiler/Profiler.h" +#include "Runtime/Misc/CPUInfo.h" +#include "Runtime/Allocator/MemoryMacros.h" +#include "Runtime/Filters/Mesh/LodMesh.h" + +PROFILER_INFORMATION(gMeshSkinningProfile, "MeshSkinning.Skin", kProfilerRender) +PROFILER_INFORMATION(gMeshSkinningSlowpath, "MeshSkinning.SlowPath", kProfilerRender) + +#include "MeshSkinningMobile.h" +#include "MeshSkinningSSE2.h" +#include "SkinGeneric.h" +#include "MeshBlendShaping.h" + + +//=========================================================================================================================================== + + +void SkinMesh(SkinMeshInfo& info) +{ + const TransformInstruction NormalizeTransformInstruction = +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP + // NOTE: optimized NEON/VFP routines do not do any normalization + // instead we rely on GPU to do that + kNoNormalize; +#else + //@TODO: fix that "Fast" & "Fastest" crap. Right now "Fastest" is actually a win on PC (1ms saved in Dark Unity) + // so I'm leaving it there for now. + kNormalizeFastest; +#endif + + // Instantiates the right skinning template depending on the bone per vertex count + #define PERMUTE_BONES(skinNormal,skinTangent) { \ + if (info.bonesPerVertex == 1) \ + SkinGeneric<NormalizeTransformInstruction, 1, skinNormal, skinTangent> (info); \ + else if (info.bonesPerVertex == 2) \ + SkinGeneric<NormalizeTransformInstruction, 2, skinNormal, skinTangent> (info); \ + else if (info.bonesPerVertex == 4) \ + SkinGeneric<NormalizeTransformInstruction, 4, skinNormal, skinTangent> (info); \ + } + + if (info.skinNormals && info.skinTangents) + PERMUTE_BONES(true, true) + else if (info.skinNormals) + PERMUTE_BONES(true, false) + else + PERMUTE_BONES(false, false) +} + + +static void ApplyMeshSkinning (SkinMeshInfo& info) +{ + #if UNITY_WII + SkinMeshWii(info); + #else + + PROFILER_AUTO(gMeshSkinningProfile, NULL); + + if (SkinMeshOptimizedMobile(info)) + return; + + if (SkinMeshOptimizedSSE2(info)) + return; + + // fallback to slow generic implementation + { + PROFILER_AUTO(gMeshSkinningSlowpath, NULL); + SkinMesh(info); + } + #endif +} + +void DeformSkinnedMesh (SkinMeshInfo& info) +{ + const bool hasBlendShapes = info.blendshapeCount != 0; + const bool hasSkin = info.boneCount != 0; + + // No actual skinning can be done. Just copy vertex stream. + // TODO: This code can be removed if we render the undeformed mesh in SkinnedMeshRenderer + // when there is no skin and no active blend shapes. See case 557165. + if (!hasBlendShapes && !hasSkin) + { + memcpy (info.outVertices, info.inVertices, info.inStride * info.vertexCount); + return; + } + + UInt8* tmpBlendShapes = NULL; + + // blend shapes + if (hasBlendShapes) + { + // The final destination might be write-combined memory which is insanely slow to read + // or randomly access, so always allocate a temp buffer for blend shapes (case 554830). + // Skinning can write directly to a VB since it always writes sequentially to memory. + size_t bufferSize = info.inStride * info.vertexCount; + tmpBlendShapes = ALLOC_TEMP_MANUAL(UInt8, bufferSize); + + ApplyBlendShapes (info, tmpBlendShapes); + + if (hasSkin) + info.inVertices = tmpBlendShapes; + else + memcpy(info.outVertices, tmpBlendShapes, bufferSize); + } + + // skinning + if (hasSkin) + ApplyMeshSkinning (info); + + if (tmpBlendShapes) + FREE_TEMP_MANUAL(tmpBlendShapes); +} + + +void* DeformSkinnedMeshJob (void* rawData) +{ + SkinMeshInfo* data = reinterpret_cast<SkinMeshInfo*>(rawData); + DeformSkinnedMesh (*data); + return NULL; +} + + +SkinMeshInfo::SkinMeshInfo() +{ + memset(this, 0, sizeof(SkinMeshInfo)); +} + +void SkinMeshInfo::Allocate() +{ + size_t size = boneCount * sizeof(Matrix4x4f) + sizeof(float) * blendshapeCount; + if (size == 0) + return; + + allocatedBuffer = (UInt8*)UNITY_MALLOC_ALIGNED(kMemSkinning, size, 64); + + UInt8* head = allocatedBuffer; + if (boneCount != 0) + { + cachedPose = reinterpret_cast<Matrix4x4f*> (head); + head += sizeof(Matrix4x4f) * boneCount; + } + + if (blendshapeCount != 0) +{ + blendshapeWeights = reinterpret_cast<float*> (head); + } +} + +void SkinMeshInfo::Release() const +{ + if (allocatedBuffer) + UNITY_FREE(kMemSkinning, allocatedBuffer); +} diff --git a/Runtime/Filters/Mesh/MeshSkinning.h b/Runtime/Filters/Mesh/MeshSkinning.h new file mode 100644 index 0000000..b56efa9 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinning.h @@ -0,0 +1,64 @@ +#ifndef MESHSKINNING_H +#define MESHSKINNING_H + +#include "Runtime/Math/Vector3.h" +#include "Runtime/Math/Quaternion.h" +#include "Mesh.h" +#include "Runtime/Geometry/AABB.h" +#include "Runtime/GfxDevice/GfxDeviceTypes.h" +#include <vector> +#include <list> + +class GPUSkinningInfo; + +typedef std::vector<BoneInfluence> CompactSkin; +struct BlendShapeData; + +enum TransformInstruction { kNormalizeFastest = 0, kNormalizeFast = 1, kNoNormalize = 3 }; +class VertexData; + +struct SkinMeshInfo +{ + int bonesPerVertex; + + void* compactSkin; + int boneCount; + + const void* inVertices; + void* outVertices; + int inStride; + int outStride; + + int normalOffset; + int tangentOffset; + bool skinNormals; + bool skinTangents; + + int vertexCount; + + // This is instance data and must be double buffered so the render thread can work in paralell. + UInt8* allocatedBuffer; + Matrix4x4f* cachedPose; + float* blendshapeWeights; + + int blendshapeCount; + const BlendShapeData* blendshapes; + + bool memExport; // Is set up for memexport (Xbox) or streamout (DX11) + +#if UNITY_PS3 + const VertexData* vertexData; +#endif + + GPUSkinningInfo *mei; + + SkinMeshInfo(); + + void Allocate(); + void Release () const; +}; + +void DeformSkinnedMesh (SkinMeshInfo& info); +void* DeformSkinnedMeshJob (void* rawData); + +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h new file mode 100644 index 0000000..0b17b42 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningGenericSIMD.h @@ -0,0 +1,212 @@ +#if 0 + +/* + mircea@INFO: this doesn't do normalization. + */ + +#include "Runtime/Math/Simd/Matrix4x4Simd.h" + +template<TransformInstruction transformInstruction, int bonesPerVertexCount, +bool skinNormal, bool skinTangent, bool copy8BytesAt24Offset> +void SkinGenericSimd (SkinMeshInfo& info) +{ + DebugAssertIf( copy8BytesAt24Offset && (!info.skinNormals || info.normalOffset != 12) ); + const int* influence1 = reinterpret_cast<const int*> (info.compactSkin); + const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin); + const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin); + + const Matrix4x4f* bones4x4 = info.cachedPose; + + const int inStride = info.inStride; + int outStride = info.outStride; + int count = info.vertexCount; + + const int normalOffset = (copy8BytesAt24Offset ? 12 : info.normalOffset) >> 2; + const int tangentOffset = info.tangentOffset >> 2; + + const UInt8* inputVertex = (const UInt8*)info.inVertices; + UInt8* outputVertex = (UInt8*)info.outVertices; + + Simd128 pose0, pose1, pose2, pose3; + + for( int v = 0; v < count; v++ ) + { + ALIGN_LOOP_OPTIMIZATION + + // Blend the matrices first, then transform everything with this + // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS + // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS). + if (bonesPerVertexCount == 1) + { + const float* maddr = bones4x4[*influence1].m_Data; + + Prefetch(maddr); + + pose0 = V4LoadUnaligned( maddr, 0x0 ); + pose1 = V4LoadUnaligned( maddr, 0x4 ); + pose2 = V4LoadUnaligned( maddr, 0x8 ); + pose3 = V4LoadUnaligned( maddr, 0xC ); + } + else if (bonesPerVertexCount == 2) + { + Prefetch(influence2); + + Simd128 weights = {influence2->weight[0], influence2->weight[1], 0, 0}; + + const float* maddr0 = bones4x4[influence2->boneIndex[0]].m_Data; + const float* maddr1 = bones4x4[influence2->boneIndex[1]].m_Data; + + Prefetch(maddr0); + Prefetch(maddr1); + + Simd128 weight0 = V4Splat(weights, 0); + Simd128 weight1 = V4Splat(weights, 1); + + Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 ); + Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 ); + Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 ); + Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC ); + + Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 ); + Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 ); + Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 ); + Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC ); + + pose0 = V4Mul(mat00, weight0); + pose1 = V4Mul(mat01, weight0); + pose2 = V4Mul(mat02, weight0); + pose3 = V4Mul(mat03, weight0); + + pose0 = V4MulAdd(mat10, weight1, pose0); + pose1 = V4MulAdd(mat11, weight1, pose1); + pose2 = V4MulAdd(mat12, weight1, pose2); + pose3 = V4MulAdd(mat13, weight1, pose3); + } + else if (bonesPerVertexCount == 4) + { + Prefetch(influence4); + + Simd128 weights = {influence4->weight[0], influence4->weight[1], influence4->weight[2], influence4->weight[3]}; + + const float* maddr0 = bones4x4[influence4->boneIndex[0]].m_Data; + const float* maddr1 = bones4x4[influence4->boneIndex[1]].m_Data; + const float* maddr2 = bones4x4[influence4->boneIndex[2]].m_Data; + const float* maddr3 = bones4x4[influence4->boneIndex[3]].m_Data; + + Prefetch(maddr0); + Prefetch(maddr1); + Prefetch(maddr2); + Prefetch(maddr3); + + Simd128 weight0 = V4Splat(weights, 0); + Simd128 weight1 = V4Splat(weights, 1); + Simd128 weight2 = V4Splat(weights, 2); + Simd128 weight3 = V4Splat(weights, 3); + + Simd128 mat00 = V4LoadUnaligned( maddr0, 0x0 ); + Simd128 mat01 = V4LoadUnaligned( maddr0, 0x4 ); + Simd128 mat02 = V4LoadUnaligned( maddr0, 0x8 ); + Simd128 mat03 = V4LoadUnaligned( maddr0, 0xC ); + + Simd128 mat10 = V4LoadUnaligned( maddr1, 0x0 ); + Simd128 mat11 = V4LoadUnaligned( maddr1, 0x4 ); + Simd128 mat12 = V4LoadUnaligned( maddr1, 0x8 ); + Simd128 mat13 = V4LoadUnaligned( maddr1, 0xC ); + + Simd128 mat20 = V4LoadUnaligned( maddr2, 0x0 ); + Simd128 mat21 = V4LoadUnaligned( maddr2, 0x4 ); + Simd128 mat22 = V4LoadUnaligned( maddr2, 0x8 ); + Simd128 mat23 = V4LoadUnaligned( maddr2, 0xC ); + + Simd128 mat30 = V4LoadUnaligned( maddr3, 0x0 ); + Simd128 mat31 = V4LoadUnaligned( maddr3, 0x4 ); + Simd128 mat32 = V4LoadUnaligned( maddr3, 0x8 ); + Simd128 mat33 = V4LoadUnaligned( maddr3, 0xC ); + + pose0 = V4Mul(mat00, weight0); + pose1 = V4Mul(mat01, weight0); + pose2 = V4Mul(mat02, weight0); + pose3 = V4Mul(mat03, weight0); + + pose0 = V4MulAdd(mat10, weight1, pose0); + pose1 = V4MulAdd(mat11, weight1, pose1); + pose2 = V4MulAdd(mat12, weight1, pose2); + pose3 = V4MulAdd(mat13, weight1, pose3); + + pose0 = V4MulAdd(mat20, weight2, pose0); + pose1 = V4MulAdd(mat21, weight2, pose1); + pose2 = V4MulAdd(mat22, weight2, pose2); + pose3 = V4MulAdd(mat23, weight2, pose3); + + pose0 = V4MulAdd(mat30, weight3, pose0); + pose1 = V4MulAdd(mat31, weight3, pose1); + pose2 = V4MulAdd(mat32, weight3, pose2); + pose3 = V4MulAdd(mat33, weight3, pose3); + } + + Prefetch(inputVertex); + + Simd128 vpos = V4LoadUnaligned((const float*)inputVertex, 0); + TransformPoint3NATIVE(pose0, pose1, pose2, pose3, vpos, vpos); + + Simd128 vnor, vtan, ndot, tdot; + + // remember... this is a template and skinNormal & skinTangent are consts + if(skinNormal || skinTangent) + { + Simd128 vlen; + if( skinNormal ) + { + vnor = V4LoadUnaligned((const float*)inputVertex, normalOffset); + TransformVector3NATIVE(pose0, pose1, pose2, pose3, vnor, vnor); + ndot = V3Dot(vnor, vnor); + } + else + { + ndot = V4Zero(); + } + + if( skinTangent ) + { + vtan = V4LoadUnaligned((const float*)inputVertex, tangentOffset); + TransformVector3NATIVE(pose0, pose1, pose2, pose3, vtan, vtan); + tdot = V3Dot(vtan, vtan); + } + else + { + tdot = V4Zero(); + } + + vlen = V4MergeH(ndot, tdot); + vlen = V4Rsqrt(vlen); + + if(skinNormal) { + vnor = V4Mul(vnor, V4Splat(vlen, 0)); + V3StoreUnaligned(vnor, (float*)outputVertex, normalOffset); + } + + if(skinTangent) { + vtan = V4Mul(vtan, V4Splat(vlen, 1)); + V3StoreUnaligned(vtan, (float*)outputVertex, tangentOffset); + } + } + + V3StoreUnaligned(vpos, (float*)outputVertex, 0); + + if( skinTangent ) + { + *reinterpret_cast<float*>( outputVertex + (tangentOffset<<2) + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + (tangentOffset<<2) + sizeof(Vector3f) ); + } + + outputVertex += outStride; + inputVertex += inStride; + + if (bonesPerVertexCount == 1) + influence1++; + else if (bonesPerVertexCount == 2) + influence2++; + if (bonesPerVertexCount == 4) + influence4++; + } +} +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningMobile.h b/Runtime/Filters/Mesh/MeshSkinningMobile.h new file mode 100644 index 0000000..f6efc54 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningMobile.h @@ -0,0 +1,160 @@ +#if UNITY_SUPPORTS_VFP + +#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN +#define s_SkinVertices_VFP _s_SkinVertices_VFP +#define s_SkinVertices_NoNormals_VFP _s_SkinVertices_NoNormals_VFP +#define s_SkinVertices_Tangents_VFP _s_SkinVertices_Tangents_VFP + +#define s_SkinVertices2Bones_VFP _s_SkinVertices2Bones_VFP +#define s_SkinVertices2Bones_NoNormals_VFP _s_SkinVertices2Bones_NoNormals_VFP +#define s_SkinVertices2Bones_Tangents_VFP _s_SkinVertices2Bones_Tangents_VFP + +#define s_SkinVertices4Bones_VFP _s_SkinVertices4Bones_VFP +#define s_SkinVertices4Bones_NoNormals_VFP _s_SkinVertices4Bones_NoNormals_VFP +#define s_SkinVertices4Bones_Tangents_VFP _s_SkinVertices4Bones_Tangents_VFP +#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN + +extern "C" +{ + void s_SkinVertices_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData); + void s_SkinVertices_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData); + void s_SkinVertices_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence1, void* dstVertData); + + void s_SkinVertices2Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData); + void s_SkinVertices2Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData); + void s_SkinVertices2Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence2, void* dstVertData); + + void s_SkinVertices4Bones_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData); + void s_SkinVertices4Bones_NoNormals_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData); + void s_SkinVertices4Bones_Tangents_VFP(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const void* srcBoneInfluence4, void* dstVertData); +} +#endif + +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) + +#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN +#define s_SkinVertices_NEON _s_SkinVertices_NEON +#define s_SkinVertices_NoNormals_NEON _s_SkinVertices_NoNormals_NEON +#define s_SkinVertices_Tangents_NEON _s_SkinVertices_Tangents_NEON + +#define s_SkinVertices2Bones_NEON _s_SkinVertices2Bones_NEON +#define s_SkinVertices2Bones_NoNormals_NEON _s_SkinVertices2Bones_NoNormals_NEON +#define s_SkinVertices2Bones_Tangents_NEON _s_SkinVertices2Bones_Tangents_NEON + +#define s_SkinVertices4Bones_NEON _s_SkinVertices4Bones_NEON +#define s_SkinVertices4Bones_NoNormals_NEON _s_SkinVertices4Bones_NoNormals_NEON +#define s_SkinVertices4Bones_Tangents_NEON _s_SkinVertices4Bones_Tangents_NEON + +#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN + +extern "C" +{ + void s_SkinVertices_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData); + void s_SkinVertices_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData); + void s_SkinVertices_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const int* srcBoneInfluence1, void* dstVertData); + + void s_SkinVertices2Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData); + void s_SkinVertices2Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData); + void s_SkinVertices2Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence2* srcBoneInfluence2, void* dstVertData); + + void s_SkinVertices4Bones_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData); + void s_SkinVertices4Bones_NoNormals_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData); + void s_SkinVertices4Bones_Tangents_NEON(const Matrix4x4f* bones4x4, const void* srcVertData, const void* srcVertDataEnd, const BoneInfluence* srcBoneInfluences, void* dstVertData); +} +#endif + +#if UNITY_SUPPORTS_VFP || (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) + +bool SkinMeshOptimizedMobile(SkinMeshInfo& info) +{ + static const size_t kPrefetchSizeBones = 4096; + static const size_t kPrefetchSizeVertex = 512; + + const int bonesPerVertexCount = info.bonesPerVertex; + const bool skinNormal = info.skinNormals; + const bool skinTangent = info.skinTangents; + + const int* influence1 = reinterpret_cast<const int*> (info.compactSkin); + const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin); + const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin); + + const Matrix4x4f* bones4x4 = info.cachedPose; + + const int inStride = info.inStride; + int count = info.vertexCount; + + const UInt8* inputVertex = (const UInt8*)info.inVertices; + UInt8* outputVertex = (UInt8*)info.outVertices; + + if (skinTangent && !skinNormal) + return false; + + if( !UNITY_SUPPORTS_VFP && !CPUInfo::HasNEONSupport() ) + { + ErrorString("non-NEON path not enabled!"); + return false; + } + +#if !ENABLE_MULTITHREADED_SKINNING + PROFILER_AUTO_THREAD_SAFE(gMeshSkinningOptimized, NULL); +#endif + + Prefetch(bones4x4, std::min<size_t>(info.boneCount * sizeof(Matrix4x4f), kPrefetchSizeBones)); + Prefetch(inputVertex + inStride, std::min<size_t>(inStride * (count-1), kPrefetchSizeVertex)); + +#if UNITY_SUPPORTS_NEON && UNITY_SUPPORTS_VFP +#define CALL_SKIN_FUNC( name, influence ) \ +do \ +{ \ +if (CPUInfo::HasNEONSupport()) \ + name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex); \ +else \ + name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex); \ +} \ +while(0) +#endif +#if UNITY_SUPPORTS_NEON && !UNITY_SUPPORTS_VFP +#define CALL_SKIN_FUNC( name, influence ) name##_NEON(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex) +#endif +#if UNITY_SUPPORTS_VFP && !UNITY_SUPPORTS_NEON +#define CALL_SKIN_FUNC( name, influence ) name##_VFP(bones4x4, inputVertex, (UInt8*)inputVertex + (inStride * count), influence, outputVertex) +#endif + + if (bonesPerVertexCount == 1 ) + { + if (skinNormal && skinTangent) + CALL_SKIN_FUNC(s_SkinVertices_Tangents, influence1); + else if( skinNormal ) + CALL_SKIN_FUNC(s_SkinVertices, influence1); + else + CALL_SKIN_FUNC(s_SkinVertices_NoNormals, influence1); + } + else if (bonesPerVertexCount == 2) + { + if (skinNormal && skinTangent) + CALL_SKIN_FUNC(s_SkinVertices2Bones_Tangents, influence2); + else if( skinNormal ) + CALL_SKIN_FUNC(s_SkinVertices2Bones, influence2); + else + CALL_SKIN_FUNC(s_SkinVertices2Bones_NoNormals, influence2); + } + else if (bonesPerVertexCount == 4) + { + if (skinNormal && skinTangent) + CALL_SKIN_FUNC(s_SkinVertices4Bones_Tangents, influence4); + else if (skinNormal) + CALL_SKIN_FUNC(s_SkinVertices4Bones, influence4); + else + CALL_SKIN_FUNC(s_SkinVertices4Bones_NoNormals, influence4); + } + + return true; +} +#else +bool SkinMeshOptimizedMobile(SkinMeshInfo& info) +{ + return false; +} +#endif // UNITY_SUPPORTS_VFP || UNITY_SUPPORTS_NEON + + diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.asm b/Runtime/Filters/Mesh/MeshSkinningNEON.asm new file mode 100644 index 0000000..494b397 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningNEON.asm @@ -0,0 +1,527 @@ + AREA .text, CODE + + EXPORT _s_SkinVertices_NEON + EXPORT _s_SkinVertices_NoNormals_NEON + EXPORT _s_SkinVertices_Tangents_NEON + EXPORT _s_SkinVertices2Bones_NEON + EXPORT _s_SkinVertices2Bones_NoNormals_NEON + EXPORT _s_SkinVertices2Bones_Tangents_NEON + EXPORT _s_SkinVertices4Bones_NEON + EXPORT _s_SkinVertices4Bones_NoNormals_NEON + EXPORT _s_SkinVertices4Bones_Tangents_NEON + +|_s_SkinVertices_NEON| PROC + mov ip, sp + vpush {d8-d10} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + mov.w r8, #12 + ldr.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + +|_s_SkinVertices_NEON_loop| + vld1.32 {d24-d27}, [r7@128]! + vld1.32 {d28-d31}, [r7@128] + vld1.32 {d6-d8}, [r1@64]! + vmul.f32 q0, q12, d6[0] + vmul.f32 q1, q12, d7[1] + cmp r1, r2 + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + vmla.f32 q1, q13, d8[0] + it cc + ldrcc.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + vmla.f32 q0, q14, d7[0] + vmla.f32 q1, q14, d8[1] + pld [r7] + vadd.f32 q0, q0, q15 + vst1.32 {d0-d1}, [r4], r8 + vst1.32 {d2-d3}, [r4], r8 + bcc.w |_s_SkinVertices_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d10} + bx lr + ENDP + + +|_s_SkinVertices_NoNormals_NEON| PROC + mov ip, sp + vpush {d8-d10} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + mov.w r8, #12 + ldr.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + +|_s_SkinVertices_NoNormals_NEON_loop| + vld1.32 {d24-d27}, [r7@128]! + vld1.32 {d28-d31}, [r7@128] + vld1.32 {d6-d7}, [r1], r8 + vmul.f32 q0, q12, d6[0] + cmp r1, r2 + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + it cc + ldrcc.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + vmla.f32 q0, q14, d7[0] + pld [r7] + vadd.f32 q0, q0, q15 + vst1.32 {d0-d1}, [r4], r8 + bcc.w |_s_SkinVertices_NoNormals_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d10} + bx lr + ENDP + + +|_s_SkinVertices_Tangents_NEON| PROC + mov ip, sp + vpush {d8-d10} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + mov.w r8, #12 + ldr.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + +|_s_SkinVertices_Tangents_NEON_loop| + vld1.32 {d24-d27}, [r7@128]! + vld1.32 {d28-d31}, [r7@128] + vld1.32 {d6-d8}, [r1@64]! + vld1.32 {d9-d10}, [r1@64]! + vmul.f32 q0, q12, d6[0] + vmul.f32 q1, q12, d7[1] + vmul.f32 q2, q12, d9[0] + cmp r1, r2 + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + vmla.f32 q1, q13, d8[0] + vmla.f32 q2, q13, d9[1] + it cc + ldrcc.w r5, [r3], #4 + add.w r7, r0, r5, lsl #6 + vmla.f32 q0, q14, d7[0] + vmla.f32 q1, q14, d8[1] + vmla.f32 q2, q14, d10[0] + pld [r7] + vadd.f32 q0, q0, q15 + vmov.f32 s11, s21 + vst1.32 {d0-d1}, [r4], r8 + vst1.32 {d2-d3}, [r4], r8 + vst1.32 {d4-d5}, [r4]! + bcc.w |_s_SkinVertices_Tangents_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d10} + bx lr + ENDP + + +|_s_SkinVertices2Bones_NEON| PROC + mov ip, sp + vpush {d8-d11} + stmdb sp!, {r4, r5, r6, r7, r8, sl} + ldr.w r4, [ip] + vld1.32 {d11}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q12, q8, d11[0] + vmul.f32 q13, q9, d11[0] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vmul.f32 q15, q11, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q12, q8, d11[1] + vmla.f32 q13, q9, d11[1] + ldr r5, [r3, #8] + mov.w r8, #12 + sub.w sl, r2, #24 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q14, q10, d11[1] + nop + +|_s_SkinVertices2Bones_NEON_loop| + cmp r1, sl + add.w r7, r0, r5, lsl #6 + it cc + ldrcc r6, [r3, #12] + vld1.32 {d6-d8}, [r1@64]! + vmla.f32 q15, q11, d11[1] + vmul.f32 q0, q12, d6[0] + vld1.32 {d16-d19}, [r7@128]! + cmp r1, sl + vmul.f32 q1, q12, d7[1] + vld1.32 {d11}, [r3] + vmul.f32 q12, q8, d11[0] + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + vmla.f32 q1, q13, d8[0] + it cc + ldrcc r5, [r3, #24] + vmul.f32 q13, q9, d11[0] + vmla.f32 q0, q14, d7[0] + cmp r1, r2 + vmla.f32 q1, q14, d8[1] + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q14, q10, d11[0] + vadd.f32 q0, q0, q15 + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + vst1.32 {d0-d1}, [r4], r8 + vmla.f32 q13, q9, d11[1] + vst1.32 {d2-d3}, [r4], r8 + add.w r3, r3, #16 + vmla.f32 q14, q10, d11[1] + bcc.w |_s_SkinVertices2Bones_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, sl} + vpop {d8-d11} + bx lr + ENDP + + +|_s_SkinVertices2Bones_NoNormals_NEON| PROC + mov ip, sp + vpush {d8-d11} + stmdb sp!, {r4, r5, r6, r7, r8, sl} + ldr.w r4, [ip] + vld1.32 {d11}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q12, q8, d11[0] + vmul.f32 q13, q9, d11[0] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vmul.f32 q15, q11, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q12, q8, d11[1] + vmla.f32 q13, q9, d11[1] + ldr r5, [r3, #8] + mov.w r8, #12 + sub.w sl, r2, #12 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q14, q10, d11[1] + nop + nop.w + +|_s_SkinVertices2Bones_NoNormals_NEON_loop| + cmp r1, sl + add.w r7, r0, r5, lsl #6 + it cc + ldrcc r6, [r3, #12] + vld1.32 {d6-d7}, [r1], r8 + vmla.f32 q15, q11, d11[1] + vmul.f32 q0, q12, d6[0] + vld1.32 {d16-d19}, [r7@128]! + cmp r1, sl + vld1.32 {d11}, [r3] + vmul.f32 q12, q8, d11[0] + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + it cc + ldrcc r5, [r3, #24] + vmul.f32 q13, q9, d11[0] + vmla.f32 q0, q14, d7[0] + cmp r1, r2 + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q14, q10, d11[0] + vadd.f32 q0, q0, q15 + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + vst1.32 {d0-d1}, [r4], r8 + vmla.f32 q13, q9, d11[1] + add.w r3, r3, #16 + vmla.f32 q14, q10, d11[1] + bcc.w |_s_SkinVertices2Bones_NoNormals_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, sl} + vpop {d8-d11} + bx lr + ENDP + + +|_s_SkinVertices2Bones_Tangents_NEON| PROC + mov ip, sp + vpush {d8-d11} + stmdb sp!, {r4, r5, r6, r7, r8, sl} + ldr.w r4, [ip] + vld1.32 {d11}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q12, q8, d11[0] + vmul.f32 q13, q9, d11[0] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vmul.f32 q15, q11, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q12, q8, d11[1] + vmla.f32 q13, q9, d11[1] + ldr r5, [r3, #8] + mov.w r8, #12 + sub.w sl, r2, #40 ; 0x28 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q14, q10, d11[1] + nop + nop.w + +|_s_SkinVertices2Bones_Tangents_NEON_loop| + cmp r1, sl + add.w r7, r0, r5, lsl #6 + it cc + ldrcc r6, [r3, #12] + vld1.32 {d6-d8}, [r1@64]! + vmla.f32 q15, q11, d11[1] + vld1.32 {d9-d10}, [r1@64]! + vmul.f32 q0, q12, d6[0] + vld1.32 {d16-d19}, [r7@128]! + cmp r1, sl + vmul.f32 q1, q12, d7[1] + vmul.f32 q2, q12, d9[0] + vld1.32 {d11}, [r3] + vmul.f32 q12, q8, d11[0] + pld [r1, #256] ; 0x100 + vmla.f32 q0, q13, d6[1] + vld1.32 {d20-d23}, [r7@128] + add.w r7, r0, r6, lsl #6 + vmla.f32 q1, q13, d8[0] + vmla.f32 q2, q13, d9[1] + it cc + ldrcc r5, [r3, #24] + vmul.f32 q13, q9, d11[0] + vmla.f32 q0, q14, d7[0] + cmp r1, r2 + vmla.f32 q1, q14, d8[1] + vmla.f32 q2, q14, d10[0] + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q14, q10, d11[0] + vadd.f32 q0, q0, q15 + vmov.f32 s11, s21 + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + vst1.32 {d0-d1}, [r4], r8 + vmla.f32 q13, q9, d11[1] + vst1.32 {d2-d3}, [r4], r8 + add.w r3, r3, #16 + vmla.f32 q14, q10, d11[1] + vst1.32 {d4-d5}, [r4]! + bcc.w |_s_SkinVertices2Bones_Tangents_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, sl} + vpop {d8-d11} + bx lr + ENDP + + +|_s_SkinVertices4Bones_NEON| PROC + mov ip, sp + vpush {d8-d12} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + vld1.32 {d11-d12}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vld1.32 {d20-d23}, [r7@128] + mov.w r8, #12 + nop.w + nop.w + nop.w + +|_s_SkinVertices4Bones_NEON_loop| + vmul.f32 q12, q8, d11[0] + vld1.32 {d6-d8}, [r1@64]! + vmul.f32 q13, q9, d11[0] + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + ldmia r3!, {r5, r6} + vmla.f32 q13, q9, d11[1] + add.w r7, r0, r5, lsl #6 + cmp r1, r2 + vmla.f32 q14, q10, d11[1] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d11[1] + pld [r3, #256] ; 0x100 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[0] + add.w r7, r0, r6, lsl #6 + vmla.f32 q13, q9, d12[0] + vmla.f32 q14, q10, d12[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d12[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[1] + vmla.f32 q13, q9, d12[1] + vmla.f32 q14, q10, d12[1] + vmla.f32 q15, q11, d12[1] + pld [r1, #256] ; 0x100 + vmul.f32 q0, q12, d6[0] + vld1.32 {d11-d12}, [r3]! + vmul.f32 q1, q12, d7[1] + it cc + ldmiacc r3!, {r5, r6} + vmla.f32 q0, q13, d6[1] + add.w r7, r0, r5, lsl #6 + vmla.f32 q1, q13, d8[0] + vldmia r7, {d16-d23} + vmla.f32 q0, q14, d7[0] + vmla.f32 q1, q14, d8[1] + vadd.f32 q0, q0, q15 + vst1.32 {d0-d1}, [r4], r8 + vst1.32 {d2-d3}, [r4], r8 + bcc.w |_s_SkinVertices4Bones_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d12} + bx lr + ENDP + + +|_s_SkinVertices4Bones_NoNormals_NEON| PROC + mov ip, sp + vpush {d8-d12} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + vld1.32 {d11-d12}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vld1.32 {d20-d23}, [r7@128] + mov.w r8, #12 + nop + nop.w + +|_s_SkinVertices4Bones_NoNormals_NEON_loop| + vmul.f32 q12, q8, d11[0] + vld1.32 {d6-d7}, [r1], r8 + vmul.f32 q13, q9, d11[0] + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + ldmia r3!, {r5, r6} + vmla.f32 q13, q9, d11[1] + add.w r7, r0, r5, lsl #6 + cmp r1, r2 + vmla.f32 q14, q10, d11[1] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d11[1] + pld [r3, #256] ; 0x100 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[0] + add.w r7, r0, r6, lsl #6 + vmla.f32 q13, q9, d12[0] + vmla.f32 q14, q10, d12[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d12[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[1] + vmla.f32 q13, q9, d12[1] + vmla.f32 q14, q10, d12[1] + vmla.f32 q15, q11, d12[1] + pld [r1, #256] ; 0x100 + vmul.f32 q0, q12, d6[0] + vld1.32 {d11-d12}, [r3]! + it cc + ldmiacc r3!, {r5, r6} + vmla.f32 q0, q13, d6[1] + add.w r7, r0, r5, lsl #6 + vldmia r7, {d16-d23} + vmla.f32 q0, q14, d7[0] + vadd.f32 q0, q0, q15 + vst1.32 {d0-d1}, [r4], r8 + bcc.w |_s_SkinVertices4Bones_NoNormals_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d12} + bx lr + ENDP + + +|_s_SkinVertices4Bones_Tangents_NEON| PROC + mov ip, sp + vpush {d8-d12} + stmdb sp!, {r4, r5, r6, r7, r8} + ldr.w r4, [ip] + vld1.32 {d11-d12}, [r3]! + ldmia r3!, {r5, r6} + add.w r7, r0, r5, lsl #6 + vld1.32 {d16-d19}, [r7@128]! + vld1.32 {d20-d23}, [r7@128] + mov.w r8, #12 + nop + nop.w + +|_s_SkinVertices4Bones_Tangents_NEON_loop| + vmul.f32 q12, q8, d11[0] + vld1.32 {d6-d8}, [r1@64]! + vmul.f32 q13, q9, d11[0] + vld1.32 {d9-d10}, [r1@64]! + add.w r7, r0, r6, lsl #6 + vmul.f32 q14, q10, d11[0] + vld1.32 {d16-d19}, [r7@128]! + vmul.f32 q15, q11, d11[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d11[1] + ldmia r3!, {r5, r6} + vmla.f32 q13, q9, d11[1] + add.w r7, r0, r5, lsl #6 + cmp r1, r2 + vmla.f32 q14, q10, d11[1] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d11[1] + pld [r3, #256] ; 0x100 + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[0] + add.w r7, r0, r6, lsl #6 + vmla.f32 q13, q9, d12[0] + vmla.f32 q14, q10, d12[0] + vld1.32 {d16-d19}, [r7@128]! + vmla.f32 q15, q11, d12[0] + vld1.32 {d20-d23}, [r7@128] + vmla.f32 q12, q8, d12[1] + vmla.f32 q13, q9, d12[1] + vmla.f32 q14, q10, d12[1] + vmla.f32 q15, q11, d12[1] + pld [r1, #256] ; 0x100 + vmul.f32 q0, q12, d6[0] + vld1.32 {d11-d12}, [r3]! + vmul.f32 q1, q12, d7[1] + vmul.f32 q2, q12, d9[0] + it cc + ldmiacc r3!, {r5, r6} + vmla.f32 q0, q13, d6[1] + add.w r7, r0, r5, lsl #6 + vmla.f32 q1, q13, d8[0] + vmla.f32 q2, q13, d9[1] + vldmia r7, {d16-d23} + vmla.f32 q0, q14, d7[0] + vmla.f32 q1, q14, d8[1] + vmla.f32 q2, q14, d10[0] + vadd.f32 q0, q0, q15 + vmov.f32 s11, s21 + vst1.32 {d0-d1}, [r4], r8 + vst1.32 {d2-d3}, [r4], r8 + vst1.32 {d4-d5}, [r4]! + bcc.w |_s_SkinVertices4Bones_Tangents_NEON_loop| + ldmia.w sp!, {r4, r5, r6, r7, r8} + vpop {d8-d12} + bx lr + nop + ENDP + + + END diff --git a/Runtime/Filters/Mesh/MeshSkinningNEON.s b/Runtime/Filters/Mesh/MeshSkinningNEON.s new file mode 100644 index 0000000..e94542d --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningNEON.s @@ -0,0 +1,183 @@ +#define UNITY_ASSEMBLER +#include "Configuration/PrefixConfigure.h" + +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) + +.set device,0 +.set device,__arm__ + +.if device + +//.code32 + +.globl _s_SkinVertices_NEON +.globl _s_SkinVertices_NoNormals_NEON +.globl _s_SkinVertices_Tangents_NEON + +.globl _s_SkinVertices2Bones_NEON +.globl _s_SkinVertices2Bones_NoNormals_NEON +.globl _s_SkinVertices2Bones_Tangents_NEON + +.globl _s_SkinVertices4Bones_NEON +.globl _s_SkinVertices4Bones_NoNormals_NEON +.globl _s_SkinVertices4Bones_Tangents_NEON + +#if UNITY_ANDROID +.hidden _s_SkinVertices_NEON +.hidden _s_SkinVertices_NoNormals_NEON +.hidden _s_SkinVertices_Tangents_NEON + +.hidden _s_SkinVertices2Bones_NEON +.hidden _s_SkinVertices2Bones_NoNormals_NEON +.hidden _s_SkinVertices2Bones_Tangents_NEON + +.hidden _s_SkinVertices4Bones_NEON +.hidden _s_SkinVertices4Bones_NoNormals_NEON +.hidden _s_SkinVertices4Bones_Tangents_NEON +#endif + + +//=========================================================================================================================================== + +#define SKIN_POS 1 +#define SKIN_POS_NRM 2 +#define SKIN_POS_NRM_TAN 3 + + +#define SKIN_2BONES 0 +#define SKIN_4BONES 0 + +_s_SkinVertices_NEON: + +#define SKIN_1BONE SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +_s_SkinVertices_NoNormals_NEON: + +#define SKIN_1BONE SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices_NoNormals_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +_s_SkinVertices_Tangents_NEON: + +#define SKIN_1BONE SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices_Tangents_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +#undef SKIN_4BONES +#undef SKIN_2BONES + +//=========================================================================================================================================== + +#define SKIN_1BONE 0 +#define SKIN_4BONES 0 + +_s_SkinVertices2Bones_NEON: + +#define SKIN_2BONES SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices2Bones_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +_s_SkinVertices2Bones_NoNormals_NEON: + +#define SKIN_2BONES SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices2Bones_NoNormals_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +_s_SkinVertices2Bones_Tangents_NEON: + +#define SKIN_2BONES SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices2Bones_Tangents_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +#undef SKIN_4BONES +#undef SKIN_1BONE + + +//=========================================================================================================================================== + +#define SKIN_1BONE 0 +#define SKIN_2BONES 0 + +_s_SkinVertices4Bones_NEON: + +#define SKIN_4BONES SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices4Bones_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + +_s_SkinVertices4Bones_NoNormals_NEON: + +#define SKIN_4BONES SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices4Bones_NoNormals_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + +_s_SkinVertices4Bones_Tangents_NEON: + +#define SKIN_4BONES SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices4Bones_Tangents_NEON_loop + +#include "MeshSkinningNeon_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + + +#undef SKIN_2BONES +#undef SKIN_1BONE + +//=========================================================================================================================================== + +.endif + +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h new file mode 100644 index 0000000..8e584da --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningNeon_Loop.h @@ -0,0 +1,487 @@ + +// defines +// SKIN_1BONE +// SKIN_2BONES +// SKIN_4BONES +// LOOP_NAME +// VERTEX_SZ + +// skin types +// SKIN_POS +// SKIN_POS_NRM +// SKIN_POS_NRM_TAN + + + +//r0: const void* bones4x4 +//r1: const void* srcVertData +//r2: const void* srcVertDataEnd +//r3: const BoneInfluence4* srcBoneInfluence4 +//[sp+0] -> r4: const void* dstVertData + +// r5, r6: index +// r7: matrix address +// r8: 12 (offset for vector3) + +// q0 <- output: pos +// q1 <- output: nrm +// q2 <- output: tan +// q3 <- input: pos +// q4 <- input: nrm +// q5 <- input: tan +// d11,d12 <- weights +// q12-q15 (blended matrix) +// q8-q11 (cur matrix) + + +// input: +// d6[0], d6[1], d7[0] <- pos +// d7[1], d8[0], d8[1] <- nrm +// d9[0], d9[1], d10[0], d10[1] <- tan +// q3 <- pos.x, pos.y, pos.z, nrm.x +// q4 <- nrm.y, nrm.z, tan.x, tan.y +// q5 <- tan.z, tan.w, w0, w1 + + +//=========================================================================================================================================== +// +// Common + +#define CALC_POS_1 vmul.f32 q0, q12, d6[0] +#define CALC_POS_2 vmla.f32 q0, q13, d6[1] +#define CALC_POS_3 vmla.f32 q0, q14, d7[0] +#define CALC_POS_4 vadd.f32 q0, q15 + +#define STORE_POS vst1.32 {d0, d1}, [r4], r8 + +#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) \ + || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) \ + || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN) + + #define LOAD_POS_NRM vld1.32 {d6, d7, d8}, [r1, :64]! + #define STORE_NRM vst1.32 {d2, d3}, [r4], r8 + #define CALC_NRM_1 vmul.f32 q1, q12, d7[1] + #define CALC_NRM_2 vmla.f32 q1, q13, d8[0] + #define CALC_NRM_3 vmla.f32 q1, q14, d8[1] +#else + #define LOAD_POS_NRM vld1.32 {d6, d7}, [r1], r8 + #define STORE_NRM + #define CALC_NRM_1 + #define CALC_NRM_2 + #define CALC_NRM_3 +#endif + +#if (SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN) + #define LOAD_TAN vld1.32 {d9, d10}, [r1, :64]! + #define STORE_TAN vst1.32 {d4, d5}, [r4]! + #define CALC_TAN_1 vmul.f32 q2, q12, d9[0] + #define CALC_TAN_2 vmla.f32 q2, q13, d9[1] + #define CALC_TAN_3 vmla.f32 q2, q14, d10[0] + #define CALC_TAN_4 vmov.f32 s11, s21 +#else + #define LOAD_TAN + #define STORE_TAN + #define CALC_TAN_1 + #define CALC_TAN_2 + #define CALC_TAN_3 + #define CALC_TAN_4 +#endif + +// right after vertex-data will be copy-data stream, so be careful to not overwrite anything +#if (SKIN_1BONE == SKIN_POS) || (SKIN_2BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS) +#define STORE_POS_LAST1 vst1.32 {d0}, [r4]! +#define STORE_POS_LAST2 vst1.32 {d1[0]}, [r4]! +#else +#define STORE_POS_LAST1 STORE_POS +#define STORE_POS_LAST2 +#endif + +#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM) +#define STORE_NRM_LAST1 vst1.32 {d2}, [r4]! +#define STORE_NRM_LAST2 vst1.32 {d3[0]}, [r4]! +#else +#define STORE_NRM_LAST1 STORE_NRM +#define STORE_NRM_LAST2 +#endif + +#define __NAME_EPILOGUE(x) x ## EPILOGUE +#define _NAME_EPILOGUE(x) __NAME_EPILOGUE(x) +#define LOOP_EPILOGUE _NAME_EPILOGUE(LOOP_NAME) + + + +#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) + #define LOAD_M_12 vld1.32 {q12,q13}, [r7,:128]! + #define LOAD_M_34 vld1.32 {q14,q15}, [r7,:128] +#else + #define LOAD_M_12 vld1.32 {q8,q9}, [r7,:128]! + #define LOAD_M_34 vld1.32 {q10,q11}, [r7,:128] +#endif + +#define WEIGHT_MATRIX_1(op,r) op.f32 q12, q8, r +#define WEIGHT_MATRIX_2(op,r) op.f32 q13, q9, r +#define WEIGHT_MATRIX_3(op,r) op.f32 q14, q10, r +#define WEIGHT_MATRIX_4(op,r) op.f32 q15, q11, r + +#define WEIGHT_M0_1 WEIGHT_MATRIX_1(vmul, d11[0]) +#define WEIGHT_M0_2 WEIGHT_MATRIX_2(vmul, d11[0]) +#define WEIGHT_M0_3 WEIGHT_MATRIX_3(vmul, d11[0]) +#define WEIGHT_M0_4 WEIGHT_MATRIX_4(vmul, d11[0]) + +#define WEIGHT_M1_1 WEIGHT_MATRIX_1(vmla, d11[1]) +#define WEIGHT_M1_2 WEIGHT_MATRIX_2(vmla, d11[1]) +#define WEIGHT_M1_3 WEIGHT_MATRIX_3(vmla, d11[1]) +#define WEIGHT_M1_4 WEIGHT_MATRIX_4(vmla, d11[1]) + +#define WEIGHT_M2_1 WEIGHT_MATRIX_1(vmla, d12[0]) +#define WEIGHT_M2_2 WEIGHT_MATRIX_2(vmla, d12[0]) +#define WEIGHT_M2_3 WEIGHT_MATRIX_3(vmla, d12[0]) +#define WEIGHT_M2_4 WEIGHT_MATRIX_4(vmla, d12[0]) + +#define WEIGHT_M3_1 WEIGHT_MATRIX_1(vmla, d12[1]) +#define WEIGHT_M3_2 WEIGHT_MATRIX_2(vmla, d12[1]) +#define WEIGHT_M3_3 WEIGHT_MATRIX_3(vmla, d12[1]) +#define WEIGHT_M3_4 WEIGHT_MATRIX_4(vmla, d12[1]) + + +//=========================================================================================================================================== +// +// 1 bone skinning + +#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) + +mov ip, sp + +vpush {d8-d10} +stmfd sp!, {r4-r8} + +ldr r4, [ip, #0] +mov r8, #12 + + ldr r5, [r3], #4 + add r7, r0, r5, lsl #6 + +LOOP_NAME: + + + +LOAD_M_12 +LOAD_M_34 + + +LOAD_POS_NRM +LOAD_TAN + +CALC_POS_1 +CALC_NRM_1 +CALC_TAN_1 + + cmp r1, r2 + pld [r1, #256] + +CALC_POS_2 +CALC_NRM_2 +CALC_TAN_2 + + ldrcc r5, [r3], #4 + add r7, r0, r5, lsl #6 + +CALC_POS_3 +CALC_NRM_3 +CALC_TAN_3 + + pld [r7] + +CALC_POS_4 +CALC_TAN_4 + +beq LOOP_EPILOGUE + +STORE_POS +STORE_NRM +STORE_TAN + +bcc LOOP_NAME + +LOOP_EPILOGUE: +STORE_POS_LAST1 +STORE_POS_LAST2 +STORE_NRM_LAST1 +STORE_NRM_LAST2 +STORE_TAN + + +ldmfd sp!, {r4-r8} +vpop {d8-d10} + +bx lr + + +//=========================================================================================================================================== +// +// 2 bones skinning + +#elif (SKIN_2BONES == SKIN_POS || SKIN_2BONES == SKIN_POS_NRM || SKIN_2BONES == SKIN_POS_NRM_TAN) + +mov ip, sp + +vpush {d8-d11} +stmfd sp!, {r4,r5,r6,r7,r8,r10} + +ldr r4, [ip, #0] + +vld1.32 {d11}, [r3,:64]! // wgt -> +ldmia r3!, {r5-r6} // idx -> + +add r7, r0, r5, lsl #6 // M0 .. +LOAD_M_12 // M0 +WEIGHT_M0_1 +WEIGHT_M0_2 + +LOAD_M_34 // M0 +add r7, r0, r6, lsl #6 // M1 .. +WEIGHT_M0_3 +WEIGHT_M0_4 + +LOAD_M_12 // M1 +WEIGHT_M1_1 +WEIGHT_M1_2 + +ldr r5, [r3, #8] // idx0 + +mov r8, #12 +sub r10, r2, #VERTEX_SZ + +LOAD_M_34 // M1 + +WEIGHT_M1_3 + +.align 4 +LOOP_NAME: + + cmp r1, r10 + + add r7, r0, r5, lsl #6 // M0 .. + ldrcc r6, [r3, #12] // idx1 +LOAD_POS_NRM + +WEIGHT_M1_4 + +LOAD_TAN + +CALC_POS_1 +LOAD_M_12 // M0 + cmp r1, r10 +CALC_NRM_1 +CALC_TAN_1 +vld1.32 {d11}, [r3,:64] // wgt -> + +WEIGHT_M0_1 + pld [r1,#256] + +CALC_POS_2 +LOAD_M_34 // M0 + add r7, r0, r6, lsl #6 // M1 .. +CALC_NRM_2 +CALC_TAN_2 + ldrcc r5, [r3, #24] // idx0 +WEIGHT_M0_2 +CALC_POS_3 + + cmp r1, r2 +CALC_NRM_3 +CALC_TAN_3 +LOAD_M_12 // M1 + + +WEIGHT_M0_3 + +CALC_POS_4 +CALC_TAN_4 + +WEIGHT_M0_4 +LOAD_M_34 // M1 + +beq LOOP_EPILOGUE + +WEIGHT_M1_1 +STORE_POS + +WEIGHT_M1_2 +STORE_NRM + add r3, r3, #16 +WEIGHT_M1_3 +STORE_TAN + +bcc LOOP_NAME + +LOOP_EPILOGUE: +STORE_POS_LAST1 +STORE_POS_LAST2 +STORE_NRM_LAST1 +STORE_NRM_LAST2 +STORE_TAN + + +ldmfd sp!, {r4,r5,r6,r7,r8,r10} +vpop {d8-d11} +bx lr + + +//=========================================================================================================================================== +// +// 4 bones skinning + +#elif (SKIN_4BONES == SKIN_POS || SKIN_4BONES == SKIN_POS_NRM || SKIN_4BONES == SKIN_POS_NRM_TAN) + + +mov ip, sp + +vpush {d8-d12} +stmfd sp!, {r4-r8} + +ldr r4, [ip, #0] + +vld1.32 {d11,d12}, [r3,:128]! // wgt -> +ldmia r3!, {r5-r6} // idx' -> + +add r7, r0, r5, lsl #6 // M0 .. +LOAD_M_12 // M0 +LOAD_M_34 // M0 + +mov r8, #12 + +.align 4 +LOOP_NAME: + +WEIGHT_M0_1 +LOAD_POS_NRM + +WEIGHT_M0_2 +LOAD_TAN + add r7, r0, r6, lsl #6 // M1 .. + + +WEIGHT_M0_3 +LOAD_M_12 // M1 + +WEIGHT_M0_4 +LOAD_M_34 // M1 + +WEIGHT_M1_1 + ldmia r3!, {r5-r6} // idx'' -> + +WEIGHT_M1_2 + add r7, r0, r5, lsl #6 // M2 .. + cmp r1, r2 + +WEIGHT_M1_3 +LOAD_M_12 // M2 + +WEIGHT_M1_4 + pld [r3, #256] +LOAD_M_34 // M2 + +WEIGHT_M2_1 + add r7, r0, r6, lsl #6 // M3 .. +WEIGHT_M2_2 +WEIGHT_M2_3 +LOAD_M_12 // M3 +WEIGHT_M2_4 + +LOAD_M_34 // M3 +WEIGHT_M3_1 +WEIGHT_M3_2 +WEIGHT_M3_3 +WEIGHT_M3_4 + pld [r1, #256] + +CALC_POS_1 +vld1.32 {d11,d12}, [r3,:128]! // wgt -> + +CALC_NRM_1 +CALC_TAN_1 + ldmcc r3!, {r5-r6} // idx -> + +CALC_POS_2 + add r7, r0, r5, lsl #6 // M0 .. +CALC_NRM_2 +CALC_TAN_2 +vldmia r7, {q8-q11} // M0 -> + +CALC_POS_3 +CALC_NRM_3 +CALC_TAN_3 + +CALC_POS_4 +CALC_TAN_4 + +beq LOOP_EPILOGUE + +STORE_POS +STORE_NRM +STORE_TAN + +bcc LOOP_NAME + +LOOP_EPILOGUE: +STORE_POS_LAST1 +STORE_POS_LAST2 +STORE_NRM_LAST1 +STORE_NRM_LAST2 +STORE_TAN + + +ldmfd sp!, {r4-r8} +vpop {d8-d12} +bx lr + + +//=========================================================================================================================================== + +#endif + +#undef __NAME_EPILOGUE +#undef _NAME_EPILOGUE +#undef LOOP_EPILOGUE +#undef CALC_POS_1 +#undef CALC_POS_2 +#undef CALC_POS_3 +#undef STORE_POS +#undef STORE_POS_LAST1 +#undef STORE_POS_LAST2 +#undef LOAD_POS_NRM +#undef STORE_NRM +#undef STORE_NRM_LAST1 +#undef STORE_NRM_LAST2 +#undef CALC_NRM_1 +#undef CALC_NRM_2 +#undef CALC_NRM_3 +#undef LOAD_TAN +#undef STORE_TAN +#undef CALC_TAN_1 +#undef CALC_TAN_2 +#undef CALC_TAN_3 +#undef CALC_TAN_4 +#undef LOAD_M_12 +#undef LOAD_M_34 +#undef WEIGHT_MATRIX_1 +#undef WEIGHT_MATRIX_2 +#undef WEIGHT_MATRIX_3 +#undef WEIGHT_MATRIX_4 +#undef WEIGHT_M0_1 +#undef WEIGHT_M0_2 +#undef WEIGHT_M0_3 +#undef WEIGHT_M0_4 +#undef WEIGHT_M1_1 +#undef WEIGHT_M1_2 +#undef WEIGHT_M1_3 +#undef WEIGHT_M1_4 +#undef WEIGHT_M2_1 +#undef WEIGHT_M2_2 +#undef WEIGHT_M2_3 +#undef WEIGHT_M2_4 +#undef WEIGHT_M3_1 +#undef WEIGHT_M3_2 +#undef WEIGHT_M3_3 +#undef WEIGHT_M3_4 diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.asm b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm new file mode 100644 index 0000000..395bf16 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.asm @@ -0,0 +1,323 @@ +;; SkinSSE2.s +;; +;; Created by Kaspar Daugaard on 1/12/11. +;; Copyright 2011 Unity Technologies. All rights reserved. + +bits 32 + +section .text align=32 + +%define normalOffset 12 +%define tangentOffset 24 + +%macro SkinSSE2_Generic 3 + ; %1 numBones + ; %2 hasNormals + ; %3 hasTangents + ; [ebp + 8] inVertices + ; [ebp + 12] outVertices + ; [ebp + 16] numVertices + ; [ebp + 20] boneMatrices + ; [ebp + 24] weightsAndIndices + ; [ebp + 28] inputStride + ; [ebp + 32] outputStride + + push ebp + mov ebp, esp + pushad + + ; Local variables (32 byte aligned) + ; [esp + 0] MaskW + ; [esp + 16] MaskVec3 + ; [esp + 32] savedEcx + sub esp, 16*3 + and esp, ~31 + + ; Create bitmasks on stack + sub eax, eax + mov [esp + 0], eax ; MaskW + mov [esp + 4], eax + mov [esp + 8], eax + dec eax + mov [esp + 12], eax + mov [esp + 16], eax ; MaskVec3 + mov [esp + 20], eax + mov [esp + 24], eax + inc eax + mov [esp + 28], eax + + mov esi, [ebp + 8] ; inVertices + mov edi, [ebp + 12] ; outVertices + mov ecx, [ebp + 16] ; numVertices + mov edx, [ebp + 24] ; weightsAndIndices + + ; Prefetch vertices + prefetchnta [edx] + prefetchnta [esi] + prefetchnta [esi + 32] + + align 32 + +%%SkinSSE2_loop: + prefetchnta [esi + 64] + + mov ebx, [ebp + 20] ; boneMatrices + mov [esp + 32], ecx ; savedEcx + + ; Load first bone index +%if %1 == 1 + ; Single bone, no weight + mov eax, [edx] + shl eax, 6 +%else + ; Indices come after weights + mov eax, [edx + %1*4] + shl eax, 6 + prefetchnta [ebx + eax] + prefetchnta [ebx + eax + 32] + + ; Load second bone index + mov ecx, [edx + %1*4 + 4] + shl ecx, 6 + prefetchnta [ebx + ecx] + prefetchnta [ebx + ecx + 32] + + ; Load all weights to xmm0 + movups xmm0, [edx] +%endif + + ; Load first matrix to xmm4-xmm7 + movaps xmm4, [ebx + eax] + movaps xmm5, [ebx + eax + 16] + movaps xmm6, [ebx + eax + 32] + movaps xmm7, [ebx + eax + 48] + +%if %1 >= 2 + ; Multiply first matrix with first weight + movaps xmm1, xmm0 + shufps xmm1, xmm1, 0x00 + mulps xmm4, xmm1 + mulps xmm5, xmm1 + mulps xmm6, xmm1 + mulps xmm7, xmm1 +%endif + +%if %1 >= 3 + ; Load third bone index + mov eax, [edx + %1*4 + 8] + shl eax, 6 + prefetchnta [ebx + eax] + prefetchnta [ebx + eax + 32] +%endif + +%if %1 >= 2 + ; Load first two rows of the second matrix to xmm2-xmm3 + movaps xmm2, [ebx + ecx] + movaps xmm3, [ebx + ecx + 16] + ; Shuffle second weight to all elements of xmm1 + movaps xmm1, xmm0 + shufps xmm1, xmm1, 0x55 + ; Multiply two first rows of second matrix with second weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm4, xmm2 + addps xmm5, xmm3 + + ; Load last two rows of the second matrix to xmm2-xmm3 + movaps xmm2, [ebx + ecx + 32] + movaps xmm3, [ebx + ecx + 48] + ; Multiply two last rows of the second matri with second weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm6, xmm2 + addps xmm7, xmm3 +%endif + +%if %1 >= 4 + ; Load fourth bone index + mov ecx, [edx + %1*4 + 12] + shl ecx, 6 + prefetchnta [ebx + ecx] + prefetchnta [ebx + ecx + 32] +%endif + +%if %1 >= 3 + ; Load first two rows of the third matrix to xmm2-xmm3 + movaps xmm2, [ebx + eax] + movaps xmm3, [ebx + eax + 16] + ; Shuffle third weight to all elements of xmm1 + movaps xmm1, xmm0 + shufps xmm1, xmm1, 0xaa + ; Multiply first two rows of third matrix with third weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm4, xmm2 + addps xmm5, xmm3 + + ; Load last two rows of the third matrix to xmm2-xmm3 + movaps xmm2, [ebx + eax + 32] + movaps xmm3, [ebx + eax + 48] + ; Multiply last two rows of third matrix with third weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm6, xmm2 + addps xmm7, xmm3 +%endif + +%if %1 >= 4 + ; Load first two rows of the fourth matrix into xmm2-xmm3 + movaps xmm2, [ebx + ecx] + movaps xmm3, [ebx + ecx + 16] + ; Shuffle fourth weight to all elements of xmm1 + movaps xmm1, xmm0 + shufps xmm1, xmm1, 0xff + ; Multiply first two rows of the fourth matrix with fourth weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm4, xmm2 + addps xmm5, xmm3 + + ; Load last two rows of the fourth matrix to xmm2-xmm3 + movaps xmm2, [ebx + ecx + 32] + movaps xmm3, [ebx + ecx + 48] + ; Multiply last two rows of the fourth matrix with fourth weight + mulps xmm2, xmm1 + mulps xmm3, xmm1 + ; Add + addps xmm6, xmm2 + addps xmm7, xmm3 +%endif + + ; Matrix is in xmm4-xmm7 + ; Transform position by 4x4 matrix in xmm4-xmm7 + movups xmm0, [esi] + movaps xmm1, xmm0 + movaps xmm2, xmm0 + shufps xmm1, xmm1, 0x55 + shufps xmm2, xmm2, 0xaa + shufps xmm0, xmm0, 0x00 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm0, xmm4 + addps xmm1, xmm2 + addps xmm0, xmm7 + addps xmm0, xmm1 + ; Store vertex position in outvert + movaps xmm7, [esp + 16] ; MaskVec3 + maskmovdqu xmm0, xmm7 + +%if %2 ; Has normal + ; Transform vector by 3x3 matrix in xmm4-xmm6 + movups xmm0, [esi + normalOffset] + movaps xmm1, xmm0 + movaps xmm2, xmm0 + shufps xmm1, xmm1, 0x55 + shufps xmm2, xmm2, 0xaa + shufps xmm0, xmm0, 0x00 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm0, xmm4 + addps xmm1, xmm2 + addps xmm0, xmm1 +%endif + +%if %3 ; Has tangent + ; Transform vector by 3x3 matrix in xmm4-xmm6 + movups xmm1, [esi + tangentOffset] + movaps xmm2, xmm1 + movaps xmm3, xmm1 + shufps xmm2, xmm2, 0x55 + shufps xmm3, xmm3, 0xaa + mulps xmm2, xmm5 + mulps xmm3, xmm6 + movaps xmm6, xmm1 ; Save original tangent's W in xmm6 + shufps xmm1, xmm1, 0x00 + andps xmm6, [esp + 0] ; MaskW + mulps xmm1, xmm4 + addps xmm2, xmm3 + addps xmm1, xmm2 +%endif + +%if %2 || %3 ; Has normal or tangent + ; Calculate lengths and normalize + movaps xmm2, xmm0 + movaps xmm5, xmm1 + mulps xmm2, xmm2 + mulps xmm5, xmm5 + movaps xmm3, xmm2 + movaps xmm4, xmm2 + shufps xmm3, xmm5, 0x55 + shufps xmm4, xmm5, 0xaa + shufps xmm2, xmm5, 0x00 + addps xmm3, xmm4 + addps xmm2, xmm3 + sqrtps xmm2, xmm2 + rcpps xmm2, xmm2 + movaps xmm3, xmm2 + shufps xmm2, xmm2, 0x00 + shufps xmm3, xmm3, 0xaa + mulps xmm0, xmm2 + mulps xmm1, xmm3 +%endif + +%if %2 ; Write normal + add edi, normalOffset + maskmovdqu xmm0, xmm7 ; MaskVec3 + sub edi, normalOffset +%endif + +%if %3 ; Write tangent + andps xmm1, xmm7 ; MaskVec3 + orps xmm1, xmm6 ; Restore original W + movups [edi + tangentOffset], xmm1 +%endif + +%if %1 == 1 + ; Indices only + add edx, 4 +%else + ; Indices and weights + add edx, %1 * 8 +%endif + + add esi, [ebp + 28] ; inputStride + add edi, [ebp + 32] ; outputStride + mov ecx, [esp + 32] ; savedEcx + dec ecx + jnz %%SkinSSE2_loop + + ; Remove local variables from stack + lea esp, [ebp-32] + + popad + pop ebp + ret + align 16 +%endmacro + + +global SkinSSE2_1Bone_Pos +global SkinSSE2_2Bones_Pos +global SkinSSE2_4Bones_Pos +global SkinSSE2_1Bone_PosNormal +global SkinSSE2_2Bones_PosNormal +global SkinSSE2_4Bones_PosNormal +global SkinSSE2_1Bone_PosNormalTan +global SkinSSE2_2Bones_PosNormalTan +global SkinSSE2_4Bones_PosNormalTan + + +SkinSSE2_1Bone_Pos: SkinSSE2_Generic 1, 0, 0 +SkinSSE2_2Bones_Pos: SkinSSE2_Generic 2, 0, 0 +SkinSSE2_4Bones_Pos: SkinSSE2_Generic 4, 0, 0 +SkinSSE2_1Bone_PosNormal: SkinSSE2_Generic 1, 1, 0 +SkinSSE2_2Bones_PosNormal: SkinSSE2_Generic 2, 1, 0 +SkinSSE2_4Bones_PosNormal: SkinSSE2_Generic 4, 1, 0 +SkinSSE2_1Bone_PosNormalTan: SkinSSE2_Generic 1, 1, 1 +SkinSSE2_2Bones_PosNormalTan: SkinSSE2_Generic 2, 1, 1 +SkinSSE2_4Bones_PosNormalTan: SkinSSE2_Generic 4, 1, 1 diff --git a/Runtime/Filters/Mesh/MeshSkinningSSE2.h b/Runtime/Filters/Mesh/MeshSkinningSSE2.h new file mode 100644 index 0000000..c085309 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningSSE2.h @@ -0,0 +1,129 @@ +#if UNITY_SUPPORTS_SSE && !UNITY_64 + +#if UNITY_OSX || UNITY_LINUX +#define __cdecl +#endif + +#define SKIN_SSE2_PARAMS \ + const void* inVertices, \ + void* outVertices, \ + int numVertices, \ + const void* boneMatrices, \ + const void* weightsAndIndices, \ + int inputStride, \ + int outputStride + +typedef void (__cdecl *SkinSSE2_Function)(SKIN_SSE2_PARAMS); + +extern "C" +{ + void __cdecl SkinSSE2_1Bone_Pos(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_2Bones_Pos(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_4Bones_Pos(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_1Bone_PosNormal(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_2Bones_PosNormal(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_4Bones_PosNormal(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_1Bone_PosNormalTan(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_2Bones_PosNormalTan(SKIN_SSE2_PARAMS); + void __cdecl SkinSSE2_4Bones_PosNormalTan(SKIN_SSE2_PARAMS); +} + + +bool SkinMeshOptimizedSSE2(SkinMeshInfo& info) +{ + if (!CPUInfo::HasSSE2Support()) + { + return false; + } + + SkinSSE2_Function skinFunc = NULL; + + if (!info.skinNormals && !info.skinTangents) + { + switch (info.bonesPerVertex) + { + DebugAssert(info.inStride == sizeof(Vector3f)); + case 1: + skinFunc = &SkinSSE2_1Bone_Pos; + break; + case 2: + skinFunc = &SkinSSE2_2Bones_Pos; + break; + case 4: + skinFunc = &SkinSSE2_4Bones_Pos; + break; + + } + } + else if (info.skinNormals && !info.skinTangents) + { + DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f)); + switch (info.bonesPerVertex) + { + case 1: + skinFunc = &SkinSSE2_1Bone_PosNormal; + break; + case 2: + skinFunc = &SkinSSE2_2Bones_PosNormal; + break; + case 4: + skinFunc = &SkinSSE2_4Bones_PosNormal; + break; + + } + } + else if (info.skinNormals && info.skinTangents) + { + DebugAssert(info.inStride == sizeof(Vector3f) + sizeof(Vector3f) + sizeof(Vector4f)); + switch (info.bonesPerVertex) + { + case 1: + skinFunc = &SkinSSE2_1Bone_PosNormalTan; + break; + case 2: + skinFunc = &SkinSSE2_2Bones_PosNormalTan; + break; + case 4: + skinFunc = &SkinSSE2_4Bones_PosNormalTan; + break; + + } + } + + if (skinFunc == NULL) + return false; + + // Skin all vertices apart from last one! + if (info.vertexCount > 1) + { + (*skinFunc)(info.inVertices, info.outVertices, info.vertexCount - 1,info.cachedPose, info.compactSkin, info.inStride, info.outStride); + } + // Copy last vertex to stack to avoid reading/writing past end of buffer + if (info.vertexCount > 0) + { + const int maxStride = 2 * sizeof(Vector3f) + sizeof(Vector4f) + 4; + Assert(info.inStride <= maxStride && info.outStride <= maxStride); + // Need 4 bytes padding to access Vec3 as Vec4 + char vertexCopyIn[maxStride + 4]; + char vertexCopyOut[maxStride + 4]; + int skinStride = (info.bonesPerVertex == 4) ? sizeof(BoneInfluence) : + (info.bonesPerVertex == 2) ? sizeof(BoneInfluence2) : + (info.bonesPerVertex == 1) ? sizeof(int) : 0; + Assert(skinStride != 0); + int index = info.vertexCount - 1; + const char* compactSkin = static_cast<const char*>(info.compactSkin) + index * skinStride; + const char* inVertex = static_cast<const char*>(info.inVertices) + index * info.inStride; + char* outVertex = static_cast<char*>(info.outVertices) + index * info.outStride; + memcpy(vertexCopyIn, inVertex, info.inStride); + (*skinFunc)(vertexCopyIn, vertexCopyOut, 1, info.cachedPose, compactSkin, info.inStride, info.outStride); + memcpy(outVertex, vertexCopyOut, info.outStride); + } + + return true; +} +#else +inline bool SkinMeshOptimizedSSE2(SkinMeshInfo& info) +{ + return false; +} +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningTests.cpp b/Runtime/Filters/Mesh/MeshSkinningTests.cpp new file mode 100644 index 0000000..407729b --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningTests.cpp @@ -0,0 +1,228 @@ +#include "UnityPrefix.h" +#include "Configuration/UnityConfigure.h" + +#if ENABLE_UNIT_TESTS && UNITY_SUPPORTS_SSE && !UNITY_64 + +#include "Runtime/Filters/Mesh/MeshSkinning.h" +#include "External/UnitTest++/src/UnitTest++.h" +#include "Runtime/Allocator/MemoryMacros.h" +#include "Runtime/Math/Random/rand.h" +#include "Runtime/Math/Matrix4x4.h" + +bool SkinMeshOptimizedSSE2(SkinMeshInfo& info); +void SkinMesh(SkinMeshInfo& info); + +Vector3f RandomVector3InUnitBox(Rand& rnd) +{ + return Vector3f(rnd.GetSignedFloat(), + rnd.GetSignedFloat(), + rnd.GetSignedFloat()); +} + +SUITE (MeshSkinningTests) +{ +TEST(MeshSkinning_AllFeatures) +{ + int failedPositions = 0; + int failedNormals = 0; + int failedTangents = 0; + int failedTangentSigns = 0; + int failedVertexCopies = 0; + + const int minVertices = 1; + const int maxVertices = 100; + const int positionSize = 3*sizeof(float); + const int normalSize = 3*sizeof(float); + const int tangentSize = 4*sizeof(float); + const int maxStride = positionSize + normalSize + tangentSize; + const int trailingBytes = 128; + + UInt8 inVertices[maxVertices * maxStride]; + UInt8 outVerticesRef[maxVertices * maxStride + trailingBytes]; + UInt8 outVerticesSimd[maxVertices * maxStride + trailingBytes]; + + SkinMeshInfo info; + memset(&info, 0, sizeof(info)); + info.inVertices = inVertices; + info.vertexCount = minVertices; + info.normalOffset = positionSize; + info.tangentOffset = positionSize + normalSize; + + // Try a large offset so AABBs don't contain (0,0,0) + Vector3f posOffset(-2000, 0, 2000); + + const int numBones = 64; + Matrix4x4f *cachedPose; + ALLOC_TEMP_ALIGNED(cachedPose, Matrix4x4f, numBones, 32); + info.cachedPose = cachedPose; + for (int i = 0; i < numBones; i++) + { + Matrix4x4f mat; + mat.SetScale(Vector3f(1.0 + 0.5f*sin(i*0.3f), + 1.0 + 0.5f*sin(i*0.5f), + 1.0 + 0.5f*sin(i*0.7f))); + mat.SetPosition(Vector3f(100.0f*sin(i*1.0f), + 100.0f*sin(i*2.5f), + 100.0f*sin(i*3.3f)) + posOffset); + cachedPose[i] = mat; + } + info.boneCount = numBones; + + Rand rnd(123); + + int boneIndices[maxVertices]; + BoneInfluence2 boneInfl2[maxVertices]; + BoneInfluence boneInfl4[maxVertices]; + for (int i = 0; i < maxVertices; i++) + { + boneIndices[i] = i%numBones; + + BoneInfluence2& b2 = boneInfl2[i]; + b2.boneIndex[0] = (i)%numBones; + b2.boneIndex[1] = (i/2+10)%numBones; + b2.weight[0] = rnd.GetFloat(); + b2.weight[1] = 1.0f - b2.weight[0]; + + BoneInfluence& b4 = boneInfl4[i]; + b4.boneIndex[0] = (i)%numBones; + b4.boneIndex[1] = (i/2+10)%numBones; + b4.boneIndex[2] = (i/3+20)%numBones; + b4.boneIndex[3] = (i/4+30)%numBones; + float weightLeft = 1.0f; + for (int j=0; j<3; j++) + { + b4.weight[j] = weightLeft * rnd.GetFloat(); + weightLeft -= b4.weight[j]; + } + b4.weight[3] = weightLeft; + } + + for (info.bonesPerVertex = 1; info.bonesPerVertex <= 4; info.bonesPerVertex++) + { + if (info.bonesPerVertex == 3) continue; + + switch (info.bonesPerVertex) + { + case 1: + info.compactSkin = boneIndices; + break; + case 2: + info.compactSkin = boneInfl2; + break; + case 4: + info.compactSkin = boneInfl4; + break; + } + + for (int skinNormals = 0; skinNormals <= 1; skinNormals++) + { + info.skinNormals = (skinNormals != 0); + + for (int skinTangents = 0; skinTangents <= 1; skinTangents++) + { + if (!skinNormals && skinTangents) continue; + info.skinTangents = (skinTangents != 0); + + // Randomize vertex count and stride + info.vertexCount += 7; + while (info.vertexCount > maxVertices) info.vertexCount -= (maxVertices - minVertices); + info.inStride = positionSize; + info.inStride += skinNormals ? normalSize : 0; + info.inStride += skinTangents ? tangentSize : 0; + info.outStride = info.inStride; + + UInt8* inVert = inVertices; + for (int i = 0; i < info.vertexCount; i++) + { + Vector3f* nextVec = (Vector3f*)inVert; + Vector3f pos = RandomVector3InUnitBox(rnd); + pos *= 1000.0f; + *nextVec++ = pos; + if (info.skinNormals) + { + Vector3f normal = RandomVector3InUnitBox(rnd); + normal = NormalizeSafe(normal); + *nextVec++ = normal; + } + + if (info.skinTangents) + { + Vector3f tangent = RandomVector3InUnitBox(rnd); + tangent = NormalizeSafe(tangent); + *nextVec++ = tangent; + float* tangentSign = (float*)nextVec; + *tangentSign = (rnd.GetSignedFloat() < 0.0f) ? -1.0f : 1.0f; + } + inVert += info.inStride; + } + + int outSize = info.vertexCount * info.outStride; + memset(outVerticesRef, 0xcc, outSize + trailingBytes); + memset(outVerticesSimd, 0xdd, outSize + trailingBytes); + + info.outVertices = outVerticesRef; + SkinMesh(info); + + info.outVertices = outVerticesSimd; + bool successSimd = SkinMeshOptimizedSSE2(info); + CHECK(successSimd); + + // Check if we wrote past end of buffer + for (int i = 0; i < trailingBytes; i++) + { + CHECK_EQUAL(0xcc, outVerticesRef[outSize + i]); + CHECK_EQUAL(0xdd, outVerticesSimd[outSize + i]); + } + + inVert = inVertices; + UInt8* vertRef = outVerticesRef; + UInt8* vertSimd = outVerticesSimd; + for (int i = 0; i < info.vertexCount; i++) + { + Vector3f* posRef = (Vector3f*)vertRef; + Vector3f* posSimd = (Vector3f*)vertRef; + if (!CompareApproximately(*posRef, *posSimd)) + { + failedPositions++; + } + if (info.skinNormals) + { + Vector3f* normalRef = (Vector3f*)(vertRef + info.normalOffset); + Vector3f* normalSimd = (Vector3f*)(vertRef + info.normalOffset); + if (!CompareApproximately(*normalRef, *normalSimd)) + { + failedNormals++; + } + } + if (info.skinTangents) + { + Vector3f* tangentRef = (Vector3f*)(vertRef + info.tangentOffset); + Vector3f* tangentSimd = (Vector3f*)(vertRef + info.tangentOffset); + if (!CompareApproximately(*tangentRef, *tangentSimd)) + { + failedTangents++; + } + float* tangentSignRef = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f)); + float* tangentSignSimd = (float*)(vertRef + info.tangentOffset + sizeof(Vector3f)); + if (*tangentSignRef != *tangentSignSimd) + { + failedTangentSigns++; + } + } + + inVert += info.inStride; + vertRef += info.outStride; + vertSimd += info.outStride; + } + } + } + } + + CHECK_EQUAL(0, failedPositions); + CHECK_EQUAL(0, failedNormals); + CHECK_EQUAL(0, failedTangents); + CHECK_EQUAL(0, failedTangentSigns); + CHECK_EQUAL(0, failedVertexCopies); +} +} +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP.s b/Runtime/Filters/Mesh/MeshSkinningVFP.s new file mode 100644 index 0000000..8829981 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningVFP.s @@ -0,0 +1,187 @@ +#define UNITY_ASSEMBLER +#include "Configuration/PrefixConfigure.h" +#include "Runtime/Utilities/VFPUtility.h" + +#if UNITY_SUPPORTS_VFP + +.syntax unified + +.set device,0 +.set device,__arm__ + +.if device + +//.code32 +.globl _s_SkinVertices_VFP +.globl _s_SkinVertices_NoNormals_VFP +.globl _s_SkinVertices_Tangents_VFP + +.globl _s_SkinVertices2Bones_VFP +.globl _s_SkinVertices2Bones_NoNormals_VFP +.globl _s_SkinVertices2Bones_Tangents_VFP + +.globl _s_SkinVertices4Bones_VFP +.globl _s_SkinVertices4Bones_Copy4Ints_VFP +.globl _s_SkinVertices4Bones_NoNormals_VFP +.globl _s_SkinVertices4Bones_NoNormals_Copy4Ints_VFP +.globl _s_SkinVertices4Bones_Tangents_VFP +.globl _s_SkinVertices4Bones_Tangents_Copy4Ints_VFP + +#if UNITY_ANDROID +.hidden _s_SkinVertices_VFP +.hidden _s_SkinVertices_NoNormals_VFP +.hidden _s_SkinVertices_Tangents_VFP + +.hidden _s_SkinVertices2Bones_VFP +.hidden _s_SkinVertices2Bones_NoNormals_VFP +.hidden _s_SkinVertices2Bones_Tangents_VFP + +.hidden _s_SkinVertices4Bones_VFP +.hidden _s_SkinVertices4Bones_NoNormals_VFP +.hidden _s_SkinVertices4Bones_Tangents_VFP +#endif + + +//=========================================================================================================================================== + + +#define SKIN_POS 1 +#define SKIN_POS_NRM 2 +#define SKIN_POS_NRM_TAN 3 + + +#define SKIN_2BONES 0 +#define SKIN_4BONES 0 + +_s_SkinVertices_VFP: + +#define SKIN_1BONE SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +_s_SkinVertices_NoNormals_VFP: + +#define SKIN_1BONE SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices_NoNormals_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +_s_SkinVertices_Tangents_VFP: + +#define SKIN_1BONE SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices_Tangents_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_1BONE + +#undef SKIN_4BONES +#undef SKIN_2BONES + + +//=========================================================================================================================================== + +#define SKIN_1BONE 0 +#define SKIN_4BONES 0 + +_s_SkinVertices2Bones_VFP: + +#define SKIN_2BONES SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices2Bones_VFP_Loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +_s_SkinVertices2Bones_NoNormals_VFP: + +#define SKIN_2BONES SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices2Bones_NoNormals_VFP_Loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +_s_SkinVertices2Bones_Tangents_VFP: + +#define SKIN_2BONES SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices2Bones_Tangents_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_2BONES + +#undef SKIN_4BONES +#undef SKIN_1BONE + +//=========================================================================================================================================== + +#define SKIN_1BONE 0 +#define SKIN_2BONES 0 + +_s_SkinVertices4Bones_VFP: + +#define SKIN_4BONES SKIN_POS_NRM +#define VERTEX_SZ 24 +#define LOOP_NAME _s_SkinVertices4Bones_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + +_s_SkinVertices4Bones_NoNormals_VFP: + +#define SKIN_4BONES SKIN_POS +#define VERTEX_SZ 12 +#define LOOP_NAME _s_SkinVertices4Bones_NoNormals_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + +_s_SkinVertices4Bones_Tangents_VFP: + +#define SKIN_4BONES SKIN_POS_NRM_TAN +#define VERTEX_SZ 40 +#define LOOP_NAME _s_SkinVertices4Bones_Tangents_VFP_loop + +#include "MeshSkinningVFP_Loop.h" + +#undef LOOP_NAME +#undef VERTEX_SZ +#undef SKIN_4BONES + +#undef SKIN_2BONES +#undef SKIN_1BONE + +//=========================================================================================================================================== + +.endif +#endif diff --git a/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h new file mode 100644 index 0000000..3b7400f --- /dev/null +++ b/Runtime/Filters/Mesh/MeshSkinningVFP_Loop.h @@ -0,0 +1,335 @@ + +// defines +// SKIN_1BONE +// SKIN_2BONES +// SKIN_4BONES +// LOOP_NAME +// VERTEX_SZ + +// skin types +// SKIN_POS +// SKIN_POS_NRM +// SKIN_POS_NRM_TAN + +//r0: const void* bones4x4 +//r1: const void* srcVertData +//r2: const void* srcVertDataEnd +//r3: const BoneInfluence4* srcBoneInfluence4 +//[sp+0] -> r4: const void* dstVertData + +// s0,s1,s2 <- output: pos +// s3,s4,s5 <- output: nrm +// s6,s7,s8,s9 <- output: tan +// s10,s11,s12 <- input: pos +// s13,s14,s15 <- input: nrm +// s16,s17,s18,s19 <- input: tan +// s20-s31 <- matrix [3x4] last row loaded directly to output pos + +//=========================================================================================================================================== +// +// Common + +#define CALC_POS_2 FMACS3 (0,1,2, 20,21,22, 10,10,10) +#define CALC_POS_3 FMACS3 (0,1,2, 24,25,26, 11,11,11) +#define CALC_POS_4 FMACS3 (0,1,2, 28,29,30, 12,12,12) + + +#if (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) \ + || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) \ + || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN) + + #define LOAD_POS_NRM vldmia.32 r1!, {s10-s15} + #define STORE_POS_NRM vstmia.32 r4!, {s0-s5} + #define CALC_NRM_1 FMULS3 (3,4,5, 20,21,22, 13,13,13) + #define CALC_NRM_2 FMACS3 (3,4,5, 24,25,26, 14,14,14) + #define CALC_NRM_3 FMACS3 (3,4,5, 28,29,30, 15,15,15) +#else + #define LOAD_POS_NRM vldmia.32 r1!, {s10-s12} + #define STORE_POS_NRM vstmia.32 r4!, {s0-s2} + #define CALC_NRM_1 + #define CALC_NRM_2 + #define CALC_NRM_3 +#endif + +#if (SKIN_1BONE == SKIN_POS_NRM_TAN) || (SKIN_2BONES == SKIN_POS_NRM_TAN) || (SKIN_4BONES == SKIN_POS_NRM_TAN) + #define LOAD_TAN vldmia.32 r1!, {s16-s19} + #define STORE_TAN vstmia.32 r4!, {s6-s9} + #define CALC_TAN_1 FMULS3 (6,7,8, 20,21,22, 16,16,16) + #define CALC_TAN_2 FMACS3 (6,7,8, 24,25,26, 17,17,17) + #define CALC_TAN_3 FMACS3 (6,7,8, 28,29,30, 18,18,18) + #define CALC_TAN_4 fcpys s9, s19 +#else + #define LOAD_TAN + #define STORE_TAN + #define CALC_TAN_1 + #define CALC_TAN_2 + #define CALC_TAN_3 + #define CALC_TAN_4 +#endif + + + + +//=========================================================================================================================================== +// +// 1 bone skinning + +#if (SKIN_1BONE == SKIN_POS) || (SKIN_1BONE == SKIN_POS_NRM) || (SKIN_1BONE == SKIN_POS_NRM_TAN) + +mov ip, sp +vpush {d7-d15} +stmfd sp!, {r4,r5,r6,r7,r8,r10,r11} + +ldr r4, [ip, #0] + +ldr r5, [r3], #4 +add r5, r0, r5, lsl #6 +add r6, r5, #48 + +vldmia.32 r6, {s0-s2} +vldmia.32 r5!, {s20-s23} +vldmia.32 r5!, {s24-s27} + +.align 4 +LOOP_NAME: + +LOAD_POS_NRM + +CALC_POS_2 +CALC_NRM_1 + ldr r6, [r3], #4 // next matrix index +vldmia.32 r5, {s28-s30} // bone matrix + add r5, r0, r6, lsl #6 // next matrix addr + + +CALC_POS_3 +CALC_NRM_2 + +LOAD_TAN + add r6, r5, #48 + cmp r1, r2 + +CALC_TAN_1 + vldmiacc.32 r5!, {s20-s23} // next bone matrix + + +CALC_POS_4 + +CALC_TAN_2 +CALC_NRM_3 + vldmiacc.32 r5!, {s24-s27} // next bone matrix + +CALC_TAN_3 +CALC_TAN_4 + + pld [r1, #1024] + + +STORE_POS_NRM +STORE_TAN + + vldmiacc.32 r6, {s0-s2} + +bcc LOOP_NAME + +ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11} +vpop {d7-d15} +bx lr + + +//=========================================================================================================================================== + +#elif (SKIN_2BONES == SKIN_POS) || (SKIN_2BONES == SKIN_POS_NRM) || (SKIN_2BONES == SKIN_POS_NRM_TAN) + +mov ip, sp +vpush {d7-d15} +stmfd sp!, {r4,r5,r6,r7,r8,r10,r11} + +ldr r4, [ip, #0] + + +.align 4 +LOOP_NAME: + +vldmia.32 r3!, {s3,s4} // w + ldmia r3!, {r5-r6} // idx + + add r5, r0, r5, lsl #6 // M0 + add r6, r0, r6, lsl #6 // M1 + + +vldmia.64 r5!, {d4,d5} // M0[0] + +vldmia.64 r6!, {d6,d7} // M1[0] +FMULS3 (20,21,22, 8,9,10, 3,3,3) // M0[0] * w + +vldmia.64 r5!, {d4,d5} // M0[1] +FMACS3 (20,21,22, 12,13,14, 4,4,4) // + M1[0] * w + +vldmia.64 r6!, {d6,d7} // M1[1] +FMULS3 (24,25,26, 8,9,10, 3,3,3) // M0[1] * w + +vldmia.64 r5!, {d4,d5} // M0[2] +FMACS3 (24,25,26, 12,13,14, 4,4,4) // + M1[1] * w + +vldmia.64 r6!, {d6,d7} // M1[2] +FMULS3 (28,29,30, 8,9,10, 3,3,3) // M0[2] * w + +vldmia.64 r5!, {d4,d5} // M0[3] +FMACS3 (28,29,30, 12,13,14, 4,4,4) // + M1[2] * w + +vldmia.64 r6!, {d6,d7} // M1[3] +FMULS3 (0,1,2, 8,9,10, 3,3,3) // M0[3] * w + +FMACS3 (0,1,2, 12,13,14, 4,4,4) // + M1[3] * w + + +LOAD_POS_NRM +LOAD_TAN + +CALC_POS_2 +CALC_NRM_1 +CALC_TAN_1 + +CALC_POS_3 +CALC_NRM_2 +CALC_TAN_2 + pld [r1, #1024] + cmp r1, r2 +CALC_POS_4 +CALC_NRM_3 +CALC_TAN_3 + +CALC_TAN_4 + + +STORE_POS_NRM +STORE_TAN + +bcc LOOP_NAME + +ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11} +vpop {d7-d15} +bx lr + + + +//=========================================================================================================================================== + +#elif (SKIN_4BONES == SKIN_POS) || (SKIN_4BONES == SKIN_POS_NRM) || (SKIN_4BONES == SKIN_POS_NRM_TAN) + +mov ip, sp +vpush {d7-d15} +stmfd sp!, {r4,r5,r6,r7,r8} + +ldr r4, [ip, #0] + + +.align 4 +LOOP_NAME: + +vldmia.32 r3!, {s3-s6} // w + ldmia r3!, {r5-r8} // idx + + add r5, r0, r5, lsl #6 // M0 + add r6, r0, r6, lsl #6 // M1 + add r7, r0, r7, lsl #6 // M2 + add r8, r0, r8, lsl #6 // M3 + + +vldmia.64 r5!, {d4,d5} // M0[0] + +vldmia.64 r6!, {d6,d7} // M1[0] +FMULS3 (20,21,22, 8,9,10, 3,3,3) // M0[0] * w + +vldmia.64 r7!, {d4,d5} // M2[0] +FMACS3 (20,21,22, 12,13,14, 4,4,4) // + M1[0] * w + +vldmia.64 r8!, {d6,d7} // M3[0] +FMACS3 (20,21,22, 8,9,10, 5,5,5) // + M2[0] * w + +vldmia.64 r5!, {d4,d5} // M0[1] +FMACS3 (20,21,22, 12,13,14, 6,6,6) // + M3[0] * w + +vldmia.64 r6!, {d6,d7} // M1[1] +FMULS3 (24,25,26, 8,9,10, 3,3,3) // M0[1] * w + +vldmia.64 r7!, {d4,d5} // M2[1] +FMACS3 (24,25,26, 12,13,14, 4,4,4) // + M1[1] * w + +vldmia.64 r8!, {d6,d7} // M3[1] +FMACS3 (24,25,26, 8,9,10, 5,5,5) // + M2[1] * w + +vldmia.64 r5!, {d4,d5} // M0[2] +FMACS3 (24,25,26, 12,13,14, 6,6,6) // + M3[1] * w + +vldmia.64 r6!, {d6,d7} // M1[2] +FMULS3 (28,29,30, 8,9,10, 3,3,3) // M0[2] * w + +vldmia.64 r7!, {d4,d5} // M2[2] +FMACS3 (28,29,30, 12,13,14, 4,4,4) // + M1[2] * w + +vldmia.64 r8!, {d6,d7} // M3[2] +FMACS3 (28,29,30, 8,9,10, 5,5,5) // + M2[2] * w + +vldmia.64 r5!, {d4,d5} // M0[3] +FMACS3 (28,29,30, 12,13,14, 6,6,6) // + M3[2] * w + +vldmia.64 r6!, {d6,d7} // M1[3] +FMULS3 (0,1,2, 8,9,10, 3,3,3) // M0[3] * w + +vldmia.64 r7!, {d4,d5} // M2[3] +FMACS3 (0,1,2, 12,13,14, 4,4,4) // + M1[3] * w + +vldmia.64 r8!, {d6,d7} // M3[3] +FMACS3 (0,1,2, 8,9,10, 5,5,5) // + M2[3] * w + +FMACS3 (0,1,2, 12,13,14, 6,6,6) // + M3[3] * w + + +LOAD_POS_NRM +LOAD_TAN + +CALC_POS_2 +CALC_NRM_1 +CALC_TAN_1 + +CALC_POS_3 +CALC_NRM_2 +CALC_TAN_2 + pld [r1, #1024] + cmp r1, r2 +CALC_POS_4 +CALC_NRM_3 +CALC_TAN_3 + +CALC_TAN_4 + + +STORE_POS_NRM +STORE_TAN + +bcc LOOP_NAME + +ldmfd sp!, {r4,r5,r6,r7,r8} +vpop {d7-d15} +bx lr + +#endif + +//=========================================================================================================================================== + +#undef CALC_POS_1 +#undef CALC_POS_2 +#undef CALC_POS_3 +#undef STORE_POS_NRM +#undef LOAD_POS_NRM +#undef CALC_NRM_1 +#undef CALC_NRM_2 +#undef CALC_NRM_3 +#undef LOAD_TAN +#undef STORE_TAN +#undef CALC_TAN_1 +#undef CALC_TAN_2 +#undef CALC_TAN_3 +#undef CALC_TAN_4 diff --git a/Runtime/Filters/Mesh/MeshUtility.cpp b/Runtime/Filters/Mesh/MeshUtility.cpp new file mode 100644 index 0000000..75d8e7f --- /dev/null +++ b/Runtime/Filters/Mesh/MeshUtility.cpp @@ -0,0 +1,58 @@ +#include "UnityPrefix.h" +#include "MeshUtility.h" +#include "Runtime/Geometry/Plane.h" +#include "Mesh.h" + +using namespace std; + +void CalculateNormals (StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals) +{ + std::fill_n (outNormals, vertexCount, Vector3f(0,0,0)); + + // Add normals from faces + int idx = 0; + for( int i = 0; i < triangleCount; ++i ) + { + UInt32 index0 = indices[idx+0]; + UInt32 index1 = indices[idx+1]; + UInt32 index2 = indices[idx+2]; + Vector3f faceNormal = CalcRawNormalFromTriangle( verts[index0], verts[index1], verts[index2] ); + outNormals[index0] += faceNormal; + outNormals[index1] += faceNormal; + outNormals[index2] += faceNormal; + idx += 3; + } + + // Normalize + for (StrideIterator<Vector3f> end = outNormals + vertexCount; outNormals != end; ++outNormals ) + { + *outNormals = NormalizeFast (*outNormals); + } +} + + +float CalculateSurfaceArea ( + const Matrix4x4f& objectToWorld, + const Mesh::TemporaryIndexContainer& triangles, + dynamic_array<Vector3f>& vertices) +{ + // transform the vertices to world space, + // do it in place since they are a copy + for (int i = 0; i < vertices.size (); i++) + vertices[i] = objectToWorld.MultiplyPoint3 (vertices[i]); + + // calculate the area + float cachedSurfaceArea = 0; + for (int i = 0; i < triangles.size () / 3; i++) + { + DebugAssert (triangles[3 * i] < vertices.size ()); + DebugAssert (triangles[3 * i + 1] < vertices.size ()); + DebugAssert (triangles[3 * i + 2] < vertices.size ()); + Vector3f a = vertices[triangles[3 * i]]; + Vector3f b = vertices[triangles[3 * i + 1]]; + Vector3f c = vertices[triangles[3 * i + 2]]; + cachedSurfaceArea += Magnitude (Cross (b - a, c - a)) * 0.5f; + } + + return cachedSurfaceArea; +} diff --git a/Runtime/Filters/Mesh/MeshUtility.h b/Runtime/Filters/Mesh/MeshUtility.h new file mode 100644 index 0000000..748c874 --- /dev/null +++ b/Runtime/Filters/Mesh/MeshUtility.h @@ -0,0 +1,42 @@ +#ifndef MESHUTILITY_H +#define MESHUTILITY_H + +#include "Runtime/Math/Vector3.h" +#include "Runtime/Math/Matrix4x4.h" +#include "Runtime/Math/Quaternion.h" +#include "Runtime/Filters/Mesh/LodMesh.h" +#include "Runtime/Utilities/StrideIterator.h" +#include "Runtime/Utilities/dynamic_array.h" + +struct Tangent; + +// Calculate normals for the mesh, given vertex array and triangle list (3 indices per triangle). +void CalculateNormals( StrideIterator<Vector3f> verts, const UInt32* indices, int vertexCount, int triangleCount, StrideIterator<Vector3f> outNormals ); + +float CalculateSurfaceArea (const Matrix4x4f& objectToWorld, const Mesh::TemporaryIndexContainer& triangles, dynamic_array<Vector3f>& vertices); + +// Use this to generate a normal from an tangent basis quickly +inline Vector3f NormalFromQuatTangentBasis (const Quaternionf& lhs) +{ + float x = lhs.x * 2.0F; + float y = lhs.y * 2.0F; + float z = lhs.z * 2.0F; + float xx = lhs.x * x; + float yy = lhs.y * y; + float xz = lhs.x * z; + float yz = lhs.y * z; + float wx = lhs.w * x; + float wy = lhs.w * y; + + Vector3f res; + res.x = xz - wy; + res.y = yz + wx; + res.z = 1.0f - xx - yy; + AssertIf (!CompareApproximately (res, RotateVectorByQuat(Inverse (lhs), Vector3f::zAxis))); + return res; +} + +//bool HasDegenerateTriangles (const Vector3f* verts, const MeshData &meshData, float degenerateArea = 0.0001); + + +#endif diff --git a/Runtime/Filters/Mesh/SkinGeneric.h b/Runtime/Filters/Mesh/SkinGeneric.h new file mode 100644 index 0000000..ef30d81 --- /dev/null +++ b/Runtime/Filters/Mesh/SkinGeneric.h @@ -0,0 +1,338 @@ +#ifndef SKINGENERIC_H +#define SKINGENERIC_H + +#include "Runtime/Filters/Mesh/VertexData.h" + +#if UNITY_PS3 +template<TransformInstruction transformInstruction, int bonesPerVertexCount, +bool skinNormal, bool skinTangent> +void SkinGenericStreamed (SkinMeshInfo& info) +{ + const int* influence1 = reinterpret_cast<const int*> (info.compactSkin); + const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin); + const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin); + + const Matrix4x4f* bones4x4 = info.cachedPose; + + int count = info.vertexCount; + + int vertexOffset = info.vertexData->GetStream(0).offset; + const int vertexStride = info.vertexData->GetStream(0).stride; + + int normalOffset = info.vertexData->GetStream(1).offset; + const int normalStride = info.vertexData->GetStream(1).stride; + + int tangentOffset = info.vertexData->GetStream(2).offset; + const int tangentStride = info.vertexData->GetStream(2).stride; + + const int copyDataOffset = info.vertexData->GetStream(3).offset; + const int copyDataSize = info.vertexData->GetStream(3).stride * info.vertexCount; + + const UInt8* inputVertex = (const UInt8*)info.inVertices; + UInt8* outputVertex = (UInt8*)info.outVertices; + + Matrix4x4f poseBlended; + const Matrix4x4f* poseToUse; + + for( int v = 0; v < count; v++ ) + { + ALIGN_LOOP_OPTIMIZATION + + Prefetch(inputVertex + 256); + + // Blend the matrices first, then transform everything with this + // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS + // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS). + if (bonesPerVertexCount == 1) + { + poseToUse = &bones4x4[*influence1]; + } + else if (bonesPerVertexCount == 2) + { + float weight0 = influence2->weight[0]; + float weight1 = influence2->weight[1]; + const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data; + const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data; + // we need only 12 components of the matrix + poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1; + poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1; + poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1; + poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1; + poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1; + poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1; + poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1; + poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1; + poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1; + poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1; + poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1; + poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1; + poseToUse = &poseBlended; + } + else if (bonesPerVertexCount == 4) + { + float weight0 = influence4->weight[0]; + float weight1 = influence4->weight[1]; + float weight2 = influence4->weight[2]; + float weight3 = influence4->weight[3]; + + const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data; + const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data; + const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data; + const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data; + // we need only 12 components of the matrix, so unroll + poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3; + poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3; + poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3; + poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3; + poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3; + poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3; + poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3; + poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3; + poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3; + poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3; + poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3; + poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3; + poseToUse = &poseBlended; + } + + // skin components + Vector3f outVertex, outNormal, outTangent; + const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex + vertexOffset); + const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset ); + const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset ); + poseToUse->MultiplyPoint3( *vertex, outVertex ); + if( skinNormal ) + { + poseToUse->MultiplyVector3( *normal, outNormal ); + if (transformInstruction == kNormalizeFastest) + { + float sqr1 = SqrMagnitude( outNormal ); + float invsqrt1 = FastestInvSqrt (sqr1); + outNormal *= invsqrt1; + } + else if (transformInstruction == kNormalizeFast) + { + float sqr1 = SqrMagnitude( outNormal ); + float invsqrt1 = FastInvSqrt (sqr1); + outNormal *= invsqrt1; + } + } + if( skinTangent ) + { + poseToUse->MultiplyVector3( *tangent, outTangent ); + if (transformInstruction == kNormalizeFastest) + { + float sqr1 = SqrMagnitude( outTangent ); + float invsqrt1 = FastestInvSqrt (sqr1); + outTangent *= invsqrt1; + } + else if (transformInstruction == kNormalizeFast) + { + float sqr1 = SqrMagnitude( outTangent ); + float invsqrt1 = FastInvSqrt (sqr1); + outTangent *= invsqrt1; + } + } + + // write data out + *reinterpret_cast<Vector3f*> (outputVertex + vertexOffset) = outVertex; + if( skinNormal ) + { + *reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal; + } + if( skinTangent ) + { + *reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent; + *reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) ); + } + + vertexOffset += vertexStride; + normalOffset += normalStride; + tangentOffset += tangentStride; + + if (bonesPerVertexCount == 1) + influence1++; + else if (bonesPerVertexCount == 2) + influence2++; + if (bonesPerVertexCount == 4) + influence4++; + } + + // copy + const UInt8* copyDataSrc = inputVertex + copyDataOffset; + UInt8* copyDataDst = outputVertex + copyDataOffset; + memcpy(copyDataDst, copyDataSrc, copyDataSize); +} +#endif + +template<TransformInstruction transformInstruction, int bonesPerVertexCount, + bool skinNormal, bool skinTangent> +void SkinGeneric (SkinMeshInfo& info); + +template<TransformInstruction transformInstruction, int bonesPerVertexCount, + bool skinNormal, bool skinTangent> +void SkinGeneric (SkinMeshInfo& info) +{ +#if UNITY_PS3 + if(info.vertexData && (info.vertexData->GetActiveStreamCount() > 2)) + return SkinGenericStreamed<transformInstruction, bonesPerVertexCount, skinNormal, skinTangent>(info); +#endif + const int* influence1 = reinterpret_cast<const int*> (info.compactSkin); + const BoneInfluence2* influence2 = reinterpret_cast<const BoneInfluence2*> (info.compactSkin); + const BoneInfluence* influence4 = reinterpret_cast<const BoneInfluence*> (info.compactSkin); + + const Matrix4x4f* bones4x4 = info.cachedPose; + + const int inStride = info.inStride; + int outStride = info.outStride; + int count = info.vertexCount; + + const int normalOffset = info.normalOffset; + const int tangentOffset = info.tangentOffset; + + const UInt8* inputVertex = (const UInt8*)info.inVertices; + UInt8* outputVertex = (UInt8*)info.outVertices; + + Matrix4x4f poseBlended; + const Matrix4x4f* poseToUse; + + +#if !ENABLE_MULTITHREADED_SKINNING + PROFILER_AUTO(gMeshSkinningSlowpath, NULL); +#endif + + //;;printf_console("bonesPerVertexCount: %d, skinNormal: %d, normalOffset: %d, inStride: %d, copyDataSizeInts: %d, count: %d, boneCount: %d, outputVertex: %d\n", + // bonesPerVertexCount, (int)skinNormal, normalOffset, inStride, copyDataSizeInts, count, info.boneCount, (int)outputVertex); + //;;uint64_t delta = mach_absolute_time(); + + for( int v = 0; v < count; v++ ) + { + ALIGN_LOOP_OPTIMIZATION + + Prefetch(inputVertex + 256); + + // Blend the matrices first, then transform everything with this + // blended matrix. Gives a small speed boost on XCode/Intel (11.3 to 12.00 FPS + // in skin4 bench), and a good boost on MSVC/Windows (9.6 to 12.4 FPS). + if (bonesPerVertexCount == 1) + { + poseToUse = &bones4x4[*influence1]; + } + else if (bonesPerVertexCount == 2) + { + float weight0 = influence2->weight[0]; + float weight1 = influence2->weight[1]; + const float* b4x40 = bones4x4[influence2->boneIndex[0]].m_Data; + const float* b4x41 = bones4x4[influence2->boneIndex[1]].m_Data; + // we need only 12 components of the matrix + poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1; + poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1; + poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1; + poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1; + poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1; + poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1; + poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1; + poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1; + poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1; + poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1; + poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1; + poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1; + poseToUse = &poseBlended; + } + else if (bonesPerVertexCount == 4) + { + float weight0 = influence4->weight[0]; + float weight1 = influence4->weight[1]; + float weight2 = influence4->weight[2]; + float weight3 = influence4->weight[3]; + + const float* b4x40 = bones4x4[influence4->boneIndex[0]].m_Data; + const float* b4x41 = bones4x4[influence4->boneIndex[1]].m_Data; + const float* b4x42 = bones4x4[influence4->boneIndex[2]].m_Data; + const float* b4x43 = bones4x4[influence4->boneIndex[3]].m_Data; + // we need only 12 components of the matrix, so unroll + poseBlended.m_Data[ 0] = b4x40[ 0] * weight0 + b4x41[ 0] * weight1 + b4x42[ 0] * weight2 + b4x43[ 0] * weight3; + poseBlended.m_Data[ 1] = b4x40[ 1] * weight0 + b4x41[ 1] * weight1 + b4x42[ 1] * weight2 + b4x43[ 1] * weight3; + poseBlended.m_Data[ 2] = b4x40[ 2] * weight0 + b4x41[ 2] * weight1 + b4x42[ 2] * weight2 + b4x43[ 2] * weight3; + poseBlended.m_Data[ 4] = b4x40[ 4] * weight0 + b4x41[ 4] * weight1 + b4x42[ 4] * weight2 + b4x43[ 4] * weight3; + poseBlended.m_Data[ 5] = b4x40[ 5] * weight0 + b4x41[ 5] * weight1 + b4x42[ 5] * weight2 + b4x43[ 5] * weight3; + poseBlended.m_Data[ 6] = b4x40[ 6] * weight0 + b4x41[ 6] * weight1 + b4x42[ 6] * weight2 + b4x43[ 6] * weight3; + poseBlended.m_Data[ 8] = b4x40[ 8] * weight0 + b4x41[ 8] * weight1 + b4x42[ 8] * weight2 + b4x43[ 8] * weight3; + poseBlended.m_Data[ 9] = b4x40[ 9] * weight0 + b4x41[ 9] * weight1 + b4x42[ 9] * weight2 + b4x43[ 9] * weight3; + poseBlended.m_Data[10] = b4x40[10] * weight0 + b4x41[10] * weight1 + b4x42[10] * weight2 + b4x43[10] * weight3; + poseBlended.m_Data[12] = b4x40[12] * weight0 + b4x41[12] * weight1 + b4x42[12] * weight2 + b4x43[12] * weight3; + poseBlended.m_Data[13] = b4x40[13] * weight0 + b4x41[13] * weight1 + b4x42[13] * weight2 + b4x43[13] * weight3; + poseBlended.m_Data[14] = b4x40[14] * weight0 + b4x41[14] * weight1 + b4x42[14] * weight2 + b4x43[14] * weight3; + poseToUse = &poseBlended; + } + + // skin components + Vector3f outVertex, outNormal, outTangent; + const Vector3f* vertex = reinterpret_cast<const Vector3f*>( inputVertex ); + const Vector3f* normal = reinterpret_cast<const Vector3f*>( inputVertex + normalOffset ); + const Vector3f* tangent = reinterpret_cast<const Vector3f*>( inputVertex + tangentOffset ); + poseToUse->MultiplyPoint3( *vertex, outVertex ); + if( skinNormal ) + { + poseToUse->MultiplyVector3( *normal, outNormal ); + if (transformInstruction == kNormalizeFastest) + { + float sqr1 = SqrMagnitude( outNormal ); + float invsqrt1 = FastestInvSqrt (sqr1); + outNormal *= invsqrt1; + } + else if (transformInstruction == kNormalizeFast) + { + float sqr1 = SqrMagnitude( outNormal ); + float invsqrt1 = FastInvSqrt (sqr1); + outNormal *= invsqrt1; + } + } + if( skinTangent ) + { + poseToUse->MultiplyVector3( *tangent, outTangent ); + if (transformInstruction == kNormalizeFastest) + { + float sqr1 = SqrMagnitude( outTangent ); + float invsqrt1 = FastestInvSqrt (sqr1); + outTangent *= invsqrt1; + } + else if (transformInstruction == kNormalizeFast) + { + float sqr1 = SqrMagnitude( outTangent ); + float invsqrt1 = FastInvSqrt (sqr1); + outTangent *= invsqrt1; + } + } + + // write data out + *reinterpret_cast<Vector3f*> (outputVertex) = outVertex; + if( skinNormal ) + { + *reinterpret_cast<Vector3f*>( outputVertex + normalOffset ) = outNormal; + } + + if( skinTangent ) + { + *reinterpret_cast<Vector3f*>( outputVertex + tangentOffset ) = outTangent; + *reinterpret_cast<float*>( outputVertex + tangentOffset + sizeof(Vector3f) ) = *reinterpret_cast<const float*>( inputVertex + tangentOffset + sizeof(Vector3f) ); + } + + outputVertex += outStride; + inputVertex += inStride; + + if (bonesPerVertexCount == 1) + influence1++; + else if (bonesPerVertexCount == 2) + influence2++; + if (bonesPerVertexCount == 4) + influence4++; + } + + //;;static int frameCount = 0; frameCount++; + //delta = mach_absolute_time() - delta; + //;;static uint64_t deltaAccum = 0; deltaAccum += (int)(delta); + //;;printf_console("skin-c: %d %d\n", (int)(deltaAccum / frameCount), (int)delta); +} + +#endif diff --git a/Runtime/Filters/Mesh/SpriteRenderer.cpp b/Runtime/Filters/Mesh/SpriteRenderer.cpp new file mode 100644 index 0000000..4ce85a1 --- /dev/null +++ b/Runtime/Filters/Mesh/SpriteRenderer.cpp @@ -0,0 +1,338 @@ +#include "UnityPrefix.h" +#include "SpriteRenderer.h" + +#if ENABLE_SPRITES + +#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h" +#include "Runtime/Graphics/SpriteFrame.h" +#include "Runtime/Graphics/Texture.h" +#include "Runtime/Graphics/Texture2D.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Profiler/Profiler.h" +#include "Runtime/Profiler/ExternalGraphicsProfiler.h" +#include "Runtime/Shaders/Material.h" +#include "Runtime/Shaders/ShaderNameRegistry.h" +#include "Runtime/Shaders/VBO.h" +#include "Runtime/Filters/Mesh/TransformVertex.h" +#include "Runtime/GfxDevice/BatchRendering.h" +#include "Runtime/Math/Color.h" +#include "Runtime/Core/Callbacks/GlobalCallbacks.h" +#include "Runtime/Misc/ResourceManager.h" +#include "Runtime/BaseClasses/Tags.h" +#include "SpriteRendererAnimationBinding.h" + + +PROFILER_INFORMATION(gSpriteRenderSingleProfile, "SpriteRenderer.RenderSingle", kProfilerRender) +PROFILER_INFORMATION(gSpriteRenderBatchProfile, "SpriteRenderer.RenderBatch", kProfilerRender) +PROFILER_INFORMATION(gSpriteRenderSubmitVBO, "Mesh.SubmitVBO", kProfilerRender) + +const float kSpriteScaleEpsilon = 0.0001f; +#define kMaxNumSpriteTrianglesPerBatch (2*1024) + +static const char* const kDefaultSpriteShader = "Sprites/Default"; +static const char* const kDefaultSpriteMaterial = "Sprites-Default.mat"; + +static SHADERPROP (MainTex); +static SHADERPROP (MainTex_TexelSize); +static Material* gSpriteDefaultMaterial = NULL; + +static void InitDefaultSpriteMaterial() +{ + Assert(gSpriteDefaultMaterial == NULL); + gSpriteDefaultMaterial = GetBuiltinResource<Material>(kDefaultSpriteMaterial); +} + +IMPLEMENT_CLASS_HAS_INIT (SpriteRenderer) +IMPLEMENT_OBJECT_SERIALIZE (SpriteRenderer) + +SpriteRenderer::SpriteRenderer (MemLabelId label, ObjectCreationMode mode) +: Super(kRendererSprite, label, mode) +, m_Color(1.0F, 1.0F, 1.0F, 1.0F) +{ + m_CastShadows = false; + m_ReceiveShadows = false; +} + +SpriteRenderer::~SpriteRenderer () +{ +} + +inline ColorRGBA32 GetDeviceColor (const ColorRGBAf& color, GfxDevice& device) +{ + if (GetActiveColorSpace () == kLinearColorSpace) + return device.ConvertToDeviceVertexColor(GammaToActiveColorSpace(color)); + else + return device.ConvertToDeviceVertexColor(color); +} + +void SpriteRenderer::InitializeClass () +{ + REGISTER_GLOBAL_CALLBACK(initializedEngineGraphics, InitDefaultSpriteMaterial()); + InitializeSpriteRendererAnimationBindingInterface(); +} + +void SpriteRenderer::CleanupClass () +{ + CleanupSpriteRendererAnimationBindingInterface (); + gSpriteDefaultMaterial = NULL; +} + +template<class TransferFunction> +void SpriteRenderer::Transfer(TransferFunction& transfer) +{ + Super::Transfer (transfer); + TRANSFER (m_Sprite); + TRANSFER (m_Color); +} + +void SpriteRenderer::UpdateLocalAABB () +{ + if (m_Sprite.IsValid()) + { + //TODO: calculate AABB from RenderData. + m_TransformInfo.localAABB = m_Sprite->GetBounds(); + } + else + { + m_TransformInfo.localAABB.SetCenterAndExtent(Vector3f::zero, Vector3f::zero); + } +} + +void SpriteRenderer::UpdateTransformInfo () +{ + Transform const& transform = GetTransform(); + if (m_TransformDirty) + { + // will return a cached matrix most of the time + TransformType type = transform.CalculateTransformMatrix (m_TransformInfo.worldMatrix); + + // Always treat sprites has having a non-uniform scale. Will make them batch better + // (since we break batches on transform type changes). And does not have any negative effects + // since uniform vs. non-uniform scale only affects fixed function vertex normals, which + // aren't relevant here. + type &= ~kUniformScaleTransform; + type |= kNonUniformScaleTransform; + m_TransformInfo.transformType = type; + + // Likewise, treat inverse scale as always being 1. + m_TransformInfo.invScale = 1.0f; + } + + if (m_BoundsDirty) + UpdateLocalAABB(); + + TransformAABBSlow(m_TransformInfo.localAABB, m_TransformInfo.worldMatrix, m_TransformInfo.worldAABB); +} + +void SpriteRenderer::SetSprite(PPtr<Sprite> sprite) +{ + if (m_Sprite != sprite) + { + m_Sprite = sprite; + BoundsChanged(); + SetupMaterialProperties(); + + SetDirty(); + } +} + +void SpriteRenderer::AwakeFromLoad (AwakeFromLoadMode awakeMode) +{ + Super::AwakeFromLoad(awakeMode); + BoundsChanged(); + SetupMaterialProperties(); +} + +void SpriteRenderer::SmartReset () +{ + SetMaterialCount(1); + SetMaterial(GetDefaultSpriteMaterial(), 0); +} + +void SpriteRenderer::SetupMaterialProperties() +{ + if (m_Sprite.IsNull()) + return; + + // Patch sprite texture and apply material property block + MaterialPropertyBlock& block = GetPropertyBlockRememberToUpdateHash (); + SetupMaterialPropertyBlock(block, GetSpriteRenderDataInContext(m_Sprite)->texture); + ComputeCustomPropertiesHash (); +} + +void SpriteRenderer::SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture) +{ + const TextureID id = spriteTexture ? spriteTexture->GetTextureID() : TextureID(0); + const Vector4f texelSize = spriteTexture ? Vector4f(spriteTexture->GetTexelSizeX(), spriteTexture->GetTexelSizeY(), spriteTexture->GetGLWidth(), spriteTexture->GetGLHeight()) : Vector4f(0, 0, 0, 0); + + block.ReplacePropertyTexture(kSLPropMainTex, kTexDim2D, id); + block.ReplacePropertyVector(kSLPropMainTex_TexelSize, texelSize); +} + +const SpriteRenderData* SpriteRenderer::GetSpriteRenderDataInContext(const PPtr<Sprite>& frame) +{ + //@Note: this is here for a possible contextual atlas implementation. + return &frame->GetRenderDataForPlayMode(); +} + +void SpriteRenderer::Render (int materialIndex, const ChannelAssigns& channels) +{ + GfxDevice& device = GetGfxDevice(); + + Assert(materialIndex == 0); + if (m_Sprite.IsNull()) + return; + + const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite); + Assert(rd->texture.IsValid()); + + PROFILER_AUTO_GFX(gSpriteRenderSingleProfile, this); + + // Get VBO chunk for a rectangle or mesh + UInt32 numIndices, numVertices; + GetGeometrySize(numIndices, numVertices); + if (!numIndices) + return; + + const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor); + + DynamicVBO& vbo = device.GetDynamicVBO(); + UInt8* __restrict vbPtr; + UInt16* __restrict ibPtr; + if ( !vbo.GetChunk(channelMask, numVertices, numIndices, DynamicVBO::kDrawIndexedTriangles, (void**)&vbPtr, (void**)&ibPtr) ) + return; + + TransformSprite (vbPtr, ibPtr, NULL, rd, GetDeviceColor (m_Color, device), 0); + vbo.ReleaseChunk(numVertices, numIndices); + + // Draw + if (m_CustomProperties) + device.SetMaterialProperties(*m_CustomProperties); + + PROFILER_BEGIN(gSpriteRenderSubmitVBO, this) + vbo.DrawChunk(channels); + GPU_TIMESTAMP(); + PROFILER_END +} + +void SpriteRenderer::GetGeometrySize(UInt32& indexCount, UInt32& vertexCount) +{ + if (m_Sprite.IsValid()) + { + const SpriteRenderData* rd = GetSpriteRenderDataInContext(m_Sprite); + if (rd->indices.size() > 0) + { + indexCount = rd->indices.size(); + vertexCount = rd->vertices.size(); + return; + } + } + + indexCount = 0; + vertexCount = 0; +} + +#if GFX_ENABLE_DRAW_CALL_BATCHING +void SpriteRenderer::RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels) +{ + DebugAssert(numIndices); + DebugAssert(numVertices); + PROFILER_AUTO_GFX(gSpriteRenderBatchProfile, 0); + + GfxDevice& device = GetGfxDevice(); + const MaterialPropertyBlock* customProps = count > 0 ? instances[0].renderer->GetCustomProperties() : NULL; + if (customProps) + device.SetMaterialProperties (*customProps); + + UInt32 expectedFence = device.GetNextCPUFence(); + const UInt32 channelMask = (1<<kShaderChannelVertex) | (1<<kShaderChannelTexCoord0) | (1<<kShaderChannelColor);; + device.BeginDynamicBatching(channels, channelMask, numVertices, numIndices, kPrimitiveTriangles); + + for (BatchInstanceData const* it = instances; it < instances + count; ++it) + { + UInt32 numIndices, numVertices; + + Assert(it->renderer); + Assert(it->renderer->GetRendererType() == kRendererSprite); + SpriteRenderer* renderer = (SpriteRenderer*)it->renderer; + renderer->GetGeometrySize(numIndices, numVertices); + if (!numIndices) + continue; + + const SpriteRenderData *rd = renderer->GetSpriteRenderDataInContext(renderer->m_Sprite); + Assert(rd->texture.IsValid()); + +#if ENABLE_MULTITHREADED_CODE + renderer->m_Sprite->SetCurrentCPUFence(expectedFence); +#endif + device.DynamicBatchSprite(&it->xform, rd, GetDeviceColor(renderer->m_Color, device)); + } + device.SetInverseScale(1.0f); + device.EndDynamicBatching(TransformType(kNoScaleTransform)); + + // Insert fence after batching is complete + UInt32 fence = device.InsertCPUFence(); + Assert(fence == expectedFence); + GPU_TIMESTAMP(); +} + +void SpriteRenderer::RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels) +{ + size_t numIndicesBatch = 0; + size_t numVerticesBatch = 0; + + BatchInstanceData const* instancesEnd = instances + count; + BatchInstanceData const* iBatchBegin = instances; + BatchInstanceData const* iBatchEnd = instances; + while (iBatchEnd != instancesEnd) + { + Assert(iBatchEnd->renderer->GetRendererType() == kRendererSprite); + SpriteRenderer* renderer = (SpriteRenderer*)iBatchEnd->renderer; + + if (renderer->GetSprite().IsNull()) + { + iBatchEnd++; + continue; + } + + UInt32 numIndices, numVertices; + renderer->GetGeometrySize(numIndices, numVertices); + + if ((numIndicesBatch + numIndices) <= kMaxNumSpriteTrianglesPerBatch) + { + numIndicesBatch += numIndices; + numVerticesBatch += numVertices; + iBatchEnd++; + } + else + { + if (numIndicesBatch) + { + RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels); + numIndicesBatch = 0; + numVerticesBatch = 0; + iBatchBegin = iBatchEnd; + } + else // Can't fit in one draw call + { + RenderBatch(iBatchEnd, 1, numIndices, numVertices, channels); + iBatchEnd++; + iBatchBegin = iBatchEnd; + } + } + } + + if ((iBatchBegin != iBatchEnd) && numIndicesBatch) + { + RenderBatch(iBatchBegin, iBatchEnd - iBatchBegin, numIndicesBatch, numVerticesBatch, channels); + } +} +#endif + +Material* SpriteRenderer::GetDefaultSpriteMaterial () +{ + Assert(gSpriteDefaultMaterial); + return gSpriteDefaultMaterial; +} + +#endif // ENABLE_SPRITES diff --git a/Runtime/Filters/Mesh/SpriteRenderer.h b/Runtime/Filters/Mesh/SpriteRenderer.h new file mode 100644 index 0000000..0bf47b9 --- /dev/null +++ b/Runtime/Filters/Mesh/SpriteRenderer.h @@ -0,0 +1,60 @@ +#ifndef SPRITERENDERER_H +#define SPRITERENDERER_H +#include "Configuration/UnityConfigure.h" + +#if ENABLE_SPRITES + +#include "Runtime/GfxDevice/ChannelAssigns.h" +#include "Runtime/Filters/Renderer.h" +#include "Runtime/Shaders/Material.h" +#include "Runtime/Graphics/SpriteFrame.h" + +class SpriteRenderer : public Renderer +{ +public: + REGISTER_DERIVED_CLASS (SpriteRenderer, Renderer) + DECLARE_OBJECT_SERIALIZE (SpriteRenderer) + + SpriteRenderer (MemLabelId label, ObjectCreationMode mode); + // ~SpriteRenderer (); declared-by-macro + + static bool IsSealedClass () { return true; } + static void InitializeClass (); + static void CleanupClass (); + + virtual void AwakeFromLoad (AwakeFromLoadMode awakeMode); + virtual void SmartReset (); + + virtual void UpdateTransformInfo(); + virtual void UpdateLocalAABB (); + virtual void Render (int materialIndex, const ChannelAssigns& channels); +#if GFX_ENABLE_DRAW_CALL_BATCHING + static void RenderMultiple (const BatchInstanceData* instances, size_t count, const ChannelAssigns& channels); +#endif + PPtr<Sprite> GetSprite() const { return m_Sprite; } + void SetSprite(PPtr<Sprite> sprite); + + ColorRGBAf GetColor() const { return m_Color; } + void SetColor(const ColorRGBAf& color) { m_Color = color; } + + static void SetupMaterialPropertyBlock(MaterialPropertyBlock& block, const Texture2D* spriteTexture); + + static Material* GetDefaultSpriteMaterial(); + +private: + PPtr<Sprite> m_Sprite; + ColorRGBAf m_Color; + + void SetupMaterialProperties(); + void GetGeometrySize(UInt32& indexCount, UInt32& vertexCount); + +#if GFX_ENABLE_DRAW_CALL_BATCHING + static void RenderBatch (const BatchInstanceData* instances, size_t count, size_t numIndices, size_t numVertices, const ChannelAssigns& channels); +#endif + // Context + const SpriteRenderData* GetSpriteRenderDataInContext(const PPtr<Sprite>& frame); +}; + +#endif //ENABLE_SPRITES + +#endif diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp new file mode 100644 index 0000000..a36406f --- /dev/null +++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.cpp @@ -0,0 +1,68 @@ +#include "UnityPrefix.h" +#include "Runtime/Animation/GenericAnimationBindingCache.h" +#include "Runtime/Animation/AnimationClipBindings.h" +#include "SpriteRenderer.h" +#include "Runtime/Interfaces/IAnimationBinding.h" + +#if ENABLE_SPRITES + +static const char* kSpriteFrame = "m_Sprite"; + +class SpriteRendererAnimationBinding : public IAnimationBinding +{ +public: + +#if UNITY_EDITOR + virtual void GetAllAnimatableProperties (Object& targetObject, std::vector<EditorCurveBinding>& outProperties) const + { + AddPPtrBinding (outProperties, ClassID(SpriteRenderer), kSpriteFrame); + } +#endif + + virtual float GetFloatValue (const UnityEngine::Animation::BoundCurve& bind) const { return 0.0F; } + virtual void SetFloatValue (const UnityEngine::Animation::BoundCurve& bind, float value) const { } + + virtual void SetPPtrValue (const UnityEngine::Animation::BoundCurve& bound, SInt32 value) const + { + SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject); + renderer->SetSprite(PPtr<Sprite> (value)); + } + + virtual SInt32 GetPPtrValue (const UnityEngine::Animation::BoundCurve& bound) const + { + SpriteRenderer* renderer = reinterpret_cast<SpriteRenderer*>(bound.targetObject); + return renderer->GetSprite().GetInstanceID(); + } + + virtual bool GenerateBinding (const UnityStr& attribute, bool pptrCurve, UnityEngine::Animation::GenericBinding& outputBinding) const + { + if (attribute == kSpriteFrame && pptrCurve) + { + outputBinding.attribute = 0; + return true; + } + + return false; + } + + virtual ClassIDType BindValue (Object& target, const UnityEngine::Animation::GenericBinding& inputBinding, UnityEngine::Animation::BoundCurve& bound) const + { + return ClassID(Sprite); + } +}; + +static SpriteRendererAnimationBinding* gSpriteRendererBinding = NULL; + +void InitializeSpriteRendererAnimationBindingInterface () +{ + Assert(gSpriteRendererBinding == NULL); + gSpriteRendererBinding = UNITY_NEW (SpriteRendererAnimationBinding, kMemAnimation); + UnityEngine::Animation::GetGenericAnimationBindingCache ().RegisterIAnimationBinding (ClassID(SpriteRenderer), UnityEngine::Animation::kSpriteRendererPPtrBinding, gSpriteRendererBinding); +} + +void CleanupSpriteRendererAnimationBindingInterface () +{ + UNITY_DELETE (gSpriteRendererBinding, kMemAnimation); +} + +#endif
\ No newline at end of file diff --git a/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h new file mode 100644 index 0000000..63e2731 --- /dev/null +++ b/Runtime/Filters/Mesh/SpriteRendererAnimationBinding.h @@ -0,0 +1,2 @@ +void InitializeSpriteRendererAnimationBindingInterface (); +void CleanupSpriteRendererAnimationBindingInterface ();
\ No newline at end of file diff --git a/Runtime/Filters/Mesh/TransformVertex.cpp b/Runtime/Filters/Mesh/TransformVertex.cpp new file mode 100644 index 0000000..e9bebc1 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertex.cpp @@ -0,0 +1,205 @@ +#include "UnityPrefix.h" +#include "TransformVertex.h" + +#include "Runtime/Math/Matrix4x4.h" +#include "Runtime/Math/Vector4.h" +#include "Runtime/Math/Vector3.h" +#include "Runtime/Math/Vector2.h" +#include "Runtime/Math/Color.h" + +#include "Runtime/Misc/CPUInfo.h" + +void +TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal, + StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1, + StrideIterator<Vector4f> inTangent, + UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream ) +{ + // NOTE: kill this code once all shaders normalize normals & tangents! + // + // We batch uniformly scaled objects, so derive the "normal matrix" here by scaling world matrix axes. + // On reference code seems much cheaper than full normalization of normal/tangent vectors. + // Test with scene of 200k vertices on Core i7 2600K: no handling of scale 3.77ms, normalization 8.00ms, + // using scaled normal matrix 3.80ms. + // + // Note that ARM NEON/VFP transformation code does not handle this, but it's not needed on GLES platforms + // since shaders always normalize normal & tangent. Might be needed on WinRT+ARM though (or just disable + // dynamic batching with tangents there). + Matrix4x4f nm; + CopyMatrix(m.GetPtr(), nm.GetPtr()); + const float axisLen = Magnitude (m.GetAxisX()); + float scale = axisLen > 1.0e-6f ? 1.0f / axisLen : 1.0f; + nm.Get (0, 0) *= scale; + nm.Get (1, 0) *= scale; + nm.Get (2, 0) *= scale; + nm.Get (0, 1) *= scale; + nm.Get (1, 1) *= scale; + nm.Get (2, 1) *= scale; + nm.Get (0, 2) *= scale; + nm.Get (1, 2) *= scale; + nm.Get (2, 2) *= scale; + + while (vertexCount --> 0) + { + Vector3f* outPos = reinterpret_cast<Vector3f*> (dstData); + m.MultiplyPoint3(*inPos, *outPos); + dstData += sizeof(Vector3f); + ++inPos; + + if (inNormal.GetPointer()) + { + Vector3f* outNormal = reinterpret_cast<Vector3f*> (dstData); + nm.MultiplyVector3(*inNormal, *outNormal); + dstData += sizeof(Vector3f); + ++inNormal; + } + + if (inColor.GetPointer()) + { + memcpy(dstData, inColor.GetPointer(), sizeof(ColorRGBA32)); + dstData += sizeof(ColorRGBA32); + ++inColor; + } + + if (inTexCoord0.GetPointer()) + { + memcpy(dstData, inTexCoord0.GetPointer(), sizeof(Vector2f)); + dstData += sizeof(Vector2f); + ++inTexCoord0; + } + + if (inTexCoord1.GetPointer()) + { + memcpy(dstData, inTexCoord1.GetPointer(), sizeof(Vector2f)); + dstData += sizeof(Vector2f); + ++inTexCoord1; + } + + if (inTangent.GetPointer()) + { + Vector4f* outTangent = reinterpret_cast<Vector4f*> (dstData); + Vector3f* outTangentXYZ = reinterpret_cast<Vector3f*> (outTangent); + nm.MultiplyVector3(reinterpret_cast<const Vector3f&>(*inTangent), *outTangentXYZ); + outTangent->w = inTangent->w; + dstData += sizeof(Vector4f); + ++inTangent; + } + } +} + + + +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP + +typedef void (*TransformFunc)( const void*, const void*, const void*, const float*, void*, int ); +typedef void (*TransformFuncWithTangents)( const void*, const void*, const void*, const float*, void*, int, const void* ); + + +#if UNITY_SUPPORTS_NEON +namespace TransformNEON +{ + #define TRANSFORM_FUNC(prefix, addData) s_TransformVertices_Strided_##prefix##_##addData##_NEON + + TransformFunc TransformXYZ[] = + { + TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5) + }; + + TransformFunc TransformXYZN[] = + { + TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5) + }; + + TransformFuncWithTangents TransformXYZNT[] = + { + TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5) + }; + + #undef TRANSFORM_FUNC +} +#endif // UNITY_SUPPORTS_NEON + + +#if UNITY_SUPPORTS_VFP +namespace TransformVFP +{ + #define TRANSFORM_FUNC(prefix, addData) s_TransformVertices_Strided_##prefix##_##addData##_VFP + + TransformFunc TransformXYZ[] = + { + TRANSFORM_FUNC(XYZ,0), TRANSFORM_FUNC(XYZ,1), TRANSFORM_FUNC(XYZ,2), TRANSFORM_FUNC(XYZ,3), TRANSFORM_FUNC(XYZ,4), TRANSFORM_FUNC(XYZ,5) + }; + + TransformFunc TransformXYZN[] = + { + TRANSFORM_FUNC(XYZN,0), TRANSFORM_FUNC(XYZN,1), TRANSFORM_FUNC(XYZN,2), TRANSFORM_FUNC(XYZN,3), TRANSFORM_FUNC(XYZN,4), TRANSFORM_FUNC(XYZN,5) + }; + + TransformFuncWithTangents TransformXYZNT[] = + { + TRANSFORM_FUNC(XYZNT,0), TRANSFORM_FUNC(XYZNT,1), TRANSFORM_FUNC(XYZNT,2), TRANSFORM_FUNC(XYZNT,3), TRANSFORM_FUNC(XYZNT,4), TRANSFORM_FUNC(XYZNT,5) + }; + + #undef TRANSFORM_FUNC +} +#endif // UNITY_SUPPORTS_VFP + +void +TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal, + StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1, + StrideIterator<Vector4f> inTangent, + UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream ) +{ + int addDataSize = 0; + if( inColor.GetPointer() ) addDataSize += 1; + if( inTexCoord0.GetPointer() ) addDataSize += 2; + if( inTexCoord1.GetPointer() ) addDataSize += 2; + + const void* addDataSrc = 0; + if( inColor.GetPointer() ) addDataSrc = inColor.GetPointer(); + else if( inTexCoord0.GetPointer() ) addDataSrc = inTexCoord0.GetPointer(); + else if( inTexCoord1.GetPointer() ) addDataSrc = inTexCoord1.GetPointer(); + + // slow path determination + if( (inColor.GetPointer() && inTexCoord1.GetPointer() && !inTexCoord0.GetPointer()) + || (inTangent.GetPointer() && !inNormal.GetPointer()) || multiStream ) + { + TransformVerticesStridedREF(inPos, inNormal, inColor, inTexCoord0, inTexCoord1, inTangent, dstData, m, vertexCount, multiStream); + return; + } + + int stride = inPos.GetStride(); + const UInt8* inDataBegin = static_cast<const UInt8*>(inPos.GetPointer()); + const UInt8* inDataEnd = inDataBegin + vertexCount * stride; + +#if UNITY_SUPPORTS_NEON + if (CPUInfo::HasNEONSupport()) + { + using namespace TransformNEON; + if( inNormal.GetPointer() && inTangent.GetPointer() ) + TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() ); + else if( inNormal.GetPointer() ) + TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride ); + else + TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride ); + } + else +#endif +#if UNITY_SUPPORTS_VFP + { + using namespace TransformVFP; + if( inNormal.GetPointer() && inTangent.GetPointer() ) + TransformXYZNT[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride, inTangent.GetPointer() ); + else if( inNormal.GetPointer() ) + TransformXYZN[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride ); + else + TransformXYZ[addDataSize]( inDataBegin, inDataEnd, addDataSrc, m.m_Data, dstData, stride ); + } +#else + { + ErrorString("non-NEON path not enabled!"); + } +#endif +} +#endif + diff --git a/Runtime/Filters/Mesh/TransformVertex.h b/Runtime/Filters/Mesh/TransformVertex.h new file mode 100644 index 0000000..fe7aa77 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertex.h @@ -0,0 +1,175 @@ +#ifndef TRANSFORM_VERTEX_H_ +#define TRANSFORM_VERTEX_H_ + +#include "Configuration/PrefixConfigure.h" +#include "Runtime/Utilities/StrideIterator.h" +#include "Runtime/Math/Vector2.h" +#include "Runtime/Math/Vector3.h" +#include "Runtime/Math/Vector4.h" +#include "Runtime/Math/Color.h" + +class Matrix4x4f; + + +//============================================================================== + +#define DECL_TRANSFORM_VERTICES_STRIDED(code, num, postfix) \ + void s_TransformVertices_Strided_##code##_##num##_##postfix( const void* srcData, const void* srcDataEnd, const void* addData, \ + const float* xform, void* outData, int stride \ + ); + +#define DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(num, postfix) \ + void s_TransformVertices_Strided_XYZNT_##num##_##postfix( const void* srcData, const void* srcDataEnd, const void* addData, \ + const float* xform, void* outData, int stride, const void* srcTangent \ + ); + + +#if UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING + +extern "C" +{ +#if UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN + #define s_TransformVertices_Strided_XYZ_0_NEON _s_TransformVertices_Strided_XYZ_0_NEON + #define s_TransformVertices_Strided_XYZ_1_NEON _s_TransformVertices_Strided_XYZ_1_NEON + #define s_TransformVertices_Strided_XYZ_2_NEON _s_TransformVertices_Strided_XYZ_2_NEON + #define s_TransformVertices_Strided_XYZ_3_NEON _s_TransformVertices_Strided_XYZ_3_NEON + #define s_TransformVertices_Strided_XYZ_4_NEON _s_TransformVertices_Strided_XYZ_4_NEON + #define s_TransformVertices_Strided_XYZ_5_NEON _s_TransformVertices_Strided_XYZ_5_NEON + + #define s_TransformVertices_Strided_XYZN_0_NEON _s_TransformVertices_Strided_XYZN_0_NEON + #define s_TransformVertices_Strided_XYZN_1_NEON _s_TransformVertices_Strided_XYZN_1_NEON + #define s_TransformVertices_Strided_XYZN_2_NEON _s_TransformVertices_Strided_XYZN_2_NEON + #define s_TransformVertices_Strided_XYZN_3_NEON _s_TransformVertices_Strided_XYZN_3_NEON + #define s_TransformVertices_Strided_XYZN_4_NEON _s_TransformVertices_Strided_XYZN_4_NEON + #define s_TransformVertices_Strided_XYZN_5_NEON _s_TransformVertices_Strided_XYZN_5_NEON + + #define s_TransformVertices_Strided_XYZNT_0_NEON _s_TransformVertices_Strided_XYZNT_0_NEON + #define s_TransformVertices_Strided_XYZNT_1_NEON _s_TransformVertices_Strided_XYZNT_1_NEON + #define s_TransformVertices_Strided_XYZNT_2_NEON _s_TransformVertices_Strided_XYZNT_2_NEON + #define s_TransformVertices_Strided_XYZNT_3_NEON _s_TransformVertices_Strided_XYZNT_3_NEON + #define s_TransformVertices_Strided_XYZNT_4_NEON _s_TransformVertices_Strided_XYZNT_4_NEON + #define s_TransformVertices_Strided_XYZNT_5_NEON _s_TransformVertices_Strided_XYZNT_5_NEON +#if ENABLE_SPRITES +#define s_TransformVertices_Sprite_NEON _s_TransformVertices_Sprite_NEON +#endif + +#endif // UNITY_ANDROID || UNITY_WINRT || UNITY_BB10 || UNITY_TIZEN + + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,NEON); + + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,NEON); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,NEON); + + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,NEON); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,NEON); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,NEON); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,NEON); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,NEON); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,NEON); +#if ENABLE_SPRITES + void s_TransformVertices_Sprite_NEON(const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color); +#endif +} + +#endif + + +#if UNITY_SUPPORTS_VFP + +extern "C" +{ +#if UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN + #define s_TransformVertices_Strided_XYZ_0_VFP _s_TransformVertices_Strided_XYZ_0_VFP + #define s_TransformVertices_Strided_XYZ_1_VFP _s_TransformVertices_Strided_XYZ_1_VFP + #define s_TransformVertices_Strided_XYZ_2_VFP _s_TransformVertices_Strided_XYZ_2_VFP + #define s_TransformVertices_Strided_XYZ_3_VFP _s_TransformVertices_Strided_XYZ_3_VFP + #define s_TransformVertices_Strided_XYZ_4_VFP _s_TransformVertices_Strided_XYZ_4_VFP + #define s_TransformVertices_Strided_XYZ_5_VFP _s_TransformVertices_Strided_XYZ_5_VFP + + #define s_TransformVertices_Strided_XYZN_0_VFP _s_TransformVertices_Strided_XYZN_0_VFP + #define s_TransformVertices_Strided_XYZN_1_VFP _s_TransformVertices_Strided_XYZN_1_VFP + #define s_TransformVertices_Strided_XYZN_2_VFP _s_TransformVertices_Strided_XYZN_2_VFP + #define s_TransformVertices_Strided_XYZN_3_VFP _s_TransformVertices_Strided_XYZN_3_VFP + #define s_TransformVertices_Strided_XYZN_4_VFP _s_TransformVertices_Strided_XYZN_4_VFP + #define s_TransformVertices_Strided_XYZN_5_VFP _s_TransformVertices_Strided_XYZN_5_VFP + + #define s_TransformVertices_Strided_XYZNT_0_VFP _s_TransformVertices_Strided_XYZNT_0_VFP + #define s_TransformVertices_Strided_XYZNT_1_VFP _s_TransformVertices_Strided_XYZNT_1_VFP + #define s_TransformVertices_Strided_XYZNT_2_VFP _s_TransformVertices_Strided_XYZNT_2_VFP + #define s_TransformVertices_Strided_XYZNT_3_VFP _s_TransformVertices_Strided_XYZNT_3_VFP + #define s_TransformVertices_Strided_XYZNT_4_VFP _s_TransformVertices_Strided_XYZNT_4_VFP + #define s_TransformVertices_Strided_XYZNT_5_VFP _s_TransformVertices_Strided_XYZNT_5_VFP +#if ENABLE_SPRITES + #define s_TransformVertices_Sprite_VFP _s_TransformVertices_Sprite_VFP +#endif +#endif // UNITY_ANDROID || UNITY_BB10 || UNITY_TIZEN + + + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,0,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,1,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,2,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,3,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,4,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZ,5,VFP); + + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,0,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,1,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,2,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,3,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,4,VFP); + DECL_TRANSFORM_VERTICES_STRIDED(XYZN,5,VFP); + + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(0,VFP); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(1,VFP); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(2,VFP); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(3,VFP); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(4,VFP); + DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS(5,VFP); +#if ENABLE_SPRITES + void s_TransformVertices_Sprite_VFP (const void* srcData, const void* srcDataEnd, const void* addData, const float* xform, void* outData, int stride, unsigned int color); +#endif +} + +#endif + + +#undef DECL_TRANSFORM_VERTICES_STRIDED_TANGENTS +#undef DECL_TRANSFORM_VERTICES_STRIDED + + +//============================================================================== + +void +TransformVerticesStridedREF( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal, + StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1, + StrideIterator<Vector4f> inTangent, + UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream ); + +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP +void +TransformVerticesStridedARM( StrideIterator<Vector3f> inPos, StrideIterator<Vector3f> inNormal, + StrideIterator<ColorRGBA32> inColor, StrideIterator<Vector2f> inTexCoord0, StrideIterator<Vector2f> inTexCoord1, + StrideIterator<Vector4f> inTangent, + UInt8* dstData, const Matrix4x4f& m, unsigned vertexCount, bool multiStream ); +#endif + + +#if (UNITY_SUPPORTS_NEON && !UNITY_DISABLE_NEON_SKINNING) || UNITY_SUPPORTS_VFP + #define TransformVerticesStrided TransformVerticesStridedARM +#else + #define TransformVerticesStrided TransformVerticesStridedREF +#endif + + +//============================================================================== + +#endif // TRANSFORM_VERTEX_H_ diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.asm b/Runtime/Filters/Mesh/TransformVertexNEON.asm new file mode 100644 index 0000000..7db462b --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertexNEON.asm @@ -0,0 +1,694 @@ + AREA .text, CODE + + EXPORT _s_TransformVertices_Strided_XYZ_0_NEON + EXPORT _s_TransformVertices_Strided_XYZ_1_NEON + EXPORT _s_TransformVertices_Strided_XYZ_2_NEON + EXPORT _s_TransformVertices_Strided_XYZ_3_NEON + EXPORT _s_TransformVertices_Strided_XYZ_4_NEON + EXPORT _s_TransformVertices_Strided_XYZ_5_NEON + EXPORT _s_TransformVertices_Strided_XYZN_0_NEON + EXPORT _s_TransformVertices_Strided_XYZN_1_NEON + EXPORT _s_TransformVertices_Strided_XYZN_2_NEON + EXPORT _s_TransformVertices_Strided_XYZN_3_NEON + EXPORT _s_TransformVertices_Strided_XYZN_4_NEON + EXPORT _s_TransformVertices_Strided_XYZN_5_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_0_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_1_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_2_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_3_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_4_NEON + EXPORT _s_TransformVertices_Strided_XYZNT_5_NEON + +|_s_TransformVertices_Strided_XYZ_0_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + +|TransformVertices_Strided_XYZ_0_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + bcc.w |TransformVertices_Strided_XYZ_0_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZ_1_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZ_1_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vld1.32 {d9}, [r2], r4 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + vst1.32 {d9[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZ_1_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZ_2_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + +|TransformVertices_Strided_XYZ_2_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vld1.32 {d9}, [r2], r4 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + vst1.32 {d9}, [r3]! + bcc.w |TransformVertices_Strided_XYZ_2_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZ_3_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + +|TransformVertices_Strided_XYZ_3_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vld1.32 {d9-d10}, [r2], r4 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + vst1.32 {d9}, [r3]! + vst1.32 {d10[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZ_3_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZ_4_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + +|TransformVertices_Strided_XYZ_4_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vld1.32 {d9-d10}, [r2], r4 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + vst1.32 {d9-d10}, [r3]! + bcc.w |TransformVertices_Strided_XYZ_4_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZ_5_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + +|TransformVertices_Strided_XYZ_5_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d6-d7}, [r0], r4 + vmla.f32 q0, q12, d6[0] + vmul.f32 q1, q13, d6[1] + vmul.f32 q2, q14, d7[0] + vadd.f32 q0, q0, q1 + vld1.32 {d9-d11}, [r2], r4 + vadd.f32 q0, q0, q2 + cmp r0, r1 + vst1.32 {d0-d1}, [r3], r6 + vorr q0, q15, q15 + vst1.32 {d9-d10}, [r3]! + vst1.32 {d11[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZ_5_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_0_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + +|TransformVertices_Strided_XYZN_0_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + bcc.w |TransformVertices_Strided_XYZN_0_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_1_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + +|TransformVertices_Strided_XYZN_1_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vld1.32 {d9}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZN_1_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_2_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZN_2_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vld1.32 {d9}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9}, [r3]! + bcc.w |TransformVertices_Strided_XYZN_2_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_3_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZN_3_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vld1.32 {d9-d10}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9}, [r3]! + vst1.32 {d10[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZN_3_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_4_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + nop.w + +|TransformVertices_Strided_XYZN_4_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vld1.32 {d9-d10}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9-d10}, [r3]! + bcc.w |TransformVertices_Strided_XYZN_4_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZN_5_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZN_5_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vld1.32 {d9-d11}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9-d10}, [r3]! + vst1.32 {d11[0]}, [r3]! + bcc.w |TransformVertices_Strided_XYZN_5_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_0_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZNT_0_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_0_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_1_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + nop.w + nop.w + nop.w + +|TransformVertices_Strided_XYZNT_1_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vld1.32 {d9}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9[0]}, [r3]! + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_1_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_2_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + nop.w + +|TransformVertices_Strided_XYZNT_2_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vld1.32 {d9}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9}, [r3]! + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_2_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_3_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + nop.w + +|TransformVertices_Strided_XYZNT_3_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vld1.32 {d9-d10}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9}, [r3]! + vst1.32 {d10[0]}, [r3]! + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_3_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_4_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + +|TransformVertices_Strided_XYZNT_4_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vld1.32 {d9-d10}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9-d10}, [r3]! + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_4_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + ENDP + + +|_s_TransformVertices_Strided_XYZNT_5_NEON| PROC + mov ip, sp + vpush {s0-s15} + stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vldmia r3!, {d24-d31} + mov.w r6, #12 + ldr.w r3, [ip] + ldr.w r4, [ip, #4] + vorr q0, q15, q15 + ldr.w r8, [ip, #8] + mov.w r9, #12 + mov.w sl, #4 + nop + nop.w + +|TransformVertices_Strided_XYZNT_5_Loop| + pld [r0, #512] ; 0x200 + vld1.32 {d4-d6}, [r0], r4 + vld1.32 {d7-d8}, [r8], r4 + vmla.f32 q0, q12, d4[0] + vmul.f32 q1, q12, d5[1] + vmul.f32 q11, q12, d7[0] + vld1.32 {d9-d11}, [r2], r4 + vmla.f32 q0, q13, d4[1] + vmla.f32 q1, q13, d6[0] + vmla.f32 q11, q13, d7[1] + vmla.f32 q0, q14, d5[0] + vmla.f32 q1, q14, d6[1] + vmla.f32 q11, q14, d8[0] + vst1.32 {d0-d1}, [r3], r6 + cmp r0, r1 + vorr q0, q15, q15 + vst1.32 {d2-d3}, [r3], r6 + vst1.32 {d9-d10}, [r3]! + vst1.32 {d11[0]}, [r3]! + vtrn.32 d8, d7 + vst1.32 {d22-d23}, [r3], r9 + vst1.32 {d7[0]}, [r3], sl + bcc.w |TransformVertices_Strided_XYZNT_5_Loop| + ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + vpop {s0-s15} + bx lr + nop.w + nop.w + nop.w + ENDP + + + END diff --git a/Runtime/Filters/Mesh/TransformVertexNEON.s b/Runtime/Filters/Mesh/TransformVertexNEON.s new file mode 100644 index 0000000..e21a554 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertexNEON.s @@ -0,0 +1,224 @@ +#define UNITY_ASSEMBLER +#include "Configuration/PrefixConfigure.h" + +#if UNITY_SUPPORTS_NEON + +.set device,0 +.set device,__arm__ + +.if device + +//.code32 + + +.globl _s_TransformVertices_Strided_XYZ_0_NEON +.globl _s_TransformVertices_Strided_XYZ_1_NEON +.globl _s_TransformVertices_Strided_XYZ_2_NEON +.globl _s_TransformVertices_Strided_XYZ_3_NEON +.globl _s_TransformVertices_Strided_XYZ_4_NEON +.globl _s_TransformVertices_Strided_XYZ_5_NEON + +.globl _s_TransformVertices_Strided_XYZN_0_NEON +.globl _s_TransformVertices_Strided_XYZN_1_NEON +.globl _s_TransformVertices_Strided_XYZN_2_NEON +.globl _s_TransformVertices_Strided_XYZN_3_NEON +.globl _s_TransformVertices_Strided_XYZN_4_NEON +.globl _s_TransformVertices_Strided_XYZN_5_NEON + +.globl _s_TransformVertices_Strided_XYZNT_0_NEON +.globl _s_TransformVertices_Strided_XYZNT_1_NEON +.globl _s_TransformVertices_Strided_XYZNT_2_NEON +.globl _s_TransformVertices_Strided_XYZNT_3_NEON +.globl _s_TransformVertices_Strided_XYZNT_4_NEON +.globl _s_TransformVertices_Strided_XYZNT_5_NEON + +.globl _s_TransformVertices_Sprite_NEON + + +#define STRIDED_INPUT 1 + + +#define LOOP_XYZ 1 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 0 + + +_s_TransformVertices_Strided_XYZ_0_NEON: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZ_0_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_1_NEON: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZ_1_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_2_NEON: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZ_2_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_3_NEON: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZ_3_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_4_NEON: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZ_4_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_5_NEON: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZ_5_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + + +#define LOOP_XYZ 0 +#define LOOP_XYZN 1 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 0 + + +_s_TransformVertices_Strided_XYZN_0_NEON: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZN_0_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_1_NEON: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZN_1_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_2_NEON: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZN_2_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_3_NEON: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZN_3_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_4_NEON: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZN_4_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_5_NEON: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZN_5_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + + +#define LOOP_XYZ 0 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 1 +#define LOOP_SPRITE 0 + + +_s_TransformVertices_Strided_XYZNT_0_NEON: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZNT_0_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_1_NEON: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZNT_1_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_2_NEON: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZNT_2_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_3_NEON: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZNT_3_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_4_NEON: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZNT_4_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_5_NEON: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZNT_5_Loop +#include "TransformVertexNEON_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + +#define LOOP_XYZ 0 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 1 + +_s_TransformVertices_Sprite_NEON: +#define LOOP_NAME TransformVertices_Sprite_Loop +#include "TransformVertexNEON_Loop.h" +#undef LOOP_NAME + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + +#undef STRIDED_INPUT + +.endif + +#endif
\ No newline at end of file diff --git a/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h new file mode 100644 index 0000000..d84a516 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertexNEON_Loop.h @@ -0,0 +1,254 @@ +// TODO: SOA + +// defines +// LOOP_XYZ +// LOOP_XYZN +// LOOP_XYZNT +// LOOP_NAME +// COPY_DATA_SZ +// STRIDED_INPUT + +#if STRIDED_INPUT + +//r0: const void* srcData +//r1: const void* srcDataEnd +//r2: const void* addData +//r3: const void* xform +//[sp+0]: void* dstData +//[sp+4]: const int stride + +mov ip, sp + +vpush {d0-d15} +stmfd sp!, {r4-r11} + +vldmia r3!, {q12-q15} + +// r3:dstData +// r4: stride +// r6: proper offset for out ptr (pos, normal) + +mov r6, #12 + +ldr r3, [ip, #0] +ldr r4, [ip, #4] + +// overlap calculation + +vmov.32 q0, q15 // pos.w (1.0) + + +#if LOOP_XYZ + +.align 4 +LOOP_NAME: + +pld [r0, #512] // prefetch + +vld1.32 {d6,d7}, [r0], r4 // load pos + +vmla.f32 q0, q12, d6[0] // pos.x +vmul.f32 q1, q13, d6[1] // pos.y +vmul.f32 q2, q14, d7[0] // pos.z + +vadd.f32 q0, q0, q1 + // load additional data +#if COPY_DATA_SZ == 1 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 2 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 3 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 4 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 5 +vld1.32 {d9,d10,d11}, [r2], r4 +#endif + +vadd.f32 q0, q0, q2 +cmp r0, r1 // check cycle + +vst1.32 {d0,d1}, [r3], r6 + +vmov.32 q0, q15 // pos.w (1.0) + // save additional data +#if COPY_DATA_SZ == 1 +vst1.32 {d9[0]}, [r3]! +#elif COPY_DATA_SZ == 2 +vst1.32 {d9}, [r3]! +#elif COPY_DATA_SZ == 3 +vst1.32 {d9}, [r3]! +vst1.32 {d10[0]}, [r3]! +#elif COPY_DATA_SZ == 4 +vst1.32 {d9,d10}, [r3]! +#elif COPY_DATA_SZ == 5 +vst1.32 {d9,d10}, [r3]! +vst1.32 {d11[0]}, [r3]! +#endif + +bcc LOOP_NAME + + +#elif LOOP_XYZN + + +.align 4 +LOOP_NAME: + +pld [r0, #512] // prefetch + +vld1.32 {d4,d5,d6}, [r0], r4 // load pos + normal + +vmla.f32 q0, q12, d4[0] // pos.x +vmul.f32 q1, q12, d5[1] // normal.x + + // load additional data +#if COPY_DATA_SZ == 1 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 2 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 3 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 4 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 5 +vld1.32 {d9,d10,d11}, [r2], r4 +#endif + +vmla.f32 q0, q13, d4[1] // pos.y +vmla.f32 q1, q13, d6[0] // normal.y + +vmla.f32 q0, q14, d5[0] // pos.z +vmla.f32 q1, q14, d6[1] // normal.z + +vst1.32 {d0,d1}, [r3], r6 + +cmp r0, r1 // check cycle +vmov.32 q0, q15 // pos.w (1.0) +vst1.32 {d2,d3}, [r3], r6 + // save additional data +#if COPY_DATA_SZ == 1 +vst1.32 {d9[0]}, [r3]! +#elif COPY_DATA_SZ == 2 +vst1.32 {d9}, [r3]! +#elif COPY_DATA_SZ == 3 +vst1.32 {d9}, [r3]! +vst1.32 {d10[0]}, [r3]! +#elif COPY_DATA_SZ == 4 +vst1.32 {d9,d10}, [r3]! +#elif COPY_DATA_SZ == 5 +vst1.32 {d9,d10}, [r3]! +vst1.32 {d11[0]}, [r3]! +#endif + + +bcc LOOP_NAME + + +#elif LOOP_XYZNT + +//[sp+8]: const void* tangent +//r8: tangent + +ldr r8, [ip, #8] + +mov r9, #12 +mov r10, #4 + +.align 4 +LOOP_NAME: + +pld [r0, #512] // prefetch + +vld1.32 {d4,d5,d6}, [r0], r4 // load pos + normal +vld1.32 {d7,d8}, [r8], r4 // load tangent + +vmla.f32 q0, q12, d4[0] // pos.x +vmul.f32 q1, q12, d5[1] // normal.x +vmul.f32 q11, q12, d7[0] // tangent.x + + // load additional data +#if COPY_DATA_SZ == 1 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 2 +vld1.32 {d9}, [r2], r4 +#elif COPY_DATA_SZ == 3 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 4 +vld1.32 {d9,d10}, [r2], r4 +#elif COPY_DATA_SZ == 5 +vld1.32 {d9,d10,d11}, [r2], r4 +#endif + +vmla.f32 q0, q13, d4[1] // pos.y +vmla.f32 q1, q13, d6[0] // normal.y +vmla.f32 q11, q13, d7[1] // tangent.y + +vmla.f32 q0, q14, d5[0] // pos.z +vmla.f32 q1, q14, d6[1] // normal.z +vmla.f32 q11, q14, d8[0] // tangent.z + +vst1.32 {d0,d1}, [r3], r6 + +cmp r0, r1 // check cycle +vmov.32 q0, q15 // pos.w (1.0) +vst1.32 {d2,d3}, [r3], r6 + // save additional data +#if COPY_DATA_SZ == 1 +vst1.32 {d9[0]}, [r3]! +#elif COPY_DATA_SZ == 2 +vst1.32 {d9}, [r3]! +#elif COPY_DATA_SZ == 3 +vst1.32 {d9}, [r3]! +vst1.32 {d10[0]}, [r3]! +#elif COPY_DATA_SZ == 4 +vst1.32 {d9,d10}, [r3]! +#elif COPY_DATA_SZ == 5 +vst1.32 {d9,d10}, [r3]! +vst1.32 {d11[0]}, [r3]! +#endif + + +// TODO: less stupid way + +vtrn.32 d8, d7 +vst1.32 {d22,d23}, [r3], r9 +vst1.32 {d7[0]}, [r3], r10 + +bcc LOOP_NAME +#elif LOOP_SPRITE +.align 4 +ldr r7, [ip, #8] // load color32 +vmov.32 d10[0], r7 +LOOP_NAME: + +pld [r0, #512] // prefetch + +vld1.32 {d6,d7}, [r0], r4 // load pos + +vmla.f32 q0, q12, d6[0] // pos.x +vmul.f32 q1, q13, d6[1] // pos.y +vmul.f32 q2, q14, d7[0] // pos.z +vadd.f32 q0, q0, q1 +// load data +vld1.32 {d9}, [r2], r4 + +vadd.f32 q0, q0, q2 +cmp r0, r1 // check cycle + +vst1.32 {d0,d1}, [r3], r6 + +vmov.32 q0, q15 // pos.w (1.0) +// save data +vst1.32 {d10[0]}, [r3]! +vst1.32 {d9}, [r3]! + + +bcc LOOP_NAME +#endif + +ldmfd sp!, {r4-r11} +vpop {d0-d15} +bx lr + +#endif diff --git a/Runtime/Filters/Mesh/TransformVertexVFP.s b/Runtime/Filters/Mesh/TransformVertexVFP.s new file mode 100644 index 0000000..114afc6 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertexVFP.s @@ -0,0 +1,250 @@ +#define UNITY_ASSEMBLER +#include "Configuration/PrefixConfigure.h" +#include "Runtime/Utilities/VFPUtility.h" + +#if UNITY_SUPPORTS_VFP + +.syntax unified + +.set device,0 +.set device,__arm__ + +.if device + +//.code32 + + +.globl _s_TransformVertices_Strided_XYZ_0_VFP +.globl _s_TransformVertices_Strided_XYZ_1_VFP +.globl _s_TransformVertices_Strided_XYZ_2_VFP +.globl _s_TransformVertices_Strided_XYZ_3_VFP +.globl _s_TransformVertices_Strided_XYZ_4_VFP +.globl _s_TransformVertices_Strided_XYZ_5_VFP + +.globl _s_TransformVertices_Strided_XYZN_0_VFP +.globl _s_TransformVertices_Strided_XYZN_1_VFP +.globl _s_TransformVertices_Strided_XYZN_2_VFP +.globl _s_TransformVertices_Strided_XYZN_3_VFP +.globl _s_TransformVertices_Strided_XYZN_4_VFP +.globl _s_TransformVertices_Strided_XYZN_5_VFP + +.globl _s_TransformVertices_Strided_XYZNT_0_VFP +.globl _s_TransformVertices_Strided_XYZNT_1_VFP +.globl _s_TransformVertices_Strided_XYZNT_2_VFP +.globl _s_TransformVertices_Strided_XYZNT_3_VFP +.globl _s_TransformVertices_Strided_XYZNT_4_VFP +.globl _s_TransformVertices_Strided_XYZNT_5_VFP + +.globl _s_TransformVertices_Sprite_VFP + + +#if UNITY_ANDROID +.hidden _s_TransformVertices_Strided_XYZ_0_VFP +.hidden _s_TransformVertices_Strided_XYZ_1_VFP +.hidden _s_TransformVertices_Strided_XYZ_2_VFP +.hidden _s_TransformVertices_Strided_XYZ_3_VFP +.hidden _s_TransformVertices_Strided_XYZ_4_VFP +.hidden _s_TransformVertices_Strided_XYZ_5_VFP + +.hidden _s_TransformVertices_Strided_XYZN_0_VFP +.hidden _s_TransformVertices_Strided_XYZN_1_VFP +.hidden _s_TransformVertices_Strided_XYZN_2_VFP +.hidden _s_TransformVertices_Strided_XYZN_3_VFP +.hidden _s_TransformVertices_Strided_XYZN_4_VFP +.hidden _s_TransformVertices_Strided_XYZN_5_VFP + +.hidden _s_TransformVertices_Strided_XYZNT_0_VFP +.hidden _s_TransformVertices_Strided_XYZNT_1_VFP +.hidden _s_TransformVertices_Strided_XYZNT_2_VFP +.hidden _s_TransformVertices_Strided_XYZNT_3_VFP +.hidden _s_TransformVertices_Strided_XYZNT_4_VFP +.hidden _s_TransformVertices_Strided_XYZNT_5_VFP + +.hidden _s_TransformVertices_Sprite_VFP +#endif + +#define STRIDED_INPUT 1 + + +#define LOOP_XYZ 1 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 0 + +_s_TransformVertices_Strided_XYZ_0_VFP: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZ_0_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_1_VFP: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZ_1_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_2_VFP: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZ_2_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_3_VFP: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZ_3_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_4_VFP: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZ_4_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZ_5_VFP: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZ_5_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + + +#define LOOP_XYZ 0 +#define LOOP_XYZN 1 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 0 + + +_s_TransformVertices_Strided_XYZN_0_VFP: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZN_0_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_1_VFP: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZN_1_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_2_VFP: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZN_2_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_3_VFP: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZN_3_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_4_VFP: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZN_4_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZN_5_VFP: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZN_5_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + + +#define LOOP_XYZ 0 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 1 +#define LOOP_SPRITE 0 + + +_s_TransformVertices_Strided_XYZNT_0_VFP: +#define COPY_DATA_SZ 0 +#define LOOP_NAME TransformVertices_Strided_XYZNT_0_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_1_VFP: +#define COPY_DATA_SZ 1 +#define LOOP_NAME TransformVertices_Strided_XYZNT_1_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_2_VFP: +#define COPY_DATA_SZ 2 +#define LOOP_NAME TransformVertices_Strided_XYZNT_2_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_3_VFP: +#define COPY_DATA_SZ 3 +#define LOOP_NAME TransformVertices_Strided_XYZNT_3_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_4_VFP: +#define COPY_DATA_SZ 4 +#define LOOP_NAME TransformVertices_Strided_XYZNT_4_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +_s_TransformVertices_Strided_XYZNT_5_VFP: +#define COPY_DATA_SZ 5 +#define LOOP_NAME TransformVertices_Strided_XYZNT_5_Loop +#include "TransformVertexVFP_Loop.h" +#undef COPY_DATA_SZ +#undef LOOP_NAME + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + +#define LOOP_XYZ 0 +#define LOOP_XYZN 0 +#define LOOP_XYZNT 0 +#define LOOP_SPRITE 1 + +_s_TransformVertices_Sprite_VFP: +#define LOOP_NAME TransformVerties_Sprite_Loop +#include "TransformVertexVFP_Loop.h" +#undef LOOP_NAME + +#undef LOOP_XYZ +#undef LOOP_XYZN +#undef LOOP_XYZNT +#undef LOOP_SPRITE + +#undef STRIDED_INPUT + +.endif + +#endif diff --git a/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h new file mode 100644 index 0000000..48193c8 --- /dev/null +++ b/Runtime/Filters/Mesh/TransformVertexVFP_Loop.h @@ -0,0 +1,252 @@ +// defines +// LOOP_XYZ +// LOOP_XYZN +// LOOP_XYZNT +// LOOP_SPRITE +// LOOP_NAME +// COPY_DATA_SZ +// STRIDED_INPUT + +#if STRIDED_INPUT + +//r0: const void* srcData +//r1: const void* srcDataEnd +//r2: const void* addData +//r3: const void* xform +//[sp+0]: void* dstData +//[sp+4]: const int stride +//[sp+8]: const void* tangent + +mov ip, sp + +vpush {d0-d15} +stmfd sp!, {r4-r11} + +// {s16-s31} xform + +vldmia.32 r3!, {s16-s31} + +// r3: dstData +// r4: stride +//r11: tangent +ldr r3, [ip, #0] +ldr r4, [ip, #4] + +#if LOOP_XYZNT +ldr r11, [ip, #8] +#endif + +#if LOOP_SPRITE +//r6: color +ldr r6, [ip, #8] +#endif + + +mov ip, r0 +// VFP_VECTOR_LENGTH(3) +mov r0, ip + + +#if LOOP_XYZ + +.align 4 +LOOP_NAME: + +mov r5, r0 +pld [r0, #512] // prefetch + +vldmia.32 r5!, {s0-s2} // load pos +FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w + +FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x +#if COPY_DATA_SZ == 1 +ldmia r2, {r6} // load additional data +#elif COPY_DATA_SZ == 2 +ldmia r2, {r6-r7} // load additional data +#elif COPY_DATA_SZ == 3 +ldmia r2, {r6-r8} // load additional data +#elif COPY_DATA_SZ == 4 +ldmia r2, {r6-r9} // load additional data +#elif COPY_DATA_SZ == 5 +ldmia r2, {r6-r10} // load additional data +#endif + +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y +add r0, r0, r4 // inc srcData + +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z +add r2, r2, r4 // inc srcAddData + +vstmia.32 r3!, {s8-s10} // store pos +cmp r0, r1 // check cycle + +#if COPY_DATA_SZ == 1 +stmia r3!, {r6} // save additional data +#elif COPY_DATA_SZ == 2 +stmia r3!, {r6-r7} // save additional data +#elif COPY_DATA_SZ == 3 +stmia r3!, {r6-r8} // save additional data +#elif COPY_DATA_SZ == 4 +stmia r3!, {r6-r9} // save additional data +#elif COPY_DATA_SZ == 5 +stmia r3!, {r6-r10} // save additional data +#endif + +bcc LOOP_NAME + + +#elif LOOP_XYZN + +.align 4 +LOOP_NAME: + +mov r5, r0 +pld [r0, #512] // prefetch + +vldmia.32 r5!, {s0-s2} // load pos +FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w + +vldmia.32 r5!, {s3-s5} // load normal +FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x + +FMULS4 (12,13,14,15, 16,17,18,19, 3,3,3,3) // normal.x +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y + +#if COPY_DATA_SZ == 1 +ldmia r2, {r6} // load additional data +#elif COPY_DATA_SZ == 2 +ldmia r2, {r6-r7} // load additional data +#elif COPY_DATA_SZ == 3 +ldmia r2, {r6-r8} // load additional data +#elif COPY_DATA_SZ == 4 +ldmia r2, {r6-r9} // load additional data +#elif COPY_DATA_SZ == 5 +ldmia r2, {r6-r10} // load additional data +#endif +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z + +FMACS4 (12,13,14,15, 20,21,22,23, 4,4,4,4) // normal.y +vstmia.32 r3!, {s8-s10} // store pos + +FMACS4 (12,13,14,15, 24,25,26,27, 5,5,5,5) // normal.z +add r0, r0, r4 // inc srcData + +vstmia.32 r3!, {s12-s14} // store normal +add r2, r2, r4 // inc srcAddData + +cmp r0, r1 // check cycle +#if COPY_DATA_SZ == 1 +stmia r3!, {r6} // save additional data +#elif COPY_DATA_SZ == 2 +stmia r3!, {r6-r7} // save additional data +#elif COPY_DATA_SZ == 3 +stmia r3!, {r6-r8} // save additional data +#elif COPY_DATA_SZ == 4 +stmia r3!, {r6-r9} // save additional data +#elif COPY_DATA_SZ == 5 +stmia r3!, {r6-r10} // save additional data +#endif + +bcc LOOP_NAME + +#elif LOOP_XYZNT + +.align 4 +LOOP_NAME: + +mov r5, r0 +pld [r0, #512] // prefetch + +vldmia.32 r5!, {s0-s2} // load pos +FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w + +vldmia.32 r5!, {s3-s5} // load normal +FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x + +FMULS4 (12,13,14,15, 16,17,18,19, 3,3,3,3) // normal.x +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y + +#if COPY_DATA_SZ == 1 +ldmia r2, {r6} // load additional data +#elif COPY_DATA_SZ == 2 +ldmia r2, {r6-r7} // load additional data +#elif COPY_DATA_SZ == 3 +ldmia r2, {r6-r8} // load additional data +#elif COPY_DATA_SZ == 4 +ldmia r2, {r6-r9} // load additional data +#elif COPY_DATA_SZ == 5 +ldmia r2, {r6-r10} // load additional data +#endif +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z + +FMACS4 (12,13,14,15, 20,21,22,23, 4,4,4,4) // normal.y +vstmia.32 r3!, {s8-s10} // store pos + +FMACS4 (12,13,14,15, 24,25,26,27, 5,5,5,5) // normal.z +vldmia.32 r11, {s0-s3} // load tangent + +add r0, r0, r4 // inc srcData +FMULS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // tangent.x + +vstmia.32 r3!, {s12-s14} // store normal +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // tangent.y + +cmp r0, r1 // check cycle +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // tangent.z + +#if COPY_DATA_SZ == 1 +stmia r3!, {r6} // save additional data +#elif COPY_DATA_SZ == 2 +stmia r3!, {r6-r7} // save additional data +#elif COPY_DATA_SZ == 3 +stmia r3!, {r6-r8} // save additional data +#elif COPY_DATA_SZ == 4 +stmia r3!, {r6-r9} // save additional data +#elif COPY_DATA_SZ == 5 +stmia r3!, {r6-r10} // save additional data +#endif +fcpys s11, s3 // copy tangent.w + +vstmia.32 r3!, {s8-s11} // store tangent +add r2, r2, r4 // inc srcAddData + +add r11, r11, r4 // inc srcTangent +bcc LOOP_NAME + +#elif LOOP_SPRITE + +.align 4 +LOOP_NAME: + +mov r5, r0 +pld [r0, #512] // prefetch + +vldmia.32 r5!, {s0-s2} // load pos +FCPYS4 (8,9,10,11, 28,29,30,31) // pos.w + +FMACS4 (8,9,10,11, 16,17,18,19, 0,0,0,0) // pos.x + + +ldmia r2, {r7-r8} // load uv + +FMACS4 (8,9,10,11, 20,21,22,23, 1,1,1,1) // pos.y +add r0, r0, r4 // inc srcData + +FMACS4 (8,9,10,11, 24,25,26,27, 2,2,2,2) // pos.z +add r2, r2, r4 // inc srcAddData + +vstmia.32 r3!, {s8-s10} // store pos +cmp r0, r1 // check cycle + +stmia r3!, {r6-r8} // save color and uv + +bcc LOOP_NAME +#endif + +// VFP_VECTOR_LENGTH_ZERO + +ldmfd sp!, {r4-r11} +vpop {d0-d15} +bx lr + +#endif // STRIDED_INPUT diff --git a/Runtime/Filters/Mesh/VertexData.cpp b/Runtime/Filters/Mesh/VertexData.cpp new file mode 100644 index 0000000..b922805 --- /dev/null +++ b/Runtime/Filters/Mesh/VertexData.cpp @@ -0,0 +1,559 @@ +#include "UnityPrefix.h" +#include "Configuration/UnityConfigure.h" +#include "VertexData.h" +#include "Runtime/Shaders/VBO.h" +#include "Runtime/Serialize/TransferFunctions/SerializeTransfer.h" +#include "Runtime/Serialize/TransferUtility.h" +#include "Runtime/Serialize/SwapEndianArray.h" +#include <algorithm> + +/* + On most platforms, for skinning/non-uniform-scaling of meshes you would want to split your data into + a hot data stream (position, normal and tangent) and a cold data stream (diffuse and uvs) in order to maximize CPU cache access patterns and + reduce bandwidth and computation ( you won't need to copy the cold data ) +*/ + +VertexStreamsLayout VertexDataInfo::kVertexStreamsDefault = {{ kShaderChannelsAll, 0, 0, 0 }}; +#if UNITY_PS3 + VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }}; +#else + VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplit = {{ kShaderChannelsHot, kShaderChannelsCold, 0, 0 }}; +# if UNITY_EDITOR + VertexStreamsLayout VertexDataInfo::kVertexStreamsSkinnedHotColdSplitPS3 = {{ VERTEX_FORMAT1(Vertex), VERTEX_FORMAT1(Normal), VERTEX_FORMAT1(Tangent), kShaderChannelsCold }}; +# endif +#endif + +#define MAKE_CHANNEL(fmt, dim) VertexChannelsLayout::Channel(kChannelFormat##fmt, dim) +VertexChannelsLayout VertexDataInfo::kVertexChannelsDefault = +{{ // Array wrapped by struct requires double braces + MAKE_CHANNEL(Float, 3), // position + MAKE_CHANNEL(Float, 3), // normal + MAKE_CHANNEL(Color, 1), // color + MAKE_CHANNEL(Float, 2), // texcoord0 + MAKE_CHANNEL(Float, 2), // texcoord1 + MAKE_CHANNEL(Float, 4) // tangent +}}; +VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressed = +{{ // Array wrapped by struct requires double braces + MAKE_CHANNEL(Float, 3), // position + MAKE_CHANNEL(Float16, 4), // normal + MAKE_CHANNEL(Color, 1), // color + MAKE_CHANNEL(Float16, 2), // texcoord0 + MAKE_CHANNEL(Float16, 2), // texcoord1 + MAKE_CHANNEL(Float16, 4) // tangent +}}; +VertexChannelsLayout VertexDataInfo::kVertexChannelsCompressedAggressive = +{{ // Array wrapped by struct requires double braces + MAKE_CHANNEL(Float, 3), // position + MAKE_CHANNEL(Byte, 4), // normal + MAKE_CHANNEL(Color, 1), // color + MAKE_CHANNEL(Float16, 2), // texcoord0 + MAKE_CHANNEL(Float16, 2), // texcoord1 + MAKE_CHANNEL(Byte, 4) // tangent +}}; +#undef MAKE_CHANNEL + +static const UInt8 kVertexChannelFormatSizes[kChannelFormatCount] = { + 4, // kChannelFormatFloat + 2, // kChannelFormatFloat16 + 4, // kChannelFormatColor + 1 // kChannelFormatByte +}; + +size_t GetChannelFormatSize(UInt8 format) +{ + Assert (format < kChannelFormatCount); + return kVertexChannelFormatSizes[format]; +} + +static bool operator == (const VertexStreamsLayout& lhs, const VertexStreamsLayout& rhs) +{ + return CompareArrays(lhs.channelMasks, rhs.channelMasks, kMaxVertexStreams); +} + +template<class TransferFunction> +void VertexData::Transfer (TransferFunction& transfer) +{ + #if SUPPORT_SERIALIZED_TYPETREES + if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup) + { + TransferWorkaround35SerializationFuckup (transfer); + return; + } + #endif + + transfer.Transfer (m_CurrentChannels, "m_CurrentChannels", kHideInEditorMask); + transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask); + + dynamic_array<ChannelInfo> channels; + dynamic_array<StreamInfo> streams; + if (transfer.IsWriting ()) + { + channels.resize_uninitialized (kShaderChannelCount); + streams.resize_uninitialized (kMaxVertexStreams); + std::copy (m_Channels, m_Channels + kShaderChannelCount, channels.begin ()); + std::copy (m_Streams, m_Streams + kMaxVertexStreams, streams.begin ()); + } + transfer.Transfer (channels, "m_Channels", kHideInEditorMask); + transfer.Transfer (streams, "m_Streams", kHideInEditorMask); + + if (transfer.IsReading ()) + { + // For compatibility do this even if channels/streams info didn't exist (case 558604) + // In the past there was only a channels mask, UpdateStreams() generates the info from that + if (channels.size () == kShaderChannelCount) + std::copy (channels.begin (), channels.begin () + kShaderChannelCount, m_Channels); + if (streams.size () == kMaxVertexStreams) + std::copy (streams.begin (), streams.begin () + kMaxVertexStreams, m_Streams); + else + std::fill (m_Streams, m_Streams + kMaxVertexStreams, StreamInfo()); + + UInt32 channelsInStreams = 0; + for (int i = 0; i < kMaxVertexStreams ; i++) + channelsInStreams |= m_Streams[i].channelMask; + if (channelsInStreams) + UpdateStreams(channelsInStreams, m_VertexCount, GetStreamsLayout (), GetChannelsLayout ()); + else + UpdateStreams(m_CurrentChannels, m_VertexCount, kVertexStreamsDefault, kVertexChannelsDefault); + } + + transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask); + if (transfer.DidReadLastProperty ()) + { + if (m_Data) + UNITY_FREE (kMemVertexData, m_Data); + m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign); + } + + transfer.TransferTypelessData (m_DataSize, m_Data); +} + +#if SUPPORT_SERIALIZED_TYPETREES +template<class TransferFunction> +void VertexData::TransferWorkaround35SerializationFuckup (TransferFunction& transfer) +{ + UInt32 currentChannels = m_CurrentChannels; + transfer.Transfer (currentChannels, "m_CurrentChannels", kHideInEditorMask); + transfer.Transfer (m_VertexCount, "m_VertexCount", kHideInEditorMask); + + TRANSFER(m_Streams[0]); + TRANSFER(m_Streams[1]); + TRANSFER(m_Streams[2]); + TRANSFER(m_Streams[3]); + + if (transfer.IsReading ()) + { + if(m_VertexCount && (currentChannels == 0)) + { + for(int i=0;i<kMaxVertexStreams;i++) + currentChannels |= m_Streams[i].channelMask; + } + UpdateStreams(currentChannels, m_VertexCount); + //GetComponentInfo(m_Components, currentChannels); + m_CurrentChannels = currentChannels; + } + + transfer.TransferTypeless (&m_DataSize, "m_DataSize", kHideInEditorMask); + + if (transfer.IsReading ()) + { + if (m_Data) + UNITY_FREE (kMemVertexData, m_Data); + m_Data = (UInt8*)UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign); + } + + transfer.TransferTypelessData (m_DataSize, m_Data); +} +#endif + +INSTANTIATE_TEMPLATE_TRANSFER(VertexData) + +void VertexDataInfo::UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels) +{ + m_VertexCount = newVertexCount; + m_CurrentChannels = 0; + m_VertexSize = 0; + size_t streamOffset = 0; + for (int s = 0; s < kMaxVertexStreams; s++) + { + StreamInfo& stream = m_Streams[s]; + m_Streams[s].Reset(); + stream.channelMask = streams.channelMasks[s] & newChannelMask; + if (stream.channelMask == 0) + continue; + m_CurrentChannels |= stream.channelMask; + for (int c = 0; c < kShaderChannelCount; c++) + { + if (stream.channelMask & (1 << c)) + { + ChannelInfo& channel = m_Channels[c]; + const VertexChannelsLayout::Channel& srcChannel = channels.channels[c]; + channel.stream = s; + channel.offset = stream.stride; + channel.format = srcChannel.format; + channel.dimension = srcChannel.dimension; + stream.stride += channel.dimension * GetChannelFormatSize(channel.format); + } + } + streamOffset = AlignStreamSize(streamOffset); + stream.offset = streamOffset; + streamOffset += stream.stride * newVertexCount; + m_VertexSize += stream.stride; + } + for (int c = 0; c < kShaderChannelCount; c++) + { + // Reset channels that were removed + if (!(m_CurrentChannels & (1 << c))) + m_Channels[c].Reset(); + } + m_DataSize = streamOffset; +} + +size_t VertexDataInfo::GetActiveStreamCount() const +{ + size_t activeStreamCount = 0; + for (int i=0; i<kMaxVertexStreams; i++) + { + if(m_Streams[i].channelMask != 0) + activeStreamCount++; + } + return activeStreamCount; +} + +size_t VertexDataInfo::GetStreamIndex(ShaderChannel channel) const +{ + UInt32 channelMask = 1 << channel; + for (int i=0; i<kMaxVertexStreams; i++) + { + if(m_Streams[i].channelMask & channelMask) + return i; + } + return -1; +} + +VertexStreamsLayout VertexDataInfo::GetStreamsLayout() const +{ + VertexStreamsLayout result; + for (int i = 0; i < kMaxVertexStreams; i++) + result.channelMasks[i] = m_Streams[i].channelMask; + return result; +} + +VertexChannelsLayout VertexDataInfo::GetChannelsLayout() const +{ + VertexChannelsLayout result; + for (int i = 0; i < kShaderChannelCount; i++) + { + result.channels[i] = VertexChannelsLayout::Channel(m_Channels[i].format, m_Channels[i].dimension); + } + return result; +} + +bool VertexDataInfo::ConformsToStreamsLayout(const VertexStreamsLayout& streams) const +{ + for (int i = 0; i < kMaxVertexStreams; i++) + { + // Fail if we have a channel that's not in the layout + if (m_Streams[i].channelMask & ~streams.channelMasks[i]) + return false; + } + return true; +} + +bool VertexDataInfo::ConformsToChannelsLayout(const VertexChannelsLayout& channels) const +{ + for (int i = 0; i < kShaderChannelCount; i++) + { + if (m_Channels[i].IsValid()) + { + const VertexChannelsLayout::Channel& channel = channels.channels[i]; + if (m_Channels[i].format != channel.format || + m_Channels[i].dimension != channel.dimension) + return false; + } + } + return true; +} + +signed char f32_to_s8(float fval) +{ + return ((fval * 255.0f) - 1.0f) / 2.0f; +} + +float s8_to_f32(signed char val) +{ + return (2*(val/255.0f)-1.0f); +} + +static void ConvertCopyChannel(size_t vertexCount, + const UInt8* srcPtr, UInt8 srcStride, UInt8 srcType, UInt8 srcDim, + UInt8* dstPtr, UInt8 dstStride, UInt8 dstType, UInt8 dstDim) +{ + UInt8 minDim = std::min(srcDim, dstDim); + if (srcType == kChannelFormatFloat16 && dstType == kChannelFormatFloat) + { + // decompressing + for (size_t i = 0; i < vertexCount; i++) + { + UInt8 comp = 0; + for ( ; comp < minDim; comp++) + HalfToFloat(reinterpret_cast<const UInt16*>(srcPtr)[comp], reinterpret_cast<float*>(dstPtr)[comp]); + for ( ; comp < dstDim; comp++) + reinterpret_cast<float*>(dstPtr)[comp] = 0.0f; + srcPtr += srcStride; + dstPtr += dstStride; + } + } + else if (srcType == kChannelFormatByte && dstType == kChannelFormatFloat) + { + // decompressing + for (size_t i = 0; i < vertexCount; i++) + { + UInt8 comp = 0; + for ( ; comp < minDim; comp++) + reinterpret_cast<float*>(dstPtr)[comp] = s8_to_f32(reinterpret_cast<const SInt8*>(srcPtr)[comp]); + for ( ; comp < dstDim; comp++) + reinterpret_cast<float*>(dstPtr)[comp] = 0.0f; + srcPtr += srcStride; + dstPtr += dstStride; + } + } +#if UNITY_EDITOR + else if (srcType == kChannelFormatFloat && dstType == kChannelFormatFloat16) + { + // compressing + for (size_t i = 0; i < vertexCount; i++) + { + UInt8 comp = 0; + for ( ; comp < minDim; comp++) + g_FloatToHalf.Convert(reinterpret_cast<const float*>(srcPtr)[comp], reinterpret_cast<UInt16*>(dstPtr)[comp]); + for ( ; comp < dstDim; comp++) + reinterpret_cast<UInt16*>(dstPtr)[comp] = 0; + srcPtr += srcStride; + dstPtr += dstStride; + } + } + else if (srcType == kChannelFormatFloat && dstType == kChannelFormatByte) + { + // compressing + for (size_t i = 0; i < vertexCount; i++) + { + UInt8 comp = 0; + for ( ; comp < minDim; comp++) + reinterpret_cast<SInt8*>(dstPtr)[comp] = f32_to_s8(reinterpret_cast<const float*>(srcPtr)[comp]); + for ( ; comp < dstDim; comp++) + reinterpret_cast<SInt8*>(dstPtr)[comp] = 0; + srcPtr += srcStride; + dstPtr += dstStride; + } + } +#endif + else + ErrorString("Unsupported conversion of vertex formats"); +} + +static void CopyChannels (size_t vertexCount, unsigned copyChannels, + const StreamInfoArray srcStreams, const ChannelInfoArray srcChannels, const UInt8* srcData, + const StreamInfoArray dstStreams, const ChannelInfoArray dstChannels, UInt8* dstData) +{ + for (unsigned chan = copyChannels, i = 0; chan && (i < kShaderChannelCount); i++, chan >>= 1) + { + if (0 == (chan & 1)) + continue; + + const ChannelInfo& srcChannel = srcChannels[i]; + const ChannelInfo& dstChannel = dstChannels[i]; + + const UInt8* srcPtr = srcData + srcChannel.CalcOffset(srcStreams); + UInt8* dstPtr = dstData + dstChannel.CalcOffset(dstStreams); + UInt8 srcStride = srcChannel.CalcStride(srcStreams); + UInt8 dstStride = dstChannel.CalcStride(dstStreams); + + if(srcChannel.format == dstChannel.format) + { + size_t copySize = srcChannel.dimension * GetChannelFormatSize(srcChannel.format); + switch (copySize) + { + case 4: + { + for (size_t i=0; i<vertexCount; ++i) + { + *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0); + srcPtr += srcStride; + dstPtr += dstStride; + } + break; + } + case 8: + { + for (size_t i=0; i<vertexCount; ++i) + { + *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0); + *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1); + srcPtr += srcStride; + dstPtr += dstStride; + } + break; + } + case 12: + { + for (size_t i=0; i<vertexCount; ++i) + { + *(reinterpret_cast<UInt32*> (dstPtr) + 0) = *(reinterpret_cast<const UInt32*> (srcPtr) + 0); + *(reinterpret_cast<UInt32*> (dstPtr) + 1) = *(reinterpret_cast<const UInt32*> (srcPtr) + 1); + *(reinterpret_cast<UInt32*> (dstPtr) + 2) = *(reinterpret_cast<const UInt32*> (srcPtr) + 2); + srcPtr += srcStride; + dstPtr += dstStride; + } + break; + } + default: + { + for (size_t i=0; i<vertexCount; ++i) + { + memcpy (dstPtr, srcPtr, copySize); + srcPtr += srcStride; + dstPtr += dstStride; + } + break; + } + } + } + else + { + ConvertCopyChannel(vertexCount, srcPtr, srcStride, srcChannel.format, srcChannel.dimension, dstPtr, dstStride, dstChannel.format, dstChannel.dimension); + } + } +} + +VertexDataInfo::VertexDataInfo () +: m_Data(NULL) +, m_DataSize(0) +, m_VertexCount(0) +, m_VertexSize(0) +, m_CurrentChannels(0) +{ + // Channels and streams have default constructors +} + +VertexData::VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels) +{ + // We do not support inserting new channels that are not present in the source + Assert ((copyChannels & src.GetChannelMask()) == copyChannels); + + UpdateStreams(copyChannels, src.m_VertexCount, streams, channels); + m_Data = (UInt8*) UNITY_MALLOC_ALIGNED (kMemVertexData, VertexData::GetAllocateDataSize (m_DataSize), kVertexDataAlign); + + const VertexData& dest = *this; + if (m_DataSize == src.m_DataSize && + copyChannels == src.GetChannelMask() && + CompareMemory(dest.m_Channels, src.m_Channels) && + CompareMemory(dest.m_Streams, src.m_Streams)) + { + // Simple copy if the format didn't change + memcpy (m_Data, src.m_Data, m_DataSize); + } + else + CopyChannels (m_VertexCount, copyChannels, src.m_Streams, src.m_Channels, src.m_Data, m_Streams, m_Channels, m_Data); +} + +VertexData::~VertexData () +{ + Deallocate(); +} + +void VertexData::Deallocate () +{ + if (m_Data) + UNITY_FREE(kMemVertexData, m_Data); + m_Data = NULL; +} + +void VertexData::Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams, const VertexChannelsLayout& channels) +{ + ChannelInfoArray srcChannels; + StreamInfoArray srcStreams; + memcpy(srcChannels, m_Channels, sizeof(srcChannels)); + memcpy(srcStreams, m_Streams, sizeof(srcStreams)); + UInt32 srcChannelMask = m_CurrentChannels; + UInt32 srcVertexCount = m_VertexCount; + UInt8* srcData = m_Data; + + UpdateStreams(channelMask, vertexCount, streams, channels); + + // In case the streams and channels don't change, simply reallocate the buffer and return + // Note that this will rarely be true with multiple streams since the stream offsets change + if (m_Data && CompareMemory(srcChannels, m_Channels) && CompareMemory(srcStreams, m_Streams)) + { + m_Data = (UInt8*)UNITY_REALLOC_ALIGNED(kMemVertexData, m_Data, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign); + return; + } + + m_Data = (UInt8*)UNITY_MALLOC_ALIGNED(kMemVertexData, VertexData::GetAllocateDataSize(m_DataSize), kVertexDataAlign); + // copy over the old data + if (srcData) + { + unsigned copyChannels = srcChannelMask & m_CurrentChannels; + size_t toCopyCount = std::min<size_t>(srcVertexCount, m_VertexCount); + CopyChannels(toCopyCount, copyChannels, srcStreams, srcChannels, srcData, m_Streams, m_Channels, m_Data); + UNITY_FREE(kMemVertexData, srcData); + } +} + + +void VertexData::SwapEndianess () +{ + unsigned const kChannelSwapMask = VERTEX_FORMAT5(Vertex, Normal, TexCoord0, TexCoord1, Tangent); + for (int s = 0; s < kMaxVertexStreams; s++) + { + if (m_Streams[s].stride) + { + StreamInfo& stream = m_Streams[s]; + size_t stride = stream.stride; + UInt8* dataStart = m_Data + stream.offset; + UInt8* dataEnd = dataStart + stream.stride * m_VertexCount; + UInt32 channelMask = stream.channelMask; + for (UInt8* p = dataStart, *end = dataEnd; p != end; p += stride) + { + // counting from LSb, 1 denotes that a value should be endian-swapped + int localOffset = 0; + for (unsigned i=0, chan = channelMask, swap = kChannelSwapMask; i<kShaderChannelCount; ++i, chan >>= 1, swap >>= 1) + { + if (chan & 1) + { + size_t componentCount = m_Channels[i].dimension; + size_t componentSize = GetChannelFormatSize(m_Channels[i].format); + if(swap & 1) + { + Assert (m_Channels [i].IsValid()); + SwapEndianArray (p + localOffset, componentSize, componentCount); + } + localOffset += componentCount * componentSize; + } + } + } + } + } +} + +void swap (VertexData& a, VertexData& b) +{ + std::swap_ranges (a.m_Channels, a.m_Channels + kShaderChannelCount, b.m_Channels); + std::swap_ranges (a.m_Streams, a.m_Streams + kMaxVertexStreams, b.m_Streams); + std::swap (a.m_CurrentChannels, b.m_CurrentChannels); + std::swap (a.m_VertexSize, b.m_VertexSize); + std::swap (a.m_VertexCount, b.m_VertexCount); + std::swap (a.m_DataSize, b.m_DataSize); + std::swap (a.m_Data, b.m_Data); +} + +void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData) +{ + Assert (vertexCount <= srcData.GetVertexCount() && vertexCount <= dstData.GetVertexCount()); + Assert ((srcData.GetChannelMask() & copyChannels) == copyChannels); + Assert ((dstData.GetChannelMask() & copyChannels) == copyChannels); + CopyChannels (vertexCount, copyChannels, + srcData.GetStreams(), srcData.GetChannels(), srcData.GetDataPtr(), + dstData.GetStreams(), dstData.GetChannels(), dstData.GetDataPtr()); +} + diff --git a/Runtime/Filters/Mesh/VertexData.h b/Runtime/Filters/Mesh/VertexData.h new file mode 100644 index 0000000..7cc6c98 --- /dev/null +++ b/Runtime/Filters/Mesh/VertexData.h @@ -0,0 +1,253 @@ +#ifndef VERTEX_DATA_H_ +#define VERTEX_DATA_H_ + +#include "Runtime/Utilities/StrideIterator.h" +#include "Runtime/GfxDevice/GfxDeviceTypes.h" +#include "Runtime/BaseClasses/ObjectDefines.h" +#include "Runtime/Serialize/SerializeUtility.h" +#include "Runtime/Serialize/TransferFunctionFwd.h" + +class VertexData; + +void swap (VertexData& a, VertexData& b); + +typedef struct StreamInfo +{ + enum { kDividerOpDivide=0, kDividerOpModulo }; + + UInt32 channelMask; + UInt32 offset; + UInt16 frequency; + UInt8 stride; + UInt8 dividerOp; + + // We use default constructors instead of memset() + StreamInfo() : channelMask(0), offset(0), frequency(0), stride(0), dividerOp(kDividerOpDivide) {} + void Reset() { *this = StreamInfo(); } + + bool operator == (const StreamInfo& rhs) const { return (channelMask == rhs.channelMask) && (offset == rhs.offset) && (frequency == rhs.frequency) && (stride == rhs.stride) && (dividerOp == rhs.dividerOp); } + bool operator != (const StreamInfo& rhs) const { return !(*this == rhs); } + + DECLARE_SERIALIZE_NO_PPTR (StreamInfo); + +#if SUPPORT_SERIALIZED_TYPETREES + template<class TransferFunction> + void TransferWorkaround35SerializationFuckup (TransferFunction& transfer); +#endif + +} StreamInfoArray [kMaxVertexStreams]; + +struct VertexStreamsLayout +{ + UInt32 channelMasks[kMaxVertexStreams]; +}; + +typedef struct ALIGN_TYPE(4) ChannelInfo +{ + UInt8 stream; + UInt8 offset; + UInt8 format; + UInt8 dimension; + + enum { kInvalidDimension = 0 }; + + // We use default constructors instead of memset() + ChannelInfo() : stream(0), offset(0), format(0), dimension(kInvalidDimension) {} + + UInt32 CalcOffset(const StreamInfoArray streams) const { return streams[stream].offset + offset; } + UInt32 CalcStride(const StreamInfoArray streams) const { return streams[stream].stride; } + bool IsValid() const { return (kInvalidDimension != dimension); } + void Reset() { *this = ChannelInfo(); } + + bool operator == (const ChannelInfo& rhs) const { return (stream == rhs.stream) && (offset == rhs.offset) && (format == rhs.format) && (dimension == rhs.dimension); } + bool operator != (const ChannelInfo& rhs) const { return !(*this == rhs); } + + DECLARE_SERIALIZE_NO_PPTR (ChannelInfo); + +} ChannelInfoArray [kShaderChannelCount]; + +struct VertexChannelsLayout +{ + struct Channel + { + Channel(UInt8 fmt, UInt8 dim) : format(fmt), dimension(dim) {} + Channel() : format(0), dimension(0) {} + UInt8 format; + UInt8 dimension; + }; + Channel channels[kShaderChannelCount]; +}; + + +template<class TransferFunc> +void StreamInfo::Transfer (TransferFunc& transfer) +{ + #if SUPPORT_SERIALIZED_TYPETREES + if (transfer.GetFlags() & kWorkaround35MeshSerializationFuckup) + { + TransferWorkaround35SerializationFuckup (transfer); + return; + } + #endif + + transfer.Transfer (channelMask, "channelMask", kHideInEditorMask); + transfer.Transfer (offset, "offset", kHideInEditorMask); + transfer.Transfer (stride, "stride", kHideInEditorMask); + transfer.Transfer (dividerOp, "dividerOp", kHideInEditorMask); + transfer.Transfer (frequency, "frequency", kHideInEditorMask); +} + +#if SUPPORT_SERIALIZED_TYPETREES +template<class TransferFunc> +void StreamInfo::TransferWorkaround35SerializationFuckup (TransferFunc& transfer) +{ + transfer.Transfer (channelMask, "channelMask", kHideInEditorMask); + transfer.Transfer (offset, "offset", kHideInEditorMask); + + UInt32 align; + UInt32 stride32bit; + transfer.Transfer (stride32bit, "stride", kHideInEditorMask); + transfer.Transfer (align, "align", kHideInEditorMask); + + stride = (UInt8) stride32bit; +} +#endif + +template<class TransferFunc> +void ChannelInfo::Transfer (TransferFunc& transfer) +{ + transfer.Transfer (stream, "stream", kHideInEditorMask); + transfer.Transfer (offset, "offset", kHideInEditorMask); + transfer.Transfer (format, "format", kHideInEditorMask); + transfer.Transfer (dimension, "dimension", kHideInEditorMask); +} + +// Information about all vertex data, but does not own the memory +class VertexDataInfo +{ +public: + enum + { + kVertexDataAlign = 32, + kVertexStreamAlign = 16, + kVertexDataPadding = 16 + }; + + static VertexStreamsLayout kVertexStreamsDefault; + static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplit; + static VertexChannelsLayout kVertexChannelsDefault; + static VertexChannelsLayout kVertexChannelsCompressed; + static VertexChannelsLayout kVertexChannelsCompressedAggressive; +#if UNITY_EDITOR + static VertexStreamsLayout kVertexStreamsSkinnedHotColdSplitPS3; +#endif + + static size_t AlignStreamSize (size_t size) { return (size + (kVertexStreamAlign-1)) & ~(kVertexStreamAlign-1); } + + friend void ::swap (VertexData& a, VertexData& b); + + VertexDataInfo (); + + bool HasChannel (ShaderChannel shaderChannelIndex) const + { + Assert ((m_Channels[shaderChannelIndex].dimension != 0) == (((m_CurrentChannels & (1 << shaderChannelIndex)) != 0))); + return m_Channels[shaderChannelIndex].dimension != 0; + } + + void UpdateStreams(unsigned newChannelMask, size_t newVertexCount, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault); + + size_t GetActiveStreamCount() const ; + size_t GetStreamIndex(ShaderChannel channel) const ; + const StreamInfo* GetStreams() const { return m_Streams; } + const StreamInfo& GetStream(int index) const { return m_Streams[index]; } + + const ChannelInfo* GetChannels() const { return m_Channels; } + const ChannelInfo& GetChannel(int index) const { return m_Channels[index]; } + + VertexStreamsLayout GetStreamsLayout() const; + VertexChannelsLayout GetChannelsLayout() const; + + bool ConformsToStreamsLayout(const VertexStreamsLayout& streams) const; + bool ConformsToChannelsLayout(const VertexChannelsLayout& channels) const; + + unsigned GetChannelMask () const { return m_CurrentChannels; } + size_t GetDataSize () const { return m_DataSize; } + size_t GetVertexSize () const { return m_VertexSize; } + size_t GetVertexCount () const { return m_VertexCount; } + size_t GetChannelOffset (unsigned channel) const { return m_Channels[channel].CalcOffset(m_Streams); } + size_t GetChannelStride (unsigned channel) const { return m_Channels[channel].CalcStride(m_Streams); } + UInt8* GetDataPtr () const { return m_Data; } + + template<class T> + StrideIterator<T> MakeStrideIterator (ShaderChannel shaderChannelIndex) const + { + Assert (shaderChannelIndex < kShaderChannelCount); + void* p = m_Data + GetChannelOffset(shaderChannelIndex); + return HasChannel (shaderChannelIndex) ? StrideIterator<T> (p, GetChannelStride (shaderChannelIndex)) : StrideIterator<T> (NULL, GetChannelStride (shaderChannelIndex)); + } + + template<class T> + StrideIterator<T> MakeEndIterator (ShaderChannel shaderChannelIndex) const + { + T* end = GetEndPointer<T> (shaderChannelIndex); + return StrideIterator<T> (end, GetChannelStride (shaderChannelIndex)); + } + + template<class T> + T* GetEndPointer (ShaderChannel shaderChannelIndex) const + { + Assert (shaderChannelIndex < kShaderChannelCount); + void* p = HasChannel (shaderChannelIndex) ? (m_Data + GetChannelOffset(shaderChannelIndex) + m_VertexCount * GetChannelStride (shaderChannelIndex)) : NULL; + return reinterpret_cast<T*> (p); + } + +protected: + ChannelInfoArray m_Channels; + StreamInfoArray m_Streams; + + size_t m_VertexSize; // must match m_CurrentChannels + UInt8* m_Data; + + // The following are being serialized. Their size must match in both 32 and 64 bit platforms + UInt32 m_CurrentChannels; // kShaderChannel bitmask + UInt32 m_VertexCount; + unsigned m_DataSize; +}; + + +// Owns the vertex memory +class VertexData : public VertexDataInfo +{ +public: + + DECLARE_SERIALIZE (VertexData) + + VertexData () : VertexDataInfo() { } + VertexData (VertexData const& src, unsigned copyChannels, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault); + ~VertexData (); + + static size_t GetAllocateDataSize (size_t accesibleBufferSize) { return accesibleBufferSize + kVertexDataPadding; } + + void Deallocate (); + void Resize (size_t vertexCount, unsigned channelMask, const VertexStreamsLayout& streams = kVertexStreamsDefault, const VertexChannelsLayout& channels = kVertexChannelsDefault); + void SwapEndianess (); + +private: + VertexData (const VertexData& o); + void operator= (const VertexData& o); + VertexData (const VertexDataInfo& o); + void operator= (const VertexDataInfo& o); + +#if SUPPORT_SERIALIZED_TYPETREES + template<class TransferFunction> + void TransferWorkaround35SerializationFuckup (TransferFunction& transfer); +#endif +}; + + +void CopyVertexDataChannels (size_t vertexCount, unsigned copyChannels, const VertexData& srcData, VertexData& dstData); +size_t GetChannelFormatSize(UInt8 format); + + + +#endif |