summaryrefslogtreecommitdiff
path: root/Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp')
-rw-r--r--Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp762
1 files changed, 762 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp b/Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp
new file mode 100644
index 0000000..dc22318
--- /dev/null
+++ b/Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.cpp
@@ -0,0 +1,762 @@
+ #include "UnityPrefix.h"
+
+#include "Runtime/GfxDevice/opengles30/TransformFeedbackSkinnedMesh.h"
+
+#include "Runtime/Filters/Mesh/Mesh.h"
+#include "Runtime/Filters/Mesh/MeshSkinning.h"
+#include "Runtime/Filters/Renderer.h"
+#include "Runtime/GfxDevice/opengles30/VBOGLES30.h"
+#include "Runtime/GfxDevice/opengles30/AssertGLES30.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+
+// If 1, uses uniform blocks, otherwise fix bone count to 82
+#define USE_UNIFORM_BLOCK_FOR_BONES 0
+
+// 1 to use glVertexAttribIPointer for bone indices, 0 to convert to floats
+#define USE_INT_ATTRIBS 1
+
+//! Attribute array indices.
+enum { TFATTRLOC_POS = 0, TFATTRLOC_NORM = 1, TFATTRLOC_TAN = 2, TFATTRLOC_BONEIDX=3, TFATTRLOC_BONEWEIGHT = 4, TFATTRLOC_SIZE = 5 };
+
+// Shader programs
+enum { TFSHADER_POS = 0, TFSHADER_POSNORM = 1, TFSHADER_POSNORMTAN = 2, TFSHADER_SIZE = 3 };
+
+struct TFShader
+{
+ TFShader() : program(0), vertShader(0), bonesLocation(0) {}
+
+ // Not a dtor, we're storing them in a map, so delete manually in CleanupTransformFeedbackShaders
+ void Release()
+ {
+ if(program)
+ glDeleteProgram(program);
+ if(vertShader)
+ glDeleteShader(vertShader);
+ }
+
+ GLuint program;
+ GLuint vertShader;
+ GLint bonesLocation;
+ GLint attribLocations[TFATTRLOC_SIZE];
+};
+
+// Swap specialization for TFShader
+namespace std
+{
+ template<> void swap(TFShader &a, TFShader &b)
+ {
+ swap(a.program, b.program);
+ swap(a.vertShader, b.vertShader);
+ swap(a.bonesLocation, b.bonesLocation);
+ swap(a.attribLocations, b.attribLocations);
+ }
+}
+
+// Map to store shaders. the key is channelMap + (bonesPerVertex << 16)
+typedef std::map<UInt32, TFShader> TFShaderMap;
+
+static TFShaderMap tfShaders;
+
+const char *tfShaderAttribNames[TFATTRLOC_SIZE] = {"in_vertex", "in_normal", "in_tangent", "in_boneIndices", "in_boneWeights" };
+
+//! Fragment shader, common to all programs.
+static GLuint tfFragShader = 0;
+
+
+#define STRINGIFY(x) #x
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ #define MATRIX_DECL "uniform MtxBlock { vec4 bones[max_bone_count*3]; } Matrices; \n"
+ #define BUILD_MATRIX "Matrices.bones[bidx + 0], Matrices.bones[bidx + 1], Matrices.bones[bidx + 2]"
+#else
+ #define MATRIX_DECL "uniform vec4 bones[max_bone_count*3];\n"
+ #define BUILD_MATRIX "bones[bidx + 0], bones[bidx + 1], bones[bidx + 2]"
+#endif
+
+// Macro to build shader source.
+#define BUILD_SHADER_2(bonecount, indecl, outdecl, skincalc, outcalc ) \
+ "#version 300 es\n" \
+ "\n" \
+ "const int max_bone_count = " STRINGIFY(bonecount) ";\n" \
+ "in vec3 in_vertex;\n" \
+ indecl \
+ "out vec3 out_pos;\n" \
+ outdecl \
+ "\n" \
+ MATRIX_DECL \
+ "\n" \
+ "mat4 getMatrix(int idx)\n" \
+ "{\n"\
+ " int bidx = idx*3;\n" \
+ " return mat4(" BUILD_MATRIX ", vec4(0.0, 0.0, 0.0, 1.0));\n" \
+ "}\n"\
+ "void main(void)\n" \
+ "{\n" \
+ " vec4 inpos = vec4(in_vertex.xyz, 1.0);\n" \
+ " mat4 localToWorldMatrix = \n" \
+ skincalc \
+ " out_pos = (inpos * localToWorldMatrix).xyz;\n" \
+ " gl_Position = vec4(out_pos.xyz, 1.0);\n" \
+ outcalc \
+ "}"
+
+#if USE_INT_ATTRIBS
+#define BONEINDEXTYPE1 "int"
+#define BONEINDEXTYPE2 "ivec2"
+#define BONEINDEXTYPE4 "ivec4"
+#else
+#define BONEINDEXTYPE1 "float"
+#define BONEINDEXTYPE2 "vec2"
+#define BONEINDEXTYPE4 "vec4"
+#endif
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+#define BUILD_SHADER( indecl, outdecl, skincalc, outcalc ) \
+ {\
+ BUILD_SHADER_2(32, indecl, outdecl, skincalc, outcalc), \
+ BUILD_SHADER_2(64, indecl, outdecl, skincalc, outcalc), \
+ BUILD_SHADER_2(128, indecl, outdecl, skincalc, outcalc), \
+ BUILD_SHADER_2(256, indecl, outdecl, skincalc, outcalc), \
+ BUILD_SHADER_2(512, indecl, outdecl, skincalc, outcalc), \
+ BUILD_SHADER_2(1024, indecl, outdecl, skincalc, outcalc) }
+#else
+// Just one bonecount, store it in first element
+#define BUILD_SHADER( indecl, outdecl, skincalc, outcalc ) \
+{\
+ BUILD_SHADER_2(82, indecl, outdecl, skincalc, outcalc), "", "", "", "", ""\
+}
+#endif
+// Shaders for each input type, and for various max bone counts (32, 64, 128, 256, 512 and 1024) and bone-per-vertex counts (1, 2, 4 bones per vertex supported, sparse array so third slot is empty).
+static const char *tfShaderSource[TFSHADER_SIZE][4][6] = {
+ // TFSHADER_POS
+ {
+#define IN_DECL "\n"
+#define OUT_DECL "\n"
+#define OUT_CALC "\n"
+ // 1 bone
+ BUILD_SHADER( "in " BONEINDEXTYPE1 " in_boneIndices;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices));\n",
+ OUT_CALC
+ ),
+
+ // 2 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE2 " in_boneIndices;\n in vec2 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] ;\n ",
+ OUT_CALC
+ ),
+ // 3 bones
+ {"", "", "", "", "", ""},
+
+ // 4 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE4 " in_boneIndices;\n in vec4 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] + \n" \
+ " getMatrix(int(in_boneIndices.z)) * in_boneWeights[2] + \n" \
+ " getMatrix(int(in_boneIndices.w)) * in_boneWeights[3] ;\n",
+ OUT_CALC
+ )
+ }
+ ,
+ // TFSHADER_POSNORM
+ {
+#undef IN_DECL
+#undef OUT_DECL
+#undef OUT_CALC
+#define IN_DECL "in vec3 in_normal;\n"
+#define OUT_DECL "out vec3 out_normal;\n"
+#define OUT_CALC " out_normal = normalize( (vec4(in_normal.xyz, 0.0) * localToWorldMatrix)).xyz;\n"
+ // 1 bone
+ BUILD_SHADER( "in " BONEINDEXTYPE1 " in_boneIndices;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices));\n",
+ OUT_CALC
+ ),
+
+ // 2 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE2 " in_boneIndices;\n in vec2 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] ;\n ",
+ OUT_CALC
+ ),
+ // 3 bones
+ {"", "", "", "", "", ""},
+
+ // 4 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE4 " in_boneIndices;\n in vec4 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] + \n" \
+ " getMatrix(int(in_boneIndices.z)) * in_boneWeights[2] + \n" \
+ " getMatrix(int(in_boneIndices.w)) * in_boneWeights[3] ;\n",
+ OUT_CALC
+ )
+ },
+// TFSHADER_POSNORMTAN
+ {
+#undef IN_DECL
+#undef OUT_DECL
+#undef OUT_CALC
+#define IN_DECL "in vec3 in_normal;\n in vec4 in_tangent;\n"
+#define OUT_DECL "out vec3 out_normal;\n out vec4 out_tangent;\n"
+#define OUT_CALC " out_normal = normalize( ( vec4(in_normal.xyz, 0.0) * localToWorldMatrix)).xyz;\n" \
+ " out_tangent = vec4( normalize( ( vec4(in_tangent.xyz, 0.0) * localToWorldMatrix)).xyz, in_tangent.w);\n"
+ // 1 bone
+ BUILD_SHADER( "in " BONEINDEXTYPE1 " in_boneIndices;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices));\n",
+ OUT_CALC
+ ),
+
+ // 2 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE2 " in_boneIndices;\n in vec2 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] ;\n ",
+ OUT_CALC
+ ),
+ // 3 bones
+ {"", "", "", "", "", ""},
+
+ // 4 bones
+ BUILD_SHADER( "in " BONEINDEXTYPE4 " in_boneIndices;\n in vec4 in_boneWeights;\n" IN_DECL,
+ OUT_DECL,
+ " getMatrix(int(in_boneIndices.x)) * in_boneWeights[0] + \n" \
+ " getMatrix(int(in_boneIndices.y)) * in_boneWeights[1] + \n" \
+ " getMatrix(int(in_boneIndices.z)) * in_boneWeights[2] + \n" \
+ " getMatrix(int(in_boneIndices.w)) * in_boneWeights[3] ;\n",
+ OUT_CALC
+ )
+ }
+
+#undef IN_DECL
+#undef OUT_DECL
+#undef OUT_CALC
+
+};
+
+#undef BUILD_SHADER
+#undef BUILD_SHADER_2
+#undef STRINGIFY
+#undef MATRIX_DECL
+#undef BUILD_MATRIX
+
+static const char skinFS[] =
+ "#version 300 es\n"
+ "\n"
+ "precision lowp float;\n"
+ "out vec4 outcol;\n"
+ "void main(void) { outcol = vec4(1.0, 1.0, 1.0, 1.0); }\n";
+
+enum TfSkinShaderChannel
+{
+ kTFC_Position = VERTEX_FORMAT1(Vertex),
+ kTFC_Normal = VERTEX_FORMAT1(Normal),
+ kTFC_Tangent = VERTEX_FORMAT1(Tangent)
+};
+
+static GLuint tfTransformFeedback = 0;
+static GLuint GetTransformFeedbackObject(void)
+{
+ if(!tfTransformFeedback)
+ GLES_CHK(glGenTransformFeedbacks(1, &tfTransformFeedback));
+ return tfTransformFeedback;
+}
+
+// Note: we might not support all formats all the time.
+static bool DoesVertexFormatQualifyForTransformFeedback(UInt32 shaderChannelsMap)
+{
+ // Must have position, and if has tangents, must have normals as well.
+ bool qualify = (shaderChannelsMap & kTFC_Position) != 0;
+ if ((shaderChannelsMap & kTFC_Tangent) != 0)
+ qualify &= (shaderChannelsMap & kTFC_Normal) != 0;
+
+ return qualify;
+
+}
+
+static UInt32 roundUpToNextPowerOf2(UInt32 in)
+{
+ // Round up to nearest power of 2
+ // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ in--;
+ in |= in >> 1;
+ in |= in >> 2;
+ in |= in >> 4;
+ in |= in >> 8;
+ in |= in >> 16;
+ in++;
+ return in;
+}
+// Get the bones bit index based on bone count. Assumes bonecount is power of 2
+static int getBonesBits(UInt32 boneCount)
+{
+ // Calculate ln2
+ // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+
+ static const int MultiplyDeBruijnBitPosition2[32] =
+ {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+ UInt32 res = MultiplyDeBruijnBitPosition2[(UInt32)(boneCount * 0x077CB531U) >> 27];
+
+ if(res < 5) // Minimum size is 32 (= 0)
+ return 0;
+
+ return res-5; // Adjust so that 32 = 0, 64 = 1 etc.
+}
+
+static void print_long_string(std::string in)
+{
+ int offs = 0;
+ int len = in.length();
+ const int split = 200;
+ do
+ {
+ printf_console(in.substr(offs, split).c_str());
+ offs+=split;
+ } while (offs < len);
+
+}
+
+// maxBonesBits == Max bone count: 0 = 32, 1 = 64, etc until 5 = 1024
+static TFShader * GetTransformFeedbackShaderProgram(UInt32 shaderChannelsMap, UInt32 bonesPerVertex, UInt32 maxBonesBits)
+{
+ // Check if already created
+ TFShaderMap::iterator itr = tfShaders.find(shaderChannelsMap + (bonesPerVertex << 16) + (maxBonesBits << 19));
+ if(itr != tfShaders.end())
+ {
+ return &(itr->second);
+ }
+
+ // There are only 3 different combinations, and they are always in order. We'll just cut the array length at the call site.
+ const char *varyings[] = {"out_pos", "out_normal", "out_tangent"};
+ GLuint varyingCount = 0;
+ int shaderIdx = 0;
+ if(shaderChannelsMap & kTFC_Tangent)
+ {
+ shaderIdx = TFSHADER_POSNORMTAN;
+ varyingCount = 3;
+ }
+ else if(shaderChannelsMap & kTFC_Normal)
+ {
+ shaderIdx = TFSHADER_POSNORM;
+ varyingCount = 2;
+ }
+ else
+ {
+ shaderIdx = TFSHADER_POS;
+ varyingCount = 1;
+ }
+
+ TFShader res;
+
+ GLint status = 0;
+ GLint shaderLen = 0;
+ const char *code;
+ int i;
+ // Create the fragment shader if it doesn't exist already
+ if(tfFragShader == 0)
+ {
+ tfFragShader = glCreateShader(GL_FRAGMENT_SHADER);
+ shaderLen = strlen(skinFS);
+ code = &skinFS[0];
+ GLES_CHK(glShaderSource(tfFragShader, 1, &code, &shaderLen));
+ GLES_CHK(glCompileShader(tfFragShader));
+ glGetShaderiv(tfFragShader, GL_COMPILE_STATUS, &status);
+ if(status != GL_TRUE)
+ {
+ char temp[512] = "";
+ GLint len = 512;
+ glGetShaderInfoLog(tfFragShader, 512, &len, temp );
+
+ printf_console("ERROR: Unable to compile Transform Feedback fragment shader!\n Error log:\n%s", temp);
+ return 0;
+ }
+ }
+ res.program = glCreateProgram();
+ res.vertShader = glCreateShader(GL_VERTEX_SHADER);
+ shaderLen = strlen(tfShaderSource[shaderIdx][bonesPerVertex-1][maxBonesBits]);
+ code = &tfShaderSource[shaderIdx][bonesPerVertex-1][maxBonesBits][0];
+ GLES_CHK(glShaderSource(res.vertShader, 1, &code, &shaderLen));
+ GLES_CHK(glCompileShader(res.vertShader));
+ glGetShaderiv(res.vertShader, GL_COMPILE_STATUS, &status);
+ if(status != GL_TRUE)
+ {
+ char temp[512] = "";
+ GLint len = 512;
+ glGetShaderInfoLog(res.vertShader, 512, &len, temp );
+
+ printf_console("ERROR: Unable to compile Transform Feedback vertex shader!\n Error log:\n%s", temp);
+ print_long_string(code);
+ return 0;
+ }
+ GLES_CHK(glAttachShader(res.program, res.vertShader));
+ GLES_CHK(glAttachShader(res.program, tfFragShader));
+
+ GLES_CHK(glTransformFeedbackVaryings(res.program, varyingCount, varyings, GL_INTERLEAVED_ATTRIBS));
+
+ GLES_CHK(glLinkProgram(res.program));
+
+ glGetProgramiv(res.program, GL_LINK_STATUS, &status);
+ if(status != GL_TRUE)
+ {
+ char temp[512] = "";
+ GLint len = 512;
+ glGetProgramInfoLog(res.program, 512, &len, temp );
+ printf_console("ERROR: Unable to link Transform Feedback shader! Error: \n%s", temp);
+ print_long_string(code);
+ return 0;
+ }
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ res.bonesLocation = glGetUniformBlockIndex(res.program, "MtxBlock");
+#else
+ res.bonesLocation = glGetUniformLocation(res.program, "bones");
+#endif
+
+ // Get the attribute locations. Some of these may be missing so clear the glerror afterwards
+ for(i = 0; i < TFATTRLOC_SIZE; i++)
+ {
+ res.attribLocations[i] = glGetAttribLocation(res.program, tfShaderAttribNames[i]);
+ }
+ // Clear gl error
+ glGetError();
+
+ // Insert into map and return
+ return &(tfShaders.insert(std::make_pair(shaderChannelsMap + (bonesPerVertex << 16) + (maxBonesBits << 19), res)).first->second);
+
+}
+
+static void ReleaseShader(std::pair<UInt32, TFShader> it)
+{
+ it.second.Release();
+}
+
+void TransformFeedbackSkinningInfo::CleanupTransformFeedbackShaders(void)
+{
+ std::for_each(tfShaders.begin(), tfShaders.end(), ReleaseShader);
+ tfShaders.clear();
+
+ if(tfTransformFeedback)
+ {
+ glDeleteTransformFeedbacks(1, &tfTransformFeedback);
+ tfTransformFeedback = NULL;
+ }
+ if(tfFragShader)
+ {
+ glDeleteShader(tfFragShader);
+ tfFragShader = 0;
+ }
+}
+
+TransformFeedbackSkinningInfo::~TransformFeedbackSkinningInfo()
+{
+#define DEL_BUFFER(x) if(x != 0) { GLES_CHK(glDeleteBuffers(1, &x)); x = 0; }
+ DEL_BUFFER(m_SourceVBO);
+#undef DEL_BUFFER
+ if(m_MatrixBuffer)
+ m_MatrixBuffer->Release();
+}
+
+//! Get Vertex size in floats
+UInt32 TransformFeedbackSkinningInfo::GetVertexSize()
+{
+ // Vertex data size
+ UInt32 res = (GetStride() / 4);
+ // Add skin info size
+ if(GetBonesPerVertex() == 1)
+ return res + 1; // Index
+ else if(GetBonesPerVertex() == 2)
+ return res + 4; // 2 indices, 2 weights
+ else
+ return res + 8; // 4 indices, 4 weights
+}
+
+bool TransformFeedbackSkinningInfo::EnsureBuffer()
+{
+ bool dirty = false;
+ if(m_SourceVBO == 0)
+ {
+ GLES_CHK(glGenBuffers(1, &m_SourceVBO));
+ dirty = true;
+ }
+ GLsizei size = GetVertexSize() * GetVertexCount() * sizeof(float);
+ if(m_SourceVBOSize < size)
+ {
+ GLES_CHK(glBindBuffer(GL_UNIFORM_BUFFER, m_SourceVBO));
+ GLES_CHK(glBufferData(GL_UNIFORM_BUFFER, size, NULL, GL_STATIC_DRAW));
+ m_SourceVBOSize = size;
+ dirty = true;
+ GLES_CHK(glBindBuffer(GL_UNIFORM_BUFFER, 0));
+ }
+ return dirty;
+}
+
+
+void TransformFeedbackSkinningInfo::UpdateSourceData(const void *vertData, const BoneInfluence *skinData, bool dirty)
+{
+ dirty |= EnsureBuffer();
+
+ if(!dirty)
+ return;
+
+ std::vector<float> vboData;
+ vboData.resize(GetVertexSize() * GetVertexCount());
+
+ float *dest = &vboData[0];
+ float *vertsrc = (float *)vertData;
+ int vertsize = GetStride() / sizeof(float);
+ const BoneInfluence *bonesrc4 = skinData;
+ const BoneInfluence2 *bonesrc2 = (BoneInfluence2 *)skinData;
+ const int *bonesrc1 = (int *)skinData;
+
+ for(int i = 0; i < GetVertexCount(); i++)
+ {
+ std::copy(vertsrc, vertsrc+vertsize, dest);
+ dest += vertsize;
+ vertsrc += vertsize;
+ switch(GetBonesPerVertex())
+ {
+ default:
+ case 1:
+#if USE_INT_ATTRIBS
+ memcpy(dest, bonesrc1, sizeof(int));
+ dest++;
+ bonesrc1++;
+#else
+ *(dest++) = (float) *(bonesrc1++);
+#endif
+ break;
+ case 2:
+ // Copy weights
+ std::copy(&bonesrc2->weight[0], (&bonesrc2->weight[0])+2, dest);
+ dest += 2;
+#if USE_INT_ATTRIBS
+ memcpy(dest, &bonesrc2->boneIndex[0], sizeof(int)*2);
+ dest+= 2;
+#else
+ *(dest++) = (float) bonesrc2->boneIndex[0];
+ *(dest++) = (float) bonesrc2->boneIndex[1];
+#endif
+ bonesrc2++;
+
+ break;
+ case 4:
+ // Copy weights
+ std::copy(&bonesrc4->weight[0], (&bonesrc4->weight[0])+4, dest);
+ dest += 4;
+#if USE_INT_ATTRIBS
+ memcpy(dest, &bonesrc4->boneIndex[0], sizeof(int)*4);
+ dest+= 4;
+#else
+ *(dest++) = (float) bonesrc4->boneIndex[0];
+ *(dest++) = (float) bonesrc4->boneIndex[1];
+ *(dest++) = (float) bonesrc4->boneIndex[2];
+ *(dest++) = (float) bonesrc4->boneIndex[3];
+#endif
+ bonesrc4++;
+
+ break;
+ }
+ }
+ GLES_CHK(glBindBuffer(GL_UNIFORM_BUFFER, m_SourceVBO));
+ GLES_CHK(glBufferSubData(GL_UNIFORM_BUFFER, 0, vboData.size() * sizeof(float), &vboData[0]));
+ GLES_CHK(glBindBuffer(GL_UNIFORM_BUFFER, 0));
+
+}
+
+
+void TransformFeedbackSkinningInfo::UpdateSourceBones( const int boneCount, const Matrix4x4f* cachedPose )
+{
+ int i;
+ int inputSize = boneCount * 4 * 3 * sizeof(float);
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ m_BoneCount = roundUpToNextPowerOf2(boneCount);
+#else
+ m_BoneCount = 82;
+#endif
+
+ UInt32 realBufSize = m_BoneCount * 4 * 3 * sizeof(float);
+
+ // This basically shouldn't happen but just in case (should be released in SkinMesh)
+ if(m_MatrixBuffer)
+ {
+ m_MatrixBuffer->Release();
+ }
+
+ float *dest = NULL;
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ m_MatrixBuffer = GetBufferManagerGLES30()->AcquireBuffer(realBufSize, GL_DYNAMIC_DRAW);
+
+ if(gGraphicsCaps.gles30.useMapBuffer)
+ {
+ m_MatrixBuffer->RecreateStorage(realBufSize, GL_DYNAMIC_DRAW);
+ dest = (float *)m_MatrixBuffer->Map(0, realBufSize, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_BUFFER_BIT);
+ }
+ else
+#endif
+ {
+ m_CachedPose.resize(realBufSize / sizeof(float));
+ dest = &m_CachedPose[0];
+ }
+
+ int realBoneCount = boneCount;
+ if(boneCount > m_BoneCount)
+ realBoneCount = m_BoneCount;
+
+ for(i = 0; i < realBoneCount; i++)
+ {
+ Matrix4x4f mat = cachedPose[i];
+ mat.Transpose();
+ float *src = mat.GetPtr();
+ std::copy(src, src+12, dest);
+ dest+=12;
+ }
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ if(gGraphicsCaps.gles30.useMapBuffer)
+ {
+ m_MatrixBuffer->Unmap();
+ }
+ else
+ {
+ m_MatrixBuffer->RecreateWithData(realBufSize, GL_DYNAMIC_DRAW, (void *)&m_CachedPose[0]);
+ }
+ m_MatrixBuffer->RecordUpdate();
+#endif
+}
+
+// In GfxDeviceGLES30.cpp
+void GLSLUseProgramGLES30(UInt32 programID);
+
+void TransformFeedbackSkinningInfo::SkinMesh( bool last )
+{
+
+ static GLuint s_WorkaroundTFBuf = 0;
+
+ // Qualcomm, srsly?
+ if(s_WorkaroundTFBuf == 0)
+ {
+ glGenBuffers(1, &s_WorkaroundTFBuf);
+ glBindBuffer(GL_COPY_WRITE_BUFFER, s_WorkaroundTFBuf);
+ glBufferData(GL_COPY_WRITE_BUFFER, 1024, NULL, GL_DYNAMIC_DRAW);
+ glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
+ }
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ TFShader *shd = GetTransformFeedbackShaderProgram(GetChannelMap(), GetBonesPerVertex(), getBonesBits(m_BoneCount));
+#else
+ TFShader *shd = GetTransformFeedbackShaderProgram(GetChannelMap(), GetBonesPerVertex(), 0);
+#endif
+
+ Assert(shd);
+
+ GLuint tf = GetTransformFeedbackObject();
+ GLES3VBO *vbo = static_cast<GLES3VBO *>(GetDestVBO());
+ GLuint glvbo = vbo->GetSkinningTargetVBO();
+
+ GLES_CHK(glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, glvbo));
+
+
+ GLSLUseProgramGLES30(shd->program);
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+ GLES_CHK(glUniformBlockBinding(shd->program, shd->bonesLocation, 0));
+
+ if(m_MatrixBuffer)
+ GLES_CHK(glBindBufferBase(GL_UNIFORM_BUFFER, 0, m_MatrixBuffer->GetBuffer()));
+#else
+
+ GLES_CHK(glUniform4fv(shd->bonesLocation, m_CachedPose.size() / 4, &m_CachedPose[0]));
+#endif
+
+ GLES_CHK(glBindBuffer(GL_ARRAY_BUFFER, m_SourceVBO));
+ GLuint stride = GetVertexSize() * sizeof(float);
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_POS], 3, GL_FLOAT, GL_FALSE, stride, 0));
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_POS]));
+
+ GLuint nextoffs = 12;
+
+ if(GetChannelMap() & kTFC_Normal)
+ {
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_NORM], 3, GL_FLOAT, GL_FALSE, stride, (void *)nextoffs));
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_NORM]));
+ nextoffs += 12;
+ }
+ if(GetChannelMap() & kTFC_Tangent)
+ {
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_TAN], 4, GL_FLOAT, GL_FALSE, stride, (void *)nextoffs));
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_TAN]));
+ nextoffs += 16;
+ }
+
+ switch(GetBonesPerVertex())
+ {
+ default:
+ case 1:
+#if USE_INT_ATTRIBS
+ GLES_CHK(glVertexAttribIPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 1, GL_INT, stride, (void *)nextoffs));
+#else
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 1, GL_FLOAT, GL_FALSE, stride, (void *)nextoffs));
+#endif
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_BONEIDX]));
+
+ break;
+ case 2:
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_BONEWEIGHT], 2, GL_FLOAT, GL_FALSE, stride,(void *)nextoffs));
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_BONEWEIGHT]));
+ nextoffs += 8;
+#if USE_INT_ATTRIBS
+ GLES_CHK(glVertexAttribIPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 2, GL_INT, stride, (void *)nextoffs));
+#else
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 2, GL_FLOAT, GL_FALSE, stride, (void *)nextoffs));
+#endif
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_BONEIDX]));
+ break;
+
+ case 4:
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_BONEWEIGHT], 4, GL_FLOAT, GL_FALSE, stride,(void *)nextoffs));
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_BONEWEIGHT]));
+ nextoffs += 16;
+#if USE_INT_ATTRIBS
+ GLES_CHK(glVertexAttribIPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 4, GL_INT, stride, (void *)nextoffs));
+#else
+ GLES_CHK(glVertexAttribPointer(shd->attribLocations[TFATTRLOC_BONEIDX], 4, GL_FLOAT,GL_FALSE, stride, (void *)nextoffs));
+#endif
+ GLES_CHK(glEnableVertexAttribArray(shd->attribLocations[TFATTRLOC_BONEIDX]));
+
+ break;
+ }
+
+ GLES_CHK(glBeginTransformFeedback(GL_POINTS));
+
+ GLES_CHK(glEnable(GL_RASTERIZER_DISCARD));
+ GLES_CHK(glDrawArrays(GL_POINTS, 0, GetVertexCount()));
+ GLES_CHK(glDisable(GL_RASTERIZER_DISCARD));
+
+ GLES_CHK(glEndTransformFeedback());
+
+
+ GLES_CHK(glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, s_WorkaroundTFBuf));
+ GLES_CHK(glBindBuffer(GL_ARRAY_BUFFER, 0));
+
+#if USE_UNIFORM_BLOCK_FOR_BONES
+
+ GLES_CHK(glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0));
+ if(m_MatrixBuffer)
+ {
+ m_MatrixBuffer->RecordRender();
+ m_MatrixBuffer->Release();
+ m_MatrixBuffer = NULL;
+ }
+#endif
+ InvalidateVertexInputCacheGLES30();
+}