summaryrefslogtreecommitdiff
path: root/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp')
-rw-r--r--Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp376
1 files changed, 376 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
new file mode 100644
index 0000000..87f8e17
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
@@ -0,0 +1,376 @@
+#include "UnityPrefix.h"
+#include "ShaderPatchingD3D9.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/Utilities/Word.h"
+
+#define DEBUG_FOG_PATCHING 0
+
+
+static inline bool IsNewline( char c ) { return c == '\n' || c == '\r'; }
+
+static int FindMaxUsedDclIndex (const std::string& src, char registerName)
+{
+ size_t n = src.size();
+ size_t pos = 0;
+ int maxDcl = -1;
+ while ((pos = src.find("dcl_", pos)) != std::string::npos)
+ {
+ // skip "dcl_"
+ pos += 4;
+
+ // skip until end of dcl_*
+ while (pos < n && !isspace(src[pos]))
+ ++pos;
+ // skip space
+ while (pos < n && isspace(src[pos]))
+ ++pos;
+ // is this an needed register type?
+ if (pos < n && src[pos] == registerName) {
+ int number = -1;
+ sscanf (src.c_str() + pos + 1, "%d", &number);
+ if (number > maxDcl)
+ maxDcl = number;
+ }
+ }
+ return maxDcl;
+}
+
+
+static bool InsertFogDcl (std::string& src, const std::string& registerName)
+{
+ // insert dcl_fog after vs_3_0/ps_3_0 line
+ size_t pos = 6;
+ while (pos < src.size() && !IsNewline(src[pos])) // skip until newline
+ ++pos;
+ while (pos < src.size() && IsNewline(src[pos])) // skip newlines
+ ++pos;
+ if (pos >= src.size())
+ return false;
+ src.insert (pos, Format("dcl_fog %s\n", registerName.c_str()));
+ return true;
+}
+
+
+bool PatchPixelShaderFogD3D9 (std::string& src, FogMode fog, int fogColorReg, int fogParamsReg)
+{
+ const bool isPS3 = !strncmp(src.c_str(), "ps_3_0", 6);
+ if (!isPS3)
+ return true; // nothing to do
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: original pixel shader:\n%s\n", src.c_str());
+ #endif
+
+ // SM3.0 has 10 input registers (v0..v9).
+
+ const int maxDclReg = FindMaxUsedDclIndex (src, 'v');
+ if (maxDclReg >= 9)
+ {
+ // out of registers
+ return false;
+ }
+ const int fogReg = 9;
+ if (!InsertFogDcl (src, Format("v%d.x", fogReg)))
+ {
+ DebugAssert (!"failed to insert fog dcl");
+ return false;
+ }
+
+ // Remap writes to oC0 with r30
+ const int colorReg = 30;
+ const int tempReg = 31;
+ replace_string (src, "oC0", "r30");
+
+ // make sure source ends with a newline
+ if (!IsNewline(src[src.size()-1]))
+ src += '\n';
+
+ // inject fog handling code
+ if (fog == kFogExp2)
+ {
+ // fog = exp(-(density*z)^2)
+ src += Format("mul r%d.x, c%d.x, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/sqrt(ln(2))) * fog
+ src += Format("mul r%d.x, r%d.x, r%d.x\n", tempReg, tempReg, tempReg); // tmp = tmp * tmp
+ src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+ else if (fog == kFogExp)
+ {
+ // fog = exp(-density*z)
+ src += Format("mul r%d.x, c%d.y, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/ln(2)) * fog
+ src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+ else if (fog == kFogLinear)
+ {
+ // fog = (end-z)/(end-start)
+ src += Format("mad_sat r%d.x, c%d.z, v%d.x, c%d.w\n", tempReg, fogParamsReg, fogReg, fogParamsReg); // tmp = (-1/(end-start)) * fog + (end/(end-start))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+
+
+ // append final move into oC0
+ src += Format("mov oC0, r%d\n", colorReg);
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: after patching, fog mode %d:\n%s\n", fog, src.c_str());
+ #endif
+
+ return true;
+}
+
+
+bool PatchVertexShaderFogD3D9 (std::string& src)
+{
+ const bool isVS3 = !strncmp(src.c_str(), "vs_3_0", 6);
+ if (!isVS3)
+ return true; // nothing to do
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: original vertex shader:\n%s\n", src.c_str());
+ #endif
+
+ // SM3.0 has 12 output registers (o0..o11), but the pixel shader only has 10 input ones.
+ // Play it safe and let's assume we only have 10 here.
+
+ const int maxDclReg = FindMaxUsedDclIndex (src, 'o');
+ if (maxDclReg >= 9)
+ {
+ // out of registers
+ return false;
+ }
+ const int fogReg = 9;
+ std::string fogRegName = Format("o%d", fogReg);
+ if (!InsertFogDcl (src, fogRegName))
+ {
+ DebugAssert (!"failed to insert fog dcl");
+ return false;
+ }
+
+ // find write to o0, and do the same for oFog
+ size_t posWrite = src.find ("o0.z,");
+ bool writesFullPos = false;
+ if (posWrite == std::string::npos)
+ {
+ posWrite = src.find ("o0,");
+ if (posWrite == std::string::npos)
+ {
+ DebugAssert (!"couldn't find write to o0");
+ return false;
+ }
+ writesFullPos = true;
+ }
+
+ // get whole line
+ size_t n = src.size();
+ size_t posWriteStart = posWrite, posWriteEnd = posWrite;
+ while (posWriteStart > 0 && !IsNewline(src[posWriteStart])) --posWriteStart;
+ ++posWriteStart;
+ while (posWriteEnd < n && !IsNewline(src[posWriteEnd])) ++posWriteEnd;
+
+ std::string instr = src.substr (posWriteStart, posWriteEnd-posWriteStart);
+ if (writesFullPos)
+ {
+ replace_string (instr, "o0", fogRegName, 0);
+ instr += ".z";
+ }
+ else
+ {
+ replace_string (instr, "o0.z", fogRegName, 0);
+ }
+ instr += '\n';
+
+ // insert fog code just after write to position
+ src.insert (posWriteEnd+1, instr);
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: after patching:\n%s\n", src.c_str());
+ #endif
+
+ return true;
+}
+
+
+// --------------------------------------------------------------------------
+
+#if ENABLE_UNIT_TESTS
+
+#include "External/UnitTest++/src/UnitTest++.h"
+
+SUITE (ShaderPatchingD3D9Tests)
+{
+
+TEST(FindMaxDclIndexNotPresent)
+{
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("foobar", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo ", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo v", 'v'));
+}
+TEST(FindMaxDclIndexOne)
+{
+ CHECK_EQUAL (0, FindMaxUsedDclIndex("dcl_foobar v0", 'v'));
+ CHECK_EQUAL (1, FindMaxUsedDclIndex("dcl_foobar v1", 'v'));
+ CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v2.x", 'v'));
+ CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3.rgb", 'v'));
+ CHECK_EQUAL (6, FindMaxUsedDclIndex("dcl_foobar v6", 'v'));
+ CHECK_EQUAL (10, FindMaxUsedDclIndex("dcl_foobar v10", 'v'));
+ CHECK_EQUAL (0, FindMaxUsedDclIndex("ps_3_0\ndcl_foobar v0\nmov oC0, v0", 'v'));
+}
+TEST(FindMaxDclIndexMultiple)
+{
+ CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v0\ndcl_foobar v2", 'v'));
+ CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3\ndcl_foobar v1", 'v'));
+}
+
+TEST(PatchVSZWrite)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "dp4 o0.z, c0, c1\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "dp4 o0.z, c0, c1\n"
+ "dp4 o9, c0, c1\n"
+ , s);
+}
+TEST(PatchVSFullWrite)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "mov o0, c0\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "mov o0, c0\n"
+ "mov o9, c0.z\n"
+ , s);
+}
+TEST(PatchVSWriteNotAtEnd)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "mov o0, r0\n"
+ "mov r0, r1\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "mov o0, r0\n"
+ "mov o9, r0.z\n"
+ "mov r0, r1\n"
+ , s);
+}
+TEST(PatchPSDisjointColorAlphaWrite)
+{
+ std::string s =
+ "ps_3_0\n"
+ "; 31 ALU, 2 TEX\n"
+ "dcl_2d s0\n"
+ "dcl_2d s1\n"
+ "def c5, 0.0, 128.0, 2.0, 0\n"
+ "dcl_texcoord0 v0.xy\n"
+ "dcl_texcoord1 v1.xyz\n"
+ "dcl_texcoord2 v2.xyz\n"
+ "dcl_texcoord3 v3.xyz\n"
+ "dcl_texcoord4 v4\n"
+ "texldp r3.x, v4, s1\n"
+ "dp3_pp r0.x, v3, v3\n"
+ "rsq_pp r0.x, r0.x\n"
+ "mad_pp r0.xyz, r0.x, v3, c0\n"
+ "dp3_pp r0.w, r0, r0\n"
+ "rsq_pp r0.w, r0.w\n"
+ "mul_pp r0.xyz, r0.w, r0\n"
+ "mov_pp r0.w, c4.x\n"
+ "dp3_pp r0.x, v1, r0\n"
+ "dp3_pp r2.x, v1, c0\n"
+ "mul_pp r1.y, c5, r0.w\n"
+ "max_pp r1.x, r0, c5\n"
+ "pow r0, r1.x, r1.y\n"
+ "mov r1.x, r0\n"
+ "texld r0, v0, s0\n"
+ "mul r1.w, r0, r1.x\n"
+ "mul_pp r1.xyz, r0, c3\n"
+ "mul_pp r0.xyz, r1, c1\n"
+ "max_pp r2.x, r2, c5\n"
+ "mul_pp r2.xyz, r0, r2.x\n"
+ "mov_pp r0.xyz, c1\n"
+ "mul_pp r0.xyz, c2, r0\n"
+ "mad r0.xyz, r0, r1.w, r2\n"
+ "mul_pp r2.w, r3.x, c5.z\n"
+ "mul r0.xyz, r0, r2.w\n"
+ "mad_pp oC0.xyz, r1, v2, r0\n" // color RGB
+ "mov_pp r2.x, c1.w\n"
+ "mul_pp r0.x, c2.w, r2\n"
+ "mul_pp r0.y, r0.w, c3.w\n"
+ "mul r0.x, r1.w, r0\n"
+ "mad oC0.w, r3.x, r0.x, r0.y\n"; // color A
+ std::string exps =
+ "ps_3_0\n"
+ "dcl_fog v9.x\n"
+ "; 31 ALU, 2 TEX\n"
+ "dcl_2d s0\n"
+ "dcl_2d s1\n"
+ "def c5, 0.0, 128.0, 2.0, 0\n"
+ "dcl_texcoord0 v0.xy\n"
+ "dcl_texcoord1 v1.xyz\n"
+ "dcl_texcoord2 v2.xyz\n"
+ "dcl_texcoord3 v3.xyz\n"
+ "dcl_texcoord4 v4\n"
+ "texldp r3.x, v4, s1\n"
+ "dp3_pp r0.x, v3, v3\n"
+ "rsq_pp r0.x, r0.x\n"
+ "mad_pp r0.xyz, r0.x, v3, c0\n"
+ "dp3_pp r0.w, r0, r0\n"
+ "rsq_pp r0.w, r0.w\n"
+ "mul_pp r0.xyz, r0.w, r0\n"
+ "mov_pp r0.w, c4.x\n"
+ "dp3_pp r0.x, v1, r0\n"
+ "dp3_pp r2.x, v1, c0\n"
+ "mul_pp r1.y, c5, r0.w\n"
+ "max_pp r1.x, r0, c5\n"
+ "pow r0, r1.x, r1.y\n"
+ "mov r1.x, r0\n"
+ "texld r0, v0, s0\n"
+ "mul r1.w, r0, r1.x\n"
+ "mul_pp r1.xyz, r0, c3\n"
+ "mul_pp r0.xyz, r1, c1\n"
+ "max_pp r2.x, r2, c5\n"
+ "mul_pp r2.xyz, r0, r2.x\n"
+ "mov_pp r0.xyz, c1\n"
+ "mul_pp r0.xyz, c2, r0\n"
+ "mad r0.xyz, r0, r1.w, r2\n"
+ "mul_pp r2.w, r3.x, c5.z\n"
+ "mul r0.xyz, r0, r2.w\n"
+ "mad_pp r30.xyz, r1, v2, r0\n"
+ "mov_pp r2.x, c1.w\n"
+ "mul_pp r0.x, c2.w, r2\n"
+ "mul_pp r0.y, r0.w, c3.w\n"
+ "mul r0.x, r1.w, r0\n"
+ "mad r30.w, r3.x, r0.x, r0.y\n"
+ "mul r31.x, c7.x, v9.x\n"
+ "mul r31.x, r31.x, r31.x\n"
+ "exp_sat r31.x, -r31.x\n"
+ "lrp r30.rgb, r31.x, r30, c6\n"
+ "mov oC0, r30\n";
+ CHECK (PatchPixelShaderFogD3D9(s, kFogExp2, 6, 7));
+ CHECK_EQUAL(exps, s);
+}
+
+} // SUITE
+
+#endif // ENABLE_UNIT_TESTS