summaryrefslogtreecommitdiff
path: root/Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp')
-rw-r--r--Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp1504
1 files changed, 1504 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp b/Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp
new file mode 100644
index 0000000..4752142
--- /dev/null
+++ b/Runtime/GfxDevice/d3d11/ShaderGeneratorD3D11.cpp
@@ -0,0 +1,1504 @@
+#include "UnityPrefix.h"
+#include "ShaderGeneratorD3D11.h"
+#include "FixedFunctionStateD3D11.h"
+#include "ConstantBuffersD3D11.h"
+#include "D3D11Context.h"
+#include "Runtime/GfxDevice/GpuProgram.h"
+#include "External/shaderlab/Library/TextureBinding.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "D3D11ByteCode.h"
+
+#define DEBUG_D3D11_FF_SHADERS (!UNITY_RELEASE && 0)
+#define DEBUG_D3D11_COMPARE_WITH_HLSL (DEBUG_D3D11_FF_SHADERS && 0)
+
+ConstantBuffersD3D11& GetD3D11ConstantBuffers (GfxDevice& device);
+
+
+// --- Debugging ---------------------------------------------------------------------------------
+
+
+#if DEBUG_D3D11_FF_SHADERS
+
+#include "Runtime/GfxDevice/d3d11/D3D11Compiler.h"
+#if UNITY_WINRT
+#include "PlatformDependent/MetroPlayer/MetroUtils.h"
+#endif
+
+static D3D11Compiler s_Compiler;
+
+static bool HasD3D11Compiler()
+{
+ static bool initialized = false;
+ if (!initialized)
+ {
+ //s_Compiler.Initialize (kD3D11CompilerDLL);
+ const char* dllName = kD3D11CompilerDLL;
+ s_Compiler.compileFunc = NULL;
+ s_Compiler.stripShaderFunc = NULL;
+ s_Compiler.reflectFunc = NULL;
+ s_Compiler.disassembleFunc = NULL;
+ s_Compiler.createBlobFunc = NULL;
+
+ #if UNITY_WINRT
+ HMODULE dll = LoadPackagedLibrary (ConvertToWindowsPath(dllName)->Data(), 0);
+ #else
+ HMODULE dll = LoadLibraryA (dllName);
+ #endif
+ if (dll)
+ {
+ s_Compiler.compileFunc = (D3D11Compiler::D3DCompileFunc) GetProcAddress (dll, "D3DCompile");
+ s_Compiler.stripShaderFunc = (D3D11Compiler::D3DStripShaderFunc) GetProcAddress (dll, "D3DStripShader");
+ s_Compiler.reflectFunc = (D3D11Compiler::D3DReflectFunc) GetProcAddress (dll, "D3DReflect");
+ s_Compiler.disassembleFunc = (D3D11Compiler::D3DDisassembleFunc) GetProcAddress (dll, "D3DDisassemble");
+ s_Compiler.createBlobFunc = (D3D11Compiler::D3DCreateBlobFunc) GetProcAddress (dll, "D3DCreateBlob");
+ }
+ }
+ return s_Compiler.IsValid();
+}
+
+#endif // #if DEBUG_D3D11_FF_SHADERS
+
+
+#if DEBUG_D3D11_COMPARE_WITH_HLSL
+
+enum D3DCOMPILER_STRIP_FLAGS
+{
+ D3DCOMPILER_STRIP_REFLECTION_DATA = 1,
+ D3DCOMPILER_STRIP_DEBUG_INFO = 2,
+ D3DCOMPILER_STRIP_TEST_BLOBS = 4,
+ D3DCOMPILER_STRIP_FORCE_DWORD = 0x7fffffff,
+};
+
+#define D3D_DISASM_ENABLE_COLOR_CODE 1
+#define D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS 2
+#define D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING 4
+#define D3D_DISASM_ENABLE_INSTRUCTION_CYCLE 8
+
+static void DebugCompileHLSLShaderD3D11 (const std::string& source, bool vertex)
+{
+ if (!HasD3D11Compiler())
+ return;
+
+ ID3D10Blob* shader = NULL;
+ ID3D10Blob* errors;
+ Assert (s_Compiler.compileFunc);
+ HRESULT hr = s_Compiler.compileFunc (
+ source.c_str(),
+ source.size(),
+ "source",
+ NULL,
+ NULL,
+ "main",
+ gGraphicsCaps.d3d11.featureLevel < kDX11Level10_0
+ ? (vertex ? "vs_4_0_level_9_3" : "ps_4_0_level_9_3")
+ : (vertex ? "vs_4_0" : "ps_4_0"),
+ 0,
+ 0,
+ &shader,
+ &errors);
+
+ if (FAILED(hr))
+ {
+ printf_console ("Failed to compile D3D11 shader:\n%s\n", source.c_str());
+ if (errors)
+ {
+ std::string msg (reinterpret_cast<const char*>(errors->GetBufferPointer()), errors->GetBufferSize());
+ printf_console ("\nErrors:\n%s\n", msg.c_str());
+ errors->Release();
+ }
+ else
+ {
+ printf_console ("\nErrors unknown!\n");
+ }
+ AssertString ("Failed to compile fixed function D3D11 shader");
+ return;
+ }
+
+ if (shader && s_Compiler.stripShaderFunc)
+ {
+ ID3D10Blob* strippedShader = NULL;
+
+ hr = s_Compiler.stripShaderFunc (shader->GetBufferPointer(), shader->GetBufferSize(), D3DCOMPILER_STRIP_REFLECTION_DATA | D3DCOMPILER_STRIP_DEBUG_INFO | D3DCOMPILER_STRIP_TEST_BLOBS, &strippedShader);
+ if (SUCCEEDED(hr))
+ {
+ SAFE_RELEASE(shader);
+ shader = strippedShader;
+ }
+ }
+
+ SAFE_RELEASE(errors);
+
+ if (shader && s_Compiler.disassembleFunc)
+ {
+ ID3D10Blob* disasm = NULL;
+ hr = s_Compiler.disassembleFunc (shader->GetBufferPointer(), shader->GetBufferSize(), D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS, NULL, &disasm);
+ if (SUCCEEDED(hr) && disasm)
+ {
+ printf_console ("disasm:\n%s\n\n", disasm->GetBufferPointer());
+ }
+ SAFE_RELEASE(disasm);
+ }
+
+ SAFE_RELEASE(shader);
+}
+
+static inline void AddToStringList (std::string& str, const char* s)
+{
+ if (!str.empty())
+ str += ',';
+ str += s;
+}
+
+#endif // #if DEBUG_D3D11_COMPARE_WITH_HLSL
+
+
+
+// --- Constant buffers & utilities --------------------------------------------------------------
+
+
+static const char* kD3D11VertexCB = "UnityFFVertex";
+static const char* kD3D11PixelCB = "UnityFFPixel";
+
+enum {
+ k11VertexMVP = 0,
+ k11VertexMV = 4,
+ k11VertexColor = 8,
+ k11VertexAmbient = 9,
+ k11VertexLightColor = 10,
+ k11VertexLightPos = 18,
+ k11VertexLightAtten = 26,
+ k11VertexLightSpot = 34,
+ k11VertexMatDiffuse = 42,
+ k11VertexMatAmbient = 43,
+ k11VertexMatSpec = 44,
+ k11VertexMatEmission = 45,
+ k11VertexTex = 46,
+ k11VertexFog = 62,
+ k11VertexSize = 63,
+ k11VertexPosOffset9x = k11VertexSize+1,
+};
+//k11VertexPosOffset9x will be used like that:
+// mad oPos.xy, v0.w, c63, v0
+// mov oPos.zw, v0
+
+#if DEBUG_D3D11_COMPARE_WITH_HLSL
+static const char* kD3D11VertexPrefix =
+ "cbuffer UnityFFVertex {\n"
+ " float4x4 ff_matrix_mvp;\n" // 0
+ " float4x4 ff_matrix_mv;\n" // 4
+ " float4 ff_vec_color;\n" // 8
+ " float4 ff_vec_ambient;\n" // 9
+ " float4 ff_light_color[8];\n" // 10
+ " float4 ff_light_pos[8];\n" // 18
+ " float4 ff_light_atten[8];\n" // 26
+ " float4 ff_light_spot[8];\n" // 34
+ " float4 ff_mat_diffuse;\n" // 42
+ " float4 ff_mat_ambient;\n" // 43
+ " float4 ff_mat_spec;\n" // 44
+ " float4 ff_mat_emission;\n" // 45
+ " float4x4 ff_matrix_tex[4];\n" // 46
+ " float4 ff_fog;\n" // 62
+ "};\n"; // 62
+#endif // #if DEBUG_D3D11_COMPARE_WITH_HLSL
+
+
+enum {
+ k11PixelColors = 0,
+ k11PixelAlphaRef = 8,
+ k11PixelFog = 9,
+ k11PixelSize = 10
+};
+#if DEBUG_D3D11_COMPARE_WITH_HLSL
+static const char* kD3D11PixelPrefix =
+"cbuffer UnityFFPixel {\n"
+" float4 ff_vec_colors[8];\n" // 0
+" float ff_alpha_ref;\n" // 8
+" float4 ff_fog;\n" // 9
+"};\n"
+"float4 main (\n ";
+#endif // # if DEBUG_D3D11_COMPARE_WITH_HLSL
+
+
+static void* BuildShaderD3D11 (DXBCBuilder* builder, size_t& outSize)
+{
+ Assert(builder);
+
+ void* dxbc = dxb_build (builder, outSize);
+ Assert(dxbc);
+ dxb_destroy (builder);
+
+ #if DEBUG_D3D11_FF_SHADERS
+ if (HasD3D11Compiler() && s_Compiler.disassembleFunc)
+ {
+ ID3D10Blob* disasm = NULL;
+ HRESULT hr = s_Compiler.disassembleFunc (dxbc, outSize, D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS, NULL, &disasm);
+ if (SUCCEEDED(hr) && disasm)
+ {
+ printf_console ("disasm dxbc:\n%s\n\n", disasm->GetBufferPointer());
+ }
+ SAFE_RELEASE(disasm);
+ }
+ #endif
+
+ return dxbc;
+}
+
+
+// --- VERTEX program ----------------------------------------------------------------------------
+
+
+static void EmitMatrixMul(DXBCBuilderStream& bld, int cbIndex, char srcType, int srcIndex, char dstType, int dstIndex, int tmpIndex, bool wAlways1)
+{
+ bld.op(kSM4Op_MUL).reg('r',tmpIndex).swz(srcType,srcIndex,kSM4SwzRepY).swz('c',cbIndex+1);
+ bld.op(kSM4Op_MAD).reg('r',tmpIndex).swz('c',cbIndex+0).swz(srcType,srcIndex,kSM4SwzRepX).swz('r',tmpIndex);
+ bld.op(kSM4Op_MAD).reg('r',tmpIndex).swz('c',cbIndex+2).swz(srcType,srcIndex,kSM4SwzRepZ).swz('r',tmpIndex);
+ if (!wAlways1)
+ bld.op(kSM4Op_MAD).reg(dstType,dstIndex).swz('c',cbIndex+3).swz(srcType,srcIndex,kSM4SwzRepW).swz('r',tmpIndex);
+ else
+ bld.op(kSM4Op_ADD).reg(dstType,dstIndex).swz('c',cbIndex+3).swz('r',tmpIndex);
+}
+
+
+void* BuildVertexShaderD3D11 (const FixedFunctionStateD3D11& state, FixedFunctionProgramD3D11::ValueParameters& params, BuiltinShaderParamIndices& matrices, size_t& outSize)
+{
+ ShaderLab::FastPropertyName cbName; cbName.SetName(kD3D11VertexCB);
+ GetD3D11ConstantBuffers(GetRealGfxDevice()).SetCBInfo (cbName.index, k11VertexSize*16);
+ params.m_CBID = cbName.index; params.m_CBSize = k11VertexSize*16;
+
+ DXBCBuilder* builder = dxb_create(4, 0, kSM4Shader_Vertex);
+ DXBCBuilderStream bld(builder);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ std::string helpers, inputs, outputs, code;
+ #endif
+
+ bool hasLights = (state.lightingEnabled && state.lightCount > 0);
+
+ bool eyePositionRequired =
+ hasLights ||
+ (state.fogMode != kFogDisabled);
+ bool eyeNormalRequired = hasLights;
+ bool viewDirRequired = hasLights && state.specularEnabled;
+ bool eyeReflRequired = false;
+ {
+ UInt64 texSources = state.texUnitSources;
+ for (int i = 0; i < state.texUnitCount; i++)
+ {
+ UInt32 uvSource = texSources & 0xF;
+ if (uvSource == kTexSourceEyeLinear)
+ eyePositionRequired = true;
+ if (uvSource == kTexSourceCubeNormal)
+ eyeNormalRequired = true;
+ if (uvSource == kTexSourceCubeReflect || uvSource == kTexSourceSphereMap)
+ eyeReflRequired = viewDirRequired = eyePositionRequired = eyeNormalRequired = true;
+ texSources >>= 4;
+ }
+ }
+ if (eyePositionRequired || eyeNormalRequired || eyeReflRequired)
+ {
+ matrices.mat[kShaderInstanceMatMV].gpuIndex = k11VertexMV*16;
+ matrices.mat[kShaderInstanceMatMV].rows = 4;
+ matrices.mat[kShaderInstanceMatMV].cols = 4;
+ matrices.mat[kShaderInstanceMatMV].cbID = params.m_CBID;
+ }
+
+ dxb_dcl_cb(builder, 0, k11VertexSize);
+
+ int inputRegCounter = 0, outputRegCounter = 0, tempRegCounter = 0;
+ int inPosReg = 0, inColorReg = 0, inNormalReg = 0;
+ int inUVReg[8] = {0};
+ int outColor0Reg = 0, outColor1Reg = 0, outPosReg = 0;
+ int outUVReg[8] = {0};
+ int eyePosReg = 0, eyeNormalReg = 0, viewDirReg = 0, eyeReflReg = 0, lcolorReg = 0, specColorReg = 0;
+
+ dxb_dcl_input(builder, "POSITION", 0, inPosReg = inputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float4 vertex : POSITION");
+ #endif
+
+ // color = Vertex or uniform color
+ char inColorType;
+ if (state.useUniformInsteadOfVertexColor)
+ {
+ params.AddVectorParam (k11VertexColor*16, 4, kShaderVecFFColor);
+ inColorType = 'c';
+ inColorReg = k11VertexColor;
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float4 color = ff_vec_color;\n";
+ #endif
+ }
+ else
+ {
+ inColorType = 'v';
+ dxb_dcl_input(builder, "COLOR", 0, inColorReg = inputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float4 vertexColor : COLOR");
+ code += "float4 color = vertexColor;\n";
+ #endif
+ }
+
+ // eyePos = eye position
+ if (eyePositionRequired)
+ {
+ eyePosReg = tempRegCounter++;
+ EmitMatrixMul (bld, k11VertexMV, 'v',inPosReg, 'r',eyePosReg, eyePosReg, false);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float3 eyePos = mul (ff_matrix_mv, vertex).xyz;\n";
+ #endif
+ }
+
+ // eyeNormal = normalize(normalMatrix * normal)
+ if (eyeNormalRequired)
+ {
+ dxb_dcl_input(builder, "NORMAL", 0, inNormalReg = inputRegCounter++, 0x7);
+ eyeNormalReg = tempRegCounter++;
+ // mul
+ bld.op(kSM4Op_MUL).reg('r',eyeNormalReg,7).swz('v',inNormalReg,kSM4SwzRepY).swz('c',k11VertexMV+1);
+ bld.op(kSM4Op_MAD).reg('r',eyeNormalReg,7).swz('c',k11VertexMV+0).swz('v',inNormalReg,kSM4SwzRepX).swz('r',eyeNormalReg);
+ bld.op(kSM4Op_MAD).reg('r',eyeNormalReg,7).swz('c',k11VertexMV+2).swz('v',inNormalReg,kSM4SwzRepZ).swz('r',eyeNormalReg);
+ // normalize
+ bld.op(kSM4Op_DP3).reg('r',eyeNormalReg,8).swz('r',eyeNormalReg,kSM4SwzNone).swz('r',eyeNormalReg,kSM4SwzNone);
+ bld.op(kSM4Op_RSQ).reg('r',eyeNormalReg,8).swz('r',eyeNormalReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',eyeNormalReg,7).swz('r',eyeNormalReg,kSM4SwzRepW).swz('r',eyeNormalReg,kSM4SwzXYZX);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float3 normal : NORMAL");
+ code += "float3 eyeNormal = normalize (mul ((float3x3)ff_matrix_mv, normal).xyz);\n"; //@TODO: proper normal matrix
+ #endif
+ }
+
+ // view dir
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float3 viewDir = 0.0;";
+ #endif
+ if (viewDirRequired)
+ {
+ // viewDir = normalized vertex-to-eye
+ viewDirReg = tempRegCounter++;
+ // -normalize
+ bld.op(kSM4Op_DP3).reg('r',viewDirReg,8).swz('r',eyePosReg,kSM4SwzNone).swz('r',eyePosReg,kSM4SwzNone);
+ bld.op(kSM4Op_RSQ).reg('r',viewDirReg,8).swz('r',viewDirReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',viewDirReg,7).swz('r',viewDirReg,kSM4SwzRepW).swz('r',eyePosReg,kSM4SwzXYZX,true);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "viewDir = -normalize (eyePos);\n";
+ #endif
+ }
+
+ // eyeRefl
+ if (eyeReflRequired)
+ {
+ DebugAssert (viewDirRequired);
+ // eyeRefl = reflection vector, 2*dot(V,N)*N-V
+ eyeReflReg = tempRegCounter++;
+ bld.op(kSM4Op_DP3).reg('r',eyeReflReg,8).swz('r',viewDirReg,kSM4SwzNone).swz('r',eyeNormalReg,kSM4SwzNone);
+ bld.op(kSM4Op_ADD).reg('r',eyeReflReg,8).swz('r',eyeReflReg,kSM4SwzRepW).swz('r',eyeReflReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MAD).reg('r',eyeReflReg,7).swz('r',eyeReflReg,kSM4SwzRepW).swz('r',eyeNormalReg,kSM4SwzXYZX).swz('r',viewDirReg,kSM4SwzXYZX,true);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float3 eyeRefl = 2.0f * dot (viewDir, eyeNormal) * eyeNormal - viewDir;\n";
+ #endif
+ }
+
+ // Lighting
+ if (state.lightingEnabled)
+ {
+ char ambientType, diffuseType, emissionType;
+ int ambientReg, diffuseReg, emissionReg;
+ if (state.colorMaterial==kColorMatAmbientAndDiffuse)
+ {
+ ambientType = diffuseType = inColorType;
+ ambientReg = diffuseReg = inColorReg;
+ }
+ else
+ {
+ ambientType = diffuseType = 'c';
+ ambientReg = k11VertexMatAmbient;
+ diffuseReg = k11VertexMatDiffuse;
+ }
+ if (state.colorMaterial==kColorMatEmission)
+ {
+ emissionType = inColorType;
+ emissionReg = inColorReg;
+ }
+ else
+ {
+ emissionType = 'c';
+ emissionReg = k11VertexMatEmission;
+ }
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ std::string ambientColor = (state.colorMaterial==kColorMatAmbientAndDiffuse) ? "color" : "ff_mat_ambient";
+ std::string diffuseColor = (state.colorMaterial==kColorMatAmbientAndDiffuse) ? "color" : "ff_mat_diffuse";
+ std::string emissionColor = (state.colorMaterial==kColorMatEmission) ? "color" : "ff_mat_emission";
+ #endif
+
+ params.AddVectorParam (k11VertexAmbient*16, 4, kShaderVecLightModelAmbient);
+ params.AddVectorParam (k11VertexMatAmbient*16, 4, kShaderVecFFMatAmbient);
+ params.AddVectorParam (k11VertexMatDiffuse*16, 4, kShaderVecFFMatDiffuse);
+ params.AddVectorParam (k11VertexMatSpec*16, 4, kShaderVecFFMatSpecular);
+ params.AddVectorParam (k11VertexMatEmission*16, 4, kShaderVecFFMatEmission);
+
+ lcolorReg = tempRegCounter++;
+ bld.op(kSM4Op_MAD).reg('r',lcolorReg,7).swz(ambientType,ambientReg,kSM4SwzXYZX).swz('c',k11VertexAmbient,kSM4SwzXYZX).swz(emissionType,emissionReg,kSM4SwzXYZX);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float3 lcolor = " + emissionColor + ".rgb + " + ambientColor + ".rgb * ff_vec_ambient.rgb;\n";
+ code += "float3 specColor = 0.0;\n";
+ if (state.lightCount > 0)
+ {
+ helpers += "float3 computeLighting (int idx, float3 dirToLight, float3 eyeNormal, float3 viewDir, float4 diffuseColor, float atten, inout float3 specColor) {\n";
+ helpers += " float NdotL = max(dot(eyeNormal, dirToLight), 0.0);\n";
+ helpers += " float3 color = NdotL * diffuseColor.rgb * ff_light_color[idx].rgb;\n";
+ if (state.specularEnabled)
+ {
+ helpers += " if (NdotL > 0.0) {\n";
+ helpers += " float3 h = normalize(dirToLight + viewDir);\n";
+ helpers += " float HdotN = max(dot(eyeNormal, h), 0.0);\n";
+ helpers += " float sp = saturate(pow(HdotN, ff_mat_spec.w));\n";
+ helpers += " specColor += atten * sp * ff_light_color[idx].rgb;\n";
+ helpers += " }\n";
+ }
+ helpers += " return color * atten;\n";
+ helpers += "}\n";
+
+ helpers += "float3 computeSpotLight(int idx, float3 eyePosition, float3 eyeNormal, float3 viewDir, float4 diffuseColor, inout float3 specColor) {\n";
+ helpers += " float3 dirToLight = ff_light_pos[idx].xyz - eyePosition * ff_light_pos[idx].w;\n";
+ helpers += " float distSqr = dot(dirToLight, dirToLight);\n";
+ helpers += " float att = 1.0 / (1.0 + ff_light_atten[idx].z * distSqr);\n";
+ helpers += " if (ff_light_pos[idx].w != 0 && distSqr > ff_light_atten[idx].w) att = 0.0;\n"; // set to 0 if outside of range
+ helpers += " dirToLight *= rsqrt(distSqr);\n";
+
+ helpers += " float rho = max(dot(dirToLight, ff_light_spot[idx].xyz), 0.0);\n";
+ helpers += " float spotAtt = (rho - ff_light_atten[idx].x) * ff_light_atten[idx].y;\n";
+ helpers += " spotAtt = saturate(spotAtt);\n";
+ helpers += " return min (computeLighting (idx, dirToLight, eyeNormal, viewDir, diffuseColor, att*spotAtt, specColor), 1.0);\n";
+ helpers += "}\n";
+ }
+ #endif // DEBUG_D3D11_COMPARE_WITH_HLSL
+
+ if (state.specularEnabled)
+ {
+ specColorReg = tempRegCounter++;
+ bld.op(kSM4Op_MOV).reg('r',specColorReg,7).float4(0,0,0,0);
+ }
+
+ for (int i = 0; i < state.lightCount; ++i)
+ {
+ params.AddVectorParam ((k11VertexLightPos+i)*16, 4, BuiltinShaderVectorParam(kShaderVecLight0Position+i));
+ params.AddVectorParam ((k11VertexLightAtten+i)*16, 4, BuiltinShaderVectorParam(kShaderVecLight0Atten+i));
+ params.AddVectorParam ((k11VertexLightColor+i)*16, 4, BuiltinShaderVectorParam(kShaderVecLight0Diffuse+i));
+ params.AddVectorParam ((k11VertexLightSpot+i)*16, 4, BuiltinShaderVectorParam(kShaderVecLight0SpotDirection+i));
+
+ Assert(eyePositionRequired);
+ Assert(eyeNormalRequired);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "lcolor += computeSpotLight(" + IntToString(i) + ", eyePos, eyeNormal, viewDir, " + diffuseColor + ", specColor);\n";
+ #endif
+
+ int ldirReg = tempRegCounter;
+ int miscReg = tempRegCounter+1;
+ int diffReg = tempRegCounter+2;
+
+ //
+ // attenuation
+
+ // float3 dirToLight = ff_light_pos[idx].xyz - eyePosition * ff_light_pos[idx].w;
+ // float distSqr = dot(dirToLight, dirToLight);
+ // float att = 1.0 / (1.0 + ff_light_atten[idx].z * distSqr);
+ // if (ff_light_pos[idx].w != 0 && distSqr > ff_light_atten[idx].w) att = 0.0;
+ // dirToLight *= rsqrt(distSqr);
+ // float rho = max(dot(dirToLight, ff_light_spot[idx].xyz), 0.0);
+ // float spotAtt = (rho - ff_light_atten[idx].x) * ff_light_atten[idx].y;
+ // spotAtt = saturate(spotAtt);
+
+ // dirToLight = ff_light_pos[idx].xyz - eyePosition * ff_light_pos[idx].w
+ bld.op(kSM4Op_MAD).reg('r',ldirReg,7).swz('r',eyePosReg,kSM4SwzXYZX,true).swz('c',k11VertexLightPos+i,kSM4SwzRepW).swz('c',k11VertexLightPos+i,kSM4SwzXYZX);
+ // normalize, distSqr in miscReg.w
+ bld.op(kSM4Op_DP3).reg('r',miscReg,8).swz('r',ldirReg,kSM4SwzNone).swz('r',ldirReg,kSM4SwzNone);
+ bld.op(kSM4Op_RSQ).reg('r',ldirReg,8).swz('r',miscReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',ldirReg,7).swz('r',ldirReg,kSM4SwzRepW).swz('r',ldirReg,kSM4SwzXYZX);
+
+ // miscReg.z = float rho = max(dot(dirToLight, ff_light_spot[idx].xyz), 0.0)
+ bld.op(kSM4Op_DP3).reg('r',miscReg,4).swz('r',ldirReg,kSM4SwzNone).swz('c',k11VertexLightSpot+i,kSM4SwzNone);
+ bld.op(kSM4Op_MAX).reg('r',miscReg,4).swz('r',miscReg,kSM4SwzRepZ).float1(0.0f);
+ // miscReg.z = spotAtt = saturate ( (rho - ff_light_atten[idx].x) * ff_light_atten[idx].y )
+ bld.op(kSM4Op_ADD).reg('r',miscReg,4).swz('r',miscReg,kSM4SwzRepZ).swz('c',k11VertexLightAtten+i,kSM4SwzRepX,true);
+ bld.op_sat(kSM4Op_MUL,miscReg).reg('r',miscReg,4).swz('r',miscReg,kSM4SwzRepZ).swz('c',k11VertexLightAtten+i,kSM4SwzRepY);
+
+ // miscReg.y = float att = 1.0 / (1.0 + ff_light_atten[idx].z * distSqr)
+ bld.op(kSM4Op_MAD).reg('r',miscReg,2).swz('c',k11VertexLightAtten+i,kSM4SwzRepZ).swz('r',miscReg,kSM4SwzRepW).float1(1.0f);
+ bld.noAutoSM2();
+ bld.op(kSM4Op_DIV).reg('r',miscReg,2).float4(1,1,1,1).swz('r',miscReg,kSM4SwzRepY);
+ bld.op2(kSM2Op_RCP).reg2('r',miscReg,2).swz2('r',miscReg,kSM4SwzRepY);
+ bld.autoSM2();
+
+ // miscReg.y = att * spotAtt
+ bld.op(kSM4Op_MUL).reg('r',miscReg,2).swz('r',miscReg,kSM4SwzRepY).swz('r',miscReg,kSM4SwzRepZ);
+ // if (ff_light_pos[idx].w != 0 && distSqr > ff_light_atten[idx].w) att = 0.0
+ bld.noAutoSM2();
+ bld.op(kSM4Op_LT).reg('r',miscReg,1).swz('c',k11VertexLightAtten+i,kSM4SwzRepW).swz('r',miscReg,kSM4SwzRepW);
+ bld.op(kSM4Op_NE).reg('r',miscReg,4).swz('c',k11VertexLightPos+i,kSM4SwzRepW).float1(0.0);
+ bld.op(kSM4Op_AND).reg('r',miscReg,1).swz('r',miscReg,kSM4SwzRepX).swz('r',miscReg,kSM4SwzRepZ);
+ bld.op(kSM4Op_MOVC).reg('r',miscReg,2).swz('r',miscReg,kSM4SwzRepX).float1(0.0).swz('r',miscReg,kSM4SwzRepY);
+ //SM2
+ bld.op2(kSM2Op_SLT).reg2('r',miscReg,1).swz2('c',k11VertexLightAtten+i,kSM4SwzRepW).swz2('r',miscReg,kSM4SwzRepW);
+ bld.op2(kSM2Op_MUL).reg2('r',miscReg,4).swz2('c',k11VertexLightPos+i,kSM4SwzRepW).swz2('c',k11VertexLightPos+i,kSM4SwzRepW);
+ bld.op2(kSM2Op_SLT).reg2('r',miscReg,4).swz2('r',miscReg,kSM4SwzRepZ,true).swz2('r',miscReg,kSM4SwzRepZ);
+ bld.op2(kSM2Op_MUL).reg2('r',miscReg,1).swz2('r',miscReg,kSM4SwzRepX).swz2('r',miscReg,kSM4SwzRepZ);
+ bld.op2(kSM2Op_MAD).reg2('r',miscReg,2).swz2('r',miscReg,kSM4SwzRepX).swz2('r',miscReg,kSM4SwzRepY,true).swz2('r',miscReg,kSM4SwzRepY);
+ bld.autoSM2();
+
+ //
+ // diffuse
+
+ // float NdotL = max(dot(eyeNormal, dirToLight), 0.0);
+ // float3 color = NdotL * diffuseColor.rgb * ff_light_color[idx].rgb;
+ // lcolor += color * atten
+
+ // miscReg.z = float NdotL = max(dot(eyeNormal, dirToLight), 0.0)
+ bld.op(kSM4Op_DP3).reg('r',miscReg,4).swz('r',eyeNormalReg,kSM4SwzNone).swz('r',ldirReg,kSM4SwzNone);
+ bld.op(kSM4Op_MAX).reg('r',miscReg,4).swz('r',miscReg,kSM4SwzRepZ).float1(0.0f);
+ // diffReg.xyz = float3 color = NdotL * diffuseColor.rgb * ff_light_color[idx].rgb
+ bld.op(kSM4Op_MUL).reg('r',diffReg,7).swz('r',miscReg,kSM4SwzRepZ).swz(diffuseType,diffuseReg,kSM4SwzXYZX);
+ bld.op(kSM4Op_MUL).reg('r',diffReg,7).swz('r',diffReg,kSM4SwzXYZX).swz('c',k11VertexLightColor+i,kSM4SwzXYZX);
+ // diffReg.xyz = saturate(color*atten, 1)
+ bld.op_sat(kSM4Op_MUL,diffReg).reg('r',diffReg,7).swz('r',diffReg,kSM4SwzXYZX).swz('r',miscReg,kSM4SwzRepY);
+ // lcolor += diffReg
+ bld.op(kSM4Op_ADD).reg('r',lcolorReg,7).swz('r',lcolorReg,kSM4SwzXYZX).swz('r',diffReg,kSM4SwzXYZX);
+
+ //
+ // specular
+
+ if (state.specularEnabled)
+ {
+ // if (NdotL > 0.0) {
+ // float3 h = normalize(dirToLight + viewDir);
+ // float HdotN = max(dot(eyeNormal, h), 0.0);
+ // float sp = saturate(pow(HdotN, ff_mat_spec.w));
+ // specColor += atten * sp * ff_light_color[idx].rgb;
+ // }
+
+ // ldirReg.xyz = h = normalize(dirToLight + viewDir)
+ bld.op(kSM4Op_ADD).reg('r',ldirReg,7).swz('r',ldirReg,kSM4SwzXYZX).swz('r',viewDirReg,kSM4SwzXYZX);
+ bld.op(kSM4Op_DP3).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzNone).swz('r',ldirReg,kSM4SwzNone);
+ bld.op(kSM4Op_RSQ).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',ldirReg,7).swz('r',ldirReg,kSM4SwzXYZX).swz('r',ldirReg,kSM4SwzRepW);
+ // ldirReg.w = HdotN = max(dot(eyeNormal,h),0)
+ bld.op(kSM4Op_DP3).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzNone).swz('r',eyeNormalReg,kSM4SwzNone);
+ bld.op(kSM4Op_MAX).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW).float1(0.0f);
+ // float sp = saturate(pow(HdotN, ff_mat_spec.w))
+ bld.op(kSM4Op_LOG).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW).swz('c',k11VertexMatSpec,kSM4SwzRepW);
+ bld.op(kSM4Op_EXP).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW);
+ bld.op(kSM4Op_MIN).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW).float1(1.0f);
+ // atten * sp * ff_light_color[idx].rgb
+ bld.op(kSM4Op_MUL).reg('r',ldirReg,8).swz('r',ldirReg,kSM4SwzRepW).swz('r',miscReg,kSM4SwzRepY);
+ bld.op(kSM4Op_MUL).reg('r',diffReg,7).swz('r',ldirReg,kSM4SwzRepW).swz('c',k11VertexLightColor+i,kSM4SwzXYZX);
+ // nuke specular if NdotL <= 0
+ bld.op(kSM4Op_LT).reg('r',miscReg,1).float1(0.0).swz('r',miscReg,kSM4SwzRepZ);
+ bld.noAutoSM2();
+ bld.op(kSM4Op_AND).reg('r',diffReg,7).swz('r',diffReg,kSM4SwzXYZX).swz('r',miscReg,kSM4SwzRepX);
+ bld.op2(kSM2Op_MUL).reg2('r',diffReg,7).swz2('r',diffReg,kSM4SwzXYZX).swz2('r',miscReg,kSM4SwzRepX);
+ bld.autoSM2();
+ // specColor += computed spec color
+ bld.op(kSM4Op_ADD).reg('r',specColorReg,7).swz('r',specColorReg,kSM4SwzXYZX).swz('r',diffReg,kSM4SwzXYZX);
+ }
+ }
+
+ bld.op(kSM4Op_MOV).reg('r',lcolorReg,8).swz(diffuseType,diffuseReg,kSM4SwzRepW);
+ inColorReg = lcolorReg;
+ inColorType = 'r';
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "color.rgb = lcolor.rgb;\n";
+ code += "color.a = " + diffuseColor + ".a;\n";
+ #endif
+
+ if (state.specularEnabled)
+ {
+ bld.op(kSM4Op_MUL).reg('r',specColorReg,7).swz('r',specColorReg,kSM4SwzXYZX).swz('c',k11VertexMatSpec,kSM4SwzXYZX);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "specColor *= ff_mat_spec.rgb;\n";
+ #endif
+ }
+ }
+
+ // Output final color
+ dxb_dcl_output(builder, "COLOR", 0, outColor0Reg = outputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (outputs, "out float4 ocolor : COLOR0");
+ #endif
+
+ bld.op_sat(kSM4Op_MOV,tempRegCounter).reg('o',outColor0Reg).swz(inColorType,inColorReg);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ocolor = saturate(color);\n";
+ #endif
+
+ if (state.lightingEnabled && state.specularEnabled)
+ {
+ dxb_dcl_output(builder, "COLOR", 1, outColor1Reg = outputRegCounter++, 0x7);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (outputs, "out float3 ospec : COLOR1");
+ #endif
+
+ bld.op_sat(kSM4Op_MOV,tempRegCounter).reg('o',outColor1Reg,7).swz('r',specColorReg,kSM4SwzXYZX);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ospec = saturate(specColor);\n";
+ #endif
+ }
+
+ // we don't need temporary registers from lighting calculations anymore after this point
+ if (state.lightingEnabled)
+ --tempRegCounter;
+
+
+ // Pass & transform texture coordinates
+ UInt32 gotInputs = 0;
+ UInt32 gotOutputs = 0;
+ UInt64 texSources = state.texUnitSources;
+ for (int i = 0; i < state.texUnitCount; i++)
+ {
+ matrices.mat[kShaderInstanceMatTexture0+i].gpuIndex = (k11VertexTex+i*4)*16;
+ matrices.mat[kShaderInstanceMatTexture0+i].rows = 4;
+ matrices.mat[kShaderInstanceMatTexture0+i].cols = 4;
+ matrices.mat[kShaderInstanceMatTexture0+i].cbID = params.m_CBID;
+
+ std::string iname = IntToString(i);
+ dxb_dcl_output(builder, "TEXCOORD", i, outUVReg[i] = outputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (outputs, ("out float4 ouv" + iname + " : TEXCOORD" + iname).c_str());
+ #endif
+
+ UInt32 uvSource = texSources & 0xF;
+ if (uvSource >= kTexSourceUV0 && uvSource <= kTexSourceUV7)
+ {
+ unsigned uv = uvSource-kTexSourceUV0;
+ std::string uvStr = IntToString(uv);
+ if (!(gotInputs & (1<<uv)))
+ {
+ dxb_dcl_input(builder, "TEXCOORD", uv, inUVReg[uv] = inputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, ("float4 uv"+uvStr+" : TEXCOORD"+uvStr).c_str());
+ #endif
+ gotInputs |= (1<<uv);
+ }
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'v',inUVReg[uv], 'o',outUVReg[i], tempRegCounter, false);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname + "], uv"+uvStr+");\n";
+ #endif
+ }
+ else if (uvSource == kTexSourceSphereMap)
+ {
+ // m = 2*sqrt(Rx*Rx + Ry*Ry + (Rz+1)*(Rz+1))
+ // SPHR = Rx/m + 0.5, Ry/m + 0.5
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname +"], float4(\n";
+ code += " eyeRefl.xy / (2.0*sqrt(eyeRefl.x*eyeRefl.x + eyeRefl.y*eyeRefl.y + (eyeRefl.z+1)*(eyeRefl.z+1))) + 0.5,\n";
+ code += " 0,1));\n";
+ #endif
+
+ // HLSL generates code like:
+ // dp2 r0.w, r0.xyxx, r0.xyxx
+ // add r0.z, r0.z, l(1.0)
+ // mad r0.z, r0.z, r0.z, r0.w
+ // sqrt r0.z, r0.z
+ // add r0.z, r0.z, r0.z
+ // div r0.xy, r0.xyxx, r0.zzzz
+ // add r0.xy, r0.xyxx, l(0.5, 0.5, 0.0, 0.0)
+#if 0
+ bld.op(kSM4Op_DP2).reg('r',tempRegCounter,8).swz('r',eyeReflReg,kSM4SwzXYXX).swz('r',eyeReflReg,kSM4SwzXYXX);
+ bld.op(kSM4Op_ADD).reg('r',tempRegCounter,4).swz('r',eyeReflReg,kSM4SwzRepZ).float1(1.0);
+ bld.op(kSM4Op_MAD).reg('r',tempRegCounter,4).swz('r',tempRegCounter,kSM4SwzRepZ).swz('r',tempRegCounter,kSM4SwzRepZ).swz('r',tempRegCounter,kSM4SwzRepW);
+ bld.op(kSM4Op_SQRT).reg('r',tempRegCounter,4).swz('r',tempRegCounter,kSM4SwzRepZ);
+ bld.op(kSM4Op_ADD).reg('r',tempRegCounter,4).swz('r',tempRegCounter,kSM4SwzRepZ).swz('r',tempRegCounter,kSM4SwzRepZ);
+ bld.op(kSM4Op_DIV).reg('r',tempRegCounter,3).swz('r',eyeReflReg,kSM4SwzXYXX).swz('r',tempRegCounter,kSM4SwzRepZ);
+ bld.op(kSM4Op_ADD).reg('r',tempRegCounter,3).swz('r',tempRegCounter,kSM4SwzXYXX).float4(0.5f,0.5f,0,0);
+#else
+ //SM2 compatible
+ bld.op(kSM4Op_ADD).reg('r',tempRegCounter,7).swz('r',eyeReflReg,kSM4SwzXYZX).float4(0.0f,0.0f,1.0f,0.0f);
+ bld.op(kSM4Op_DP3).reg('r',tempRegCounter,8).swz('r',tempRegCounter,kSM4SwzNone).swz('r',tempRegCounter,kSM4SwzNone);
+ bld.op(kSM4Op_RSQ).reg('r',tempRegCounter,8).swz('r',tempRegCounter,kSM4SwzRepW);
+ bld.op(kSM4Op_MUL).reg('r',tempRegCounter,8).swz('r',tempRegCounter,kSM4SwzRepW).float4(0.5f,0.5f,0.5f,0.5f);
+ bld.op(kSM4Op_MAD).reg('r',tempRegCounter,3).swz('r',eyeReflReg,kSM4SwzXYXX).swz('r',tempRegCounter,kSM4SwzRepW).float4(0.5f,0.5f,0.5f,0.5f);
+#endif
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'r',tempRegCounter, 'o',outUVReg[i], tempRegCounter+1, true);
+ }
+ else if (uvSource == kTexSourceObject)
+ {
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'v',inPosReg, 'o',outUVReg[i], tempRegCounter, false);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname +"], vertex);\n";
+ #endif
+ }
+ else if (uvSource == kTexSourceEyeLinear)
+ {
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'r',eyePosReg, 'o',outUVReg[i], tempRegCounter, true);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname +"], float4(eyePos,1.0));\n";
+ #endif
+ }
+ else if (uvSource == kTexSourceCubeNormal)
+ {
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'r',eyeNormalReg, 'o',outUVReg[i], tempRegCounter, true);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname +"], float4(eyeNormal,1.0));\n";
+ #endif
+ }
+ else if (uvSource == kTexSourceCubeReflect)
+ {
+ EmitMatrixMul (bld, k11VertexTex+4*i, 'r',eyeReflReg, 'o',outUVReg[i], tempRegCounter, true);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = mul(ff_matrix_tex["+iname +"], float4(eyeRefl,1.0));\n";
+ #endif
+ }
+ else
+ {
+ AssertString("Unknown texgen mode");
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ouv"+iname + " = 0.5;\n";
+ #endif
+ }
+ texSources >>= 4;
+ }
+
+ // fog if we have a spare varying
+ if (state.fogMode != kFogDisabled && outputRegCounter < 8)
+ {
+ Assert(eyePositionRequired);
+ int outFogReg;
+ dxb_dcl_output(builder, "FOG", 0, outFogReg = outputRegCounter++, 0x1);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (outputs, "out float ofog : FOG0");
+ #endif
+
+ params.AddVectorParam (k11VertexFog*16, 4, kShaderVecFFFogParams);
+
+ int fogReg = tempRegCounter++;
+
+ // fogCoord = length(eyePosition.xyz), for radial fog
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float fogCoord = length(eyePos.xyz);\n";
+ #endif
+
+ bld.op(kSM4Op_DP3).reg('r',fogReg,1).swz('r',eyePosReg,kSM4SwzNone).swz('r',eyePosReg,kSM4SwzNone);
+#if 0
+ bld.op(kSM4Op_SQRT).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX);
+#else
+ //SM2 compatible
+ bld.op(kSM4Op_RSQ).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX);
+ bld.op(kSM4Op_RCP).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX);
+#endif
+ if (state.fogMode == kFogLinear)
+ {
+ // fogParams.z * fogCoord + fogParams.w
+ bld.op_sat(kSM4Op_MAD,tempRegCounter).reg('o',outFogReg,1).swz('r',fogReg,kSM4SwzRepX).swz('c',k11VertexFog,kSM4SwzRepZ).swz('c',k11VertexFog,kSM4SwzRepW);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ofog = saturate(fogCoord * ff_fog.z + ff_fog.w);\n";
+ #endif
+ }
+ else if (state.fogMode == kFogExp)
+ {
+ // fogArg = fogParams.y * fogCoord
+ // exp2(-fogArg)
+ bld.op(kSM4Op_MUL).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX).swz('c',k11VertexFog,kSM4SwzRepY);
+ bld.op_sat(kSM4Op_EXP,tempRegCounter).reg('o',outFogReg,1).swz('r',fogReg,kSM4SwzRepX,true);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "ofog = saturate(exp2(-(fogCoord * ff_fog.y)));\n";
+ #endif
+ }
+ else if (state.fogMode == kFogExp2)
+ {
+ // fogArg = fogParams.y * fogCoord
+ // exp2(-fogArg*fogArg)
+ bld.op(kSM4Op_MUL).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX).swz('c',k11VertexFog,kSM4SwzRepY);
+ bld.op(kSM4Op_MUL).reg('r',fogReg,1).swz('r',fogReg,kSM4SwzRepX).swz('r',fogReg,kSM4SwzRepX);
+ bld.op_sat(kSM4Op_EXP,tempRegCounter).reg('o',outFogReg,1).swz('r',fogReg,kSM4SwzRepX,true);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "fogCoord = fogCoord * ff_fog.y;\n";
+ code += "ofog = saturate(exp2(-fogCoord * fogCoord));\n";
+ #endif
+ }
+ --tempRegCounter;
+ }
+
+ dxb_dcl_output(builder, "SV_POSITION", 0, outPosReg = outputRegCounter++);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (outputs, "out float4 overtex : SV_POSITION");
+ #endif
+
+ // Vertex transformation
+ matrices.mat[kShaderInstanceMatMVP].gpuIndex = k11VertexMVP*16;
+ matrices.mat[kShaderInstanceMatMVP].rows = 4;
+ matrices.mat[kShaderInstanceMatMVP].cols = 4;
+ matrices.mat[kShaderInstanceMatMVP].cbID = params.m_CBID;
+ bld.op(kSM4Op_MUL).reg('r',0).swz('v',inPosReg,kSM4SwzRepY).swz('c',k11VertexMVP+1);
+ bld.op(kSM4Op_MAD).reg('r',0).swz('c',k11VertexMVP+0).swz('v',inPosReg,kSM4SwzRepX).swz('r',0);
+ bld.op(kSM4Op_MAD).reg('r',0).swz('c',k11VertexMVP+2).swz('v',inPosReg,kSM4SwzRepZ).swz('r',0);
+ bld.op(kSM4Op_MAD).reg('r',0).swz('c',k11VertexMVP+3).swz('v',inPosReg,kSM4SwzRepW).swz('r',0);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "overtex = mul (ff_matrix_mvp, vertex);\n";
+ #endif
+
+ //correct output pos with Vertex Shader position offset
+ //mad oPos.xy, v0.w, c63, v0
+ bld.op2(kSM2Op_MAD).reg2('o',outPosReg,3).swz2('r',0,kSM4SwzRepW).swz2('c',k11VertexPosOffset9x).swz2('r',0);
+ //mov oPos.zw, v0
+ bld.op2(kSM2Op_MOV).reg2('o',outPosReg,12).swz2('r',0);
+
+ //copy output pos for sm40
+ bld.noAutoSM2();
+ bld.op(kSM4Op_MOV).reg('o',outPosReg).swz('r',0);
+ bld.autoSM2();
+
+ bld.op(kSM4Op_RET);
+
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ std::string src =
+ kD3D11VertexPrefix +
+ helpers + '\n' +
+ "void main (\n " +
+ inputs + ",\n " +
+ outputs + ") {\n" +
+ code + "\n}";
+ printf_console ("d3d11 FF VS HLSL:\n%s\n", src.c_str());
+ DebugCompileHLSLShaderD3D11 (src, true);
+ #endif
+
+
+ void* blob = BuildShaderD3D11 (builder, outSize);
+ return blob;
+}
+
+
+// --- FRAGMENT program ----------------------------------------------------------------------------
+
+enum CombinerWriteMask { kCombWriteRGBA, kCombWriteRGB, kCombWriteA };
+
+static bool EmitCombinerMath11 (
+ int stage,
+ UInt32 combiner,
+ CombinerWriteMask writeMaskMode,
+ int texUnitCount,
+ DXBCBuilderStream& bld
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ , std::string& code
+ #endif
+)
+{
+ Assert (texUnitCount < 10 && stage < 10);
+
+ combiner::Source sources[3];
+ combiner::Operand operands[3];
+ combiner::Operation op;
+ int scale;
+ combiner::DecodeTextureCombinerDescriptor (combiner, op, sources, operands, scale, true);
+
+ // dot3 and dot3rgba write into RGBA; alpha combiner is always ignored
+ if (op == combiner::kOpDot3RGB || op == combiner::kOpDot3RGBA)
+ {
+ if (writeMaskMode == kCombWriteA)
+ return false;
+ writeMaskMode = kCombWriteRGBA;
+ }
+
+ unsigned tmpIdx = 1;
+
+ bool usedConstant = false;
+ char regFile[3];
+ unsigned regIdx[3];
+ unsigned regSrcAlphaSwz[3];
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ std::string reg[3];
+ #endif
+ for (int r = 0; r < 3; ++r)
+ {
+ combiner::Source source = sources[r];
+ regSrcAlphaSwz[r] = kSM4SwzRepW;
+ if (stage == 0 && source == combiner::kSrcPrevious)
+ source = combiner::kSrcPrimaryColor; // first stage, "previous" the same as "primary"
+ switch (source)
+ {
+ case combiner::kSrcPrimaryColor:
+ regFile[r] = 'v'; regIdx[r] = 0;
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ reg[r] = "icolor";
+ #endif
+ break;
+ case combiner::kSrcPrevious:
+ regFile[r] = 'r'; regIdx[r] = 0;
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ reg[r] = "col";
+ #endif
+ break;
+ case combiner::kSrcTexture:
+ regFile[r] = 'r'; regIdx[r] = 1; tmpIdx = 2;
+ regSrcAlphaSwz[r] = kSM4SwzRepW;
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ reg[r] = "tex";
+ #endif
+ break;
+ case combiner::kSrcConstant:
+ usedConstant |= true; regFile[r] = 'c'; regIdx[r] = stage;
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ reg[r] = std::string("ff_vec_colors[") + char('0'+stage) + ']';
+ #endif
+ break;
+ default:
+ AssertString("unknown source"); //reg[r] = "foo";
+ }
+ }
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ const char* writeMask = "";
+ #endif
+ unsigned writeMaskBin = 0xF; // rgba
+ if (writeMaskMode == kCombWriteRGB)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ writeMask = ".rgb";
+ #endif
+ writeMaskBin = 0x7; // rgb
+ }
+ else if (writeMaskMode == kCombWriteA)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ writeMask = ".a";
+ #endif
+ writeMaskBin = 0x8; // a
+ }
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ const char* regSwizzle[3];
+ #endif
+ unsigned regSwizzleBin[3];
+ for (int r = 0; r < 3; ++r)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ regSwizzle[r] = "";
+ #endif
+ regSwizzleBin[r] = kSM4SwzNone;
+ // 1-x: into tmpN and use that
+ if (operands[r] == combiner::kOperOneMinusSrcColor || operands[r] == combiner::kOperOneMinusSrcAlpha)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("tmp")+char('0'+r)+" = 1.0 - " + reg[r]+regSwizzle[r] + ";\n";
+ reg[r] = std::string("tmp")+char('0'+r);
+ #endif
+ bld.op(kSM4Op_ADD).reg('r', tmpIdx, writeMaskBin).swz(regFile[r], regIdx[r], regSwizzleBin[r], true).float1(1.0f);
+ regFile[r] = 'r';
+ regIdx[r] = tmpIdx;
+ ++tmpIdx;
+ }
+ // replicate alpha swizzle?
+ if (operands[r] == combiner::kOperSrcAlpha || operands[r] == combiner::kOperOneMinusSrcAlpha)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ regSwizzle[r] = ".a";
+ #endif
+ regSwizzleBin[r] = kSM4SwzRepW;
+ }
+ }
+ switch (op)
+ {
+ case combiner::kOpReplace:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + ";\n";
+ #endif
+ bld.op(kSM4Op_MOV).reg('r', 0, writeMaskBin).swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ break;
+ case combiner::kOpModulate:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " * " + reg[1]+regSwizzle[1] + ";\n";
+ #endif
+ bld.op(kSM4Op_MUL);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ break;
+ case combiner::kOpAdd:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " + " + reg[1]+regSwizzle[1] + ";\n";
+ #endif
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ break;
+ case combiner::kOpAddSigned:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " + " + reg[1]+regSwizzle[1] + " - 0.5;\n";
+ #endif
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', 0);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ break;
+ case combiner::kOpSubtract:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " - " + reg[1]+regSwizzle[1] + ";\n";
+ #endif
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1], true);
+ break;
+ case combiner::kOpLerp:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = lerp (" + reg[1]+regSwizzle[1] + ", " + reg[0]+regSwizzle[0] + ", " + reg[2]+ ".a);\n";
+ #endif
+ // tmp = r0-r1
+ // res = tmp * r2 + r1
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', tmpIdx, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1], true);
+ bld.op(kSM4Op_MAD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', tmpIdx);
+ bld.swz(regFile[2], regIdx[2], regSrcAlphaSwz[2]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ ++tmpIdx;
+ break;
+ case combiner::kOpDot3RGB:
+ DebugAssert(writeMaskMode == kCombWriteRGBA);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col.rgb = 4.0 * dot ((") + reg[0]+regSwizzle[0] + ")-0.5, (" + reg[1]+regSwizzle[1] + ")-0.5);\n";
+ code += std::string("col.a = ") + reg[0]+".a;\n";
+ #endif
+
+ // tmp+0 = r0-0.5
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', tmpIdx+0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ // tmp+1 = r1-0.5
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', tmpIdx+1, writeMaskBin);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ // tmp0.rgb = dp4(tmp+0, tmp+1)
+ bld.op(kSM4Op_DP3);
+ bld.reg('r', 0, 0x7);
+ bld.swz('r', tmpIdx+0);
+ bld.swz('r', tmpIdx+1);
+ // tmp0.rgb *= 4
+ bld.op(kSM4Op_MUL);
+ bld.reg('r', 0, 0x7);
+ bld.swz('r', 0);
+ bld.float4(4.0f,4.0f,4.0f,4.0f);
+ // tmp0.a = r0.a
+ bld.op(kSM4Op_MOV);
+ bld.reg('r', 0, 0x8);
+ bld.swz(regFile[0], regIdx[0], kSM4SwzRepW);
+ tmpIdx += 2;
+ break;
+ case combiner::kOpDot3RGBA:
+ DebugAssert(writeMaskMode == kCombWriteRGBA);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = 4.0 * dot ((" + reg[0]+regSwizzle[0] + ")-0.5, (" + reg[1]+regSwizzle[1] + ")-0.5);\n";
+ #endif
+ // tmp+0 = r0-0.5
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', tmpIdx+0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ // tmp+1 = r1-0.5
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', tmpIdx+1, writeMaskBin);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ // tmp0 = dp4(tmp+0, tmp+1)
+ bld.op(kSM4Op_DP3);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', tmpIdx+0);
+ bld.swz('r', tmpIdx+1);
+ // tmp0 *= 4
+ bld.op(kSM4Op_MUL);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', 0);
+ bld.float4(4.0f,4.0f,4.0f,4.0f);
+ tmpIdx += 2;
+ break;
+ case combiner::kOpMulAdd:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " * " + reg[2]+".a + " + reg[1]+regSwizzle[1] + ";\n";
+ #endif
+ bld.op(kSM4Op_MAD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[2], regIdx[2], regSrcAlphaSwz[2]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ break;
+ case combiner::kOpMulSub:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " * " + reg[2]+".a - " + reg[1]+regSwizzle[1] + ";\n";
+ #endif
+ bld.op(kSM4Op_MAD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[2], regIdx[2], regSrcAlphaSwz[2]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1], true);
+ break;
+ case combiner::kOpMulAddSigned:
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + " * " + reg[2]+".a + " + reg[1]+regSwizzle[1] + " - 0.5;\n";
+ #endif
+ bld.op(kSM4Op_MAD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz(regFile[0], regIdx[0], regSwizzleBin[0]);
+ bld.swz(regFile[2], regIdx[2], regSrcAlphaSwz[2]);
+ bld.swz(regFile[1], regIdx[1], regSwizzleBin[1]);
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', 0);
+ bld.float4(-.5f,-.5f,-.5f,-.5f);
+ break;
+ default:
+ AssertString ("Unknown combiner op!");
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col")+writeMask + " = " + reg[0]+regSwizzle[0] + ";\n";
+ #endif
+ break;
+ }
+
+ // scale
+ if (scale > 1)
+ {
+ DebugAssert (scale == 2 || scale == 4);
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("col *= ") + char('0'+scale) + ".0;\n";
+ #endif
+ if (scale == 2)
+ {
+ bld.op(kSM4Op_ADD);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', 0);
+ bld.swz('r', 0);
+ }
+ else if (scale == 4)
+ {
+ bld.op(kSM4Op_MUL);
+ bld.reg('r', 0, writeMaskBin);
+ bld.swz('r', 0);
+ bld.float4(4.0f,4.0f,4.0f,4.0f);
+ }
+ }
+
+ return usedConstant;
+}
+
+void* BuildFragmentShaderD3D11 (const FixedFunctionStateD3D11& state, FixedFunctionProgramD3D11::ValueParameters& params, size_t& outSize)
+{
+ ShaderLab::FastPropertyName cbName; cbName.SetName(kD3D11PixelCB);
+ GetD3D11ConstantBuffers(GetRealGfxDevice()).SetCBInfo (cbName.index, k11PixelSize*16);
+ params.m_CBID = cbName.index; params.m_CBSize = k11PixelSize*16;
+
+ DXBCBuilder* builder = dxb_create(4, 0, kSM4Shader_Pixel);
+ DXBCBuilderStream bld(builder);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ std::string textures, inputs, code;
+ #endif
+
+ dxb_dcl_output(builder, "SV_Target", 0, 0);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float4 icolor : COLOR0");
+ #endif
+ int inputRegCounter = 0;
+ dxb_dcl_input(builder, "COLOR", 0, inputRegCounter++);
+
+ if (state.lightingEnabled && state.specularEnabled)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float3 ispec : COLOR1");
+ #endif
+ dxb_dcl_input(builder, "COLOR", 1, inputRegCounter++, 0x7);
+ }
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float4 col;\n";
+ #endif
+
+ if (state.texUnitCount == 0)
+ {
+ // No combiners is special case: output primary color
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "col = icolor;\n";
+ #endif
+ bld.op(kSM4Op_MOV).reg('r', 0).swz('v', 0);
+
+ // BUG, using for ex.,
+ // SubShader { Pass { Color (1,0,0,0) } }
+ // produces white color instead of red on IvyBridge UltraBook
+ }
+ else
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "float4 tex, tmp0, tmp1, tmp2;\n";
+ #endif
+ for (int i = 0; i < state.texUnitCount; i++)
+ {
+ std::string iname = IntToString(i);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, ("float4 iuv"+iname + " : TEXCOORD"+iname).c_str());
+ textures += "SamplerState ff_smp"+iname + " : register(s"+iname+");\n";
+ #endif
+
+ // sample the texture into tmp1
+ if (state.texUnit3D & (1<<i)) // 3D
+ {
+ dxb_dcl_input(builder, "TEXCOORD", i, inputRegCounter,0x7);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ textures += "Texture3D ff_tex"+iname + " : register(t"+iname+");\n";
+ code += "tex = ff_tex"+iname + ".Sample(ff_smp"+iname + ", iuv"+iname + ".xyz);\n";
+ #endif
+
+ dxb_dcl_tex(builder, i, kSM4Target_TEXTURE3D);
+ bld.op(kSM4Op_SAMPLE).reg('r', 1).swz('v', inputRegCounter, kSM4SwzXYZX).swz('t', i).reg('s', i);
+ }
+ else if (state.texUnitCube & (1<<i)) // cubemap
+ {
+ dxb_dcl_input(builder, "TEXCOORD", i, inputRegCounter,0x7);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ textures += "TextureCube ff_tex"+iname + " : register(t"+iname+");\n";
+ code += "tex = ff_tex"+iname + ".Sample(ff_smp"+iname + ", iuv"+iname + ".xyz);\n";
+ #endif
+
+ dxb_dcl_tex(builder, i, kSM4Target_TEXTURECUBE);
+ bld.op(kSM4Op_SAMPLE).reg('r', 1).swz('v', inputRegCounter, kSM4SwzXYZX).swz('t', i).reg('s', i);
+ }
+ else if (state.texUnitProjected & (1<<i)) // projected sample
+ {
+ dxb_dcl_input(builder, "TEXCOORD", i, inputRegCounter,0xB); // xyw mask
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ textures += "Texture2D ff_tex"+iname + " : register(t"+iname+");\n";
+ code += "tex = ff_tex"+iname + ".Sample(ff_smp"+iname + ", iuv"+iname + ".xy / iuv"+iname + ".w);\n";
+ #endif
+
+ dxb_dcl_tex(builder, i, kSM4Target_TEXTURE2D);
+
+ // SM4: use DIV; Intel IvyBridge seems to prefer that
+ bld.noAutoSM2();
+ bld.op(kSM4Op_DIV).reg('r', 1, 0x3).swz('v', inputRegCounter, kSM4SwzXYXX).swz('v', inputRegCounter, kSM4SwzRepW);
+ bld.autoSM2();
+
+ // SM2: use RCP+MUL
+ bld.op2(kSM2Op_RCP).reg2('r', 1, 8).swz2('v', inputRegCounter, kSM4SwzRepW);
+ bld.op2(kSM2Op_MUL).reg2('r', 1, 0x3).swz2('v', inputRegCounter, kSM4SwzXYXX).swz2('r',1, kSM4SwzRepW);
+
+ bld.op(kSM4Op_SAMPLE).reg('r', 1).swz('r', 1, kSM4SwzXYXX).swz('t', i).reg('s', i);
+ }
+ else // regular sample
+ {
+ dxb_dcl_input(builder, "TEXCOORD", i, inputRegCounter,0x3);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ textures += "Texture2D ff_tex"+iname + " : register(t"+iname+");\n";
+ code += "tex = ff_tex"+iname + ".Sample(ff_smp"+iname + ", iuv"+iname + ".xy);\n";
+ #endif
+
+ dxb_dcl_tex(builder, i, kSM4Target_TEXTURE2D);
+ bld.op(kSM4Op_SAMPLE).reg('r', 1).swz('v', inputRegCounter, kSM4SwzXYXX).swz('t', i).reg('s', i);
+ }
+
+ // emit color & alpha combiners; result in tmp0
+ UInt32 colorComb = state.texUnitColorCombiner[i];
+ UInt32 alphaComb = state.texUnitAlphaCombiner[i];
+ bool usedConstant = false;
+ if (colorComb == alphaComb)
+ {
+ usedConstant |= EmitCombinerMath11 (i, colorComb, kCombWriteRGBA, state.texUnitCount, bld
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ , code
+ #endif
+ );
+ }
+ else
+ {
+ usedConstant |= EmitCombinerMath11 (i, colorComb, kCombWriteRGB, state.texUnitCount, bld
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ , code
+ #endif
+ );
+ usedConstant |= EmitCombinerMath11 (i, alphaComb, kCombWriteA, state.texUnitCount, bld
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ , code
+ #endif
+ );
+ }
+
+ if (usedConstant)
+ params.AddVectorParam ((k11PixelColors+i)*16, 4, BuiltinShaderVectorParam(kShaderVecFFTextureEnvColor0+i));
+ ++inputRegCounter;
+ }
+ }
+
+ // alpha test
+ if (state.alphaTest != kFuncDisabled && state.alphaTest != kFuncAlways)
+ {
+ params.AddVectorParam (k11PixelAlphaRef*16, 1, kShaderVecFFAlphaTestRef);
+ if (state.alphaTest == kFuncNever)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "discard;\n";
+ #endif
+ bld.op(kSM4Op_DISCARD).float1(-1); // int is not sm20 compatible; old comment: HLSL emits 'l(-1)' for plain discard; with the value being integer -1 (all bits set)
+ }
+ else
+ {
+ // Reverse logic because we're using here 'discard'
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ static const char* kCmpOps[] =
+ {
+ "", // kFuncDisabled
+ "", // kFuncNever
+ ">=", // kFuncLess
+ "!=", // kFuncEqual
+ ">", // kFuncLEqual
+ "<=", // kFuncGreater
+ "==", // kFuncNotEqual
+ "<", // kFuncGEqual
+ "", // kFuncAlways
+ };
+ #endif // #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ static SM4Opcode kCmpOpcodes[] =
+ {
+ kSM4Op_ADD, // kFuncDisabled
+ kSM4Op_ADD, // kFuncNever
+ kSM4Op_GE, // kFuncLess
+ kSM4Op_NE, // kFuncEqual
+ kSM4Op_LT, // kFuncLEqual
+ kSM4Op_GE, // kFuncGreater
+ kSM4Op_EQ, // kFuncNotEqual
+ kSM4Op_LT, // kFuncGEqual
+ kSM4Op_ADD, // kFuncAlways
+ };
+ static bool kCmpOrder[] =
+ {
+ false, // kFuncDisabled
+ false, // kFuncNever
+ true, // kFuncLess
+ true, // kFuncEqual
+ false, // kFuncLEqual
+ false, // kFuncGreater
+ true, // kFuncNotEqual
+ true, // kFuncGEqual
+ false, // kFuncAlways
+ };
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += std::string("if (col.a ") + kCmpOps[state.alphaTest] + " ff_alpha_ref) discard;\n";
+ #endif
+
+ bld.noAutoSM2();
+ bld.op(kCmpOpcodes[state.alphaTest]).reg('r', 1, 0x1);
+ if (kCmpOrder[state.alphaTest])
+ {
+ bld.swz('r', 0, kSM4SwzRepW);
+ bld.swz('c', k11PixelAlphaRef, kSM4SwzRepX);
+ }
+ else
+ {
+ bld.swz('c', k11PixelAlphaRef, kSM4SwzRepX);
+ bld.swz('r', 0, kSM4SwzRepW);
+ }
+ bld.op(kSM4Op_DISCARD).reg('r', 1, 1);
+ bld.autoSM2();
+
+ //SM20
+ static float bConst[][2] =
+ {
+ {0,0},
+ {0,0},
+
+ {0,-1},
+ {0,-1},
+ {-1,0},
+ {0,-1},
+ {-1,0},
+ {-1,0},
+
+ {0,0},
+ };
+ static bool bRefSign[] =
+ {
+ false,
+ false,
+
+ false,
+ true,
+ true,
+ true,
+ true,
+ false,
+ false,
+ };
+
+ bld.op2(kSM2Op_ADD).
+ reg2('r',1,1).
+ swz2('c',k11PixelAlphaRef, kSM4SwzRepX,bRefSign[state.alphaTest]).
+ swz2('r', 0, kSM4SwzRepW,!bRefSign[state.alphaTest]);
+ if (state.alphaTest == kFuncEqual || state.alphaTest == kFuncNotEqual)
+ bld.op2(kSM2Op_MUL).reg2('r',1,1).swz2('r',1,kSM4SwzRepX).swz2('r',1,kSM4SwzRepX);
+ bld.op2(kSM2Op_CMP).reg2('r',1).
+ swz2('r',1,kSM4SwzRepX,state.alphaTest == kFuncEqual || state.alphaTest == kFuncNotEqual).
+ float1_2(bConst[state.alphaTest][0]).
+ float1_2(bConst[state.alphaTest][1]);
+ bld.op2(kSM2Op_TEXKILL).reg2('r', 1);
+ }
+ }
+
+ // add specular
+ if (state.lightingEnabled && state.specularEnabled)
+ {
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "col.rgb += ispec;\n";
+ #endif
+ // add r0.xyz, r0.xyz, v1.xyz
+ bld.op(kSM4Op_ADD).reg('r', 0, 0x7).swz('r', 0, kSM4SwzXYZX).swz('v', 1, kSM4SwzXYZX);
+ }
+
+ // fog
+ if (state.fogMode != kFogDisabled && inputRegCounter < 8)
+ {
+ int fogVar = inputRegCounter;
+ dxb_dcl_input(builder, "FOG", 0, fogVar, 0x1);
+ params.AddVectorParam (k11PixelFog*16, 4, kShaderVecFFFogColor);
+ // color.rgb = lerp (fogColor.rgb, color.rgb, fogVar) =
+ // (color.rgb-fogColor.rgb) * fogVar + fogColor.rgb
+ bld.op(kSM4Op_ADD).reg('r',0,7).swz('r',0,kSM4SwzXYZX).swz('c',k11PixelFog,kSM4SwzXYZX, true);
+ bld.op(kSM4Op_MAD).reg('r',0,7).swz('r',0,kSM4SwzXYZX).swz('v',fogVar,kSM4SwzRepX).swz('c',k11PixelFog,kSM4SwzXYZX);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ AddToStringList (inputs, "float ifog : FOG");
+ code += "col.rgb = lerp (ff_fog.rgb, col.rgb, ifog);\n";
+ #endif
+ }
+
+ if (params.HasVectorParams())
+ dxb_dcl_cb(builder, 0, k11PixelSize);
+
+ // mov o0.xyzw, r0.xyzw
+ bld.op(kSM4Op_MOV).reg('o', 0).swz('r', 0);
+ // ret
+ bld.op(kSM4Op_RET);
+
+ #if DEBUG_D3D11_COMPARE_WITH_HLSL
+ code += "return col;\n";
+ std::string src = textures + kD3D11PixelPrefix + inputs + ") : SV_TARGET {\n" + code + "\n}";
+ printf_console ("d3d11 FF PS HLSL:\n%s\n", src.c_str());
+ DebugCompileHLSLShaderD3D11 (src, false);
+ #endif
+
+ void* blob = BuildShaderD3D11 (builder, outSize);
+ return blob;
+}