summaryrefslogtreecommitdiff
path: root/Runtime/GfxDevice/d3d
diff options
context:
space:
mode:
Diffstat (limited to 'Runtime/GfxDevice/d3d')
-rw-r--r--Runtime/GfxDevice/d3d/CombinerD3D.cpp600
-rw-r--r--Runtime/GfxDevice/d3d/CombinerD3D.h37
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Context.cpp629
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Context.h44
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Enumeration.cpp344
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Enumeration.h64
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Includes.h7
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Utils.cpp169
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Utils.h69
-rw-r--r--Runtime/GfxDevice/d3d/D3D9VBO.cpp815
-rw-r--r--Runtime/GfxDevice/d3d/D3D9VBO.h86
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Window.cpp272
-rw-r--r--Runtime/GfxDevice/d3d/D3D9Window.h39
-rw-r--r--Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp3009
-rw-r--r--Runtime/GfxDevice/d3d/GfxDeviceD3D9.h361
-rw-r--r--Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp474
-rw-r--r--Runtime/GfxDevice/d3d/GpuProgramsD3D.h40
-rw-r--r--Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp384
-rw-r--r--Runtime/GfxDevice/d3d/RenderTextureD3D.cpp583
-rw-r--r--Runtime/GfxDevice/d3d/RenderTextureD3D.h17
-rw-r--r--Runtime/GfxDevice/d3d/ShaderGenerator.cpp948
-rw-r--r--Runtime/GfxDevice/d3d/ShaderGenerator.h100
-rw-r--r--Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp376
-rw-r--r--Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h7
-rw-r--r--Runtime/GfxDevice/d3d/TexturesD3D9.cpp696
-rw-r--r--Runtime/GfxDevice/d3d/TexturesD3D9.h90
-rw-r--r--Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp196
-rw-r--r--Runtime/GfxDevice/d3d/TimerQueryD3D9.h67
-rw-r--r--Runtime/GfxDevice/d3d/VertexDeclarations.cpp124
-rw-r--r--Runtime/GfxDevice/d3d/VertexDeclarations.h26
-rw-r--r--Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp705
-rw-r--r--Runtime/GfxDevice/d3d/VertexPipeD3D9.h139
32 files changed, 11517 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/d3d/CombinerD3D.cpp b/Runtime/GfxDevice/d3d/CombinerD3D.cpp
new file mode 100644
index 0000000..2be2b47
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/CombinerD3D.cpp
@@ -0,0 +1,600 @@
+#include "UnityPrefix.h"
+#include "CombinerD3D.h"
+#include "External/shaderlab/Library/texenv.h"
+#include "External/shaderlab/Library/pass.h"
+#include "External/shaderlab/Library/TextureBinding.h"
+#include "D3D9Context.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "External/DirectX/builds/dx9include/d3dx9.h"
+
+
+// --------------------------------------------------------------------------
+// Combiners to fixed function texture stages
+
+// NOTE: not all GL combiner modes are representable in TSS:
+// * per-stage constants
+// * DOUBLE/QUAD on arbitrary operations
+// * a*b+-c, a*b-c
+// So what we do is: if hardware supports ps_1_1, we generate pixel shaders on the fly, see below.
+
+static D3DTEXTUREOP kCombinerFuncTable[3][8] = {
+ { D3DTOP_SELECTARG1, D3DTOP_MODULATE, D3DTOP_ADD, D3DTOP_ADDSIGNED, D3DTOP_SUBTRACT, D3DTOP_LERP, D3DTOP_DOTPRODUCT3, D3DTOP_DOTPRODUCT3 },
+ { D3DTOP_ADD, D3DTOP_MODULATE2X, D3DTOP_DISABLE, D3DTOP_ADDSIGNED2X, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE },
+ { D3DTOP_DISABLE, D3DTOP_MODULATE4X, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE },
+};
+static DWORD kCombinerFuncCapsTable[3][8] = {
+ { D3DTEXOPCAPS_SELECTARG1, D3DTEXOPCAPS_MODULATE, D3DTEXOPCAPS_ADD, D3DTEXOPCAPS_ADDSIGNED, D3DTEXOPCAPS_SUBTRACT, D3DTEXOPCAPS_LERP, D3DTEXOPCAPS_DOTPRODUCT3, D3DTEXOPCAPS_DOTPRODUCT3 },
+ { D3DTEXOPCAPS_ADD, D3DTEXOPCAPS_MODULATE2X, 0, D3DTEXOPCAPS_ADDSIGNED2X, 0, 0, 0, 0 },
+ { 0, D3DTEXOPCAPS_MODULATE4X, 0, 0, 0, 0, 0, 0 },
+};
+
+void InitializeCombinerCapsD3D9()
+{
+ DWORD texOpCaps = gGraphicsCaps.d3d.d3dcaps.TextureOpCaps;
+ for( int r = 0; r < 3; ++r ) {
+ for( int c = 0; c < 8; ++c ) {
+ if( kCombinerFuncCapsTable[r][c] ) {
+ if( !(texOpCaps & kCombinerFuncCapsTable[r][c]) )
+ kCombinerFuncTable[r][c] = D3DTOP_DISABLE;
+ }
+ }
+ }
+}
+
+static int kCombinerSourceTable[4] = {
+ D3DTA_CURRENT, D3DTA_TEXTURE, D3DTA_TFACTOR, D3DTA_DIFFUSE // TODO: TFACTOR is global, not per-stage!
+};
+static const int kCombinerOperandModTableRGB[4] = {
+ 0, D3DTA_ALPHAREPLICATE, D3DTA_COMPLEMENT, D3DTA_ALPHAREPLICATE | D3DTA_COMPLEMENT
+};
+static const int kCombinerOperandModTableAlpha[4] = {
+ 0, 0, D3DTA_COMPLEMENT, D3DTA_COMPLEMENT
+};
+
+
+
+static bool CombinerToTextureStage( UInt32 comb, D3DTEXTUREOP& outOp, int outArgs[3], bool alpha )
+{
+ int s0 = (comb >> combiner::kSrcZeroShift) & 0xFF;
+
+ int cf = COMBINER_GET_FUNC(comb);
+ int s1 = (comb) & 0xFF;
+ int scale = HighestBit( (comb >> combiner::kScaleShift) );
+ AssertIf( scale < 0 || scale > 2 );
+
+ const int* kCombinerOperandModTable = alpha ? kCombinerOperandModTableAlpha : kCombinerOperandModTableRGB;
+ int source0 = kCombinerSourceTable[s0 & combiner::kSourceMask] | kCombinerOperandModTable[s0 >> combiner::kOperandShift];
+ int source1 = kCombinerSourceTable[s1 & combiner::kSourceMask] | kCombinerOperandModTable[s1 >> combiner::kOperandShift];
+ if( !(cf & combiner::kBlendFuncMask) )
+ {
+ outOp = kCombinerFuncTable[scale][cf];
+ if( outOp == D3DTOP_DISABLE )
+ return false;
+ // we emulate "source double" with "source + source"
+ if( cf == 0 )
+ source1 = source0;
+ outArgs[0] = source0;
+ outArgs[1] = source1;
+ outArgs[2] = D3DTA_CURRENT;
+ }
+ else
+ {
+ int blendF = COMBINER_GET_BLEND_FUNC_INDEX(cf);
+ int src2 = cf & combiner::kSourceMask;
+ int oper2 = ((cf & combiner::kOperandTwo) >> combiner::kOperandShift) | 1;
+ int source2 = kCombinerSourceTable[src2] | kCombinerOperandModTable[oper2];
+
+ DWORD texOpCaps = gGraphicsCaps.d3d.d3dcaps.TextureOpCaps;
+
+ switch( blendF )
+ {
+ case 0:
+ // src0 lerp(src2 alpha) src1
+ if( (scale != 0) || !(texOpCaps & D3DTEXOPCAPS_LERP) )
+ return false;
+ outOp = D3DTOP_LERP;
+ outArgs[0] = source0;
+ outArgs[1] = source1;
+ outArgs[2] = source2;
+ break;
+ case 1:
+ // src0 * src2 alpha + src1
+ if( texOpCaps & D3DTEXOPCAPS_MULTIPLYADD ) {
+ if( scale != 0 )
+ return false;
+ outOp = D3DTOP_MULTIPLYADD;
+ outArgs[0] = source0;
+ outArgs[1] = source2;
+ outArgs[2] = source1;
+ } else {
+ // TODO
+ return false;
+ }
+ break;
+ case 2:
+ // src0 * src2 alpha +- src1
+ // not supported!
+ return false;
+ case 3:
+ // src0 * src2 alpha - src1
+ // not supported!
+ return false;
+ default:
+ AssertString( "Unknown combiner blend function" );
+ return false;
+ }
+ AssertIf( outOp <= D3DTOP_DISABLE || outOp > D3DTOP_LERP );
+ }
+
+ return true;
+}
+
+static bool CombinerToTextureStage( const ShaderLab::TextureBinding& te, D3DTextureStage& stage, bool& outTFactorUsed )
+{
+ int combColor = te.m_CombColor;
+ if( !CombinerToTextureStage( combColor, stage.colorOp, stage.colorArgs, false ) )
+ return false;
+
+ // For DOT3 operation, we have to force using no function on alpha
+ // However, on some old cards this has no effect; they always replicate DOT3 to all channels
+ // (e.g. GeForce 2). Oh well.
+ int combColorFunc = COMBINER_GET_FUNC(combColor);
+ int combAlpha = te.m_CombAlpha;
+ if( combColorFunc == 6 ) { // DOT3
+ combAlpha &= ~(0xFF << combiner::kFuncShift);
+ }
+ if( !CombinerToTextureStage( combAlpha, stage.alphaOp, stage.alphaArgs, true ) )
+ return false;
+
+ if( te.IsTexColorUsed() )
+ {
+ outTFactorUsed = true;
+ }
+
+ return true;
+}
+
+
+// --------------------------------------------------------------------------
+// Combiners to pixel shader 1.1
+
+
+// Supports up to 4 texture stages.
+// Each stage outputs into r0 register.
+// Per-stage constants are stored in corresponding constant registers [c0..c3].
+// "Color" command (TFACTOR equivalent) should store in c4.
+// r1 is used in some cases to load & process some constants (e.g. where we'd want to do a c4_bias, we first load it into r1 and then do r1_bias)
+
+
+// Cache for generated pixel shaders.
+struct CombinersCacheEntry
+{
+ UInt32 combColor[kMaxD3DTextureStagesForPS];
+ UInt32 combAlpha[kMaxD3DTextureStagesForPS];
+ IDirect3DPixelShader9* pixelShader;
+ bool specular;
+
+ bool Equals( int count, const ShaderLab::TextureBinding* texEnvs, bool specular ) const
+ {
+ AssertIf( count > kMaxD3DTextureStagesForPS );
+ if( specular != this->specular )
+ return false;
+ for( int i = 0; i < count; ++i )
+ {
+ if( texEnvs[i].m_CombColor != combColor[i] )
+ return false;
+ if( texEnvs[i].m_CombAlpha != combAlpha[i] )
+ return false;
+ }
+ return true;
+ }
+};
+// Not a map on purpose - comparison is cheap and we want to store everything in a single block.
+static std::vector<CombinersCacheEntry> s_CombinersCache[kMaxD3DTextureStagesForPS][2]; // [2] = lighting off, lighting on
+
+void TextureCombinersD3D::CleanupCombinerCache()
+{
+ for( int i = 0; i < kMaxD3DTextureStagesForPS; ++i )
+ {
+ for( int j = 0; j < 2; ++j )
+ {
+ std::vector<CombinersCacheEntry>& cache = s_CombinersCache[i][j];
+ for( int k = 0; k < cache.size(); ++k )
+ {
+ IDirect3DPixelShader9* ps = cache[k].pixelShader;
+ if( ps ) {
+ ULONG refCount = ps->Release();
+ AssertIf( refCount != 0 );
+ }
+ }
+ cache.clear();
+ cache.swap(std::vector<CombinersCacheEntry>());
+ }
+ }
+}
+
+static const char* kPSDestRegRGBA[3] = { " r0", "_x2 r0", "_x4 r0" };
+static const char* kPSDestRegRGB [3] = { " r0.rgb", "_x2 r0.rgb", "_x4 r0.rgb" };
+static const char* kPSDestRegA [3] = { " r0.a", "_x2 r0.a", "_x4 r0.a" };
+
+static const char* kPSOperandPrefixTable[4] = {
+ "", "", "1-", "1-",
+};
+static const char* kPSOperandSuffixTableRGB[4] = {
+ "", ".a", "", ".a",
+};
+
+static std::string CombinerSrcPS( combiner::Source source, int stage, bool lighting )
+{
+ switch( source )
+ {
+ case combiner::kSrcPrevious:
+ if( stage == 0 )
+ return lighting ? "v0" : "c4";
+ else
+ return "r0";
+ case combiner::kSrcTexture:
+ return 't' + IntToString(stage);
+ case combiner::kSrcConstant:
+ return 'c' + IntToString(stage);
+ case combiner::kSrcPrimaryColor:
+ return lighting ? "v0" : "c4";
+ default:
+ AssertString( "Unknown source" );
+ return "";
+ }
+}
+
+static void FixupForConstantModifiers( std::string& source, std::string& outFixup, UInt32 operand, bool alpha, bool dot3 )
+{
+ if( source.size() >= 4 && source[0]=='1' && source[1]=='-' && source[2] == 'c' )
+ {
+ std::string sub = source.substr( 3, source.size()-3 );
+ outFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n';
+ source = "1-r1";
+ if( !alpha )
+ source += kPSOperandSuffixTableRGB[operand];
+ }
+ if( dot3 && source.size() >= 2 && source[0]=='c' )
+ {
+ std::string sub = source.substr( 1, source.size()-1 );
+ outFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n';
+ source = "r1";
+ if( !alpha )
+ source += kPSOperandSuffixTableRGB[operand];
+ }
+}
+
+static void CombinerToPixelShaderText( int stage, bool lighting, UInt32 comb, bool alpha, std::string& outInstruction, std::string& outPrevFixup, bool& outPrevSat, bool& outSkipAlpha )
+{
+ outSkipAlpha = false;
+
+ int s0 = (comb >> combiner::kSrcZeroShift) & 0xFF;
+
+ int cf = COMBINER_GET_FUNC(comb);
+ int s1 = (comb) & 0xFF;
+ int scale = HighestBit( (comb >> combiner::kScaleShift) );
+ AssertIf( scale < 0 || scale > 2 );
+
+ combiner::Source src0 = static_cast<combiner::Source>(s0 & combiner::kSourceMask);
+ combiner::Source src1 = static_cast<combiner::Source>(s1 & combiner::kSourceMask);
+ UInt32 oper0 = s0 >> combiner::kOperandShift;
+ UInt32 oper1 = s1 >> combiner::kOperandShift;
+ std::string source0 = kPSOperandPrefixTable[oper0] + CombinerSrcPS( src0, stage, lighting );
+ if( !alpha )
+ source0 += kPSOperandSuffixTableRGB[oper0];
+ std::string source1 = kPSOperandPrefixTable[oper1] + CombinerSrcPS( src1, stage, lighting );
+ std::string suffix1 = alpha ? "" : kPSOperandSuffixTableRGB[oper1];
+ const char** kPSDestReg = (cf == 7) ? kPSDestRegRGBA : (alpha ? kPSDestRegA : kPSDestRegRGB);
+ std::string destReg = kPSDestReg[scale];
+
+ std::string text;
+
+ // Some special rules:
+ // * For bias modifier or a lerp, a previous instruction should saturate the result
+ // * For negate modifier, a previous instruction can't saturate the result (hence can't just saturate everything)
+ // * 1-x or x_bias not allowed on constants. So if we detect such case, we try to load it into r1
+ // with additional instruction.
+ // * Emulating DOT3 requires _bx2 modifier, which is not allowed on constants. So we also detect
+ // that and load it into r1 with additional instruction.
+
+ bool dot3 = (cf == 6 || cf == 7);
+ FixupForConstantModifiers( source0, outPrevFixup, oper0, alpha, dot3 );
+ FixupForConstantModifiers( source1, outPrevFixup, oper1, alpha, dot3 );
+
+ bool addSatOnPrevious = false; // should we add "saturate" on previous instruction?
+ if( !(cf & combiner::kBlendFuncMask) )
+ {
+ switch( cf )
+ {
+ case 0: text = "mov" + destReg + ", " + source0; break;
+ case 1: text = "mul" + destReg + ", " + source0 + ", " + source1 + suffix1; break;
+ case 2: text = "add" + destReg + ", " + source0 + ", " + source1 + suffix1; break;
+ case 3:
+ if( source1[0] == 'c' )
+ {
+ std::string sub = source1.substr( 1, source1.size()-1 );
+ outPrevFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n';
+ source1 = "r1";
+ }
+ text = "add" + destReg + ", " + source0 + ", " + source1 + "_bias" + suffix1;
+ if( (s1 & combiner::kSourceMask) == 0 ) // if source1 is "previous", need to saturate previous instruction
+ addSatOnPrevious = true;
+ break;
+ case 4: text = "sub" + destReg + ", " + source0 + ", " + source1 + suffix1; break;
+ case 5: AssertIf(false); break;
+ case 6:
+ // DOT3: for rgb do the dot, for alpha just use source0
+ if( alpha )
+ text += "mov" + destReg + ", " + source0;
+ else
+ text = "dp3" + destReg + ", " + source0 + "_bx2, " + source1 + "_bx2" + suffix1; break;
+ case 7:
+ // DOT3RGBA: do the dot into all four channels, and do not issue co-issued alpha
+ // instructions for this stage.
+ text = "dp3" + destReg + ", " + source0 + "_bx2, " + source1 + "_bx2" + suffix1;
+ outSkipAlpha = true;
+ break;
+ }
+ }
+ else
+ {
+ int blendF = COMBINER_GET_BLEND_FUNC_INDEX(cf);
+ combiner::Source src2 = static_cast<combiner::Source>(cf & combiner::kSourceMask);
+ int oper2 = ((cf & combiner::kOperandTwo) >> combiner::kOperandShift) | 1;
+ std::string source2 = kPSOperandPrefixTable[oper2] + CombinerSrcPS(src2, stage, lighting);
+ if( !alpha )
+ source2 += kPSOperandSuffixTableRGB[oper2];
+
+ FixupForConstantModifiers( source2, outPrevFixup, oper2, alpha, false );
+
+ switch( blendF )
+ {
+ case 0:
+ // src0 lerp(src2 alpha) src1
+ text = "lrp" + destReg + ", " + source2 + ", " + source0 + ", " + source1 + suffix1;
+ if( src2 == combiner::kSrcPrevious ) // if src2 is "previous", need to saturate previous instruction
+ addSatOnPrevious = true;
+ break;
+ case 1:
+ // src0 * src2 alpha + src1
+ text = "mad" + destReg + ", " + source0 + ", " + source2 + ", " + source1 + suffix1;
+ break;
+ case 2:
+ // src0 * src2 alpha +- src1
+ if( source1[0] == 'c' )
+ {
+ std::string sub = source1.substr( 1, source1.size()-1 );
+ outPrevFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n';
+ source1 = "r1";
+ }
+ text = "mad" + destReg + ", " + source0 + ", " + source2 + ", " + source1 + "_bias" + suffix1;
+ if( (s1 & combiner::kSourceMask) == 0 ) // if source1 is "previous", need to saturate previous instruction
+ addSatOnPrevious = true;
+ break;
+ case 3:
+ // src0 * src2 alpha - src1
+ text = "mad" + destReg + ", " + source0 + ", " + source2 + ", -" + source1 + suffix1;
+ break;
+ default:
+ AssertString( "Unknown combiner blend function" );
+ break;
+ }
+ }
+
+ // if we're not the first instruction and we need to modify previous one - do it.
+ int typeIndex = alpha ? 1 : 0;
+ outPrevSat = false;
+ if( stage != 0 && addSatOnPrevious )
+ outPrevSat = true;
+
+ if( alpha )
+ outInstruction += '+';
+ outInstruction += text;
+ outInstruction += '\n';
+}
+
+
+// GpuProgramsD3D.cpp
+ID3DXBuffer* AssembleD3DShader( const std::string& source );
+
+
+static IDirect3DPixelShader9* CombinersToPixelShader( int count, const ShaderLab::TextureBinding* texEnvs, bool lighting, bool addSpecular )
+{
+ AssertIf( count < 1 );
+
+ // ps_1_1 supports only 4 textures
+ if (count > kMaxD3DTextureStagesForPS)
+ return NULL;
+
+ // look for such combiner setup in cache
+ int lightingIdx = lighting ? 1 : 0;
+ int cacheCount = s_CombinersCache[count-1][lightingIdx].size();
+ for( int i = 0; i < cacheCount; ++i )
+ {
+ CombinersCacheEntry& ce = s_CombinersCache[count-1][lightingIdx][i];
+ if( ce.Equals( count, texEnvs, addSpecular ) )
+ {
+ AssertIf( !ce.pixelShader );
+ return ce.pixelShader;
+ }
+ }
+
+ std::string text = "ps_1_1\n";
+ // sample textures
+ for( int i = 0; i < count; ++i )
+ {
+ text += "tex t" + IntToString(i) + '\n';
+ }
+
+ // do combiner operations
+ CombinersCacheEntry cacheEntry;
+ int previousInstructions[2] = { 0, 0 };
+ for( int i = 0; i < count; ++i )
+ {
+ UInt32 combColor = texEnvs[i].m_CombColor;
+ UInt32 combAlpha = texEnvs[i].m_CombAlpha;
+ cacheEntry.combColor[i] = combColor;
+ cacheEntry.combAlpha[i] = combAlpha;
+ cacheEntry.specular = addSpecular;
+ std::string instruction, fixup;
+ bool satPrevious, skipAlpha;
+ // color
+ CombinerToPixelShaderText( i, lighting, combColor, false, instruction, fixup, satPrevious, skipAlpha );
+ if( satPrevious ) {
+ while( text[previousInstructions[0]] != ' ' )
+ ++previousInstructions[0];
+ text.insert( previousInstructions[0], "_sat" );
+ previousInstructions[1] += 4; // move the other pointer forward by _sat length as well
+ }
+ int colorInstructionLength = instruction.size();
+ // alpha
+ if( !skipAlpha )
+ {
+ CombinerToPixelShaderText( i, lighting, combAlpha, true, instruction, fixup, satPrevious, skipAlpha );
+ if( satPrevious ) {
+ while( text[previousInstructions[1]] != ' ' )
+ ++previousInstructions[1];
+ text.insert( previousInstructions[1], "_sat" );
+ previousInstructions[0] += 4; // move the other pointer forward by _sat length as well
+ }
+ }
+
+ text += fixup;
+ previousInstructions[0] = text.size();
+ previousInstructions[1] = text.size() + (skipAlpha ? 0 : colorInstructionLength);
+ text += instruction;
+ }
+
+ // add specular at the end if needed
+ if( addSpecular )
+ {
+ text += "add r0.rgb, r0, v1";
+ }
+
+ // compile pixel shader
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // assemble shader
+ ID3DXBuffer *compiledShader = AssembleD3DShader( text );
+ IDirect3DPixelShader9* ps = NULL;
+ if( compiledShader )
+ {
+ // create shader
+ hr = dev->CreatePixelShader( (const DWORD*)compiledShader->GetBufferPointer(), &ps );
+ compiledShader->Release();
+ if( FAILED(hr) )
+ {
+ ErrorStringMsg ("D3D9 Combiners: failed to create pixel shader representation: %s", text.c_str());
+ }
+ }
+ AssertIf( !ps );
+
+ // insert into cache
+ cacheEntry.pixelShader = ps;
+ s_CombinersCache[count-1][lightingIdx].push_back( cacheEntry );
+
+ return ps;
+}
+
+
+// --------------------------------------------------------------------------
+
+
+
+TextureCombinersD3D* TextureCombinersD3D::Create( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular )
+{
+ static int uniqueIDCounter = 1;
+
+ static bool combinerCapsInitialized = false;
+ if( !combinerCapsInitialized )
+ {
+ InitializeCombinerCapsD3D9();
+ combinerCapsInitialized = true;
+ }
+
+ // For threaded rendering this check is done on the client side (and we get NULL here)
+ if (props)
+ {
+ // check texgen modes & texture dimension are supported
+ for( int i = 0; i < count; ++i ) {
+ TextureDimension texDim;
+ TexGenMode texGen;
+ GetTexEnvInfoFromName( texEnvs[i].m_TextureName, texDim, texGen, props );
+ if( !ShaderLab::IsTexEnvSupported( texEnvs[i].m_TextureName, texDim, texGen ) )
+ return NULL;
+ }
+ }
+
+ bool canConvertToStages = true;
+
+ // "primary" in the combiner might refer to diffuse or texture factor, depending on
+ // whether lighting is on or vertex colors are bound
+ kCombinerSourceTable[3] = hasVertexColorOrLighting ? D3DTA_DIFFUSE : D3DTA_TFACTOR;
+
+ TextureCombinersD3D* d3dte = new TextureCombinersD3D();
+ d3dte->uniqueID = ++uniqueIDCounter;
+ d3dte->envCount = count;
+ d3dte->texEnvs = texEnvs;
+ d3dte->pixelShader = NULL;
+ d3dte->textureFactorIndex = -1;
+
+ // special case: when no SetTextures are present, setup to do { combine primary } equivalent
+ if( count == 0 )
+ {
+ d3dte->stages[0].colorOp = D3DTOP_SELECTARG1;
+ d3dte->stages[0].colorArgs[0] = d3dte->stages[0].colorArgs[1] = d3dte->stages[0].colorArgs[2] = kCombinerSourceTable[3];
+ d3dte->stages[0].alphaOp = D3DTOP_SELECTARG1;
+ d3dte->stages[0].alphaArgs[0] = d3dte->stages[0].alphaArgs[1] = d3dte->stages[0].alphaArgs[2] = kCombinerSourceTable[3];
+ d3dte->stages[1].colorOp = D3DTOP_DISABLE;
+ d3dte->stageCount = 1;
+ return d3dte;
+ }
+
+ // try to convert to pixel shader and use that if everything is ok
+ d3dte->pixelShader = CombinersToPixelShader( count, texEnvs, hasVertexColorOrLighting, usesAddSpecular );
+
+ // if don't have pixel shader, convert to TSS setup
+ if( !d3dte->pixelShader )
+ {
+ if( count > gGraphicsCaps.d3d.d3dcaps.MaxSimultaneousTextures )
+ {
+ // In theory we could convert more; if most of combiner stages do not actually
+ // use the texture. In practice we just cap it at MaxSimultaneousTextures;
+ // it will match GL behaviour as well.
+ canConvertToStages = false;
+ }
+ else
+ {
+ for( int i = 0; i < count; ++i )
+ {
+ const ShaderLab::TextureBinding& te = texEnvs[i];
+ D3DTextureStage& stage = d3dte->stages[i];
+ bool textureFactorUsed = false;
+ if( !CombinerToTextureStage(te, stage, textureFactorUsed) )
+ {
+ canConvertToStages = false;
+ break;
+ }
+ if (textureFactorUsed)
+ {
+ d3dte->textureFactorIndex = i;
+ }
+ }
+ d3dte->stages[count].colorOp = D3DTOP_DISABLE;
+ }
+
+ // no can't do
+ if( !canConvertToStages )
+ {
+ delete d3dte;
+ return NULL;
+ }
+ }
+
+ d3dte->stageCount = count;
+ return d3dte;
+}
diff --git a/Runtime/GfxDevice/d3d/CombinerD3D.h b/Runtime/GfxDevice/d3d/CombinerD3D.h
new file mode 100644
index 0000000..6f7fb05
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/CombinerD3D.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "External/shaderlab/Library/shadertypes.h"
+
+namespace ShaderLab {
+ struct TextureBinding;
+ class TexEnv;
+}
+
+
+const int kMaxD3DTextureStages = 8;
+const int kMaxD3DTextureStagesForPS = 4;
+
+struct D3DTextureStage
+{
+ D3DTEXTUREOP colorOp;
+ int colorArgs[3];
+ D3DTEXTUREOP alphaOp;
+ int alphaArgs[3];
+};
+
+struct TextureCombinersD3D
+{
+ static TextureCombinersD3D* Create( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular );
+ static void CleanupCombinerCache();
+
+ D3DTextureStage stages[kMaxD3DTextureStages+1];
+ int envCount, stageCount; // these might be different!
+ IDirect3DPixelShader9* pixelShader;
+ const ShaderLab::TextureBinding* texEnvs;
+
+ int textureFactorIndex;
+ bool textureFactorUsed;
+
+ int uniqueID;
+};
diff --git a/Runtime/GfxDevice/d3d/D3D9Context.cpp b/Runtime/GfxDevice/d3d/D3D9Context.cpp
new file mode 100644
index 0000000..e192ad8
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Context.cpp
@@ -0,0 +1,629 @@
+#include "UnityPrefix.h"
+#include "D3D9Context.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "D3D9Enumeration.h"
+#include "D3D9Utils.h"
+#include "GfxDeviceD3D9.h"
+#include "TimerQueryD3D9.h"
+#include "PlatformDependent/Win/WinUtils.h"
+#include "Configuration/UnityConfigure.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Threads/ThreadSharedObject.h"
+#include "Runtime/Misc/Plugins.h"
+#if UNITY_EDITOR
+#include "Runtime/GfxDevice/GfxDeviceSetup.h"
+#include "Runtime/Misc/QualitySettings.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "D3D9Window.h"
+#endif
+
+#if WEBPLUG
+#define ENABLE_NV_PERFHUD 0
+#else
+#define ENABLE_NV_PERFHUD 1
+#endif
+
+#define ENABLE_D3D_WINDOW_LOGGING 1
+
+static IDirect3D9* s_D3D = NULL;
+static IDirect3DDevice9* s_Device = NULL;
+
+static RenderColorSurfaceD3D9 s_BackBuffer;
+static RenderDepthSurfaceD3D9 s_DepthStencil;
+static HWND s_Window = NULL;
+static HINSTANCE s_D3DDll = NULL;
+static D3DPRESENT_PARAMETERS s_PresentParams;
+static D3D9FormatCaps* s_FormatCaps = NULL;
+static bool s_CurrentlyWindowed = true;
+static D3DDISPLAYMODE s_LastWindowedMode;
+bool g_D3DUsesMixedVP = false;
+bool g_D3DHasDepthStencil = true;
+D3DFORMAT g_D3DDepthStencilFormat = D3DFMT_D16;
+D3DDEVTYPE g_D3DDevType;
+DWORD g_D3DAdapter = D3DADAPTER_DEFAULT;
+
+#if WEBPLUG
+extern bool gInsideFullscreenToggle;
+#endif
+
+typedef IDirect3D9* (WINAPI* Direct3DCreate9Func)(UINT);
+
+GfxDeviceD3D9& GetD3D9GfxDevice();
+void SetD3D9DeviceLost( bool lost ); // GfxDeviceD3D9.cpp
+bool IsD3D9DeviceLost();
+void ResetDynamicResourcesD3D9();
+
+#if ENABLE_PROFILER
+D3DPERF_BeginEventFunc g_D3D9BeginEventFunc;
+D3DPERF_EndEventFunc g_D3D9EndEventFunc;
+#endif
+
+
+bool InitializeD3D(D3DDEVTYPE devtype)
+{
+ AssertIf( s_D3D || s_Device || s_Window || s_D3DDll || s_FormatCaps );
+ g_D3DDevType = devtype;
+
+ s_D3DDll = LoadLibrary( "d3d9.dll" );
+ if( !s_D3DDll )
+ {
+ printf_console( "d3d: no D3D9 installed\n" );
+ return false; // no d3d9 installed
+ }
+
+ Direct3DCreate9Func createFunc = (Direct3DCreate9Func)GetProcAddress( s_D3DDll, "Direct3DCreate9" );
+ if( !createFunc )
+ {
+ printf_console( "d3d: Direct3DCreate9 not found\n" );
+ FreeLibrary( s_D3DDll );
+ s_D3DDll = NULL;
+ return false; // for some reason Direct3DCreate9 not found
+ }
+
+ #if ENABLE_PROFILER
+ g_D3D9BeginEventFunc = (D3DPERF_BeginEventFunc)GetProcAddress(s_D3DDll, "D3DPERF_BeginEvent");
+ g_D3D9EndEventFunc = (D3DPERF_EndEventFunc)GetProcAddress(s_D3DDll, "D3DPERF_EndEvent");
+ #endif
+
+ // create D3D object
+ s_D3D = createFunc( D3D_SDK_VERSION );
+ if( !s_D3D )
+ {
+ printf_console( "d3d: no 9.0c available\n" );
+ FreeLibrary( s_D3DDll );
+ s_D3DDll = NULL;
+ return false; // D3D initialization failed
+ }
+
+ // validate the adapter ordinal
+ UINT adapterCount = s_D3D->GetAdapterCount();
+ if ( g_D3DAdapter >= adapterCount )
+ g_D3DAdapter = D3DADAPTER_DEFAULT;
+
+ // check whether we have a HAL device
+ D3DDISPLAYMODE mode;
+ HRESULT hr;
+ if (FAILED(hr = s_D3D->GetAdapterDisplayMode(g_D3DAdapter, &mode)))
+ {
+ printf_console ("d3d: failed to get adapter mode (adapter %d error 0x%08x)\n", g_D3DAdapter, hr);
+ s_D3D->Release();
+ s_D3D = NULL;
+ FreeLibrary( s_D3DDll );
+ s_D3DDll = NULL;
+ return false; // failed to get adapter mode
+ }
+ if( FAILED( s_D3D->CheckDeviceType( g_D3DAdapter, g_D3DDevType, mode.Format, mode.Format, TRUE ) ) )
+ {
+ printf_console( "d3d: no support for this device type (accelerated/ref)\n" );
+ s_D3D->Release();
+ s_D3D = NULL;
+ FreeLibrary( s_D3DDll );
+ s_D3DDll = NULL;
+ return false; // no HAL driver available
+ }
+
+ // enumerate all formats, multi sample types and whatnot
+ s_FormatCaps = new D3D9FormatCaps();
+ if( !s_FormatCaps->Enumerate( *s_D3D ) )
+ {
+ printf_console( "d3d: no video modes available\n" );
+ return false;
+ }
+
+ return true;
+}
+
+IDirect3D9* GetD3DObject()
+{
+ AssertIf( !s_D3D );
+ return s_D3D;
+}
+D3D9FormatCaps* GetD3DFormatCaps()
+{
+ AssertIf( !s_FormatCaps );
+ return s_FormatCaps;
+}
+
+void CleanupD3D()
+{
+ AssertIf( s_Device || s_Window );
+
+ delete s_FormatCaps;
+ s_FormatCaps = NULL;
+
+ if( s_D3D )
+ {
+ s_D3D->Release();
+ s_D3D = NULL;
+ }
+ if( s_D3DDll )
+ {
+ FreeLibrary( s_D3DDll );
+ s_D3DDll = NULL;
+ }
+}
+
+D3DFORMAT GetD3DFormatForChecks()
+{
+ AssertIf( !s_FormatCaps );
+ return s_FormatCaps->GetAdapterFormatForChecks();
+}
+
+static void SetFramebufferDepthFormat(GfxDevice* realDevice, D3DFORMAT format)
+{
+ // Not the most robust way to figure out the format, but should do.
+ int depthBPP = GetBPPFromD3DFormat(format);
+ DepthBufferFormat depthFormat = kDepthFormatNone;
+ if (depthBPP == 16)
+ depthFormat = kDepthFormat16;
+ else if (depthBPP == 32)
+ depthFormat = kDepthFormat24;
+ realDevice->SetFramebufferDepthFormat(depthFormat);
+
+ // Set it on the client device as well, if we're changing resolutions
+ // and the property hasn't been propagated by copying from the real to client device.
+ if (IsGfxDevice())
+ GetGfxDevice().SetFramebufferDepthFormat(depthFormat);
+}
+
+bool InitializeOrResetD3DDevice(
+ class GfxDevice* device,
+ HWND window, int width, int height,
+ int refreshRate, bool fullscreen, int vBlankCount, int fsaa,
+ int& outBackbufferBPP, int& outFrontbufferBPP, int& outDepthBPP, int& outFSAA )
+{
+ AssertIf( !s_D3D );
+
+ outBackbufferBPP = 4;
+ outFrontbufferBPP = 4;
+ outDepthBPP = 4;
+ outFSAA = 0;
+
+ width = std::max(width, 1);
+ height = std::max(height, 1);
+
+ D3DDISPLAYMODE mode;
+ if( s_CurrentlyWindowed )
+ {
+ HRESULT hr = s_D3D->GetAdapterDisplayMode( g_D3DAdapter, &mode );
+ if( FAILED( hr ) )
+ {
+ printf_console( "d3d initialize: failed to get adapter display mode [%s]\n", GetD3D9Error(hr) );
+ return false;
+ }
+ s_LastWindowedMode = mode;
+ }
+ else
+ {
+ // If we are fullscreen right now, use last checked Windowed mode format
+ // to choose compatible formats. Otherwise we won't be able to switch to 16 bit
+ // desktop mode after a 32 bit fullscreen one.
+ mode = s_LastWindowedMode;
+ }
+
+ D3DPRESENT_PARAMETERS& pparams = s_PresentParams;
+ ZeroMemory (&pparams, sizeof(D3DPRESENT_PARAMETERS));
+ pparams.BackBufferWidth = width;
+ pparams.BackBufferHeight = height;
+ pparams.BackBufferCount = 1;
+ pparams.hDeviceWindow = window;
+ pparams.FullScreen_RefreshRateInHz = fullscreen ? refreshRate : 0;
+
+ pparams.EnableAutoDepthStencil = FALSE;
+ g_D3DHasDepthStencil = true;
+
+ pparams.Windowed = fullscreen ? FALSE : TRUE;
+ pparams.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+ // fullscreen FSAA might be buggy
+ if( fullscreen && gGraphicsCaps.buggyFullscreenFSAA )
+ fsaa = 1;
+
+ s_FormatCaps->FindBestPresentationParams( width, height, mode.Format, !fullscreen, vBlankCount, fsaa, pparams );
+
+ outBackbufferBPP = GetBPPFromD3DFormat(pparams.BackBufferFormat)/8;
+ outFrontbufferBPP = GetBPPFromD3DFormat(mode.Format)/8;
+ outDepthBPP = GetBPPFromD3DFormat(pparams.AutoDepthStencilFormat)/8;
+ outFSAA = (pparams.MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) ? pparams.MultiSampleQuality : pparams.MultiSampleType;
+ g_D3DDepthStencilFormat = pparams.AutoDepthStencilFormat;
+ device->SetCurrentTargetSize(pparams.BackBufferWidth, pparams.BackBufferHeight);
+ SetFramebufferDepthFormat(device, pparams.AutoDepthStencilFormat);
+
+ bool deviceInLostState = false;
+ if( !s_Device )
+ {
+ AssertIf( s_Window );
+
+ UINT adapterIndex = g_D3DAdapter;
+ D3DDEVTYPE devType = g_D3DDevType;
+
+ #if ENABLE_NV_PERFHUD
+ UINT adapterCount = s_D3D->GetAdapterCount();
+ D3DADAPTER_IDENTIFIER9 perfHudID;
+ memset( &perfHudID, 0, sizeof(perfHudID) );
+ s_D3D->GetAdapterIdentifier( adapterCount-1, 0, &perfHudID );
+ perfHudID.Description[MAX_DEVICE_IDENTIFIER_STRING-1] = 0;
+ if( strstr( perfHudID.Description, "PerfHUD" ) != NULL )
+ {
+ adapterIndex = adapterCount-1;
+ devType = D3DDEVTYPE_REF;
+ }
+ #endif
+
+ const int kShaderVersion11 = (1 << 8) + 1;
+ bool hasHardwareTL = gGraphicsCaps.d3d.d3dcaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT;
+ bool hasVS11 = LOWORD(gGraphicsCaps.d3d.d3dcaps.VertexShaderVersion) >= kShaderVersion11;
+ DWORD behaviourFlags = D3DCREATE_HARDWARE_VERTEXPROCESSING;
+ if( !hasVS11 )
+ behaviourFlags = D3DCREATE_MIXED_VERTEXPROCESSING;
+ if( !hasHardwareTL )
+ behaviourFlags = D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+ g_D3DUsesMixedVP = (behaviourFlags == D3DCREATE_MIXED_VERTEXPROCESSING);
+
+ if( GetGfxThreadingMode() == kGfxThreadingModeThreaded )
+ behaviourFlags |= D3DCREATE_MULTITHREADED;
+
+ // Preserve FPU mode. Benchmarking both in hardware and software vertex processing does not
+ // reveal any real differences. If FPU mode is not preserved, bad things will happen, like:
+ // * doubles will act like floats
+ // * on Firefox/Safari, some JavaScript libraries will stop working (spect.aculo.us, dojo) - case 17513
+ // * some random funky FPU exceptions will happen
+ HRESULT hr = s_D3D->CreateDevice( adapterIndex, devType, window, behaviourFlags | D3DCREATE_FPU_PRESERVE, &pparams, &s_Device );
+ if( FAILED( hr ) )
+ {
+ printf_console( "d3d: creation params: flags=%x swap=%i vsync=%x w=%i h=%i fmt=%i bbcount=%i dsformat=%i pflags=%x\n",
+ behaviourFlags, pparams.SwapEffect, pparams.PresentationInterval,
+ pparams.BackBufferWidth, pparams.BackBufferHeight, pparams.BackBufferFormat, pparams.BackBufferCount,
+ pparams.AutoDepthStencilFormat, pparams.Flags );
+ printf_console( "d3d: failed to create device [%s]\n", GetD3D9Error(hr) );
+ if (devType == D3DDEVTYPE_REF)
+ {
+ winutils::AddErrorMessage("Reference Rasterizer was requested but is not available.\nPlease make sure you have DirectX SDK installed.");
+ winutils::DisplayErrorMessagesAndQuit ("REFRAST not available");
+ }
+ return false;
+ }
+ s_CurrentlyWindowed = pparams.Windowed ? true : false;
+
+ gGraphicsCaps.hasTimerQuery =
+ (GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, NULL) != D3DERR_NOTAVAILABLE) &&
+ (GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP,NULL) != D3DERR_NOTAVAILABLE);
+ }
+ else
+ {
+ AssertIf( !s_Window );
+
+ // If we're resetting device mid-frame (e.g. script calls Screen.SetResolution),
+ // we need to end scene, reset and begin scene again.
+ bool wasInsideFrame = GetD3D9GfxDevice().IsInsideFrame();
+ if( wasInsideFrame )
+ {
+ s_Device->EndScene();
+ GetD3D9GfxDevice().SetInsideFrame(false);
+ }
+
+ // cleanup
+ s_BackBuffer.Release();
+ s_DepthStencil.Release();
+
+ PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventBeforeReset);
+
+ D3DPRESENT_PARAMETERS ppcopy = pparams; // copy them, as Reset changes some values
+ HRESULT hr = s_Device->Reset( &ppcopy );
+ if( FAILED(hr) )
+ {
+ if( hr == D3DERR_DEVICELOST )
+ {
+ deviceInLostState = true;
+ SetD3D9DeviceLost( true );
+ }
+ else
+ {
+ ErrorString( Format("D3D device reset failed [%s]", GetD3D9Error(hr)) );
+ return false;
+ }
+ }
+
+ PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventAfterReset);
+
+ s_CurrentlyWindowed = ppcopy.Windowed ? true : false;
+ if( wasInsideFrame && !deviceInLostState )
+ {
+ s_Device->BeginScene();
+ GetD3D9GfxDevice().SetInsideFrame(true);
+ }
+
+#if ENABLE_PROFILER
+ if (gGraphicsCaps.hasTimerQuery)
+ GetD3D9GfxDevice().GetTimerQueries().RecreateAllQueries();
+#endif
+ }
+
+ s_Window = window;
+ if( !deviceInLostState )
+ {
+ s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface);
+ s_BackBuffer.width = pparams.BackBufferWidth;
+ s_BackBuffer.height = pparams.BackBufferHeight;
+ // create depth stencil
+ D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (s_Device, pparams.BackBufferWidth, pparams.BackBufferHeight, pparams.AutoDepthStencilFormat, pparams.MultiSampleType, pparams.MultiSampleQuality, TRUE);
+ if (depthStencil.m_Surface)
+ {
+ s_DepthStencil.m_Surface = depthStencil.m_Surface;
+ s_DepthStencil.m_Texture = depthStencil.m_Texture;
+ s_DepthStencil.width = pparams.BackBufferWidth;
+ s_DepthStencil.height = pparams.BackBufferHeight;
+ s_DepthStencil.depthFormat = kDepthFormat16; //@TODO?
+ }
+
+ s_BackBuffer.backBuffer = true;
+ s_DepthStencil.backBuffer = true;
+
+ #if !UNITY_EDITOR
+ RenderSurfaceHandle bbHandle(&s_BackBuffer), dsHandle(&s_DepthStencil);
+ device->SetRenderTargets(1, &bbHandle, dsHandle);
+ #endif
+ s_Device->SetRenderState (D3DRS_ZENABLE, TRUE);
+ }
+
+ return true;
+}
+
+void GetBackBuffersAfterDeviceReset()
+{
+ AssertIf (!s_Device);
+ AssertIf (!s_DepthStencil.m_Surface);
+ s_BackBuffer.Release();
+ s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface);
+ s_BackBuffer.backBuffer = true;
+}
+
+#if UNITY_EDITOR
+void EditorInitializeD3D(GfxDevice* device)
+{
+ int dummy;
+ if( !InitializeOrResetD3DDevice( device, s_HiddenWindowD3D, 32, 32, 0, false, 0, 0, dummy, dummy, dummy, dummy ) )
+ {
+ winutils::AddErrorMessage( "Failed to create master Direct3D window" );
+ DestroyGfxDevice();
+ winutils::DisplayErrorMessagesAndQuit( "Failed to initialize 3D graphics" );
+ }
+
+ // Disable D3D Debug runtime in editor release mode:
+ // VERTEXSTATS query is only available in Debug runtime.
+ #if UNITY_RELEASE
+ if (CheckD3D9DebugRuntime(GetD3DDevice()))
+ {
+ winutils::AddErrorMessage (
+ "You are using Direct3D Debug Runtime, this is not supported by\r\n"
+ "Unity. Switch to Retail runtime in DirectX Control Panel.");
+ DestroyGfxDevice();
+ winutils::DisplayErrorMessagesAndQuit ("D3D9 Debug Runtime is not supported");
+ }
+ #endif
+}
+#endif
+
+bool FullResetD3DDevice()
+{
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("FullResetD3DDevice\n");
+ #endif
+ // destroy dynamic VBO / render textures and reset the device
+ ResetDynamicResourcesD3D9();
+ bool ok = ResetD3DDevice();
+ if( ok )
+ SetD3D9DeviceLost( false );
+ return ok;
+}
+
+bool HandleD3DDeviceLost()
+{
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("HandleD3DDeviceLost\n");
+ #endif
+ HRESULT hr = s_Device->TestCooperativeLevel();
+ bool ok = false;
+ switch( hr )
+ {
+ // Is device actually lost?
+ case D3D_OK:
+ {
+ ok = true;
+ break;
+ }
+ // If device was lost, do not render until we get it back
+ case D3DERR_DEVICELOST:
+ {
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console(" HandleD3DDeviceLost: still lost\n");
+ #endif
+ break;
+ }
+ // If device needs to be reset, do that
+ case D3DERR_DEVICENOTRESET:
+ {
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console(" HandleD3DDeviceLost: needs reset, doing it\n");
+ #endif
+ ok = FullResetD3DDevice();
+ break;
+ }
+ }
+
+ if( !ok )
+ return false;
+
+ // device is not lost anymore, proceed
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("D3Dwindow device not lost anymore\n");
+ #endif
+ GetBackBuffersAfterDeviceReset();
+ SetD3D9DeviceLost( false );
+
+ return true;
+}
+
+bool ResetD3DDevice()
+{
+ AssertIf( !s_D3D || !s_Device || !s_Window );
+
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("ResetD3DDevice\n");
+ #endif
+
+ // cleanup
+ s_BackBuffer.Release();
+ s_DepthStencil.Release();
+
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("dev->Reset\n");
+ #endif
+
+ D3DPRESENT_PARAMETERS ppcopy = s_PresentParams; // copy them, as Reset changes some values
+
+ #if WEBPLUG
+ // Reset sends WM_ACTIVATE message which makes Web Player exit fullscreen (unless gInsideFullscreenToggle is set).
+ bool insideFullscreenToggle = gInsideFullscreenToggle;
+ gInsideFullscreenToggle = true;
+ #endif
+
+ PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventBeforeReset);
+
+ HRESULT hr = s_Device->Reset( &ppcopy );
+
+ #if WEBPLUG
+ gInsideFullscreenToggle = insideFullscreenToggle;
+ #endif
+
+ bool setToLost = false;
+ if( FAILED(hr) )
+ {
+ if( hr == D3DERR_DEVICELOST )
+ {
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("set device to lost\n");
+ #endif
+ SetD3D9DeviceLost( true );
+ setToLost = true;
+ }
+ else
+ {
+ ErrorString( Format("D3D device reset failed [%s]", GetD3D9Error(hr)) );
+ return false;
+ }
+ }
+ else
+ {
+ PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventAfterReset);
+
+ s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface);
+ s_BackBuffer.width = ppcopy.BackBufferWidth;
+ s_BackBuffer.height = ppcopy.BackBufferHeight;
+ // create depth stencil
+ D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (s_Device, ppcopy.BackBufferWidth, ppcopy.BackBufferHeight, ppcopy.AutoDepthStencilFormat, ppcopy.MultiSampleType, ppcopy.MultiSampleQuality, TRUE);
+ if (depthStencil.m_Surface)
+ {
+ s_DepthStencil.m_Surface = depthStencil.m_Surface;
+ s_DepthStencil.m_Texture = depthStencil.m_Texture;
+ s_DepthStencil.width = ppcopy.BackBufferWidth;
+ s_DepthStencil.height = ppcopy.BackBufferHeight;
+ s_DepthStencil.depthFormat = kDepthFormat16; //@TODO?
+ }
+
+ s_BackBuffer.backBuffer = true;
+ s_DepthStencil.backBuffer = true;
+
+ #if !UNITY_EDITOR
+ RenderSurfaceHandle bbHandle(&s_BackBuffer), dsHandle(&s_DepthStencil);
+ GetRealGfxDevice().SetRenderTargets(1, &bbHandle, dsHandle);
+ #endif
+ s_Device->SetRenderState (D3DRS_ZENABLE, TRUE);
+ }
+ s_CurrentlyWindowed = ppcopy.Windowed ? true : false;
+
+ return !setToLost;
+}
+
+void DestroyD3DDevice()
+{
+ // This can happen when quiting from screen selector - window is not set up yet
+ if( !s_Window || !s_Device )
+ return;
+
+ // cleanup
+ s_BackBuffer.Release();
+ s_DepthStencil.Release();
+ s_Device->Release();
+ s_Device = NULL;
+ s_Window = NULL;
+}
+
+IDirect3DDevice9* GetD3DDevice()
+{
+ AssertIf( !s_Device );
+ return s_Device;
+}
+
+IDirect3DDevice9* GetD3DDeviceNoAssert()
+{
+ return s_Device;
+}
+
+
+
+#if UNITY_EDITOR
+
+#include "PlatformDependent/Win/WinUtils.h"
+
+HWND s_HiddenWindowD3D = NULL;
+
+bool CreateHiddenWindowD3D()
+{
+ AssertIf( s_HiddenWindowD3D );
+
+ // Dummy master window is 64x64 in size. Seems that 32x32 is too small for Rage cards (produces internal driver errors in CreateDevice).
+ s_HiddenWindowD3D = CreateWindowW(
+ L"STATIC",
+ L"UnityHiddenWindow",
+ WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN,
+ 0, 0, 64, 64,
+ NULL, NULL,
+ winutils::GetInstanceHandle(), NULL );
+ if( !s_HiddenWindowD3D )
+ {
+ winutils::AddErrorMessage( "Failed to create hidden window: %s", WIN_LAST_ERROR_TEXT );
+ return false;
+ }
+
+ return true;
+}
+
+void DestroyHiddenWindowD3D()
+{
+ AssertIf( !s_HiddenWindowD3D );
+ DestroyWindow( s_HiddenWindowD3D );
+ s_HiddenWindowD3D = NULL;
+}
+
+#endif
diff --git a/Runtime/GfxDevice/d3d/D3D9Context.h b/Runtime/GfxDevice/d3d/D3D9Context.h
new file mode 100644
index 0000000..370a1c7
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Context.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "D3D9Enumeration.h"
+
+bool InitializeD3D(D3DDEVTYPE devtype);
+void CleanupD3D();
+bool InitializeOrResetD3DDevice(
+ class GfxDevice* device,
+ HWND window, int width, int height,
+ int refreshRate, bool fullscreen, int vBlankCount, int fsaa,
+ int& outBackbufferBPP, int& outFrontbufferBPP, int& outDepthBPP, int& outFSAA );
+void GetBackBuffersAfterDeviceReset();
+bool ResetD3DDevice();
+#if UNITY_EDITOR
+void EditorInitializeD3D(GfxDevice* device);
+#endif
+bool FullResetD3DDevice();
+bool HandleD3DDeviceLost();
+void DestroyD3DDevice();
+extern D3DDEVTYPE g_D3DDevType;
+extern DWORD g_D3DAdapter;
+extern bool g_D3DUsesMixedVP;
+extern bool g_D3DHasDepthStencil;
+extern D3DFORMAT g_D3DDepthStencilFormat;
+
+IDirect3DDevice9* GetD3DDevice();
+IDirect3DDevice9* GetD3DDeviceNoAssert();
+IDirect3D9* GetD3DObject();
+D3D9FormatCaps* GetD3DFormatCaps();
+D3DFORMAT GetD3DFormatForChecks();
+
+typedef int (WINAPI* D3DPERF_BeginEventFunc)(D3DCOLOR, LPCWSTR);
+typedef int (WINAPI* D3DPERF_EndEventFunc)();
+extern D3DPERF_BeginEventFunc g_D3D9BeginEventFunc;
+extern D3DPERF_EndEventFunc g_D3D9EndEventFunc;
+
+
+#if UNITY_EDITOR
+bool CreateHiddenWindowD3D();
+void DestroyHiddenWindowD3D();
+extern HWND s_HiddenWindowD3D;
+#endif
+
diff --git a/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp b/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp
new file mode 100644
index 0000000..b78433e
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp
@@ -0,0 +1,344 @@
+#include "UnityPrefix.h"
+#include "D3D9Enumeration.h"
+#include "D3D9Utils.h"
+#include "Runtime/GfxDevice/VramLimits.h"
+
+// ---------------------------------------------------------------------------
+
+
+const int kMinDisplayWidth = 512;
+const int kMinDisplayHeight = 384;
+const int kMinColorBits = 4;
+const int kMinAlphaBits = 0;
+
+extern D3DDEVTYPE g_D3DDevType;
+extern DWORD g_D3DAdapter;
+
+// ---------------------------------------------------------------------------
+
+static int GetFormatColorBits( D3DFORMAT fmt ) {
+ switch( fmt ) {
+ case D3DFMT_A2B10G10R10:
+ case D3DFMT_A2R10G10B10: return 10;
+ case D3DFMT_R8G8B8:
+ case D3DFMT_A8R8G8B8:
+ case D3DFMT_X8R8G8B8: return 8;
+ case D3DFMT_R5G6B5:
+ case D3DFMT_X1R5G5B5:
+ case D3DFMT_A1R5G5B5: return 5;
+ case D3DFMT_A4R4G4B4:
+ case D3DFMT_X4R4G4B4: return 4;
+ case D3DFMT_R3G3B2:
+ case D3DFMT_A8R3G3B2: return 2;
+ default: return 0;
+ }
+}
+
+static int GetFormatAlphaBits( D3DFORMAT fmt ) {
+ switch( fmt ) {
+ case D3DFMT_R8G8B8:
+ case D3DFMT_X8R8G8B8:
+ case D3DFMT_R5G6B5:
+ case D3DFMT_X1R5G5B5:
+ case D3DFMT_R3G3B2:
+ case D3DFMT_X4R4G4B4: return 0;
+ case D3DFMT_A8R8G8B8:
+ case D3DFMT_A8R3G3B2: return 8;
+ case D3DFMT_A1R5G5B5: return 1;
+ case D3DFMT_A4R4G4B4: return 4;
+ case D3DFMT_A2B10G10R10:
+ case D3DFMT_A2R10G10B10: return 2;
+ default: return 0;
+ }
+}
+
+int GetFormatDepthBits( D3DFORMAT fmt ) {
+ switch( fmt ) {
+ case D3DFMT_D16: return 16;
+ case D3DFMT_D15S1: return 15;
+ case D3DFMT_D24X8:
+ case D3DFMT_D24S8:
+ case D3DFMT_D24X4S4: return 24;
+ case D3DFMT_D32: return 32;
+ default: return 0;
+ }
+}
+
+static D3DFORMAT ConvertToAlphaFormat( D3DFORMAT fmt )
+{
+ if( fmt == D3DFMT_X8R8G8B8 )
+ fmt = D3DFMT_A8R8G8B8;
+ else if( fmt == D3DFMT_X4R4G4B4 )
+ fmt = D3DFMT_A4R4G4B4;
+ else if( fmt == D3DFMT_X1R5G5B5 )
+ fmt = D3DFMT_A1R5G5B5;
+ return fmt;
+}
+
+// -----------------------------------------------------------------------------
+
+
+static UInt32 buildVertexProcessings( const D3DCAPS9& caps )
+{
+ UInt32 result = 0;
+
+ // TODO: check vertex shader version
+
+ DWORD devCaps = caps.DevCaps;
+ if( devCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ) {
+ if( devCaps & D3DDEVCAPS_PUREDEVICE ) {
+ result |= (1<<kVPPureHardware);
+ }
+ result |= (1<<kVPHardware);
+ result |= (1<<kVPMixed);
+ }
+
+ result |= (1<<kVPSoftware);
+
+ return result;
+}
+
+
+static void buildDepthStencilFormats( IDirect3D9& d3d, D3DDeviceCombo& devCombo )
+{
+ const D3DFORMAT dsFormats[] = {
+ D3DFMT_D24S8, D3DFMT_D24X8, D3DFMT_D24X4S4, D3DFMT_D16, D3DFMT_D15S1, D3DFMT_D32,
+ };
+ const int dsFormatCount = sizeof(dsFormats) / sizeof(dsFormats[0]);
+
+ for( int idsf = 0; idsf < dsFormatCount; ++idsf ) {
+ D3DFORMAT format = dsFormats[idsf];
+ if( SUCCEEDED( d3d.CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, devCombo.adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format ) ) )
+ {
+ if( SUCCEEDED( d3d.CheckDepthStencilMatch( g_D3DAdapter, g_D3DDevType, devCombo.adapterFormat, devCombo.backBufferFormat, format ) ) )
+ {
+ devCombo.depthStencilFormats.push_back( format );
+ }
+ }
+ }
+}
+
+
+static void buildMultiSampleTypes( IDirect3D9& d3d, D3DDeviceCombo& devCombo )
+{
+ const size_t kMaxSamples = 16;
+ devCombo.multiSampleTypes.reserve( kMaxSamples );
+ devCombo.multiSampleTypes.push_back( D3DMULTISAMPLE_NONE );
+
+ for( int samples = 2; samples <= kMaxSamples; ++samples ) {
+ D3DMULTISAMPLE_TYPE msType = GetD3DMultiSampleType( samples );
+ DWORD msQuality;
+ if( SUCCEEDED( d3d.CheckDeviceMultiSampleType( g_D3DAdapter, g_D3DDevType, devCombo.backBufferFormat, devCombo.isWindowed, msType, NULL ) ) )
+ devCombo.multiSampleTypes.push_back( samples );
+ }
+}
+
+
+static void buildConflicts( IDirect3D9& d3d, D3DDeviceCombo& devCombo )
+{
+ for( size_t ids = 0; ids < devCombo.depthStencilFormats.size(); ++ids ) {
+ D3DFORMAT format = (D3DFORMAT)devCombo.depthStencilFormats[ids];
+ for( size_t ims = 0; ims < devCombo.multiSampleTypes.size(); ++ims ) {
+ D3DMULTISAMPLE_TYPE msType = (D3DMULTISAMPLE_TYPE)devCombo.multiSampleTypes[ims];
+ if( FAILED( d3d.CheckDeviceMultiSampleType(
+ g_D3DAdapter, g_D3DDevType,
+ format, devCombo.isWindowed, msType, NULL ) ) )
+ {
+ D3DDeviceCombo::MultiSampleConflict conflict;
+ conflict.format = format;
+ conflict.type = msType;
+ devCombo.conflicts.push_back( conflict );
+ }
+ }
+ }
+}
+
+
+static bool enumerateDeviceCombos( IDirect3D9& d3d, const D3DCAPS9& caps, const DwordVector& adapterFormats, D3DDeviceComboVector& outCombos )
+{
+ const D3DFORMAT bbufferFormats[] = {
+ D3DFMT_A8R8G8B8, D3DFMT_X8R8G8B8, D3DFMT_A2R10G10B10,
+ D3DFMT_R5G6B5, D3DFMT_A1R5G5B5, D3DFMT_X1R5G5B5
+ };
+ const int bbufferFormatCount = sizeof(bbufferFormats) / sizeof(bbufferFormats[0]);
+
+ bool isWindowedArray[] = { false, true };
+
+ // see which adapter formats are supported by this device
+ for( size_t iaf = 0; iaf < adapterFormats.size(); ++iaf )
+ {
+ D3DFORMAT format = (D3DFORMAT)adapterFormats[iaf];
+ for( int ibbf = 0; ibbf < bbufferFormatCount; ibbf++ )
+ {
+ D3DFORMAT bbufferFormat = bbufferFormats[ibbf];
+ if( GetFormatAlphaBits(bbufferFormat) < kMinAlphaBits )
+ continue;
+ for( int iiw = 0; iiw < 2; ++iiw ) {
+ bool isWindowed = isWindowedArray[iiw];
+ if( FAILED( d3d.CheckDeviceType( g_D3DAdapter, g_D3DDevType, format, bbufferFormat, isWindowed ) ) )
+ continue;
+
+ // Here, we have an adapter format / backbuffer format/ windowed
+ // combo that is supported by the system. We still need to find one or
+ // more suitable depth/stencil buffer format, multisample type,
+ // vertex processing type, and vsync.
+ D3DDeviceCombo devCombo;
+
+ devCombo.adapterFormat = format;
+ devCombo.backBufferFormat = bbufferFormat;
+ devCombo.isWindowed = isWindowed;
+ devCombo.presentationIntervals = caps.PresentationIntervals;
+
+ buildDepthStencilFormats( d3d, devCombo );
+ if( devCombo.depthStencilFormats.empty() )
+ continue;
+
+ buildMultiSampleTypes( d3d, devCombo );
+ if( devCombo.multiSampleTypes.empty() )
+ continue;
+
+ buildConflicts( d3d, devCombo );
+
+ outCombos.push_back( devCombo );
+ }
+ }
+ }
+
+ return !outCombos.empty();
+}
+
+
+bool D3D9FormatCaps::Enumerate( IDirect3D9& d3d )
+{
+ AssertIf( !m_Combos.empty() );
+ HRESULT hr;
+
+ const D3DFORMAT allowedFormats[] = {
+ D3DFMT_X8R8G8B8, D3DFMT_X1R5G5B5, D3DFMT_R5G6B5, D3DFMT_A2R10G10B10
+ };
+ const int allowedFormatCount = sizeof(allowedFormats) / sizeof(allowedFormats[0]);
+
+ m_AdapterFormatForChecks = D3DFMT_UNKNOWN;
+
+ // build a list of all display adapter formats
+ DwordVector adapterFormatList; // D3DFORMAT
+
+ for( size_t ifmt = 0; ifmt < allowedFormatCount; ++ifmt )
+ {
+ D3DFORMAT format = allowedFormats[ifmt];
+ int modeCount = d3d.GetAdapterModeCount( g_D3DAdapter, format );
+ for( int mode = 0; mode < modeCount; ++mode ) {
+ D3DDISPLAYMODE dm;
+ d3d.EnumAdapterModes( g_D3DAdapter, format, mode, &dm );
+ if( dm.Width < (UINT)kMinDisplayWidth || dm.Height < (UINT)kMinDisplayHeight || GetFormatColorBits(dm.Format) < kMinColorBits )
+ continue;
+ // adapterInfo->displayModes.push_back( dm );
+ if( std::find(adapterFormatList.begin(),adapterFormatList.end(),dm.Format) == adapterFormatList.end() ) {
+ adapterFormatList.push_back( dm.Format );
+ if( m_AdapterFormatForChecks == D3DFMT_UNKNOWN )
+ m_AdapterFormatForChecks = format;
+ }
+ }
+ }
+
+ if( m_AdapterFormatForChecks == D3DFMT_UNKNOWN ) // for some reason no format was selected for checks, use default
+ m_AdapterFormatForChecks = allowedFormats[0];
+
+ // get info for device on this adapter
+ D3DCAPS9 caps;
+ if( FAILED( d3d.GetDeviceCaps( g_D3DAdapter, g_D3DDevType, &caps ) ) )
+ return false;
+
+ // find suitable vertex processing modes (if any)
+ m_VertexProcessings = buildVertexProcessings( caps );
+ AssertIf( !m_VertexProcessings );
+
+ // get info for each device combo on this device
+ if( !enumerateDeviceCombos( d3d, caps, adapterFormatList, m_Combos ) )
+ return false;
+
+ return true;
+}
+
+
+void D3D9FormatCaps::FindBestPresentationParams( int width, int height, D3DFORMAT desktopMode, bool windowed, int vBlankCount, int multiSample, D3DPRESENT_PARAMETERS& outParams ) const
+{
+ const D3DDeviceCombo* bestCombo = NULL;
+ int bestScore = -1;
+
+ for( size_t idc = 0; idc < m_Combos.size(); ++idc )
+ {
+ const D3DDeviceCombo& devCombo = m_Combos[idc];
+ if( windowed && !devCombo.isWindowed )
+ continue;
+ if( !windowed && devCombo.isWindowed )
+ continue;
+ if( windowed )
+ {
+ if( devCombo.adapterFormat != desktopMode )
+ continue;
+ }
+
+ int score = 0;
+
+ bool matchesBB = (devCombo.backBufferFormat == ConvertToAlphaFormat(devCombo.adapterFormat));
+ bool matchesDesktop = (devCombo.adapterFormat == desktopMode);
+
+ if( matchesBB )
+ score += 1;
+ if( matchesDesktop )
+ score += 1;
+ if( GetFormatAlphaBits(devCombo.backBufferFormat) > 0 )
+ score += 1;
+
+ if( score > bestScore )
+ {
+ bestScore = score;
+ bestCombo = &devCombo;
+ }
+ }
+
+ if( !bestCombo )
+ {
+ // This can happen if we're debugging force-16BPP modes on a 32BPP desktop, and so on
+ outParams.BackBufferFormat = desktopMode;
+ outParams.AutoDepthStencilFormat = D3DFMT_D16;
+ outParams.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
+ outParams.MultiSampleType = D3DMULTISAMPLE_NONE;
+ outParams.MultiSampleQuality = 0;
+ return;
+ }
+
+ outParams.BackBufferFormat = bestCombo->backBufferFormat;
+ outParams.AutoDepthStencilFormat = (D3DFORMAT)bestCombo->depthStencilFormats[0];
+
+ // No support for intervals above 1 in windowed mode (case 497116)
+ if (windowed && vBlankCount > 1)
+ vBlankCount = 1;
+
+ // best possible vsync parameter (if device doesn't support 2 fall back to 1)
+ DWORD intervals = bestCombo->presentationIntervals;
+ outParams.PresentationInterval = ( vBlankCount >= 2 ) && ( intervals & D3DPRESENT_INTERVAL_TWO ) ? D3DPRESENT_INTERVAL_TWO :
+ ( vBlankCount >= 1 ) && ( intervals & D3DPRESENT_INTERVAL_ONE ) ? D3DPRESENT_INTERVAL_ONE :
+ ( vBlankCount == 0 ) && ( intervals & D3DPRESENT_INTERVAL_IMMEDIATE ) ? D3DPRESENT_INTERVAL_IMMEDIATE :
+ D3DPRESENT_INTERVAL_DEFAULT;
+
+ // Here we already know backbuffer, depth buffer formats and so on, so we can also clamp used FSAA to sane VRAM limits.
+ int backbufferBPP = GetBPPFromD3DFormat(outParams.BackBufferFormat)/8;
+ int frontbufferBPP = GetBPPFromD3DFormat(desktopMode)/8;
+ int depthBPP = GetBPPFromD3DFormat(outParams.AutoDepthStencilFormat)/8;
+ multiSample = ChooseSuitableFSAALevel( width, height, backbufferBPP, frontbufferBPP, depthBPP, multiSample );
+
+ // Find out best matched multi sample type.
+ int msIdx = 0;
+ if( multiSample > 1 )
+ {
+ while( msIdx < bestCombo->multiSampleTypes.size() && bestCombo->multiSampleTypes[msIdx] <= multiSample )
+ ++msIdx;
+ --msIdx;
+ AssertIf( msIdx < 0 );
+ }
+ outParams.MultiSampleType = GetD3DMultiSampleType(bestCombo->multiSampleTypes[msIdx]);
+ outParams.MultiSampleQuality = 0;
+}
+
diff --git a/Runtime/GfxDevice/d3d/D3D9Enumeration.h b/Runtime/GfxDevice/d3d/D3D9Enumeration.h
new file mode 100644
index 0000000..240bb89
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Enumeration.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include "D3D9Includes.h"
+
+
+struct D3DDeviceCombo;
+
+typedef std::vector<DWORD> DwordVector;
+typedef std::vector<D3DDeviceCombo> D3DDeviceComboVector;
+
+
+enum D3DVertexProcessing {
+ kVPPureHardware,
+ kVPHardware,
+ kVPMixed,
+ kVPSoftware,
+};
+
+
+//---------------------------------------------------------------------------
+
+// A combo of adapter format, back buffer format, and windowed/fulscreen that
+// is compatible with a D3D device.
+struct D3DDeviceCombo {
+public:
+ // A depth/stencil buffer format that is incompatible with a multisample type.
+ struct MultiSampleConflict {
+ D3DFORMAT format;
+ D3DMULTISAMPLE_TYPE type;
+ };
+ typedef std::vector<MultiSampleConflict> MultiSampleConflictVector;
+public:
+ D3DFORMAT adapterFormat;
+ D3DFORMAT backBufferFormat;
+ bool isWindowed;
+ DWORD presentationIntervals;
+
+ DwordVector depthStencilFormats;
+ DwordVector multiSampleTypes;
+ MultiSampleConflictVector conflicts;
+};
+
+
+//---------------------------------------------------------------------------
+
+class D3D9FormatCaps {
+public:
+ D3D9FormatCaps() : m_VertexProcessings(0) { }
+
+ bool Enumerate( IDirect3D9& d3d );
+
+ // Fills in BackBufferFormat, AutoDepthStencilFormat, PresentationInterval,
+ // MultiSampleType, MultiSampleQuality.
+ void FindBestPresentationParams( int width, int height, D3DFORMAT desktopMode, bool windowed, int vBlankCount, int multiSample, D3DPRESENT_PARAMETERS& outParams ) const;
+
+ // Gets adapter format for doing CheckDeviceFormat checks.
+ // Usually D3DFMT_X8R8G8B8, except for really old cards that can't do 32 bpp.
+ D3DFORMAT GetAdapterFormatForChecks() const { return m_AdapterFormatForChecks; }
+
+public:
+ D3DDeviceComboVector m_Combos;
+ UInt32 m_VertexProcessings; // bitmask
+ D3DFORMAT m_AdapterFormatForChecks;
+};
diff --git a/Runtime/GfxDevice/d3d/D3D9Includes.h b/Runtime/GfxDevice/d3d/D3D9Includes.h
new file mode 100644
index 0000000..84596ff
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Includes.h
@@ -0,0 +1,7 @@
+#ifndef D3DINCLUDES_H
+#define D3DINCLUDES_H
+
+//#define D3D_DEBUG_INFO
+#include "External/DirectX/builds/dx9include/d3d9.h"
+
+#endif
diff --git a/Runtime/GfxDevice/d3d/D3D9Utils.cpp b/Runtime/GfxDevice/d3d/D3D9Utils.cpp
new file mode 100644
index 0000000..3e25633
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Utils.cpp
@@ -0,0 +1,169 @@
+#include "UnityPrefix.h"
+#include "D3D9Utils.h"
+#include "Runtime/Utilities/ArrayUtility.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+
+
+#ifdef DUMMY_D3D9_CALLS
+HRESULT CallDummyD3D9Function()
+{
+ return S_OK;
+}
+#endif
+
+struct D3D9Error {
+ HRESULT hr;
+ const char* message;
+};
+
+static D3D9Error s_D3DErrors[] = {
+ { D3DOK_NOAUTOGEN, "no mipmap autogen" },
+ { D3DERR_WRONGTEXTUREFORMAT, "wrong texture format" },
+ { D3DERR_UNSUPPORTEDCOLOROPERATION, "unsupported color op" },
+ { D3DERR_UNSUPPORTEDCOLORARG, "unsupported color arg" },
+ { D3DERR_UNSUPPORTEDALPHAOPERATION, "unsupported alpha op" },
+ { D3DERR_UNSUPPORTEDALPHAARG, "unsupported alpha arg" },
+ { D3DERR_TOOMANYOPERATIONS, "too many texture operations" },
+ { D3DERR_CONFLICTINGTEXTUREFILTER, "conflicting texture filters" },
+ { D3DERR_UNSUPPORTEDFACTORVALUE, "unsupported factor value" },
+ { D3DERR_CONFLICTINGRENDERSTATE, "conflicting render states" },
+ { D3DERR_UNSUPPORTEDTEXTUREFILTER, "unsupported texture filter" },
+ { D3DERR_CONFLICTINGTEXTUREPALETTE, "conflicting texture palettes" },
+ { D3DERR_DRIVERINTERNALERROR, "internal driver error" },
+ { D3DERR_NOTFOUND, "requested item not found" },
+ { D3DERR_MOREDATA, "more data than fits into buffer" },
+ { D3DERR_DEVICELOST, "device lost" },
+ { D3DERR_DEVICENOTRESET, "device not reset" },
+ { D3DERR_NOTAVAILABLE, "queried technique not available" },
+ { D3DERR_OUTOFVIDEOMEMORY, "out of VRAM" },
+ { D3DERR_INVALIDDEVICE, "invalid device" },
+ { D3DERR_INVALIDCALL, "invalid call" },
+ { D3DERR_DRIVERINVALIDCALL, "driver invalid call" },
+ { D3DERR_WASSTILLDRAWING, "was still drawing" },
+ { S_OK, "S_OK" },
+ { E_FAIL, "E_FAIL" },
+ { E_INVALIDARG, "E_INVALIDARG" },
+ { E_OUTOFMEMORY, "out of memory" },
+};
+
+const char* GetD3D9Error( HRESULT hr )
+{
+ for( int i = 0; i < ARRAY_SIZE(s_D3DErrors); ++i )
+ {
+ if( hr == s_D3DErrors[i].hr )
+ return s_D3DErrors[i].message;
+ }
+
+ static char buffer[1000];
+ sprintf( buffer, "unknown error, code 0x%X", hr );
+ return buffer;
+}
+
+int GetBPPFromD3DFormat( D3DFORMAT format )
+{
+ switch( format ) {
+ case D3DFMT_UNKNOWN:
+ case kD3D9FormatNULL:
+ return 0;
+ case D3DFMT_X8R8G8B8:
+ case D3DFMT_A8R8G8B8:
+ case D3DFMT_A2R10G10B10:
+ case D3DFMT_A2B10G10R10:
+ case D3DFMT_R8G8B8:
+ case D3DFMT_A8B8G8R8:
+ case D3DFMT_R32F:
+ case D3DFMT_D24X8:
+ case D3DFMT_D24S8:
+ case D3DFMT_D24X4S4:
+ case kD3D9FormatINTZ:
+ case kD3D9FormatRAWZ:
+ return 32;
+ case D3DFMT_X1R5G5B5:
+ case D3DFMT_A1R5G5B5:
+ case D3DFMT_A4R4G4B4:
+ case D3DFMT_X4R4G4B4:
+ case D3DFMT_R5G6B5:
+ case D3DFMT_R16F:
+ case D3DFMT_D16:
+ case D3DFMT_D15S1:
+ case D3DFMT_D16_LOCKABLE:
+ case D3DFMT_L16:
+ case D3DFMT_A8L8:
+ case kD3D9FormatDF16:
+ return 16;
+ case D3DFMT_A16B16G16R16F:
+ return 64;
+ case D3DFMT_A32B32G32R32F:
+ return 128;
+ case D3DFMT_DXT1:
+ return 4;
+ case D3DFMT_A8:
+ case D3DFMT_L8:
+ case D3DFMT_DXT3:
+ case D3DFMT_DXT5:
+ return 8;
+ default:
+ ErrorString( Format("Unknown D3D format %x", format) );
+ return 32;
+ }
+}
+
+int GetStencilBitsFromD3DFormat (D3DFORMAT fmt)
+{
+ switch( fmt ) {
+ case D3DFMT_D15S1: return 1;
+ case D3DFMT_D24S8: return 8;
+ case D3DFMT_D24X4S4: return 4;
+ default: return 0;
+ }
+}
+
+D3DMULTISAMPLE_TYPE GetD3DMultiSampleType (int samples)
+{
+ // Optimizer should take care of this, since value of D3DMULTISAMPLE_N_SAMPLES is N
+ switch( samples ) {
+ case 0:
+ case 1: return D3DMULTISAMPLE_NONE;
+ case 2: return D3DMULTISAMPLE_2_SAMPLES;
+ case 3: return D3DMULTISAMPLE_3_SAMPLES;
+ case 4: return D3DMULTISAMPLE_4_SAMPLES;
+ case 5: return D3DMULTISAMPLE_5_SAMPLES;
+ case 6: return D3DMULTISAMPLE_6_SAMPLES;
+ case 7: return D3DMULTISAMPLE_7_SAMPLES;
+ case 8: return D3DMULTISAMPLE_8_SAMPLES;
+ case 9: return D3DMULTISAMPLE_9_SAMPLES;
+ case 10: return D3DMULTISAMPLE_10_SAMPLES;
+ case 11: return D3DMULTISAMPLE_11_SAMPLES;
+ case 12: return D3DMULTISAMPLE_12_SAMPLES;
+ case 13: return D3DMULTISAMPLE_13_SAMPLES;
+ case 14: return D3DMULTISAMPLE_14_SAMPLES;
+ case 15: return D3DMULTISAMPLE_15_SAMPLES;
+ case 16: return D3DMULTISAMPLE_16_SAMPLES;
+ default:
+ ErrorString("Unknown sample count");
+ return D3DMULTISAMPLE_NONE;
+ }
+}
+
+bool CheckD3D9DebugRuntime (IDirect3DDevice9* dev)
+{
+ IDirect3DQuery9* query = NULL;
+ HRESULT hr = dev->CreateQuery (D3DQUERYTYPE_VERTEXSTATS, &query);
+ if( SUCCEEDED(hr) )
+ {
+ query->Release ();
+ return true;
+ }
+ return false;
+}
+
+
+D3D9DepthStencilTexture CreateDepthStencilTextureD3D9 (IDirect3DDevice9* dev, int width, int height, D3DFORMAT format, D3DMULTISAMPLE_TYPE msType, DWORD msQuality, BOOL discardable)
+{
+ D3D9DepthStencilTexture tex;
+
+ HRESULT hr = dev->CreateDepthStencilSurface (width, height, format, msType, msQuality, discardable, &tex.m_Surface, NULL);
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(tex.m_Surface, width * height * GetBPPFromD3DFormat(format), NULL);
+
+ return tex;
+}
diff --git a/Runtime/GfxDevice/d3d/D3D9Utils.h b/Runtime/GfxDevice/d3d/D3D9Utils.h
new file mode 100644
index 0000000..529e58b
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Utils.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+
+//#define DUMMY_D3D9_CALLS
+
+#ifndef DUMMY_D3D9_CALLS
+#define D3D9_CALL(x) x
+#define D3D9_CALL_HR(x) x
+#else
+HRESULT CallDummyD3D9Function();
+#define D3D9_CALL(x) CallDummyD3D9Function()
+#define D3D9_CALL_HR(x) CallDummyD3D9Function()
+#endif
+
+
+const char* GetD3D9Error( HRESULT hr );
+int GetBPPFromD3DFormat( D3DFORMAT format );
+int GetStencilBitsFromD3DFormat (D3DFORMAT fmt);
+D3DMULTISAMPLE_TYPE GetD3DMultiSampleType (int samples);
+
+bool CheckD3D9DebugRuntime (IDirect3DDevice9* dev);
+
+struct D3D9DepthStencilTexture {
+ D3D9DepthStencilTexture() : m_Texture(NULL), m_Surface(NULL) {}
+
+ IDirect3DTexture9* m_Texture;
+ IDirect3DSurface9* m_Surface;
+
+ void Release() {
+ if (m_Texture) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Texture);
+ m_Texture->Release();
+ m_Texture = NULL;
+ }
+ if (m_Surface) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Surface);
+ m_Surface->Release();
+ m_Surface = NULL;
+ }
+ }
+};
+
+const D3DFORMAT kD3D9FormatDF16 = (D3DFORMAT)MAKEFOURCC('D','F','1','6');
+const D3DFORMAT kD3D9FormatINTZ = (D3DFORMAT)MAKEFOURCC('I','N','T','Z');
+const D3DFORMAT kD3D9FormatRAWZ = (D3DFORMAT)MAKEFOURCC('R','A','W','Z');
+const D3DFORMAT kD3D9FormatNULL = (D3DFORMAT)MAKEFOURCC('N','U','L','L');
+const D3DFORMAT kD3D9FormatRESZ = (D3DFORMAT)MAKEFOURCC('R','E','S','Z');
+
+
+D3D9DepthStencilTexture CreateDepthStencilTextureD3D9 (
+ IDirect3DDevice9* dev, int width, int height, D3DFORMAT format,
+ D3DMULTISAMPLE_TYPE msType, DWORD msQuality, BOOL discardable );
+
+static inline DWORD GetD3D9SamplerIndex (ShaderType type, int unit)
+{
+ switch (type) {
+ case kShaderVertex:
+ DebugAssert (unit >= 0 && unit < 4); // DX9 has limit of 4 vertex samplers
+ return unit + D3DVERTEXTEXTURESAMPLER0;
+ case kShaderFragment:
+ DebugAssert (unit >= 0 && unit < kMaxSupportedTextureUnits);
+ return unit;
+ default:
+ Assert ("Unsupported shader type for sampler");
+ return 0;
+ }
+}
diff --git a/Runtime/GfxDevice/d3d/D3D9VBO.cpp b/Runtime/GfxDevice/d3d/D3D9VBO.cpp
new file mode 100644
index 0000000..19cc409
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9VBO.cpp
@@ -0,0 +1,815 @@
+#include "UnityPrefix.h"
+#include "D3D9VBO.h"
+#include "D3D9Context.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "D3D9Utils.h"
+#include "GfxDeviceD3D9.h"
+#include "Runtime/Profiler/MemoryProfiler.h"
+
+
+// defined in GfxDeviceD3D9.cpp
+IDirect3DVertexDeclaration9* GetD3DVertexDeclaration( UInt32 shaderChannelsMap );
+void UpdateChannelBindingsD3D( const ChannelAssigns& channels );
+
+
+// Define this to 1 to make VBO operations randomly fail.
+// Use this to test error checking code.
+#define DEBUG_RANDOMLY_FAIL_D3D_VBO 0
+
+
+#if !DEBUGMODE && DEBUG_RANDOMLY_FAIL_D3D_VBO
+#error Never enable random VBO failures on release code!
+#endif
+
+#if DEBUG_RANDOMLY_FAIL_D3D_VBO
+#define RANDOM_FAIL_FOR_DEBUG - ((rand()%8==0) ? 100000000 : 0)
+#else
+#define RANDOM_FAIL_FOR_DEBUG
+#endif
+
+
+static const D3DPRIMITIVETYPE kTopologyD3D9[kPrimitiveTypeCount] =
+{
+ D3DPT_TRIANGLELIST,
+ D3DPT_TRIANGLESTRIP,
+ D3DPT_TRIANGLELIST, //@TODO: make work
+ D3DPT_LINELIST,
+ D3DPT_LINESTRIP,
+ D3DPT_POINTLIST,
+};
+
+
+// -----------------------------------------------------------------------------
+
+IDirect3DIndexBuffer9* D3D9VBO::ms_CustomIB = NULL;
+int D3D9VBO::ms_CustomIBSize = 0;
+UInt32 D3D9VBO::ms_CustomIBUsedBytes = 0;
+
+D3D9VBO::D3D9VBO()
+: m_IB(NULL)
+, m_IBSize(0)
+{
+ memset(m_VertexDecls, 0, sizeof(m_VertexDecls));
+ memset(m_VBStreams, 0, sizeof(m_VBStreams));
+}
+
+D3D9VBO::~D3D9VBO ()
+{
+ for( int s = 0; s < kMaxVertexStreams; s++ )
+ {
+ if( m_VBStreams[s] ) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[s]);
+ ULONG refCount = m_VBStreams[s]->Release();
+ AssertIf( refCount != 0 );
+ m_VBStreams[s] = NULL;
+ }
+ }
+ if( m_IB ) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB);
+ ULONG refCount = m_IB->Release();
+ AssertIf( refCount != 0 );
+ m_IB = NULL;
+ }
+
+}
+
+
+void D3D9VBO::ResetDynamicVB()
+{
+ // Gets called on all VBs and ignores non-dynamic ones
+ for( int s = 0; s < kMaxVertexStreams; s++ )
+ {
+ if( m_StreamModes[s] == kStreamModeDynamic )
+ {
+ // Vertex buffer can be null when switching fullscreen in web player.
+ // There we lose device a couple of times, and ResetDynamicVB is called several
+ // times in succession.
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[s]);
+ SAFE_RELEASE( m_VBStreams[s] );
+ }
+ }
+}
+
+void D3D9VBO::CleanupSharedIndexBuffer()
+{
+ if( ms_CustomIB )
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(ms_CustomIB);
+ ULONG refCount = ms_CustomIB->Release();
+ AssertIf( refCount != 0 );
+ ms_CustomIBSize = 0;
+ ms_CustomIBUsedBytes = 0;
+ ms_CustomIB = NULL;
+ }
+}
+
+void D3D9VBO::BindVertexStreams( IDirect3DDevice9* dev, const ChannelAssigns& channels )
+{
+ int freeStream = -1;
+ for( int s = 0; s < kMaxVertexStreams; s++ )
+ {
+ if( m_VBStreams[s] )
+ D3D9_CALL( dev->SetStreamSource( s, m_VBStreams[s], 0, m_Streams[s].stride ) );
+ else
+ freeStream = s;
+ }
+ int declIndex = kVertexDeclDefault;
+ if ((channels.GetSourceMap() & VERTEX_FORMAT1(Color)) && !m_ChannelInfo[kShaderChannelColor].IsValid())
+ {
+ if (freeStream != -1)
+ {
+ declIndex = kVertexDeclAllWhiteStream;
+ if (!m_VertexDecls[declIndex])
+ {
+ ChannelInfoArray channelInfo;
+ memcpy(&channelInfo, m_ChannelInfo, sizeof(channelInfo));
+ ChannelInfo& colorInfo = channelInfo[kShaderChannelColor];
+ colorInfo.stream = freeStream;
+ colorInfo.offset = 0;
+ colorInfo.format = kChannelFormatColor;
+ colorInfo.dimension = 1;
+ m_VertexDecls[declIndex] = GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( channelInfo );
+ }
+ IDirect3DVertexBuffer9* whiteVB = GetD3D9GfxDevice().GetAllWhiteVertexStream();
+ D3D9_CALL( dev->SetStreamSource( freeStream, whiteVB, 0, sizeof(D3DCOLOR) ) );
+ }
+ else
+ ErrorString("Need a free stream to add default vertex colors!");
+ }
+ D3D9_CALL( dev->SetVertexDeclaration( m_VertexDecls[declIndex] ) );
+ UpdateChannelBindingsD3D( channels );
+}
+
+void D3D9VBO::UpdateVertexStream( const VertexBufferData& sourceData, unsigned stream )
+{
+ DebugAssert( !m_IsStreamMapped[stream] );
+ const StreamInfo& srcStream = sourceData.streams[stream];
+ int oldSize = CalculateVertexStreamSize(m_Streams[stream], m_VertexCount);
+ int newSize = CalculateVertexStreamSize(srcStream, sourceData.vertexCount);
+ m_Streams[stream] = srcStream;
+ if (newSize == 0)
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[stream]);
+ SAFE_RELEASE( m_VBStreams[stream] );
+ return;
+ }
+
+ const bool isDynamic = (m_StreamModes[stream] == kStreamModeDynamic);
+ DWORD usage = isDynamic ? (D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY) : (D3DUSAGE_WRITEONLY);
+ D3DPOOL pool = isDynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED;
+
+ if( m_VBStreams[stream] == NULL || newSize != oldSize )
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[stream]);
+ SAFE_RELEASE( m_VBStreams[stream] );
+ IDirect3DDevice9* dev = GetD3DDevice();
+ HRESULT hr = dev->CreateVertexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, usage, 0, pool, &m_VBStreams[stream], NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_VBStreams[stream],newSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to create vertex buffer of size %d [%s]\n", newSize, GetD3D9Error(hr) );
+ return;
+ }
+ }
+
+ // Don't update contents if there is no source data.
+ // This is used to update the vertex declaration only, leaving buffer intact.
+ // Also to create an empty buffer that is written to later.
+ if (!sourceData.buffer)
+ return;
+
+ UInt8* buffer;
+ HRESULT hr = m_VBStreams[stream]->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, isDynamic ? D3DLOCK_DISCARD : 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock vertex buffer %p [%s]\n", m_VBStreams[stream], GetD3D9Error(hr) );
+ return;
+ }
+ CopyVertexStream( sourceData, buffer, stream );
+
+ m_VBStreams[stream]->Unlock();
+}
+
+
+void D3D9VBO::UpdateIndexBufferData (const IndexBufferData& sourceData)
+{
+ if( !sourceData.indices )
+ {
+ m_IBSize = 0;
+ return;
+ }
+
+ AssertIf( !m_IB );
+ UInt8* buffer;
+ HRESULT hr = m_IB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock index buffer %p [%s]\n", m_IB, GetD3D9Error(hr) );
+ return;
+ }
+
+ memcpy (buffer, sourceData.indices, sourceData.count * kVBOIndexSize);
+
+ m_IB->Unlock();
+}
+
+bool D3D9VBO::MapVertexStream( VertexStreamData& outData, unsigned stream )
+{
+ if( m_VBStreams[stream] == NULL )
+ {
+ printf_console( "d3d: attempt to map null vertex buffer\n" );
+ return false;
+ }
+ DebugAssertIf( IsVertexBufferLost() );
+ AssertIf( m_IsStreamMapped[stream] );
+
+ const bool isDynamic = (m_StreamModes[stream] == kStreamModeDynamic);
+
+ UInt8* buffer;
+ int vbSize = CalculateVertexStreamSize(m_Streams[stream], m_VertexCount);
+ HRESULT hr = m_VBStreams[stream]->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, isDynamic ? D3DLOCK_DISCARD : 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to map vertex buffer %p of size %i [%s]\n", m_VBStreams[stream], vbSize, GetD3D9Error(hr) );
+ return false;
+ }
+ m_IsStreamMapped[stream] = true;
+
+ outData.buffer = buffer;
+ outData.channelMask = m_Streams[stream].channelMask;
+ outData.stride = m_Streams[stream].stride;
+ outData.vertexCount = m_VertexCount;
+
+ GetRealGfxDevice().GetFrameStats().AddUploadVBO( vbSize );
+
+ return true;
+}
+
+void D3D9VBO::UnmapVertexStream( unsigned stream )
+{
+ DebugAssert( m_VBStreams[stream] );
+ AssertIf( !m_IsStreamMapped[stream] );
+ m_IsStreamMapped[stream] = false;
+ m_VBStreams[stream]->Unlock();
+}
+
+bool D3D9VBO::IsVertexBufferLost() const
+{
+ for( int s = 0; s < kMaxVertexStreams; s++ )
+ if( m_Streams[s].channelMask && !m_VBStreams[s] )
+ return true;
+
+ return false;
+}
+
+int D3D9VBO::GetRuntimeMemorySize() const
+{
+#if ENABLE_MEM_PROFILER
+ return GetMemoryProfiler()->GetRelatedMemorySize(this)
+ + GetMemoryProfiler()->GetRelatedIDMemorySize((UInt32)this);
+#else
+ return 0;
+#endif
+/* int vertexSize = 0;
+ for( int s = 0; s < kMaxVertexStreams; s++ )
+ vertexSize += m_Streams[s].stride;
+
+ return vertexSize * m_VertexCount + m_IBSize;*/
+}
+
+
+void D3D9VBO::DrawVBO (const ChannelAssigns& channels, UInt32 firstIndexByte, UInt32 indexCount, GfxPrimitiveType topology, UInt32 firstVertex, UInt32 vertexCount)
+{
+ // just return if no indices
+ if( m_IBSize == 0 )
+ return;
+
+ HRESULT hr;
+
+ if( m_VBStreams[0] == NULL || m_IB == NULL )
+ {
+ printf_console( "d3d: VB or IB is null\n" );
+ return;
+ }
+
+ GfxDevice& device = GetRealGfxDevice();
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ BindVertexStreams( dev, channels );
+ device.BeforeDrawCall( false );
+
+ if (topology == kPrimitiveQuads)
+ {
+ UInt32 ibBytesLocked;
+ UInt16* ibPtr = MapDynamicIndexBuffer (indexCount/4*6, ibBytesLocked);
+ if (!ibPtr)
+ return;
+ const UInt16* ibSrc = NULL;
+ hr = m_IB->Lock (firstIndexByte, indexCount*kVBOIndexSize, (void**)&ibSrc, D3DLOCK_READONLY);
+ if (FAILED(hr))
+ {
+ UnmapDynamicIndexBuffer();
+ return;
+ }
+ FillIndexBufferForQuads (ibPtr, ibBytesLocked, ibSrc, indexCount/4);
+ m_IB->Unlock ();
+ UnmapDynamicIndexBuffer ();
+ firstIndexByte = ms_CustomIBUsedBytes;
+ ms_CustomIBUsedBytes += ibBytesLocked;
+ D3D9_CALL(dev->SetIndices(ms_CustomIB));
+ }
+ else
+ {
+ D3D9_CALL(dev->SetIndices( m_IB ));
+ }
+
+ // draw
+ D3DPRIMITIVETYPE primType = kTopologyD3D9[topology];
+ int primCount = GetPrimitiveCount (indexCount, topology, false);
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive (primType, 0, firstVertex, vertexCount, firstIndexByte/2, primCount));
+ Assert(SUCCEEDED(hr));
+
+ device.GetFrameStats().AddDrawCall (primCount, vertexCount);
+}
+
+UInt16* D3D9VBO::MapDynamicIndexBuffer (int indexCount, UInt32& outBytesUsed)
+{
+ HRESULT hr;
+ const UInt32 kMaxIndices = 64000; // Smaller threshold than absolutely necessary
+ Assert (indexCount <= kMaxIndices);
+ indexCount = std::min<UInt32>(indexCount, kMaxIndices);
+ int ibCapacity = indexCount * kVBOIndexSize;
+ int newIBSize = std::max (ibCapacity, 32*1024); // 32k IB at least
+
+ if (newIBSize > ms_CustomIBSize)
+ {
+ if (ms_CustomIB)
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(ms_CustomIB);
+ ms_CustomIB->Release();
+ }
+ ms_CustomIBSize = newIBSize;
+ ms_CustomIBUsedBytes = 0;
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ HRESULT hr = dev->CreateIndexBuffer (ms_CustomIBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, D3DFMT_INDEX16, D3DPOOL_DEFAULT , &ms_CustomIB, NULL);
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(ms_CustomIB,ms_CustomIBSize,0);
+
+ if( FAILED(hr) )
+ {
+ printf_console ("d3d: failed to create custom index buffer of size %d [%s]\n", newIBSize, GetD3D9Error(hr));
+ return NULL;
+ }
+ }
+
+ UInt16* buffer;
+ if (ms_CustomIBUsedBytes + ibCapacity > ms_CustomIBSize)
+ {
+ hr = ms_CustomIB->Lock (0 RANDOM_FAIL_FOR_DEBUG, ibCapacity, (void**)&buffer, D3DLOCK_DISCARD);
+ if (FAILED(hr))
+ {
+ printf_console ("d3d: failed to lock shared index buffer with discard [%s]\n", GetD3D9Error(hr));
+ return NULL;
+ }
+ ms_CustomIBUsedBytes = 0;
+ }
+ else
+ {
+ hr = ms_CustomIB->Lock (ms_CustomIBUsedBytes RANDOM_FAIL_FOR_DEBUG, ibCapacity, (void**)&buffer, D3DLOCK_NOOVERWRITE);
+ if (FAILED(hr))
+ {
+ printf_console ("d3d: failed to lock shared index buffer, offset %i size %i [%s]\n", ms_CustomIBUsedBytes, ibCapacity, GetD3D9Error(hr));
+ return NULL;
+ }
+ }
+ outBytesUsed = ibCapacity;
+
+ return buffer;
+}
+
+void D3D9VBO::UnmapDynamicIndexBuffer ()
+{
+ ms_CustomIB->Unlock();
+}
+
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+ void D3D9VBO::DrawCustomIndexed( const ChannelAssigns& channels, void* indices, UInt32 indexCount,
+ GfxPrimitiveType topology, UInt32 vertexRangeBegin, UInt32 vertexRangeEnd, UInt32 drawVertexCount )
+ {
+ Assert(!m_IsStreamMapped[0]);
+
+ if (m_VBStreams[0] == NULL)
+ {
+ printf_console( "d3d: VB is null\n" );
+ return;
+ }
+ UInt32 ibBytesUsed;
+ UInt16* ibPtr = MapDynamicIndexBuffer (indexCount, ibBytesUsed);
+ if (!ibPtr)
+ return;
+ memcpy (ibPtr, indices, ibBytesUsed);
+ UnmapDynamicIndexBuffer ();
+
+ GfxDevice& device = GetRealGfxDevice();
+ IDirect3DDevice9* dev = GetD3DDevice();
+ HRESULT hr;
+
+ BindVertexStreams( dev, channels );
+ device.BeforeDrawCall( false );
+
+ D3D9_CALL(dev->SetIndices( ms_CustomIB ));
+
+ D3DPRIMITIVETYPE primType = kTopologyD3D9[topology];
+ int primCount = GetPrimitiveCount (indexCount, topology, false);
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive(primType, 0, vertexRangeBegin, vertexRangeEnd-vertexRangeBegin, ms_CustomIBUsedBytes / kVBOIndexSize, primCount));
+ Assert(SUCCEEDED(hr));
+ ms_CustomIBUsedBytes += ibBytesUsed;
+
+ device.GetFrameStats().AddDrawCall (primCount, drawVertexCount);
+ }
+#endif
+
+
+void D3D9VBO::UpdateVertexData( const VertexBufferData& buffer )
+{
+ // Old vertex count and streams are still used here
+ for (unsigned stream = 0; stream < kMaxVertexStreams; stream++)
+ UpdateVertexStream( buffer, stream );
+
+ memcpy( m_ChannelInfo, buffer.channels, sizeof(m_ChannelInfo) );
+ memset( m_VertexDecls, 0, sizeof(m_VertexDecls) );
+ m_VertexDecls[kVertexDeclDefault] = GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( m_ChannelInfo );
+ m_VertexCount = buffer.vertexCount;
+}
+
+void D3D9VBO::UpdateIndexData (const IndexBufferData& buffer)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ int newSize = CalculateIndexBufferSize(buffer);
+
+ if( !m_IB )
+ {
+ // initially, create a static buffer
+ HRESULT hr = dev->CreateIndexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, (buffer.hasTopologies & (1<<kPrimitiveQuads)) ? 0 : D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_IB, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,newSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to create index buffer of size %d [%s]\n", newSize, GetD3D9Error(hr) );
+ return;
+ }
+ }
+ else
+ {
+ if( newSize != m_IBSize )
+ {
+ IDirect3DIndexBuffer9* oldIB = m_IB;
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB);
+ m_IB->Release();
+ HRESULT hr = dev->CreateIndexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, (buffer.hasTopologies & (1<<kPrimitiveQuads)) ? 0 : D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_IB, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,newSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to resize index buffer %p to size %d [%s]\n", oldIB, newSize, GetD3D9Error(hr) );
+ return;
+ }
+ }
+ }
+ m_IBSize = newSize;
+ UpdateIndexBufferData(buffer);
+}
+
+// -----------------------------------------------------------------------------
+
+
+DynamicD3D9VBO::DynamicD3D9VBO( UInt32 vbSize, UInt32 ibSize )
+: DynamicVBO()
+, m_VBSize(vbSize)
+, m_VBUsedBytes(0)
+, m_IBSize(ibSize)
+, m_IBUsedBytes(0)
+, m_VB(NULL)
+, m_IB(NULL)
+, m_VertexDecl(NULL)
+, m_LastChunkStartVertex(0)
+, m_LastChunkStartIndex(0)
+, m_QuadsIB(NULL)
+, m_QuadsIBFailed(false)
+{
+}
+
+DynamicD3D9VBO::~DynamicD3D9VBO ()
+{
+ if( m_VB ) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VB);
+ ULONG refCount = m_VB->Release();
+ AssertIf( refCount != 0 );
+ }
+ if( m_IB ) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB);
+ ULONG refCount = m_IB->Release();
+ AssertIf( refCount != 0 );
+ }
+ if( m_QuadsIB ) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_QuadsIB);
+ ULONG refCount = m_QuadsIB->Release();
+ AssertIf( refCount != 0 );
+ }
+}
+
+void DynamicD3D9VBO::InitializeQuadsIB()
+{
+ AssertIf( m_QuadsIB );
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ HRESULT hr = dev->CreateIndexBuffer( VBO::kMaxQuads * 6 * kVBOIndexSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_QuadsIB, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_QuadsIB,VBO::kMaxQuads * 6 * kVBOIndexSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to create quads index buffer [%s]\n", GetD3D9Error(hr) );
+ m_QuadsIBFailed = true;
+ return;
+ }
+ UInt16* ib = NULL;
+ hr = m_QuadsIB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&ib, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock quads index buffer [%s]\n", GetD3D9Error(hr) );
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_QuadsIB);
+ m_QuadsIB->Release();
+ m_QuadsIB = NULL;
+ m_QuadsIBFailed = true;
+ return;
+ }
+
+ UInt32 baseIndex = 0;
+ for( int i = 0; i < VBO::kMaxQuads; ++i )
+ {
+ ib[0] = baseIndex + 1;
+ ib[1] = baseIndex + 2;
+ ib[2] = baseIndex;
+ ib[3] = baseIndex + 2;
+ ib[4] = baseIndex + 3;
+ ib[5] = baseIndex;
+ baseIndex += 4;
+ ib += 6;
+ }
+
+ m_QuadsIB->Unlock();
+}
+
+void DynamicD3D9VBO::DrawChunk (const ChannelAssigns& channels)
+{
+ // just return if nothing to render
+ if( !m_LastChunkShaderChannelMask )
+ return;
+
+ HRESULT hr;
+
+ AssertIf( !m_LastChunkShaderChannelMask || !m_LastChunkStride );
+ AssertIf( m_LendedChunk );
+
+ GfxDevice& device = GetRealGfxDevice();
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // setup VBO
+ DebugAssertIf( !m_VB );
+ D3D9_CALL(dev->SetStreamSource( 0, m_VB, 0, m_LastChunkStride ));
+ D3D9_CALL(dev->SetVertexDeclaration( m_VertexDecl ));
+ UpdateChannelBindingsD3D( channels );
+ device.BeforeDrawCall( false );
+
+ // draw
+ GfxDeviceStats& stats = device.GetFrameStats();
+ int primCount = 0;
+ if( m_LastRenderMode == kDrawTriangleStrip )
+ {
+ hr = D3D9_CALL_HR(dev->DrawPrimitive( D3DPT_TRIANGLESTRIP, m_LastChunkStartVertex, m_LastChunkVertices-2 ));
+ primCount = m_LastChunkVertices-2;
+ }
+ else if (m_LastRenderMode == kDrawIndexedTriangleStrip)
+ {
+ DebugAssertIf( !m_IB );
+ D3D9_CALL(dev->SetIndices( m_IB ));
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLESTRIP, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices-2 ));
+ primCount = m_LastChunkIndices-2;
+ }
+ else if( m_LastRenderMode == kDrawQuads )
+ {
+ // initialize quads index buffer if needed
+ if( !m_QuadsIB )
+ InitializeQuadsIB();
+ // if quads index buffer has valid data, draw with it
+ if( !m_QuadsIBFailed )
+ {
+ D3D9_CALL(dev->SetIndices( m_QuadsIB ));
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, 0, m_LastChunkVertices/2 ));
+ primCount = m_LastChunkVertices/2;
+ }
+ }
+ else if (m_LastRenderMode == kDrawIndexedLines)
+ {
+ DebugAssertIf( !m_IB );
+ D3D9_CALL(dev->SetIndices( m_IB ));
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_LINELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices/2 ));
+ primCount = m_LastChunkIndices/2;
+ }
+ else if (m_LastRenderMode == kDrawIndexedPoints)
+ {
+ DebugAssertIf( !m_IB );
+ D3D9_CALL(dev->SetIndices( m_IB ));
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_POINTLIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices ));
+ primCount = m_LastChunkIndices;
+ }
+ else
+ {
+ DebugAssertIf( !m_IB );
+ D3D9_CALL(dev->SetIndices( m_IB ));
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices/3 ));
+ primCount = m_LastChunkIndices/3;
+ }
+ stats.AddDrawCall (primCount, m_LastChunkVertices);
+ AssertIf(FAILED(hr));
+}
+
+bool DynamicD3D9VBO::GetChunk( UInt32 shaderChannelMask, UInt32 maxVertices, UInt32 maxIndices, RenderMode renderMode, void** outVB, void** outIB )
+{
+ Assert( !m_LendedChunk );
+ Assert( maxVertices < 65536 && maxIndices < 65536*3 );
+ Assert(!((renderMode == kDrawQuads) && (VBO::kMaxQuads*4 < maxVertices)));
+ DebugAssertMsg(outVB != NULL && maxVertices > 0, "DynamicD3D9VBO::GetChunk - outVB: 0x%08x maxVertices: %d", outVB, maxVertices);
+ DebugAssertMsg(
+ (renderMode == kDrawIndexedQuads && (outIB != NULL && maxIndices > 0)) ||
+ (renderMode == kDrawIndexedPoints && (outIB != NULL && maxIndices > 0)) ||
+ (renderMode == kDrawIndexedLines && (outIB != NULL && maxIndices > 0)) ||
+ (renderMode == kDrawIndexedTriangles && (outIB != NULL && maxIndices > 0)) ||
+ (renderMode == kDrawIndexedTriangleStrip && (outIB != NULL && maxIndices > 0)) ||
+ (renderMode == kDrawTriangleStrip && (outIB == NULL && maxIndices == 0)) ||
+ (renderMode == kDrawQuads && (outIB == NULL && maxIndices == 0)),
+ "DynamicD3D9VBO::GetChunk - renderMode: %d outIB: 0x%08x maxIndices: %d", renderMode, outIB, maxIndices);
+ HRESULT hr;
+ bool success = true;
+
+ m_LendedChunk = true;
+ m_LastRenderMode = renderMode;
+
+ if( maxVertices == 0 )
+ maxVertices = 8;
+
+ m_LastChunkStride = 0;
+ for( int i = 0; i < kShaderChannelCount; ++i ) {
+ if( shaderChannelMask & (1<<i) )
+ m_LastChunkStride += VBO::GetDefaultChannelByteSize(i);
+ }
+ if (shaderChannelMask != m_LastChunkShaderChannelMask)
+ {
+ m_VertexDecl = GetD3DVertexDeclaration( shaderChannelMask );
+ m_LastChunkShaderChannelMask = shaderChannelMask;
+ }
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // -------- vertex buffer
+
+ DebugAssertIf( !outVB );
+ UInt32 vbCapacity = maxVertices * m_LastChunkStride;
+ // check if requested chunk is larger than current buffer
+ if( vbCapacity > m_VBSize ) {
+ m_VBSize = vbCapacity * 2; // allocate more up front
+ if( m_VB ){
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VB);
+ m_VB->Release();
+ }
+ m_VB = NULL;
+ }
+ // allocate buffer if don't have it yet
+ if( !m_VB ) {
+ hr = dev->CreateVertexBuffer( m_VBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_VB, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_VB,m_VBSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to create dynamic vertex buffer of size %d [%s]\n", m_VBSize, GetD3D9Error(hr) );
+ success = false;
+ *outVB = NULL;
+ }
+ }
+
+ // lock, making sure the offset we lock is multiple of vertex stride
+ if( m_VB )
+ {
+ m_VBUsedBytes = ((m_VBUsedBytes + (m_LastChunkStride-1)) / m_LastChunkStride) * m_LastChunkStride;
+ if( m_VBUsedBytes + vbCapacity > m_VBSize ) {
+ hr = m_VB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, outVB, D3DLOCK_DISCARD );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock dynamic vertex buffer with discard [%s]\n", GetD3D9Error(hr) );
+ *outVB = NULL;
+ success = false;
+ }
+ m_VBUsedBytes = 0;
+ } else {
+ hr = m_VB->Lock( m_VBUsedBytes RANDOM_FAIL_FOR_DEBUG, vbCapacity, outVB, D3DLOCK_NOOVERWRITE );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock vertex index buffer, offset %i size %i [%s]\n", m_VBUsedBytes, vbCapacity, GetD3D9Error(hr) );
+ *outVB = NULL;
+ success = false;
+ }
+ }
+ m_LastChunkStartVertex = m_VBUsedBytes / m_LastChunkStride;
+ DebugAssertIf( m_LastChunkStartVertex * m_LastChunkStride != m_VBUsedBytes );
+ }
+
+ // -------- index buffer
+
+ const bool indexed = (renderMode != kDrawQuads) && (renderMode != kDrawTriangleStrip);
+ if( success && maxIndices && indexed )
+ {
+ UInt32 ibCapacity = maxIndices * kVBOIndexSize;
+ // check if requested chunk is larger than current buffer
+ if( ibCapacity > m_IBSize ) {
+ m_IBSize = ibCapacity * 2; // allocate more up front
+ if( m_IB ){
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB);
+ m_IB->Release();
+ }
+ m_IB = NULL;
+ }
+ // allocate buffer if don't have it yet
+ if( !m_IB ) {
+ hr = dev->CreateIndexBuffer( m_IBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &m_IB, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,m_IBSize,this);
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to create dynamic index buffer of size %d [%s]\n", m_IBSize, GetD3D9Error(hr) );
+ if( m_VB )
+ m_VB->Unlock();
+ }
+ }
+ // lock it if we have IB created successfully
+ if( m_IB )
+ {
+ if( m_IBUsedBytes + ibCapacity > m_IBSize ) {
+ hr = m_IB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, outIB, D3DLOCK_DISCARD );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock dynamic index buffer with discard [%s]\n", GetD3D9Error(hr) );
+ *outIB = NULL;
+ success = false;
+ if( m_VB )
+ m_VB->Unlock();
+ }
+ m_IBUsedBytes = 0;
+ } else {
+ hr = m_IB->Lock( m_IBUsedBytes RANDOM_FAIL_FOR_DEBUG, ibCapacity, outIB, D3DLOCK_NOOVERWRITE );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock dynamic index buffer, offset %i size %i [%s]\n", m_IBUsedBytes, ibCapacity, GetD3D9Error(hr) );
+ *outIB = NULL;
+ success = false;
+ if( m_VB )
+ m_VB->Unlock();
+ }
+ }
+ m_LastChunkStartIndex = m_IBUsedBytes / 2;
+ }
+ else
+ {
+ *outIB = NULL;
+ success = false;
+ }
+ }
+
+ if( !success )
+ m_LendedChunk = false;
+
+ return success;
+}
+
+void DynamicD3D9VBO::ReleaseChunk( UInt32 actualVertices, UInt32 actualIndices )
+{
+ Assert( m_LendedChunk );
+ Assert( m_LastRenderMode == kDrawIndexedTriangleStrip || m_LastRenderMode == kDrawIndexedQuads || m_LastRenderMode == kDrawIndexedPoints || m_LastRenderMode == kDrawIndexedLines || actualIndices % 3 == 0 );
+ m_LendedChunk = false;
+
+ const bool indexed = (m_LastRenderMode != kDrawQuads) && (m_LastRenderMode != kDrawTriangleStrip);
+
+ m_LastChunkVertices = actualVertices;
+ m_LastChunkIndices = actualIndices;
+
+ // unlock buffers
+ m_VB->Unlock();
+ if( indexed )
+ m_IB->Unlock();
+
+ if( !actualVertices || (indexed && !actualIndices) ) {
+ m_LastChunkShaderChannelMask = 0;
+ return;
+ }
+
+ UInt32 actualVBSize = actualVertices * m_LastChunkStride;
+ m_VBUsedBytes += actualVBSize;
+ UInt32 actualIBSize = actualIndices * kVBOIndexSize;
+ m_IBUsedBytes += actualIBSize;
+}
+
+
diff --git a/Runtime/GfxDevice/d3d/D3D9VBO.h b/Runtime/GfxDevice/d3d/D3D9VBO.h
new file mode 100644
index 0000000..71c0da9
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9VBO.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "Runtime/Shaders/VBO.h"
+
+
+// Implements Direct3D9 VBO
+class D3D9VBO : public VBO {
+public:
+ D3D9VBO();
+ virtual ~D3D9VBO();
+
+ virtual void UpdateVertexData( const VertexBufferData& buffer );
+ virtual void UpdateIndexData (const IndexBufferData& buffer);
+ virtual void DrawVBO (const ChannelAssigns& channels, UInt32 firstIndexByte, UInt32 indexCount, GfxPrimitiveType topology, UInt32 firstVertex, UInt32 vertexCount);
+ #if GFX_ENABLE_DRAW_CALL_BATCHING
+ virtual void DrawCustomIndexed( const ChannelAssigns& channels, void* indices, UInt32 indexCount,
+ GfxPrimitiveType topology, UInt32 vertexRangeBegin, UInt32 vertexRangeEnd, UInt32 drawVertexCount );
+ #endif
+ virtual bool MapVertexStream( VertexStreamData& outData, unsigned stream );
+ virtual void UnmapVertexStream( unsigned stream );
+ virtual bool IsVertexBufferLost() const;
+
+ virtual void ResetDynamicVB();
+
+ virtual int GetRuntimeMemorySize() const;
+
+ static void CleanupSharedIndexBuffer();
+
+private:
+ void BindVertexStreams( IDirect3DDevice9* dev, const ChannelAssigns& channels );
+ void UpdateVertexStream( const VertexBufferData& sourceData, unsigned stream );
+ void UpdateIndexBufferData (const IndexBufferData& sourceData);
+ static UInt16* MapDynamicIndexBuffer (int indexCount, UInt32& outBytesUsed);
+ static void UnmapDynamicIndexBuffer ();
+
+private:
+ int m_VertexCount;
+
+ enum
+ {
+ kVertexDeclDefault,
+ kVertexDeclAllWhiteStream,
+ kVertexDeclCount
+ };
+
+ IDirect3DVertexBuffer9* m_VBStreams[kMaxVertexStreams];
+ IDirect3DIndexBuffer9* m_IB;
+ IDirect3DVertexDeclaration9* m_VertexDecls[kVertexDeclCount];
+ ChannelInfoArray m_ChannelInfo;
+ int m_IBSize;
+
+ static IDirect3DIndexBuffer9* ms_CustomIB;
+ static int ms_CustomIBSize;
+ static UInt32 ms_CustomIBUsedBytes;
+};
+
+class DynamicD3D9VBO : public DynamicVBO {
+public:
+ DynamicD3D9VBO( UInt32 vbSize, UInt32 ibSize );
+ virtual ~DynamicD3D9VBO();
+
+ virtual bool GetChunk( UInt32 shaderChannelMask, UInt32 maxVertices, UInt32 maxIndices, RenderMode mode, void** outVB, void** outIB );
+ virtual void ReleaseChunk( UInt32 actualVertices, UInt32 actualIndices );
+ virtual void DrawChunk (const ChannelAssigns& channels);
+
+private:
+ void InitializeQuadsIB();
+
+private:
+ UInt32 m_VBSize;
+ UInt32 m_VBUsedBytes;
+ UInt32 m_IBSize;
+ UInt32 m_IBUsedBytes;
+
+ IDirect3DVertexBuffer9* m_VB;
+ IDirect3DIndexBuffer9* m_IB;
+ IDirect3DVertexDeclaration9* m_VertexDecl; // vertex declaration for the last chunk
+
+ UInt32 m_LastChunkStartVertex;
+ UInt32 m_LastChunkStartIndex;
+
+ IDirect3DIndexBuffer9* m_QuadsIB; // static IB for drawing quads
+ bool m_QuadsIBFailed;
+};
+
diff --git a/Runtime/GfxDevice/d3d/D3D9Window.cpp b/Runtime/GfxDevice/d3d/D3D9Window.cpp
new file mode 100644
index 0000000..b568b34
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Window.cpp
@@ -0,0 +1,272 @@
+#include "UnityPrefix.h"
+#include "D3D9Window.h"
+#include "GfxDeviceD3D9.h"
+#include "RenderTextureD3D.h"
+#include "Runtime/Misc/QualitySettings.h"
+#include "Runtime/Threads/ThreadSharedObject.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+
+
+#if UNITY_EDITOR
+
+bool IsD3D9DeviceLost();
+void SetD3D9DeviceLost( bool lost );
+
+static bool s_OldHasDepthFlag = false;
+static D3D9Window* s_CurrentD3DWindow = NULL;
+static int s_CurrentD3DFSAALevel = 0;
+
+int GetCurrentD3DFSAALevel() { return s_CurrentD3DFSAALevel; }
+
+void SetNoRenderTextureActiveEditor(); // RenderTexture.cpp
+
+
+D3D9Window::D3D9Window(IDirect3DDevice9* device, HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias )
+: GfxDeviceWindow(window, width, height, depthFormat, antiAlias)
+, m_SwapChain(NULL)
+, m_FSAALevel(0)
+{
+ m_Device = device;
+ Reshape( width, height, depthFormat, antiAlias );
+}
+
+D3D9Window::~D3D9Window()
+{
+ if( s_CurrentD3DWindow == this )
+ {
+ s_CurrentD3DWindow = NULL;
+ s_CurrentD3DFSAALevel = 0;
+ }
+
+ DestroyRenderSurfaceD3D9(&m_DepthStencil);
+ DestroyRenderSurfaceD3D9(&m_BackBuffer);
+ SAFE_RELEASE(m_SwapChain);
+}
+
+bool D3D9Window::Reshape( int width, int height, DepthBufferFormat depthFormat, int antiAlias )
+{
+ if(GfxDeviceWindow::Reshape(width, height, depthFormat, antiAlias)==false)return false;
+
+
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("D3Dwindow %x Reshape %ix%i d=%i aa=%i\n", this, width, height, depthFormat, antiAlias);
+ #endif
+ // release old
+ m_DepthStencil.Release();
+ m_BackBuffer.Release();
+ SAFE_RELEASE(m_SwapChain);
+
+ HRESULT hr;
+
+
+ // Choose presentation params
+ if( antiAlias == -1 )
+ antiAlias = GetQualitySettings().GetCurrent().antiAliasing;
+
+ D3DDISPLAYMODE mode;
+ hr = GetD3DObject()->GetAdapterDisplayMode( D3DADAPTER_DEFAULT, &mode );
+ D3DPRESENT_PARAMETERS params;
+
+ ZeroMemory( &params, sizeof(params) );
+ params.BackBufferWidth = m_Width;
+ params.BackBufferHeight = m_Height;
+ params.BackBufferCount = 1;
+ params.hDeviceWindow = m_Window;
+ params.FullScreen_RefreshRateInHz = 0;
+ params.Windowed = TRUE;
+ params.SwapEffect = D3DSWAPEFFECT_COPY;
+ params.BackBufferFormat = D3DFMT_A8R8G8B8;
+ params.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+ params.EnableAutoDepthStencil = FALSE;
+ GetD3DFormatCaps()->FindBestPresentationParams( width, height, mode.Format, true, 0, antiAlias, params );
+ if( params.MultiSampleType != D3DMULTISAMPLE_NONE ) {
+ params.SwapEffect = D3DSWAPEFFECT_DISCARD;
+ m_CanUseBlitOptimization = false;
+ } else {
+ m_CanUseBlitOptimization = true;
+ }
+ m_FSAALevel = (params.MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) ? params.MultiSampleQuality : params.MultiSampleType;
+
+ hr = m_Device->CreateAdditionalSwapChain( &params, &m_SwapChain );
+ if( FAILED(hr) ) {
+ printf_console( "d3d: swap chain: swap=%i vsync=%x w=%i h=%i fmt=%i bbcount=%i dsformat=%i pflags=%x\n",
+ params.SwapEffect, params.PresentationInterval,
+ params.BackBufferWidth, params.BackBufferHeight, params.BackBufferFormat, params.BackBufferCount,
+ params.AutoDepthStencilFormat, params.Flags );
+ printf_console( "d3d: failed to create swap chain [%s]\n", GetD3D9Error(hr) );
+ m_InvalidState = true;
+ return !m_InvalidState;
+ }
+
+ IDirect3DSurface9* backBuffer = NULL;
+ hr = m_SwapChain->GetBackBuffer( 0, D3DBACKBUFFER_TYPE_MONO, &backBuffer );
+ if( FAILED(hr) ) {
+ AssertString( "Failed to get back buffer for D3DWindow" );
+ m_SwapChain->Release();
+ m_SwapChain = NULL;
+ m_InvalidState = true;
+ return !m_InvalidState;
+ }
+
+ m_BackBuffer.backBuffer = true;
+ m_DepthStencil.backBuffer = true;
+
+ m_BackBuffer.m_Surface = backBuffer;
+ m_BackBuffer.width = params.BackBufferWidth;
+ m_BackBuffer.height = params.BackBufferHeight;
+ m_BackBuffer.format = kRTFormatARGB32;
+
+ // Depth format
+ bool needsDepth = false;
+ m_DepthStencilFormat = D3DFMT_UNKNOWN;
+ switch( depthFormat ) {
+ case kDepthFormatNone:
+ needsDepth = false;
+ m_DepthStencilFormat = D3DFMT_UNKNOWN;
+ break;
+ case kDepthFormat16:
+ needsDepth = true;
+ m_DepthStencilFormat = D3DFMT_D16;
+ break;
+ case kDepthFormat24:
+ needsDepth = true;
+ m_DepthStencilFormat = D3DFMT_D24S8;
+ break;
+ default:
+ ErrorString("Unknown depth format");
+ }
+
+ if( needsDepth )
+ {
+ D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (m_Device, m_Width, m_Height, m_DepthStencilFormat, params.MultiSampleType, params.MultiSampleQuality, FALSE);
+ m_Device->SetRenderState (D3DRS_ZENABLE, TRUE);
+ if (!depthStencil.m_Surface)
+ {
+ AssertString( "Failed to create depth/stencil for D3DWindow" );
+ m_SwapChain->Release();
+ m_SwapChain = NULL;
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_BackBuffer.m_Surface);
+ m_BackBuffer.m_Surface->Release();
+ m_BackBuffer.m_Surface = NULL;
+ m_InvalidState = true;
+ return !m_InvalidState;
+ }
+ m_DepthStencil.m_Surface = depthStencil.m_Surface;
+ m_DepthStencil.m_Texture = depthStencil.m_Texture;
+ m_DepthStencil.width = m_Width;
+ m_DepthStencil.height = m_Height;
+ m_DepthStencil.depthFormat = depthFormat;
+ }
+
+ return !m_InvalidState;
+}
+
+void D3D9Window::SetAsActiveWindow ()
+{
+ GetRealGfxDevice().SetRenderTargets(1, &GetBackBuffer(), GetDepthStencil());
+ GetRealGfxDevice().SetActiveRenderTexture(NULL);
+ GetRealGfxDevice().SetCurrentWindowSize(m_Width, m_Height);
+ GetRealGfxDevice().SetInvertProjectionMatrix(false);
+
+ s_OldHasDepthFlag = g_D3DHasDepthStencil;
+ g_D3DHasDepthStencil = (m_DepthStencil.m_Surface != NULL);
+
+ s_CurrentD3DWindow = this;
+ s_CurrentD3DFSAALevel = m_FSAALevel;
+
+ // not entirely correct but better not touch anything if we don't have depth
+ if(m_DepthStencil.m_Surface != NULL)
+ g_D3DDepthStencilFormat = m_DepthStencilFormat;
+}
+
+bool D3D9Window::BeginRendering()
+{
+ if (GfxDeviceWindow::BeginRendering())
+ {
+ HRESULT hr;
+
+ // Handle lost devices
+ if (!GetRealGfxDevice().IsValidState())
+ {
+ return false;
+ }
+
+ // begin scene
+ if (IsD3D9DeviceLost())
+ {
+ ErrorString ("GUI Window tries to begin rendering while D3D9 device is lost!");
+ }
+ GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() );
+ if (device.IsInsideFrame())
+ {
+ ErrorString ("GUI Window tries to begin rendering while something else has not finished rendering! Either you have a recursive OnGUI rendering, or previous OnGUI did not clean up properly.");
+ }
+
+ m_Device->BeginScene();
+ SetAsActiveWindow ();
+
+ device.SetInsideFrame(true);
+ return true;
+ }
+ else
+ {
+ #if ENABLE_D3D_WINDOW_LOGGING
+ printf_console("D3Dwindow %ix%i BeginRendering: invalid state\n", m_Width, m_Height);
+ #endif
+ return false;
+ }
+}
+
+bool D3D9Window::EndRendering( bool presentContent )
+{
+ if(GfxDeviceWindow::EndRendering(presentContent))
+ {
+
+ g_D3DHasDepthStencil = s_OldHasDepthFlag;
+ s_CurrentD3DWindow = NULL;
+ s_CurrentD3DWindow = 0;
+
+ if( IsD3D9DeviceLost() )
+ return false;
+
+ HRESULT hr;
+ GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() );
+ Assert( device.IsInsideFrame() );
+ hr = m_Device->EndScene();
+ device.SetInsideFrame(false);
+ if( m_SwapChain && presentContent )
+ {
+ hr = m_SwapChain->Present( NULL, NULL, NULL, NULL, 0 );
+ device.PushEventQuery();
+ // When D3DERR_DRIVERINTERNALERROR is returned from Present(),
+ // the application can do one of the following, try recovering just as
+ // from the lost device.
+ if( hr == D3DERR_DEVICELOST || hr == D3DERR_DRIVERINTERNALERROR )
+ {
+ SetD3D9DeviceLost( true );
+ return false;
+ }
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+RenderSurfaceHandle D3D9Window::GetBackBuffer()
+{
+ RenderSurfaceHandle handle;
+ handle.object = &m_BackBuffer;
+ return handle;
+}
+
+RenderSurfaceHandle D3D9Window::GetDepthStencil()
+{
+ RenderSurfaceHandle handle;
+ handle.object = &m_DepthStencil;
+ return handle;
+}
+
+#endif
diff --git a/Runtime/GfxDevice/d3d/D3D9Window.h b/Runtime/GfxDevice/d3d/D3D9Window.h
new file mode 100644
index 0000000..038b59f
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/D3D9Window.h
@@ -0,0 +1,39 @@
+#ifndef D3D9WINDOW_H
+#define D3D9WINDOW_H
+
+#include "D3D9Includes.h"
+#include "Runtime/GfxDevice/GfxDeviceWindow.h"
+#include "Runtime/GfxDevice/GfxDeviceObjects.h"
+#include "D3D9Utils.h"
+#include "TexturesD3D9.h"
+
+class D3D9Window : public GfxDeviceWindow
+{
+private:
+ IDirect3DDevice9* m_Device;
+ IDirect3DSwapChain9* m_SwapChain;
+ RenderColorSurfaceD3D9 m_BackBuffer;
+ RenderDepthSurfaceD3D9 m_DepthStencil;
+ D3DFORMAT m_DepthStencilFormat;
+ int m_FSAALevel;
+public:
+ D3D9Window( IDirect3DDevice9* device, HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias );
+ ~D3D9Window();
+
+ bool Reshape( int width, int height, DepthBufferFormat depthFormat, int antiAlias );
+
+ bool BeginRendering();
+ bool EndRendering( bool presentContent );
+ void SetAsActiveWindow ();
+
+ D3DFORMAT GetDepthStencilFormat() const { return m_DepthStencilFormat; }
+
+ RenderSurfaceHandle GetBackBuffer();
+ RenderSurfaceHandle GetDepthStencil();
+};
+
+#if UNITY_EDITOR
+int GetCurrentD3DFSAALevel();
+#endif
+
+#endif
diff --git a/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp
new file mode 100644
index 0000000..77fe956
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp
@@ -0,0 +1,3009 @@
+#include "UnityPrefix.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "GfxDeviceD3D9.h"
+#include "D3D9Context.h"
+#include "Runtime/Math/FloatConversion.h"
+#include "D3D9VBO.h"
+#include "CombinerD3D.h"
+#include "External/shaderlab/Library/program.h"
+#include "External/shaderlab/Library/TextureBinding.h"
+#include "External/shaderlab/Library/texenv.h"
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "External/shaderlab/Library/pass.h"
+#include "Runtime/GfxDevice/BuiltinShaderParams.h"
+#include "Runtime/GfxDevice/GpuProgramParamsApply.h"
+#include "Runtime/Graphics/Image.h"
+#include "Runtime/Graphics/ScreenManager.h"
+#include "PlatformDependent/Win/SmartComPointer.h"
+#include "PlatformDependent/Win/WinUnicode.h"
+#include "Runtime/Allocator/LinearAllocator.h"
+#include "Runtime/Utilities/Utility.h"
+#include "Runtime/Utilities/ArrayUtility.h"
+#include "Runtime/Threads/Thread.h"
+#include "Runtime/Misc/Plugins.h"
+#include "D3D9Utils.h"
+#include "D3D9Window.h"
+#include "RenderTextureD3D.h"
+#include "GpuProgramsD3D.h"
+#include "TimerQueryD3D9.h"
+#include "GfxDeviceD3D9.h"
+
+
+// --------------------------------------------------------------------------
+
+bool IsActiveRenderTargetWithColorD3D9();
+
+typedef std::list<IDirect3DQuery9*> D3D9QueryList;
+static D3D9QueryList s_EventQueries;
+
+static void PushEventQuery (int maxBuffer);
+static void CleanupEventQueries ();
+
+
+
+static const D3DBLEND kBlendModeD3D9[] = {
+ D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_DESTCOLOR, D3DBLEND_SRCCOLOR, D3DBLEND_INVDESTCOLOR, D3DBLEND_SRCALPHA, D3DBLEND_INVSRCCOLOR,
+ D3DBLEND_DESTALPHA, D3DBLEND_INVDESTALPHA, D3DBLEND_SRCALPHASAT, D3DBLEND_INVSRCALPHA,
+};
+
+static const D3DBLENDOP kBlendOpD3D9[] = {
+ D3DBLENDOP_ADD, D3DBLENDOP_SUBTRACT, D3DBLENDOP_REVSUBTRACT, D3DBLENDOP_MIN, D3DBLENDOP_MAX,
+};
+
+static const D3DCMPFUNC kCmpFuncD3D9[] = {
+ D3DCMP_ALWAYS, D3DCMP_NEVER, D3DCMP_LESS, D3DCMP_EQUAL, D3DCMP_LESSEQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL, D3DCMP_GREATEREQUAL, D3DCMP_ALWAYS
+};
+
+static const D3DSTENCILOP kStencilOpD3D9[] = {
+ D3DSTENCILOP_KEEP, D3DSTENCILOP_ZERO, D3DSTENCILOP_REPLACE, D3DSTENCILOP_INCRSAT,
+ D3DSTENCILOP_DECRSAT, D3DSTENCILOP_INVERT, D3DSTENCILOP_INCR, D3DSTENCILOP_DECR
+};
+
+static D3DCULL kCullModeD3D9[] = {
+ D3DCULL_NONE, D3DCULL_CW, D3DCULL_CCW
+};
+
+// --------------------------------------------------------------------------
+
+
+static inline D3DCOLOR ColorToD3D( const float color[4] )
+{
+ return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) );
+}
+
+
+
+// --------------------------------------------------------------------------
+
+enum {
+ kNeedsSoftwareVPVertexShader = (1<<0),
+ kNeedsSoftwareVPTexGen = (1<<1),
+};
+
+class GfxDeviceD3D9;
+
+static void ApplyBackfaceMode( DeviceStateD3D& state );
+static void ApplyStencilFuncAndOp( DeviceStateD3D& state );
+
+
+
+
+
+void DeviceStateD3D::Invalidate( GfxDeviceD3D9& device )
+{
+ int i;
+
+ depthFunc = kFuncUnknown;
+ depthWrite = -1;
+
+ blending = -1; // unknown
+ srcBlend = destBlend = srcBlendAlpha = destBlendAlpha = -1; // won't match any D3D mode
+ blendOp = blendOpAlpha = -1; // won't match any D3D mode
+ alphaFunc = kFuncUnknown;
+ alphaValue = -1.0f;
+
+ culling = kCullUnknown;
+ d3dculling = D3DCULL_FORCE_DWORD;
+ scissor = -1;
+
+ offsetFactor = offsetUnits = -1000.0f;
+ for( i = 0; i < kShaderTypeCount; ++i )
+ {
+ activeGpuProgramParams[i] = NULL;
+ activeGpuProgram[i] = NULL;
+ activeShader[i] = NULL;
+ }
+ fixedFunctionPS = 0;
+
+ colorWriteMask = -1; // TBD ?
+ m_StencilRef = -1;
+
+ for (i = 0; i < ARRAY_SIZE(texturesPS); ++i)
+ texturesPS[i].Invalidate();
+ for (i = 0; i < ARRAY_SIZE(texturesVS); ++i)
+ texturesVS[i].Invalidate();
+
+ m_SoftwareVP = false;
+ m_NeedsSofwareVPFlags = 0;
+
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ if( dev && !m_DeviceLost )
+ {
+ D3D9_CALL(dev->SetVertexShader( NULL ));
+ D3D9_CALL(dev->SetPixelShader( NULL ));
+
+ ApplyBackfaceMode( *this );
+
+ if( g_D3DUsesMixedVP )
+ D3D9_CALL(dev->SetSoftwareVertexProcessing( FALSE ));
+
+ // misc. state
+ D3D9_CALL(dev->SetRenderState( D3DRS_LOCALVIEWER, TRUE ));
+
+ #if UNITY_EDITOR
+ D3D9_CALL(dev->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID ));
+ #endif
+ }
+}
+
+
+void UpdateChannelBindingsD3D( const ChannelAssigns& channels )
+{
+ // Texture coordinate index bindings
+ GfxDeviceD3D9& device = (GfxDeviceD3D9&)GetRealGfxDevice();
+ if( device.IsShaderActive(kShaderVertex) )
+ return;
+ DeviceStateD3D& state = device.GetState();
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ const int maxTexCoords = gGraphicsCaps.maxTexCoords; // fetch here once
+
+ VertexPipeConfig& config = device.GetVertexPipeConfig();
+ UInt32 textureSources = config.textureSources;
+ for( int i = 0; i < maxTexCoords; ++i )
+ {
+ UInt32 source = (textureSources >> (i*3)) & 0x7;
+ if( source > kTexSourceUV1 )
+ continue;
+ ShaderChannel texCoordChannel = channels.GetSourceForTarget( (VertexComponent)(kVertexCompTexCoord0 + i) );
+ if( texCoordChannel == kShaderChannelTexCoord0 )
+ textureSources = textureSources & ~(7<<i*3) | (kTexSourceUV0<<i*3);
+ else if( texCoordChannel == kShaderChannelTexCoord1 )
+ textureSources = textureSources & ~(7<<i*3) | (kTexSourceUV1<<i*3);
+ else if( texCoordChannel != kShaderChannelNone ) {
+ AssertString( "Bad texcoord index" );
+ }
+ }
+ config.textureSources = textureSources;
+
+ config.hasVertexColor = (channels.GetTargetMap() & (1<<kVertexCompColor)) ? 1 : 0;
+}
+
+
+struct SetValuesFunctorD3D9
+{
+ SetValuesFunctorD3D9(GfxDevice& device, VertexShaderConstantCache& vs, PixelShaderConstantCache& ps) : m_Device(device), vscache(vs), pscache(ps) { }
+ GfxDevice& m_Device;
+ VertexShaderConstantCache& vscache;
+ PixelShaderConstantCache& pscache;
+ void SetVectorVal (ShaderType shaderType, ShaderParamType type, int index, const float* ptr, int cols, const GpuProgramParameters& params, int cbIndex)
+ {
+ if (shaderType == kShaderVertex)
+ vscache.SetValues(index, ptr, 1);
+ else
+ pscache.SetValues(index, ptr, 1);
+ }
+ void SetMatrixVal (ShaderType shaderType, int index, const Matrix4x4f* ptr, int rows, const GpuProgramParameters& params, int cbIndex)
+ {
+ DebugAssert(rows == 4);
+ Matrix4x4f mat;
+ TransposeMatrix4x4 (ptr, &mat);
+ if (shaderType == kShaderVertex)
+ vscache.SetValues(index, mat.GetPtr(), 4);
+ else
+ pscache.SetValues(index, mat.GetPtr(), 4);
+ }
+ void SetTextureVal (ShaderType shaderType, int index, int samplerIndex, TextureDimension dim, TextureID texID)
+ {
+ m_Device.SetTexture (shaderType, index, samplerIndex, texID, dim, std::numeric_limits<float>::infinity());
+ }
+};
+
+
+// Compute/Update any deferred state before each draw call
+void GfxDeviceD3D9::BeforeDrawCall( bool immediateMode )
+{
+ VertexShaderConstantCache& vscache = GetVertexShaderConstantCache();
+ PixelShaderConstantCache& pscache = GetPixelShaderConstantCache();
+ DeviceStateD3D& state = m_State;
+ IDirect3DDevice9* dev = GetD3DDevice();
+ bool usesVertexShader = (state.activeShader[kShaderVertex] != NULL);
+
+ //@TODO: remove TESTING CODE
+ static bool oldTnL = false;
+ if( oldTnL != (!immediateMode) )
+ {
+ m_VertexPrevious.config.Reset ();
+ m_VertexPrevious.ambient.set(-1,-1,-1,-1);
+ oldTnL = !immediateMode;
+ }
+
+ m_TransformState.UpdateWorldViewMatrix (m_BuiltinParamValues);
+
+ // Deferred setup of fixed function stuff
+ if (!immediateMode)
+ SetupVertexShaderD3D9( dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious, vscache, usesVertexShader, immediateMode );
+ else
+ SetupFixedFunctionD3D9( dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious, usesVertexShader, immediateMode );
+
+
+ // update GL equivalents of built-in shader state
+
+ const BuiltinShaderParamIndices& paramsVS = *m_BuiltinParamIndices[kShaderVertex];
+ const BuiltinShaderParamIndices& paramsPS = *m_BuiltinParamIndices[kShaderFragment];
+ int gpuIndexVS, gpuIndexPS;
+
+#define SET_BUILTIN_MATRIX_BEGIN(idx) \
+ gpuIndexVS = paramsVS.mat[idx].gpuIndex; gpuIndexPS = paramsPS.mat[idx].gpuIndex; if (gpuIndexVS >= 0 || gpuIndexPS >= 0)
+
+#define SET_BUILTIN_MATRIX_END(name) \
+ if (gpuIndexVS >= 0) vscache.SetValues(gpuIndexVS, name.GetPtr(), 4); \
+ if (gpuIndexPS >= 0) pscache.SetValues(gpuIndexPS, name.GetPtr(), 4)
+
+ // MVP matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatMVP)
+ {
+ Matrix4x4f matMul;
+ MultiplyMatrices4x4 (&m_BuiltinParamValues.GetMatrixParam(kShaderMatProj), &m_TransformState.worldViewMatrix, &matMul);
+ Matrix4x4f mat;
+ TransposeMatrix4x4 (&matMul, &mat);
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+ // MV matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatMV)
+ {
+ Matrix4x4f mat;
+ TransposeMatrix4x4 (&m_TransformState.worldViewMatrix, &mat);
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+ // Transpose MV matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatTransMV)
+ {
+ const Matrix4x4f& mat = m_TransformState.worldViewMatrix;
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+ // Inverse transpose of MV matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatInvTransMV)
+ {
+ Matrix4x4f mat;
+ Matrix4x4f::Invert_Full (m_TransformState.worldViewMatrix, mat);
+ if (m_VertexData.normalization == kNormalizationScale)
+ {
+ // Inverse transpose of modelview should be scaled by uniform
+ // normal scale (this will match state.matrix.invtrans.modelview
+ // and gl_NormalMatrix in OpenGL)
+ float scale = Magnitude (m_TransformState.worldMatrix.GetAxisX());
+ mat.Get (0, 0) *= scale;
+ mat.Get (1, 0) *= scale;
+ mat.Get (2, 0) *= scale;
+ mat.Get (0, 1) *= scale;
+ mat.Get (1, 1) *= scale;
+ mat.Get (2, 1) *= scale;
+ mat.Get (0, 2) *= scale;
+ mat.Get (1, 2) *= scale;
+ mat.Get (2, 2) *= scale;
+ }
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+ // M matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatM)
+ {
+ Matrix4x4f mat;
+ TransposeMatrix4x4 (&m_TransformState.worldMatrix, &mat);
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+ // Inverse M matrix
+ SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatInvM)
+ {
+ Matrix4x4f mat = m_TransformState.worldMatrix;
+ if (m_VertexData.normalization == kNormalizationScale)
+ {
+ // Kill scale in the world matrix before inverse
+ float invScale = m_BuiltinParamValues.GetInstanceVectorParam(kShaderInstanceVecScale).w;
+ mat.Get (0, 0) *= invScale;
+ mat.Get (1, 0) *= invScale;
+ mat.Get (2, 0) *= invScale;
+ mat.Get (0, 1) *= invScale;
+ mat.Get (1, 1) *= invScale;
+ mat.Get (2, 1) *= invScale;
+ mat.Get (0, 2) *= invScale;
+ mat.Get (1, 2) *= invScale;
+ mat.Get (2, 2) *= invScale;
+ }
+ Matrix4x4f inverseMat;
+ Matrix4x4f::Invert_General3D (mat, inverseMat);
+ TransposeMatrix4x4 (&inverseMat, &mat);
+ SET_BUILTIN_MATRIX_END(mat);
+ }
+
+ // Set instance vector parameters
+ for (int i = 0; i < kShaderInstanceVecCount; ++i)
+ {
+ gpuIndexVS = paramsVS.vec[i].gpuIndex;
+ if (gpuIndexVS >= 0)
+ vscache.SetValues(gpuIndexVS, m_BuiltinParamValues.GetInstanceVectorParam((ShaderBuiltinInstanceVectorParam)i).GetPtr(), 1);
+ gpuIndexPS = paramsPS.vec[i].gpuIndex;
+ if (gpuIndexPS >= 0)
+ pscache.SetValues(gpuIndexPS, m_BuiltinParamValues.GetInstanceVectorParam((ShaderBuiltinInstanceVectorParam)i).GetPtr(), 1);
+ }
+
+ // Texture matrices for vertex shader
+ for( int i = 0; i < 8; ++i )
+ {
+ if( paramsVS.mat[kShaderInstanceMatTexture0 + i].gpuIndex >= 0 )
+ {
+ Matrix4x4f mat;
+ TransposeMatrix4x4 (&m_TransformState.texMatrices[i], &mat);
+ const int index = paramsVS.mat[kShaderInstanceMatTexture0 + i].gpuIndex;
+ vscache.SetValues( index, mat.GetPtr(), 4 );
+ }
+ }
+
+ // Software VP flags
+ if( g_D3DUsesMixedVP )
+ {
+ if( state.m_NeedsSofwareVPFlags )
+ {
+ if( state.m_SoftwareVP == false )
+ {
+ D3D9_CALL(dev->SetSoftwareVertexProcessing( TRUE ));
+ state.m_SoftwareVP = true;
+ }
+ }
+ else
+ {
+ if( state.m_SoftwareVP == true )
+ {
+ D3D9_CALL(dev->SetSoftwareVertexProcessing( FALSE ));
+ state.m_SoftwareVP = false;
+ }
+ }
+ }
+
+ SetValuesFunctorD3D9 setValuesFunc(*this, vscache, pscache);
+ ApplyMaterialPropertyBlockValues(m_MaterialProperties, m_State.activeGpuProgram ,m_State.activeGpuProgramParams, setValuesFunc);
+
+ vscache.CommitVertexConstants();
+ pscache.CommitPixelConstants();
+}
+
+
+DeviceBlendState* GfxDeviceD3D9::CreateBlendState(const GfxBlendState& state)
+{
+ std::pair<CachedBlendStates::iterator, bool> result = m_CachedBlendStates.insert(std::make_pair(state, DeviceBlendStateD3D9()));
+ if (!result.second)
+ return &result.first->second;
+
+ DeviceBlendStateD3D9& d3dstate = result.first->second;
+ memcpy(&d3dstate.sourceState, &state, sizeof(GfxBlendState));
+ DWORD d3dmask = 0;
+ const UInt8 mask = state.renderTargetWriteMask;
+ if( mask & kColorWriteR ) d3dmask |= D3DCOLORWRITEENABLE_RED;
+ if( mask & kColorWriteG ) d3dmask |= D3DCOLORWRITEENABLE_GREEN;
+ if( mask & kColorWriteB ) d3dmask |= D3DCOLORWRITEENABLE_BLUE;
+ if( mask & kColorWriteA ) d3dmask |= D3DCOLORWRITEENABLE_ALPHA;
+ d3dstate.renderTargetWriteMask = d3dmask;
+
+ DebugAssertIf(kFuncUnknown==state.alphaTest);
+ d3dstate.alphaFunc = kCmpFuncD3D9[state.alphaTest];
+ return &result.first->second;
+}
+
+
+DeviceDepthState* GfxDeviceD3D9::CreateDepthState(const GfxDepthState& state)
+{
+ std::pair<CachedDepthStates::iterator, bool> result = m_CachedDepthStates.insert(std::make_pair(state, DeviceDepthStateD3D9()));
+ if (!result.second)
+ return &result.first->second;
+
+ DeviceDepthStateD3D9& d3dstate = result.first->second;
+ memcpy(&d3dstate.sourceState, &state, sizeof(GfxDepthState));
+ d3dstate.depthFunc = kCmpFuncD3D9[state.depthFunc];
+ return &result.first->second;
+}
+
+DeviceStencilState* GfxDeviceD3D9::CreateStencilState(const GfxStencilState& state)
+{
+ std::pair<CachedStencilStates::iterator, bool> result = m_CachedStencilStates.insert(std::make_pair(state, DeviceStencilStateD3D9()));
+ if (!result.second)
+ return &result.first->second;
+
+ DeviceStencilStateD3D9& st = result.first->second;
+ memcpy(&st.sourceState, &state, sizeof(state));
+ st.stencilFuncFront = kCmpFuncD3D9[state.stencilFuncFront];
+ st.stencilFailOpFront = kStencilOpD3D9[state.stencilFailOpFront];
+ st.depthFailOpFront = kStencilOpD3D9[state.stencilZFailOpFront];
+ st.depthPassOpFront = kStencilOpD3D9[state.stencilPassOpFront];
+ st.stencilFuncBack = kCmpFuncD3D9[state.stencilFuncBack];
+ st.stencilFailOpBack = kStencilOpD3D9[state.stencilFailOpBack];
+ st.depthFailOpBack = kStencilOpD3D9[state.stencilZFailOpBack];
+ st.depthPassOpBack = kStencilOpD3D9[state.stencilPassOpBack];
+ return &result.first->second;
+}
+
+
+
+DeviceRasterState* GfxDeviceD3D9::CreateRasterState(const GfxRasterState& state)
+{
+ std::pair<CachedRasterStates::iterator, bool> result = m_CachedRasterStates.insert(std::make_pair(state, DeviceRasterState()));
+ if (!result.second)
+ return &result.first->second;
+
+ DeviceRasterState& d3dstate = result.first->second;
+ memcpy(&d3dstate.sourceState, &state, sizeof(DeviceRasterState));
+
+ return &result.first->second;
+}
+
+
+void GfxDeviceD3D9::SetBlendState(const DeviceBlendState* state, float alphaRef)
+{
+ DeviceBlendStateD3D9* devstate = (DeviceBlendStateD3D9*)state;
+
+ if (m_CurrBlendState == devstate && alphaRef == m_State.alphaValue)
+ return;
+
+ m_CurrBlendState = devstate;
+ if (!m_CurrBlendState)
+ return;
+
+ UInt32 colMask = devstate->renderTargetWriteMask;
+ if (!IsActiveRenderTargetWithColorD3D9())
+ colMask = 0;
+
+ if(colMask != m_State.colorWriteMask)
+ {
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ D3D9_CALL(dev->SetRenderState(D3DRS_COLORWRITEENABLE, colMask));
+ m_State.colorWriteMask = colMask;
+ }
+
+ const GfxBlendState& desc = state->sourceState;
+ const CompareFunction mode = state->sourceState.alphaTest;
+ const D3DBLEND d3dsrc = kBlendModeD3D9[desc.srcBlend];
+ const D3DBLEND d3ddst = kBlendModeD3D9[desc.dstBlend];
+ const D3DBLEND d3dsrca = kBlendModeD3D9[desc.srcBlendAlpha];
+ const D3DBLEND d3ddsta = kBlendModeD3D9[desc.dstBlendAlpha];
+ const D3DBLENDOP d3dop = kBlendOpD3D9[desc.blendOp];
+ const D3DBLENDOP d3dopa = kBlendOpD3D9[desc.blendOpAlpha];
+
+ const bool blendDisabled = (d3dsrc == D3DBLEND_ONE && d3ddst == D3DBLEND_ZERO && d3dsrca == D3DBLEND_ONE && d3ddsta == D3DBLEND_ZERO);
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if(blendDisabled)
+ {
+ if( m_State.blending != 0 )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE ));
+ m_State.blending = 0;
+ }
+ }
+ else
+ {
+ if( d3dsrc != m_State.srcBlend || d3ddst != m_State.destBlend )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_SRCBLEND, d3dsrc ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_DESTBLEND, d3ddst ));
+ m_State.srcBlend = d3dsrc;
+ m_State.destBlend = d3ddst;
+ }
+
+ if (d3dop != m_State.blendOp)
+ {
+ bool supports = true;
+ if( (d3dop == D3DBLENDOP_SUBTRACT || d3dop == D3DBLENDOP_REVSUBTRACT) && !gGraphicsCaps.hasBlendSub )
+ supports = false;
+ if( (d3dop == D3DBLENDOP_MIN || d3dop == D3DBLENDOP_MAX) && !gGraphicsCaps.hasBlendMinMax )
+ supports = false;
+
+ if(supports)
+ {
+ D3D9_CALL(dev->SetRenderState(D3DRS_BLENDOP, d3dop));
+ m_State.blendOp = d3dop;
+ }
+ }
+ if (gGraphicsCaps.hasSeparateAlphaBlend)
+ {
+ if( d3dsrca != m_State.srcBlendAlpha || d3ddsta != m_State.destBlendAlpha || d3dopa != m_State.blendOpAlpha )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_SEPARATEALPHABLENDENABLE, d3dsrc != d3dsrca || d3ddst != d3ddsta || d3dopa != d3dop));
+ D3D9_CALL(dev->SetRenderState( D3DRS_SRCBLENDALPHA, d3dsrca ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_DESTBLENDALPHA, d3ddsta ));
+ m_State.srcBlendAlpha = d3dsrca;
+ m_State.destBlendAlpha = d3ddsta;
+
+ bool supports = true;
+ if( (d3dopa == D3DBLENDOP_SUBTRACT || d3dopa == D3DBLENDOP_REVSUBTRACT) && !gGraphicsCaps.hasBlendSub )
+ supports = false;
+ if( (d3dopa == D3DBLENDOP_MIN || d3dopa == D3DBLENDOP_MAX) && !gGraphicsCaps.hasBlendMinMax )
+ supports = false;
+
+ if (supports)
+ {
+ D3D9_CALL(dev->SetRenderState(D3DRS_BLENDOPALPHA, d3dopa));
+ m_State.blendOpAlpha = d3dopa;
+ }
+ }
+ }
+ if( m_State.blending != 1 )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE ));
+ m_State.blending = 1;
+ }
+ }
+
+ DebugAssertIf(mode==kFuncUnknown);
+#if UNITY_EDITOR // gles2.0 doesn't have FF alpha testing(only discard/clip on shader side), so disable on editor while emulating
+ bool skipAlphaTestFF = (gGraphicsCaps.IsEmulatingGLES20() && IsShaderActive(kShaderFragment));
+ // possible that vertex shader will be used with FF "frag shader" (like Transparent/vertexlit.shader),
+ // which will change alphatesting. So later on when real frag shaders come, we need to force disable alpha
+ // testing or enjoy nasty artefacts (like active alpha testing messing up the whole scene).
+ if ( skipAlphaTestFF && m_State.alphaFunc!=kFuncDisabled )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, FALSE ));
+ m_State.alphaFunc = kFuncDisabled;
+ }
+
+ if ( !skipAlphaTestFF )
+ {
+#endif
+ if( mode != m_State.alphaFunc || alphaRef != m_State.alphaValue )
+ {
+ if( mode != kFuncDisabled )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, TRUE ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHAFUNC, kCmpFuncD3D9[mode] ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHAREF, alphaRef * 255.0f ));
+ }
+ else
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, FALSE ));
+ }
+
+ m_State.alphaFunc = mode;
+ m_State.alphaValue = alphaRef;
+ }
+#if UNITY_EDITOR
+ }
+#endif
+ // TODO: ATI/NVIDIA hacks
+}
+
+
+void GfxDeviceD3D9::SetRasterState(const DeviceRasterState* state)
+{
+ DeviceRasterState* devstate = (DeviceRasterState*)state;
+ if(!devstate)
+ {
+ m_CurrRasterState = NULL;
+ return;
+ }
+
+ m_CurrRasterState = devstate;
+
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ CullMode cull = devstate->sourceState.cullMode;
+ D3DCULL d3dcull = kCullModeD3D9[cull];
+ if( d3dcull != m_State.d3dculling )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_CULLMODE, d3dcull ));
+ m_State.culling = cull;
+ m_State.d3dculling = d3dcull;
+ }
+
+ float zFactor = devstate->sourceState.slopeScaledDepthBias;
+ float zUnits = devstate->sourceState.depthBias;
+ if( zFactor != m_State.offsetFactor || zUnits != m_State.offsetUnits )
+ {
+ m_State.offsetFactor = zFactor;
+ m_State.offsetUnits = zUnits;
+
+ // In D3D9 the values are in floating point, with 1 meaning "full depth range".
+ // In theory the offset should depend on depth buffer bit count, and on 24 bit depth buffer a value close to 4.8e-7 should be used
+ // (see Lengyel's GDC2007 "projection matrix tricks").
+ // However, it looks like even on 16 bit depth buffer, a value as-if-24-bit should be used (tested on Radeon HD 3850, GeForce 8600, Intel 945).
+ const double kOneBit = 4.8e-7;
+
+ // It looks like generally we need twice the one bit (PolygonOff2 unit test, on Radeon 3850 and GeForce 8600).
+ // To be somewhat more safer, we make it trhee times the one bit. Still looks quite okay.
+ const float kBiasMultiplier = 3.0 * kOneBit;
+
+ if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_DEPTHBIAS )
+ {
+ zUnits *= kBiasMultiplier;
+ D3D9_CALL(dev->SetRenderState( D3DRS_DEPTHBIAS, *(DWORD*)&zUnits ));
+ }
+ if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_SLOPESCALEDEPTHBIAS, *(DWORD*)&zFactor ));
+ }
+ }
+}
+
+
+void GfxDeviceD3D9::SetDepthState(const DeviceDepthState* state)
+{
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ DeviceDepthStateD3D9* devstate = (DeviceDepthStateD3D9*)state;
+ if (m_CurrDepthState == devstate)
+ return;
+
+ m_CurrDepthState = devstate;
+
+ if (!m_CurrDepthState)
+ return;
+
+ if( devstate->sourceState.depthFunc != m_State.depthFunc )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ZFUNC, devstate->depthFunc ));
+ m_State.depthFunc = devstate->sourceState.depthFunc;
+ }
+
+ int d3dDepthWriteMode = devstate->sourceState.depthWrite ? TRUE : FALSE;
+ if( d3dDepthWriteMode != m_State.depthWrite )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_ZWRITEENABLE, d3dDepthWriteMode ));
+ m_State.depthWrite = d3dDepthWriteMode;
+ }
+}
+
+void GfxDeviceD3D9::SetStencilState(const DeviceStencilState* state, int stencilRef)
+{
+ if (m_CurrStencilState == state && m_State.m_StencilRef == stencilRef)
+ return;
+ const DeviceStencilStateD3D9* st = static_cast<const DeviceStencilStateD3D9*>(state);
+ m_CurrStencilState = st;
+ if (!m_CurrStencilState)
+ return;
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILENABLE, st->sourceState.stencilEnable));
+ D3D9_CALL (dev->SetRenderState (D3DRS_TWOSIDEDSTENCILMODE, TRUE));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILMASK, st->sourceState.readMask));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILWRITEMASK, st->sourceState.writeMask));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILREF, stencilRef));
+
+ m_State.stencilFunc[0] = st->stencilFuncFront;
+ m_State.stencilFailOp[0] = st->stencilFailOpFront;
+ m_State.depthFailOp[0] = st->depthFailOpFront;
+ m_State.depthPassOp[0] = st->depthPassOpFront;
+ m_State.stencilFunc[1] = st->stencilFuncBack;
+ m_State.stencilFailOp[1] = st->stencilFailOpBack;
+ m_State.depthFailOp[1] = st->depthFailOpBack;
+ m_State.depthPassOp[1] = st->depthPassOpBack;
+ ApplyStencilFuncAndOp(m_State);
+
+ m_State.m_StencilRef = stencilRef;
+}
+
+static void ApplyStencilFuncAndOp (DeviceStateD3D& state)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ // Normally [0] is front and [1] back stencil state, but when rendering
+ // upside-down, the winding order flips, so flip the state as well.
+ const int cw = state.invertProjMatrix ? 1 : 0;
+ const int ccw = (cw + 1)%2;
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILFUNC, state.stencilFunc[cw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILFAIL, state.stencilFailOp[cw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILZFAIL, state.depthFailOp[cw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_STENCILPASS, state.depthPassOp[cw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILFUNC, state.stencilFunc[ccw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILFAIL, state.stencilFailOp[ccw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILZFAIL, state.depthFailOp[ccw]));
+ D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILPASS, state.depthPassOp[ccw]));
+}
+
+void GfxDeviceD3D9::SetSRGBWrite (bool enable)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ D3D9_CALL (dev->SetRenderState (D3DRS_SRGBWRITEENABLE, enable));
+}
+
+bool GfxDeviceD3D9::GetSRGBWrite ()
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ DWORD v;
+ D3D9_CALL (dev->GetRenderState (D3DRS_SRGBWRITEENABLE, &v));
+ return (v==TRUE);
+}
+
+GfxThreadableDevice* CreateD3D9GfxDevice(bool forceREF)
+{
+ if( !InitializeD3D(forceREF ? D3DDEVTYPE_REF : D3DDEVTYPE_HAL) )
+ return NULL;
+
+ #if UNITY_EDITOR
+ if (!CreateHiddenWindowD3D())
+ return NULL;
+ #endif
+
+ gGraphicsCaps.InitD3D9();
+
+ GfxDeviceD3D9* device = UNITY_NEW_AS_ROOT(GfxDeviceD3D9(), kMemGfxDevice, "D3D9GfxDevice", "");
+
+#if UNITY_EDITOR
+ EditorInitializeD3D(device);
+#else
+ ScreenManagerWin& screenMgr = GetScreenManager();
+ HWND window = screenMgr.GetWindow();
+ int width = screenMgr.GetWidth();
+ int height = screenMgr.GetHeight();
+ int dummy;
+ if (!InitializeOrResetD3DDevice(device, window, width, height, 0, false, 0, 0, dummy, dummy, dummy, dummy))
+ {
+ UNITY_DELETE(device, kMemGfxDevice);
+ device = NULL;
+ }
+#endif
+
+ return device;
+}
+
+GfxDeviceD3D9& GetD3D9GfxDevice()
+{
+ GfxDevice& device = GetRealGfxDevice();
+ Assert( device.GetRenderer() == kGfxRendererD3D9 );
+ return static_cast<GfxDeviceD3D9&>(device);
+}
+
+bool IsD3D9DeviceLost()
+{
+ GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() );
+ AssertIf( device.GetRenderer() != kGfxRendererD3D9 );
+ return device.GetState().m_DeviceLost;
+}
+
+void SetD3D9DeviceLost( bool lost )
+{
+ GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() );
+ AssertIf( device.GetRenderer() != kGfxRendererD3D9 );
+ device.GetState().m_DeviceLost = lost;
+}
+
+
+GfxDeviceD3D9::GfxDeviceD3D9()
+{
+ m_State.m_DeviceLost = false;
+ m_DynamicVBO = NULL;
+
+ m_State.appBackfaceMode = false;
+ m_State.userBackfaceMode = false;
+ m_State.invertProjMatrix = false;
+ m_State.wireframe = false;
+
+ InvalidateState();
+ ResetFrameStats();
+
+ m_Renderer = kGfxRendererD3D9;
+ m_UsesOpenGLTextureCoords = false;
+ m_UsesHalfTexelOffset = true;
+ m_IsThreadable = true;
+
+ m_MaxBufferedFrames = 1; // -1 means no limiting, default is 1
+
+ m_State.viewport[0] = m_State.viewport[1] = m_State.viewport[2] = m_State.viewport[3] = 0;
+ m_State.scissorRect[0] = m_State.scissorRect[1] = m_State.scissorRect[2] = m_State.scissorRect[3] = 0;
+
+ m_CurrBlendState = 0;
+ m_CurrDepthState = 0;
+ m_CurrStencilState = 0;
+ m_CurrRasterState = 0;
+ m_CurrTargetWidth = 0;
+ m_CurrTargetHeight = 0;
+ m_CurrWindowWidth = 0;
+ m_CurrWindowHeight = 0;
+
+ m_AllWhiteVertexStream = NULL;
+
+ extern RenderSurfaceBase* DummyColorBackBuferD3D9();
+ SetBackBufferColorSurface(DummyColorBackBuferD3D9());
+
+ extern RenderSurfaceBase* DummyDepthBackBuferD3D9();
+ SetBackBufferDepthSurface(DummyDepthBackBuferD3D9());
+}
+
+GfxDeviceD3D9::~GfxDeviceD3D9()
+{
+#if !ENABLE_GFXDEVICE_REMOTE_PROCESS_WORKER
+ PluginsSetGraphicsDevice (GetD3DDevice(), kGfxRendererD3D9, kGfxDeviceEventShutdown);
+#endif
+
+ D3D9VBO::CleanupSharedIndexBuffer();
+
+ CleanupEventQueries ();
+#if ENABLE_PROFILER
+ m_TimerQueriesD3D9.ReleaseAllQueries();
+#endif
+
+ if( m_DynamicVBO )
+ delete m_DynamicVBO;
+
+ SAFE_RELEASE(m_AllWhiteVertexStream);
+ SAFE_RELEASE(m_Imm.m_ImmVertexDecl);
+ m_VertexDecls.Clear();
+ TextureCombinersD3D::CleanupCombinerCache();
+ CleanupVertexShadersD3D9 ();
+ DestroyD3DDevice();
+
+ #if UNITY_EDITOR
+ DestroyHiddenWindowD3D();
+ #endif
+
+ CleanupD3D();
+}
+
+void GfxDeviceD3D9::InvalidateState()
+{
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ if( m_State.m_DeviceLost )
+ dev = NULL;
+
+ ResetVertexPipeStateD3D9 (dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious);
+ m_FogParams.Invalidate();
+ m_State.Invalidate(*this);
+ m_Imm.Invalidate();
+ m_VSConstantCache.Invalidate();
+ m_PSConstantCache.Invalidate();
+
+ m_CurrBlendState = NULL;
+ m_CurrDepthState = NULL;
+ m_CurrStencilState = NULL;
+ m_CurrRasterState = NULL;
+}
+
+
+void GfxDeviceD3D9::Clear(UInt32 clearFlags, const float color[4], float depth, int stencil)
+{
+ if( !g_D3DHasDepthStencil )
+ clearFlags &= ~kGfxClearDepthStencil;
+ if (!IsActiveRenderTargetWithColorD3D9())
+ clearFlags &= ~kGfxClearColor;
+
+ DWORD flags = 0;
+ if (clearFlags & kGfxClearColor) flags |= D3DCLEAR_TARGET;
+ if (clearFlags & kGfxClearDepth) flags |= D3DCLEAR_ZBUFFER;
+ if (clearFlags & kGfxClearStencil && GetStencilBitsFromD3DFormat (g_D3DDepthStencilFormat) > 0) {
+ flags |= D3DCLEAR_STENCIL;
+ }
+ GetD3DDevice()->Clear (0, NULL, flags, ColorToD3D(color), depth, stencil);
+}
+
+
+static void ApplyBackfaceMode( DeviceStateD3D& state )
+{
+ if( (state.appBackfaceMode == state.userBackfaceMode) == state.invertProjMatrix )
+ {
+ kCullModeD3D9[kCullFront] = D3DCULL_CCW;
+ kCullModeD3D9[kCullBack] = D3DCULL_CW;
+ }
+ else
+ {
+ kCullModeD3D9[kCullFront] = D3DCULL_CW;
+ kCullModeD3D9[kCullBack] = D3DCULL_CCW;
+ }
+
+ if( state.culling != kCullUnknown )
+ {
+ IDirect3DDevice9* dev = GetD3DDevice();
+ D3DCULL d3dcull = kCullModeD3D9[state.culling];
+ if( d3dcull != state.d3dculling )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_CULLMODE, d3dcull ));
+ state.d3dculling = d3dcull;
+ }
+ }
+}
+
+void GfxDeviceD3D9::SetUserBackfaceMode( bool enable )
+{
+ if( m_State.userBackfaceMode == enable )
+ return;
+ m_State.userBackfaceMode = enable;
+ ApplyBackfaceMode( m_State );
+}
+
+
+void GfxDeviceD3D9::SetWireframe( bool wire )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ D3D9_CALL(dev->SetRenderState( D3DRS_FILLMODE, wire ? D3DFILL_WIREFRAME : D3DFILL_SOLID ));
+ m_State.wireframe = wire;
+}
+
+bool GfxDeviceD3D9::GetWireframe() const
+{
+ return m_State.wireframe;
+}
+
+
+
+// Even with programmable shaders, some things need fixed function D3DTS_PROJECTION to be set up;
+// most notably fixed function fog (shader model 2.0).
+static void SetFFProjectionMatrixD3D9 (const Matrix4x4f& m)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ Matrix4x4f projFlip;
+ projFlip.m_Data[ 0] = m.m_Data[ 0];
+ projFlip.m_Data[ 1] = m.m_Data[ 1];
+ projFlip.m_Data[ 2] = m.m_Data[ 2];
+ projFlip.m_Data[ 3] = m.m_Data[ 3];
+ projFlip.m_Data[ 4] = m.m_Data[ 4];
+ projFlip.m_Data[ 5] = m.m_Data[ 5];
+ projFlip.m_Data[ 6] = m.m_Data[ 6];
+ projFlip.m_Data[ 7] = m.m_Data[ 7];
+ projFlip.m_Data[ 8] = -m.m_Data[ 8];
+ projFlip.m_Data[ 9] = -m.m_Data[ 9];
+ projFlip.m_Data[10] = -m.m_Data[10];
+ projFlip.m_Data[11] = -m.m_Data[11];
+ projFlip.m_Data[12] = m.m_Data[12];
+ projFlip.m_Data[13] = m.m_Data[13];
+ projFlip.m_Data[14] = m.m_Data[14];
+ projFlip.m_Data[15] = m.m_Data[15];
+ D3D9_CALL(dev->SetTransform (D3DTS_PROJECTION, (const D3DMATRIX*)projFlip.GetPtr()));
+}
+
+
+void GfxDeviceD3D9::SetInvertProjectionMatrix( bool enable )
+{
+ if( m_State.invertProjMatrix == enable )
+ return;
+
+ m_State.invertProjMatrix = enable;
+ ApplyBackfaceMode( m_State );
+ ApplyStencilFuncAndOp( m_State );
+
+ // When setting up "invert" flag, invert the matrix as well.
+ Matrix4x4f& m = m_BuiltinParamValues.GetWritableMatrixParam(kShaderMatProj);
+ m.Get(1,1) = -m.Get(1,1);
+ m.Get(1,3) = -m.Get(1,3);
+ m_TransformState.dirtyFlags |= TransformState::kProjDirty;
+ SetFFProjectionMatrixD3D9 (m);
+}
+
+bool GfxDeviceD3D9::GetInvertProjectionMatrix() const
+{
+ return m_State.invertProjMatrix;
+}
+
+void GfxDeviceD3D9::SetWorldMatrix( const float matrix[16] )
+{
+ CopyMatrix (matrix, m_TransformState.worldMatrix.GetPtr());
+ m_TransformState.dirtyFlags |= TransformState::kWorldDirty;
+}
+
+void GfxDeviceD3D9::SetViewMatrix( const float matrix[16] )
+{
+ m_TransformState.SetViewMatrix (matrix, m_BuiltinParamValues);
+}
+
+void GfxDeviceD3D9::SetProjectionMatrix(const Matrix4x4f& matrix)
+{
+ Matrix4x4f& m = m_BuiltinParamValues.GetWritableMatrixParam(kShaderMatProj);
+ CopyMatrix (matrix.GetPtr(), m.GetPtr());
+ CopyMatrix (matrix.GetPtr(), m_TransformState.projectionMatrixOriginal.GetPtr());
+
+ CalculateDeviceProjectionMatrix (m, m_UsesOpenGLTextureCoords, m_State.invertProjMatrix);
+ SetFFProjectionMatrixD3D9 (m);
+
+ m_TransformState.dirtyFlags |= TransformState::kProjDirty;
+}
+
+
+void GfxDeviceD3D9::GetMatrix(float outMatrix[16]) const
+{
+ m_TransformState.UpdateWorldViewMatrix (m_BuiltinParamValues);
+ CopyMatrix (m_TransformState.worldViewMatrix.GetPtr(), outMatrix);
+}
+
+const float* GfxDeviceD3D9::GetWorldMatrix() const
+{
+ return m_TransformState.worldMatrix.GetPtr();
+}
+
+const float* GfxDeviceD3D9::GetViewMatrix() const
+{
+ return m_BuiltinParamValues.GetMatrixParam(kShaderMatView).GetPtr();
+}
+
+const float* GfxDeviceD3D9::GetProjectionMatrix() const
+{
+ return m_TransformState.projectionMatrixOriginal.GetPtr();
+}
+
+const float* GfxDeviceD3D9::GetDeviceProjectionMatrix() const
+{
+ return m_BuiltinParamValues.GetMatrixParam(kShaderMatProj).GetPtr();
+}
+
+void GfxDeviceD3D9::SetNormalizationBackface( NormalizationMode mode, bool backface )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if( mode != m_VertexData.normalization )
+ {
+ m_VertexData.normalization = mode;
+ m_VertexConfig.hasNormalization = (mode == kNormalizationFull);
+ }
+ if( m_State.appBackfaceMode != backface )
+ {
+ m_State.appBackfaceMode = backface;
+ ApplyBackfaceMode( m_State );
+ }
+}
+
+void GfxDeviceD3D9::SetFFLighting( bool on, bool separateSpecular, ColorMaterialMode colorMaterial )
+{
+ m_VertexConfig.hasLighting = on ? 1 : 0;
+ m_VertexConfig.hasSpecular = separateSpecular ? 1 : 0;
+ DebugAssertIf(colorMaterial==kColorMatUnknown);
+ m_VertexConfig.colorMaterial = colorMaterial;
+}
+
+void GfxDeviceD3D9::SetMaterial( const float ambient[4], const float diffuse[4], const float specular[4], const float emissive[4], const float shininess )
+{
+ D3DMATERIAL9& mat = m_VertexData.material;
+ mat.Ambient = *(D3DCOLORVALUE*)ambient;
+ mat.Diffuse = *(D3DCOLORVALUE*)diffuse;
+ mat.Specular = *(D3DCOLORVALUE*)specular;
+ mat.Emissive = *(D3DCOLORVALUE*)emissive;
+ mat.Power = std::max<float>( std::min<float>(shininess,1.0f), 0.0f) * 128.0f;
+}
+
+
+void GfxDeviceD3D9::SetColor( const float color[4] )
+{
+ // If we have pixel shader set up, do nothing; fixed function
+ // constant color can't be possibly used there
+ if (m_State.activeShader[kShaderFragment] != 0) // inlined IsShaderActive(kShaderFragment)
+ return;
+
+ // There's no really good place to make a glColor equivalent, put it into
+ // TFACTOR... Additionally put that into c4 register for ps_1_1 combiner emulation
+ IDirect3DDevice9* dev = GetD3DDevice();
+ D3D9_CALL(dev->SetRenderState( D3DRS_TEXTUREFACTOR, ColorToD3D(color) ));
+ m_PSConstantCache.SetValues( kMaxD3DTextureStagesForPS, color, 1 );
+}
+
+
+void GfxDeviceD3D9::SetViewport( int x, int y, int width, int height )
+{
+ m_State.viewport[0] = x;
+ m_State.viewport[1] = y;
+ m_State.viewport[2] = width;
+ m_State.viewport[3] = height;
+
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ if( !dev ) // happens on startup, when deleting all render textures
+ return;
+ D3DVIEWPORT9 view;
+ view.X = x;
+ view.Y = y;
+ view.Width = width;
+ view.Height = height;
+ view.MinZ = 0.0f;
+ view.MaxZ = 1.0f;
+ dev->SetViewport( &view );
+}
+
+void GfxDeviceD3D9::GetViewport( int* port ) const
+{
+ port[0] = m_State.viewport[0];
+ port[1] = m_State.viewport[1];
+ port[2] = m_State.viewport[2];
+ port[3] = m_State.viewport[3];
+}
+
+
+void GfxDeviceD3D9::SetScissorRect( int x, int y, int width, int height )
+{
+ if (m_State.scissor != 1)
+ {
+ if (gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SCISSORTEST )
+ {
+ GetD3DDevice()->SetRenderState( D3DRS_SCISSORTESTENABLE, TRUE );
+ }
+ m_State.scissor = 1;
+ }
+
+
+ m_State.scissorRect[0] = x;
+ m_State.scissorRect[1] = y;
+ m_State.scissorRect[2] = width;
+ m_State.scissorRect[3] = height;
+
+ RECT rc;
+ rc.left = x;
+ rc.top = y;
+ rc.right = x + width;
+ rc.bottom = y + height;
+ GetD3DDevice()->SetScissorRect( &rc );
+
+}
+void GfxDeviceD3D9::DisableScissor()
+{
+ if (m_State.scissor != 0)
+ {
+ if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SCISSORTEST )
+ {
+ GetD3DDevice()->SetRenderState( D3DRS_SCISSORTESTENABLE, FALSE);
+ }
+ m_State.scissor = 0;
+ }
+}
+bool GfxDeviceD3D9::IsScissorEnabled() const
+{
+ return m_State.scissor == 1;
+}
+
+void GfxDeviceD3D9::GetScissorRect( int scissor[4] ) const
+{
+ scissor[0] = m_State.scissorRect[0];
+ scissor[1] = m_State.scissorRect[1];
+ scissor[2] = m_State.scissorRect[2];
+ scissor[3] = m_State.scissorRect[3];
+}
+
+bool GfxDeviceD3D9::IsCombineModeSupported( unsigned int combiner )
+{
+ return true;
+}
+
+TextureCombinersHandle GfxDeviceD3D9::CreateTextureCombiners( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular )
+{
+ TextureCombinersD3D* implD3D = TextureCombinersD3D::Create( count, texEnvs, props, hasVertexColorOrLighting, usesAddSpecular );
+ return TextureCombinersHandle( implD3D );
+}
+
+void GfxDeviceD3D9::DeleteTextureCombiners( TextureCombinersHandle& textureCombiners )
+{
+ TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners, TextureCombinersD3D);
+ delete implD3D;
+ textureCombiners.Reset();
+}
+
+void GfxDeviceD3D9::SetTextureCombinersThreadable( TextureCombinersHandle textureCombiners, const TexEnvData* texEnvData, const Vector4f* texColors )
+{
+ TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners,TextureCombinersD3D);
+ AssertIf( !implD3D );
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ AssertIf (IsShaderActive( kShaderFragment ));
+
+ const int maxTexUnits = gGraphicsCaps.maxTexUnits; // fetch here once
+
+ // set textures
+ int i = 0;
+ for( ; i < maxTexUnits && i < implD3D->envCount; ++i )
+ {
+ ApplyTexEnvData (i, i, texEnvData[i]);
+ }
+
+ // clear unused textures
+ for (; i < maxTexUnits; ++i)
+ {
+ if (i < kMaxSupportedTextureCoords)
+ m_VertexConfig.ClearTextureUnit(i);
+
+ TextureUnitStateD3D& currTex = m_State.texturesPS[i];
+ if (currTex.texID.m_ID != 0)
+ {
+ D3D9_CALL(dev->SetTexture( GetD3D9SamplerIndex(kShaderFragment,i), NULL ));
+ currTex.texID.m_ID = 0;
+ }
+ }
+
+ // setup texture stages
+ if( implD3D->pixelShader )
+ {
+ for( i = 0; i < implD3D->stageCount; ++i )
+ {
+ const ShaderLab::TextureBinding& binding = implD3D->texEnvs[i];
+ const Vector4f& texcolorVal = texColors[i];
+ m_PSConstantCache.SetValues( i, texcolorVal.GetPtr(), 1 );
+ }
+ if( m_State.fixedFunctionPS != implD3D->uniqueID )
+ {
+ D3D9_CALL(dev->SetPixelShader( implD3D->pixelShader ));
+ m_State.fixedFunctionPS = implD3D->uniqueID;
+ }
+ }
+ else
+ {
+ if( implD3D->textureFactorIndex != -1 )
+ {
+ const Vector4f& color = texColors[implD3D->textureFactorIndex];
+ D3D9_CALL(dev->SetRenderState( D3DRS_TEXTUREFACTOR, ColorToD3D( color.GetPtr() ) ));
+ }
+ for( i = 0; i < implD3D->stageCount; ++i )
+ {
+ // TODO: cache!
+ const D3DTextureStage& stage = implD3D->stages[i];
+ AssertIf( stage.colorOp == D3DTOP_DISABLE || stage.alphaOp == D3DTOP_DISABLE );
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLOROP, stage.colorOp ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG1, stage.colorArgs[0] ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG2, stage.colorArgs[1] ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG0, stage.colorArgs[2] ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAOP, stage.alphaOp ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG1, stage.alphaArgs[0] ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG2, stage.alphaArgs[1] ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG0, stage.alphaArgs[2] ));
+ }
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLOROP, D3DTOP_DISABLE ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAOP, D3DTOP_DISABLE ));
+ D3D9_CALL(dev->SetPixelShader( NULL ));
+ m_State.fixedFunctionPS = 0;
+ }
+}
+
+
+void GfxDeviceD3D9::SetTextureCombiners( TextureCombinersHandle textureCombiners, const ShaderLab::PropertySheet* props )
+{
+ TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners,TextureCombinersD3D);
+ AssertIf( !implD3D );
+
+ int count = std::min(implD3D->envCount, gGraphicsCaps.maxTexUnits);
+
+ // Fill in arrays
+ TexEnvData* texEnvData;
+ ALLOC_TEMP (texEnvData, TexEnvData, count);
+ for( int i = 0; i < count; ++i )
+ {
+ ShaderLab::TexEnv *te = ShaderLab::GetTexEnvForBinding( implD3D->texEnvs[i], props );
+ Assert( te != NULL );
+ te->PrepareData (implD3D->texEnvs[i].m_TextureName.index, implD3D->texEnvs[i].m_MatrixName, props, &texEnvData[i]);
+ }
+
+ Vector4f* texColors;
+ ALLOC_TEMP (texColors, Vector4f, implD3D->envCount);
+ for( int i = 0; i < implD3D->envCount; ++i )
+ {
+ const ShaderLab::TextureBinding& binding = implD3D->texEnvs[i];
+ texColors[i] = binding.GetTexColor().Get (props);
+ }
+ GfxDeviceD3D9::SetTextureCombinersThreadable(textureCombiners, texEnvData, texColors);
+}
+
+
+void GfxDeviceD3D9::SetTexture (ShaderType shaderType, int unit, int samplerUnit, TextureID texture, TextureDimension dim, float bias)
+{
+ DebugAssertIf( dim < kTexDim2D || dim > kTexDimCUBE );
+ DebugAssertIf (unit < 0 || unit >= kMaxSupportedTextureUnits);
+
+ if (unit < kMaxSupportedTextureCoords)
+ m_VertexConfig.SetTextureUnit(unit);
+
+ TextureUnitStateD3D* currTex = NULL;
+ if (shaderType == kShaderFragment)
+ currTex = &m_State.texturesPS[unit];
+ else if (shaderType == kShaderVertex)
+ currTex = &m_State.texturesVS[unit];
+ else
+ {
+ AssertString ("Unsupported shader type for SetTexture");
+ return;
+ }
+
+ if (texture != currTex->texID)
+ {
+ if (m_Textures.SetTexture (shaderType, unit, texture))
+ currTex->texID = texture;
+ }
+ m_Stats.AddUsedTexture(texture);
+ if (gGraphicsCaps.hasMipLevelBias && bias != currTex->bias && shaderType == kShaderFragment)
+ {
+ D3D9_CALL(GetD3DDevice()->SetSamplerState( unit, D3DSAMP_MIPMAPLODBIAS, *(DWORD*)&bias ));
+ currTex->bias = bias;
+ }
+}
+
+
+
+void GfxDeviceD3D9::SetTextureTransform( int unit, TextureDimension dim, TexGenMode texGen, bool identity, const float matrix[16] )
+{
+ Assert (unit >= 0 && unit < kMaxSupportedTextureCoords);
+
+ m_State.m_NeedsSofwareVPFlags &= ~kNeedsSoftwareVPTexGen;
+
+ // -------- texture matrix
+
+ float* mat = m_TransformState.texMatrices[unit].GetPtr();
+ CopyMatrix( matrix, mat );
+
+ // In OpenGL all texture reads are projective, and matrices are always 4x4, and z/w defaults to 0/1.
+ // In D3D everything is different. So here we try to figure out how many components need to be transformed,
+ // munge the matrix and enable projective texturing if needed.
+
+ TextureMatrixMode matrixMode;
+ int projectedTexture = 0;
+ if( identity )
+ {
+ // matrix guaranteed to be identity: disable transformation
+ matrixMode = kTexMatrixNone;
+ }
+ else if( dim == kTexDimCUBE || dim == kTexDim3D )
+ {
+ // for cube/volume texture: count3
+ matrixMode = kTexMatrix3;
+ }
+ else
+ {
+ // detect projected matrix
+ projectedTexture = (mat[3] != 0.0f || mat[7] != 0.0f || mat[11] != 0.0f || mat[15] != 1.0f) ? 1 : 0;
+ // Cards that do support projected textures or cubemaps seem to want
+ // Count3 flags for object/eyelinear transforms. Cards that don't support
+ // projection nor cubemaps will have to use Count2 - fixes GUI text rendering!
+ bool is3DTexGen = (texGen != kTexGenDisabled && texGen != kTexGenSphereMap);
+
+ if( projectedTexture )
+ {
+ matrixMode = kTexMatrix4;
+ }
+ else if( is3DTexGen )
+ {
+ matrixMode = kTexMatrix3;
+ }
+ else
+ {
+ // regular texture: count2, and move matrix' 4th row into 3rd one
+ matrixMode = kTexMatrix2;
+ mat[ 8] = mat[12];
+ mat[ 9] = mat[13];
+ mat[10] = mat[14];
+ mat[11] = mat[15];
+ }
+ }
+
+ m_VertexConfig.textureMatrixModes = m_VertexConfig.textureMatrixModes & ~(3<<(unit*2)) | (matrixMode<<(unit*2));
+ m_VertexData.projectedTextures = m_VertexData.projectedTextures & ~(1<<unit) | (projectedTexture<<unit);
+
+ // -------- texture coordinate generation
+
+ TextureSourceMode texSource = texGen == kTexGenDisabled ? kTexSourceUV0 : static_cast<TextureSourceMode>(texGen + 1);
+ m_VertexConfig.textureSources = m_VertexConfig.textureSources & ~(7<<(unit*3)) | (texSource<<(unit*3));
+
+ if( texGen == kTexGenSphereMap && !IsShaderActive(kShaderVertex) )
+ {
+ if( g_D3DUsesMixedVP && !(gGraphicsCaps.d3d.d3dcaps.VertexProcessingCaps & D3DVTXPCAPS_TEXGEN_SPHEREMAP) )
+ m_State.m_NeedsSofwareVPFlags |= kNeedsSoftwareVPTexGen;
+ }
+}
+
+void GfxDeviceD3D9::SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace )
+{
+ m_Textures.SetTextureParams( texture, texDim, filter, wrap, anisoLevel, hasMipMap, colorSpace );
+
+ // we'll need to set texture sampler states, so invalidate current texture cache
+ // invalidate texture unit states that used this texture
+ for (int i = 0; i < ARRAY_SIZE(m_State.texturesPS); ++i)
+ {
+ TextureUnitStateD3D& currTex = m_State.texturesPS[i];
+ if( currTex.texID == texture )
+ currTex.Invalidate();
+ }
+ for (int i = 0; i < ARRAY_SIZE(m_State.texturesVS); ++i)
+ {
+ TextureUnitStateD3D& currTex = m_State.texturesVS[i];
+ if (currTex.texID == texture)
+ currTex.Invalidate();
+ }
+}
+
+
+void GfxDeviceD3D9::SetShadersThreadable( GpuProgram* programs[kShaderTypeCount], const GpuProgramParameters* params[kShaderTypeCount], UInt8 const * const paramsBuffer[kShaderTypeCount])
+{
+ GpuProgram* vertexProgram = programs[kShaderVertex];
+ GpuProgram* fragmentProgram = programs[kShaderFragment];
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // vertex shader
+ if( vertexProgram && vertexProgram->GetImplType() == kShaderImplVertex )
+ {
+ // set the shader
+ bool resetToNoFog = false;
+ IDirect3DVertexShader9* shader = static_cast<D3D9VertexShader&>(*vertexProgram).GetShader(m_FogParams.mode, resetToNoFog);
+ // Note: get pixel shader to match actually used fog mode from VS. If VS was too complex
+ // to patch for fog, for example, then we want PS to not have fog as well.
+ if (resetToNoFog)
+ m_FogParams.mode = kFogDisabled;
+ DebugAssert (shader);
+
+ if( m_State.activeShader[kShaderVertex] != shader )
+ {
+ D3D9_CALL(dev->SetVertexShader( shader ));
+ if (m_State.activeShader[kShaderVertex] == NULL)
+ {
+ for( int i = 0; i < kMaxSupportedTextureCoords; ++i )
+ {
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTSS_TCI_PASSTHRU ));
+ }
+ }
+
+ m_VertexPrevious.vertexShader = NULL;
+ m_VertexPrevious.ambient.set(-1,-1,-1,-1);
+
+ m_State.activeShader[kShaderVertex] = shader;
+ }
+
+ if( g_D3DUsesMixedVP )
+ m_State.m_NeedsSofwareVPFlags |= kNeedsSoftwareVPVertexShader;
+
+ m_BuiltinParamIndices[kShaderVertex] = &params[kShaderVertex]->GetBuiltinParams();
+ }
+ else
+ {
+ // clear the shader
+ DebugAssertIf( vertexProgram != 0 );
+ if( m_State.activeShader[kShaderVertex] != 0 )
+ {
+ D3D9_CALL(dev->SetVertexShader( NULL ));
+ m_State.activeShader[kShaderVertex] = 0;
+ }
+
+ if( g_D3DUsesMixedVP )
+ m_State.m_NeedsSofwareVPFlags &= ~kNeedsSoftwareVPVertexShader;
+
+ m_BuiltinParamIndices[kShaderVertex] = &m_NullParamIndices;
+ }
+
+ // pixel shader
+ if( fragmentProgram && fragmentProgram->GetImplType() == kShaderImplFragment )
+ {
+ // set the shader
+ IDirect3DPixelShader9* shader = static_cast<D3D9PixelShader&>(*fragmentProgram).GetShader(m_FogParams.mode, *params[kShaderFragment]);
+ DebugAssert (shader);
+
+ if( m_State.activeShader[kShaderFragment] != shader )
+ {
+ D3D9_CALL(dev->SetPixelShader( shader ));
+ m_State.activeShader[kShaderFragment] = shader;
+ m_State.fixedFunctionPS = 0;
+ }
+
+ m_BuiltinParamIndices[kShaderFragment] = &params[kShaderFragment]->GetBuiltinParams();
+ }
+ else
+ {
+ // clear the shader
+ DebugAssertIf( fragmentProgram != 0 );
+ if( m_State.activeShader[kShaderFragment] != 0 )
+ {
+ D3D9_CALL(dev->SetPixelShader( NULL ));
+ m_State.activeShader[kShaderFragment] = 0;
+ m_State.fixedFunctionPS = 0;
+ }
+
+ m_BuiltinParamIndices[kShaderFragment] = &m_NullParamIndices;
+ }
+
+ for (int pt = 0; pt < kShaderTypeCount; ++pt)
+ {
+ if (programs[pt])
+ {
+ m_State.activeGpuProgramParams[pt] = params[pt];
+ m_State.activeGpuProgram[pt] = programs[pt];
+ programs[pt]->ApplyGpuProgram (*params[pt], paramsBuffer[pt]);
+ }
+ else
+ {
+ m_State.activeGpuProgramParams[pt] = NULL;
+ m_State.activeGpuProgram[pt] = NULL;
+ }
+ }
+}
+
+
+bool GfxDeviceD3D9::IsShaderActive( ShaderType type ) const
+{
+ return m_State.activeShader[type] != 0;
+}
+
+void GfxDeviceD3D9::DestroySubProgram( ShaderLab::SubProgram* subprogram )
+{
+ GpuProgram* program = &subprogram->GetGpuProgram();
+ if (program->GetImplType() == kShaderImplVertex)
+ {
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ IUnknown* shader = static_cast<D3D9VertexShader*>(program)->GetShaderAtFogIndex(static_cast<FogMode>(i));
+ if (m_State.activeShader[kShaderVertex] == shader)
+ m_State.activeShader[kShaderVertex] = NULL;
+ }
+ }
+ else if (program->GetImplType() == kShaderImplFragment)
+ {
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ IUnknown* shader = static_cast<D3D9PixelShader*>(program)->GetShaderAtFogIndex(static_cast<FogMode>(i));
+ if (m_State.activeShader[kShaderFragment] == shader)
+ m_State.activeShader[kShaderFragment] = NULL;
+ }
+ }
+ delete subprogram;
+}
+
+void GfxDeviceD3D9::DisableLights( int startLight )
+{
+ m_VertexData.vertexLightCount = startLight;
+
+ const Vector4f black(0.0F, 0.0F, 0.0F, 0.0F);
+ for (int i = startLight; i < gGraphicsCaps.maxLights; ++i)
+ {
+ m_BuiltinParamValues.SetVectorParam(BuiltinShaderVectorParam(kShaderVecLight0Diffuse + i), black);
+ }
+}
+
+void GfxDeviceD3D9::SetLight( int light, const GfxVertexLight& data)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ DebugAssert(light >= 0 && light < kMaxSupportedVertexLights);
+
+ DebugAssertIf( (data.position.w == 0.0f) != (data.type == kLightDirectional) ); // directional lights should have 0 in position.w
+ DebugAssertIf( (data.spotAngle != -1.0f) != (data.type == kLightSpot) ); // non-spot lights should have -1 in spot angle
+
+ GfxVertexLight& dest = m_VertexData.lights[light];
+ dest = data;
+
+ const Matrix4x4f& viewMat = m_BuiltinParamValues.GetMatrixParam(kShaderMatView);
+
+ if (data.type == kLightDirectional)
+ {
+ dest.position.Set(0.0f,0.0f,0.0f,0.0f);
+ Vector3f v = viewMat.MultiplyVector3((const Vector3f&)data.position);
+ dest.spotDirection.Set( v.x, v.y, v.z, 0.0f );
+ }
+ else
+ {
+ Vector3f v = viewMat.MultiplyPoint3((const Vector3f&)data.position);
+ dest.position.Set( v.x, v.y, v.z, 1.0f );
+ Vector3f d = viewMat.MultiplyVector3((const Vector3f&)data.spotDirection);
+ dest.spotDirection.Set( d.x, d.y, d.z, 0.0f );
+ }
+
+ SetupVertexLightParams (light, data);
+}
+
+void GfxDeviceD3D9::SetAmbient( const float ambient[4] )
+{
+ if( m_VertexData.ambient != ambient )
+ {
+ m_VertexData.ambient.set( ambient );
+ m_VertexData.ambientClamped.set( clamp01(ambient[0]), clamp01(ambient[1]), clamp01(ambient[2]), clamp01(ambient[3]) );
+ m_BuiltinParamValues.SetVectorParam(kShaderVecLightModelAmbient, Vector4f(ambient));
+ }
+}
+
+
+static D3DFOGMODE s_D3DFogModes[kFogModeCount] = { D3DFOG_NONE, D3DFOG_LINEAR, D3DFOG_EXP, D3DFOG_EXP2 };
+
+void GfxDeviceD3D9::EnableFog(const GfxFogParams& fog)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ DebugAssertIf( fog.mode <= kFogDisabled );
+ if( m_FogParams.mode != fog.mode )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGTABLEMODE, s_D3DFogModes[fog.mode] )); // TODO: or maybe vertex fog?
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGENABLE, TRUE ));
+ m_FogParams.mode = fog.mode;
+ }
+ if( m_FogParams.start != fog.start )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGSTART, *(DWORD*)&fog.start ));
+ m_FogParams.start = fog.start;
+ }
+ if( m_FogParams.end != fog.end )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGEND, *(DWORD*)&fog.end ));
+ m_FogParams.end = fog.end;
+ }
+ if( m_FogParams.density != fog.density )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGDENSITY, *(DWORD*)&fog.density ));
+ m_FogParams.density = fog.density;
+ }
+ if( m_FogParams.color != fog.color )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGCOLOR, ColorToD3D(fog.color.GetPtr()) ));
+ m_FogParams.color = fog.color;
+ }
+}
+
+void GfxDeviceD3D9::DisableFog()
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if( m_FogParams.mode != kFogDisabled )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_FOGENABLE, FALSE ));
+ m_FogParams.mode = kFogDisabled;
+ }
+}
+
+VBO* GfxDeviceD3D9::CreateVBO()
+{
+ VBO* vbo = new D3D9VBO();
+ OnCreateVBO(vbo);
+ return vbo;
+}
+
+void GfxDeviceD3D9::DeleteVBO( VBO* vbo )
+{
+ OnDeleteVBO(vbo);
+ delete vbo;
+}
+
+DynamicVBO& GfxDeviceD3D9::GetDynamicVBO()
+{
+ if( !m_DynamicVBO ) {
+ m_DynamicVBO = new DynamicD3D9VBO( 1024 * 1024, 65536 ); // initial 1 MiB VB, 64 KiB IB
+ }
+ return *m_DynamicVBO;
+}
+
+IDirect3DVertexBuffer9* GfxDeviceD3D9::GetAllWhiteVertexStream()
+{
+ if( !m_AllWhiteVertexStream )
+ {
+ int maxVerts = 0x10000;
+ int size = maxVerts * sizeof(D3DCOLOR);
+ HRESULT hr = GetD3DDevice()->CreateVertexBuffer( size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_MANAGED, &m_AllWhiteVertexStream, NULL );
+ if( !SUCCEEDED(hr) )
+ return NULL;
+ void* buffer;
+ hr = m_AllWhiteVertexStream->Lock( 0 , 0, &buffer, 0 );
+ if( !SUCCEEDED(hr) )
+ {
+ SAFE_RELEASE( m_AllWhiteVertexStream );
+ return NULL;
+ }
+ D3DCOLOR* dest = (D3DCOLOR*)buffer;
+ for( int i = 0; i < maxVerts; i++ )
+ dest[i] = D3DCOLOR_ARGB(255, 255, 255, 255);
+ m_AllWhiteVertexStream->Unlock();
+ }
+ return m_AllWhiteVertexStream;
+}
+
+void GfxDeviceD3D9::ResetDynamicResources()
+{
+ delete m_DynamicVBO;
+ m_DynamicVBO = NULL;
+
+ CleanupEventQueries ();
+ ResetDynamicVBs ();
+
+ #if ENABLE_PROFILER
+ m_TimerQueriesD3D9.ReleaseAllQueries();
+ #endif
+
+ D3D9VBO::CleanupSharedIndexBuffer();
+}
+
+
+void ResetDynamicResourcesD3D9()
+{
+ AutoGfxDeviceAcquireThreadOwnership autoOwner;
+ GetD3D9GfxDevice().ResetDynamicResources();
+}
+
+IDirect3DVertexDeclaration9* GetD3DVertexDeclaration( UInt32 shaderChannelsMap )
+{
+ ChannelInfoArray channels;
+ int offset = 0;
+ for (int i = 0; i < kShaderChannelCount; i++)
+ {
+ ChannelInfo& info = channels[i];
+ if (shaderChannelsMap & (1 << i))
+ {
+ info.stream = 0;
+ info.offset = offset;
+ info.format = VBO::GetDefaultChannelFormat( i );
+ info.dimension = VBO::GetDefaultChannelDimension( i );
+ offset += VBO::GetDefaultChannelByteSize( i );
+}
+ else
+ info.Reset();
+ }
+ return GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( channels );
+}
+
+VertexShaderConstantCache& GetD3D9VertexShaderConstantCache()
+{
+ return GetD3D9GfxDevice().GetVertexShaderConstantCache();
+}
+
+PixelShaderConstantCache& GetD3D9PixelShaderConstantCache()
+{
+ return GetD3D9GfxDevice().GetPixelShaderConstantCache();
+}
+
+
+// ---------- render textures
+
+RenderSurfaceHandle GfxDeviceD3D9::CreateRenderColorSurface (TextureID textureID, int width, int height, int samples, int depth, TextureDimension dim, RenderTextureFormat format, UInt32 createFlags)
+{
+ return CreateRenderColorSurfaceD3D9 (textureID, width, height, samples, dim, createFlags, format, m_Textures);
+}
+RenderSurfaceHandle GfxDeviceD3D9::CreateRenderDepthSurface(TextureID textureID, int width, int height, int samples, TextureDimension dim, DepthBufferFormat depthFormat, UInt32 createFlags)
+{
+ return CreateRenderDepthSurfaceD3D9 (textureID, width, height, samples, depthFormat, createFlags, m_Textures);
+}
+void GfxDeviceD3D9::DestroyRenderSurface(RenderSurfaceHandle& rs)
+{
+ DestroyRenderSurfaceD3D9( rs, m_Textures );
+}
+void GfxDeviceD3D9::SetRenderTargets (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face)
+{
+ bool isBackBuffer;
+ m_CurrTargetWidth = m_CurrWindowWidth;
+ m_CurrTargetHeight = m_CurrWindowHeight;
+ if (SetRenderTargetD3D9 (count, colorHandles, depthHandle, mipLevel, face, m_CurrTargetWidth, m_CurrTargetHeight, isBackBuffer))
+ {
+ // changing render target might mean different color clear flags; so reset current state
+ m_CurrBlendState = NULL;
+ }
+}
+void GfxDeviceD3D9::ResolveDepthIntoTexture (RenderSurfaceHandle colorHandle, RenderSurfaceHandle depthHandle)
+{
+ Assert (gGraphicsCaps.d3d.hasDepthResolveRESZ);
+
+ RenderSurfaceD3D9* depthSurf = reinterpret_cast<RenderSurfaceD3D9*>(depthHandle.object);
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ // Important: change point size render state to something else than RESZ
+ // before the dummy draw call; otherwise RESZ state set will be filtered out
+ // by non-PURE D3D device.
+ dev->SetRenderState (D3DRS_POINTSIZE, 0);
+
+ // Bind destination as texture
+ SetTexture (kShaderFragment, 0, 0, depthSurf->textureID, kTexDim2D, 0.0f);
+
+ // Dummy draw call
+ float dummy[3] = {0,0,0};
+ dev->DrawPrimitiveUP (D3DPT_POINTLIST, 1, dummy, 12);
+
+ // RESZ to trigger depth buffer copy
+ dev->SetRenderState (D3DRS_POINTSIZE, 0x7fa05000);
+}
+
+
+void GfxDeviceD3D9::ResolveColorSurface (RenderSurfaceHandle srcHandle, RenderSurfaceHandle dstHandle)
+{
+ Assert (srcHandle.IsValid());
+ Assert (dstHandle.IsValid());
+ RenderColorSurfaceD3D9* src = reinterpret_cast<RenderColorSurfaceD3D9*>(srcHandle.object);
+ RenderColorSurfaceD3D9* dst = reinterpret_cast<RenderColorSurfaceD3D9*>(dstHandle.object);
+ if (!src->colorSurface || !dst->colorSurface)
+ {
+ WarningString("RenderTexture: Resolving non-color surfaces.");
+ return;
+ }
+ if (!src->m_Surface || !dst->m_Surface)
+ {
+ WarningString("RenderTexture: Resolving NULL surfaces.");
+ return;
+ }
+ if (src->dim != dst->dim)
+ {
+ WarningString("RenderTexture: Resolving surfaces of different types.");
+ return;
+ }
+ if (src->format != dst->format)
+ {
+ WarningString("RenderTexture: Resolving surfaces of different formats.");
+ return;
+ }
+ if (src->width != dst->width || src->height != dst->height)
+ {
+ WarningString("RenderTexture: Resolving surfaces of different sizes.");
+ return;
+ }
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+ dev->StretchRect (src->m_Surface, NULL, dst->m_Surface, NULL, D3DTEXF_NONE);
+}
+
+RenderSurfaceHandle GfxDeviceD3D9::GetActiveRenderColorSurface (int index)
+{
+ return GetActiveRenderColorSurfaceD3D9(index);
+}
+RenderSurfaceHandle GfxDeviceD3D9::GetActiveRenderDepthSurface()
+{
+ return GetActiveRenderDepthSurfaceD3D9();
+}
+void GfxDeviceD3D9::SetSurfaceFlags (RenderSurfaceHandle surf, UInt32 flags, UInt32 keepFlags)
+{
+}
+
+
+// ---------- uploading textures
+
+void GfxDeviceD3D9::UploadTexture2D( TextureID texture, TextureDimension dimension, UInt8* srcData, int srcSize, int width, int height, TextureFormat format, int mipCount, UInt32 uploadFlags, int skipMipLevels, TextureUsageMode usageMode, TextureColorSpace colorSpace )
+{
+ m_Textures.UploadTexture2D( texture, dimension, srcData, width, height, format, mipCount, uploadFlags, skipMipLevels, usageMode, colorSpace );
+}
+void GfxDeviceD3D9::UploadTextureSubData2D( TextureID texture, UInt8* srcData, int srcSize, int mipLevel, int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace )
+{
+ m_Textures.UploadTextureSubData2D( texture, srcData, mipLevel, x, y, width, height, format, colorSpace );
+}
+void GfxDeviceD3D9::UploadTextureCube( TextureID texture, UInt8* srcData, int srcSize, int faceDataSize, int size, TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace )
+{
+ m_Textures.UploadTextureCube( texture, srcData, faceDataSize, size, format, mipCount, uploadFlags, colorSpace );
+}
+void GfxDeviceD3D9::UploadTexture3D( TextureID texture, UInt8* srcData, int srcSize, int width, int height, int depth, TextureFormat format, int mipCount, UInt32 uploadFlags )
+{
+ m_Textures.UploadTexture3D( texture, srcData, width, height, depth, format, mipCount, uploadFlags );
+}
+
+void GfxDeviceD3D9::DeleteTexture( TextureID texture )
+{
+ m_Textures.DeleteTexture( texture );
+
+ // invalidate texture unit states that used this texture
+ for (int i = 0; i < ARRAY_SIZE(m_State.texturesPS); ++i)
+ {
+ TextureUnitStateD3D& currTex = m_State.texturesPS[i];
+ if( currTex.texID == texture )
+ currTex.Invalidate();
+ }
+ for (int i = 0; i < ARRAY_SIZE(m_State.texturesVS); ++i)
+ {
+ TextureUnitStateD3D& currTex = m_State.texturesVS[i];
+ if (currTex.texID == texture)
+ currTex.Invalidate();
+ }
+}
+
+void UnbindTextureD3D9( TextureID texture )
+{
+ GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() );
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // invalidate texture unit states that used this texture
+ for (int i = 0; i < ARRAY_SIZE(device.GetState().texturesPS); ++i)
+ {
+ TextureUnitStateD3D& currTex = device.GetState().texturesPS[i];
+ if( currTex.texID == texture )
+ {
+ D3D9_CALL(dev->SetTexture(GetD3D9SamplerIndex(kShaderFragment,i), NULL));
+ currTex.Invalidate();
+ }
+ }
+ for (int i = 0; i < ARRAY_SIZE(device.GetState().texturesVS); ++i)
+ {
+ TextureUnitStateD3D& currTex = device.GetState().texturesVS[i];
+ if (currTex.texID == texture)
+ {
+ D3D9_CALL(dev->SetTexture(GetD3D9SamplerIndex(kShaderVertex,i), NULL));
+ currTex.Invalidate();
+ }
+ }
+}
+
+
+// ---------- context
+
+GfxDevice::PresentMode GfxDeviceD3D9::GetPresentMode()
+{
+ return kPresentBeforeUpdate;
+}
+
+void GfxDeviceD3D9::BeginFrame()
+{
+ if( m_State.m_DeviceLost )
+ return;
+
+ // begin scene
+ Assert( !m_InsideFrame );
+ GetD3DDevice()->BeginScene();
+ m_InsideFrame = true;
+
+}
+
+void GfxDeviceD3D9::EndFrame()
+{
+ // Check if we're inside scene in case BeginFrame() failed
+ if( !m_InsideFrame )
+ return;
+
+ GetD3DDevice()->EndScene();
+ m_InsideFrame = false;
+}
+
+bool GfxDeviceD3D9::IsValidState()
+{
+ return !m_State.m_DeviceLost;
+}
+
+bool GfxDeviceD3D9::HandleInvalidState()
+{
+#if ENABLE_MULTITHREADED_CODE
+ // Reset render textures owned by the main thread
+ if (Thread::CurrentThreadIsMainThread())
+ CommonReloadResources(kReleaseRenderTextures);
+#endif
+
+ ResetDynamicResourcesD3D9();
+
+ bool success = HandleD3DDeviceLost();
+
+#if ENABLE_PROFILER
+ if (success)
+ m_TimerQueriesD3D9.RecreateAllQueries();
+#endif
+
+ InvalidateState();
+ return success;
+}
+
+static void CleanupEventQueries ()
+{
+ D3D9QueryList::iterator itEnd = s_EventQueries.end();
+ for (D3D9QueryList::iterator it = s_EventQueries.begin(); it != itEnd; ++it)
+ {
+ IDirect3DQuery9* query = *it;
+ if (query != NULL)
+ {
+ query->Release();
+ }
+ }
+ s_EventQueries.clear();
+}
+
+static void PopEventQuery ()
+{
+ AssertIf (s_EventQueries.empty());
+
+ IDirect3DQuery9* query = s_EventQueries.front();
+ AssertIf (query == NULL);
+
+ while (S_FALSE == query->GetData (NULL, 0, D3DGETDATA_FLUSH))
+ {
+ Sleep (1);
+ }
+ query->Release();
+
+ s_EventQueries.pop_front();
+}
+
+void GfxDeviceD3D9::PushEventQuery ()
+{
+ if (m_MaxBufferedFrames < 0)
+ return;
+
+ IDirect3DQuery9* query = NULL;
+ HRESULT hr = GetD3DDevice()->CreateQuery (D3DQUERYTYPE_EVENT, &query);
+ if (query != NULL)
+ {
+ if (SUCCEEDED(query->Issue(D3DISSUE_END)))
+ s_EventQueries.push_back (query);
+ else
+ query->Release();
+ }
+
+ // don't exceed maximum lag... instead we'll deterministically block here until the GPU has done enough work
+ while (!s_EventQueries.empty() && s_EventQueries.size() > m_MaxBufferedFrames)
+ {
+ PopEventQuery();
+ }
+}
+
+void GfxDeviceD3D9::PresentFrame()
+{
+ if( m_State.m_DeviceLost )
+ return;
+
+ HRESULT hr = GetD3DDevice()->Present( NULL, NULL, NULL, NULL );
+ PushEventQuery();
+ // When D3DERR_DRIVERINTERNALERROR is returned from Present(),
+ // the application can do one of the following, try recovering just as
+ // from the lost device.
+ if( hr == D3DERR_DEVICELOST || hr == D3DERR_DRIVERINTERNALERROR )
+ {
+ m_State.m_DeviceLost = true;
+ }
+}
+
+void GfxDeviceD3D9::FinishRendering()
+{
+ // not needed on D3D
+}
+
+
+
+// ---------- immediate mode rendering
+
+// we break very large immediate mode submissions into multiple batches internally
+const int kMaxImmediateVerticesPerDraw = 8192;
+
+
+ImmediateModeD3D::ImmediateModeD3D()
+: m_ImmVertexDecl(NULL)
+{
+ m_QuadsIB = new UInt16[kMaxImmediateVerticesPerDraw*6];
+ UInt32 baseIndex = 0;
+ UInt16* ibPtr = m_QuadsIB;
+ for( int i = 0; i < kMaxImmediateVerticesPerDraw; ++i )
+ {
+ ibPtr[0] = baseIndex + 1;
+ ibPtr[1] = baseIndex + 2;
+ ibPtr[2] = baseIndex;
+ ibPtr[3] = baseIndex + 2;
+ ibPtr[4] = baseIndex + 3;
+ ibPtr[5] = baseIndex;
+ baseIndex += 4;
+ ibPtr += 6;
+ }
+}
+
+ImmediateModeD3D::~ImmediateModeD3D()
+{
+ delete[] m_QuadsIB;
+}
+
+
+void ImmediateModeD3D::Invalidate()
+{
+ m_Vertices.clear();
+ memset( &m_Current, 0, sizeof(m_Current) );
+}
+
+void GfxDeviceD3D9::ImmediateVertex( float x, float y, float z )
+{
+ // If the current batch is becoming too large, internally end it and begin it again.
+ size_t currentSize = m_Imm.m_Vertices.size();
+ if( currentSize >= kMaxImmediateVerticesPerDraw - 4 )
+ {
+ GfxPrimitiveType mode = m_Imm.m_Mode;
+ // For triangles, break batch when multiple of 3's is reached.
+ if( mode == kPrimitiveTriangles && currentSize % 3 == 0 )
+ {
+ ImmediateEnd();
+ ImmediateBegin( mode );
+ }
+ // For other primitives, break on multiple of 4's.
+ // NOTE: This won't quite work for triangle strips, but we'll just pretend
+ // that will never happen.
+ else if( mode != kPrimitiveTriangles && currentSize % 4 == 0 )
+ {
+ ImmediateEnd();
+ ImmediateBegin( mode );
+ }
+ }
+ D3DVECTOR& vert = m_Imm.m_Current.vertex;
+ vert.x = x;
+ vert.y = y;
+ vert.z = z;
+ m_Imm.m_Vertices.push_back( m_Imm.m_Current );
+}
+
+void GfxDeviceD3D9::ImmediateNormal( float x, float y, float z )
+{
+ m_Imm.m_Current.normal.x = x;
+ m_Imm.m_Current.normal.y = y;
+ m_Imm.m_Current.normal.z = z;
+}
+
+void GfxDeviceD3D9::ImmediateColor( float r, float g, float b, float a )
+{
+ float color[4] = { r, g, b, a };
+ m_Imm.m_Current.color = ColorToD3D( color );
+}
+
+void GfxDeviceD3D9::ImmediateTexCoordAll( float x, float y, float z )
+{
+ for( int i = 0; i < 8; ++i )
+ {
+ D3DVECTOR& uv = m_Imm.m_Current.texCoords[i];
+ uv.x = x;
+ uv.y = y;
+ uv.z = z;
+ }
+}
+
+void GfxDeviceD3D9::ImmediateTexCoord( int unit, float x, float y, float z )
+{
+ if( unit < 0 || unit >= 8 )
+ {
+ ErrorString( "Invalid unit for texcoord" );
+ return;
+ }
+ D3DVECTOR& uv = m_Imm.m_Current.texCoords[unit];
+ uv.x = x;
+ uv.y = y;
+ uv.z = z;
+}
+
+void GfxDeviceD3D9::ImmediateBegin( GfxPrimitiveType type )
+{
+ m_Imm.m_Mode = type;
+ m_Imm.m_Vertices.clear();
+}
+
+void GfxDeviceD3D9::ImmediateEnd()
+{
+ if( m_Imm.m_Vertices.empty() )
+ return;
+
+ // lazily create vertex declaration
+ IDirect3DDevice9* dev = GetD3DDevice();
+ HRESULT hr = S_OK;
+ if( !m_Imm.m_ImmVertexDecl )
+ {
+ static const D3DVERTEXELEMENT9 elements[] = {
+ // stream, offset, data type, processing, semantics, index
+ { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 }, // position
+ { 0, 12, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0 }, // normal
+ { 0, 24, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 }, // color
+ { 0, 28, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 }, // UVs
+ { 0, 40, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 },
+ { 0, 52, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 2 },
+ { 0, 64, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 3 },
+ { 0, 76, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 4 },
+ { 0, 88, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 5 },
+ { 0, 100, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 6 },
+ { 0, 112, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 7 },
+ D3DDECL_END()
+ };
+ hr = dev->CreateVertexDeclaration( elements, &m_Imm.m_ImmVertexDecl );
+ if( FAILED(hr) ) {
+ // TODO: error
+ }
+ }
+
+ // draw
+ D3D9_CALL(dev->SetVertexDeclaration( m_Imm.m_ImmVertexDecl ));
+
+ BeforeDrawCall( true );
+
+ int vertexCount = m_Imm.m_Vertices.size();
+ const ImmediateVertexD3D* vb = &m_Imm.m_Vertices[0];
+ switch( m_Imm.m_Mode )
+ {
+ case kPrimitiveTriangles:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLELIST, vertexCount / 3, vb, sizeof(ImmediateVertexD3D) ));
+ m_Stats.AddDrawCall( vertexCount / 3, vertexCount );
+ break;
+ case kPrimitiveTriangleStripDeprecated:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLESTRIP, vertexCount - 2, vb, sizeof(ImmediateVertexD3D) ));
+ m_Stats.AddDrawCall( vertexCount - 2, vertexCount );
+ break;
+ case kPrimitiveQuads:
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitiveUP( D3DPT_TRIANGLELIST, 0, vertexCount, vertexCount / 4 * 2, m_Imm.m_QuadsIB, D3DFMT_INDEX16, vb, sizeof(ImmediateVertexD3D) ));
+ m_Stats.AddDrawCall( vertexCount / 4 * 2, vertexCount );
+ break;
+ case kPrimitiveLines:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINELIST, vertexCount / 2, vb, sizeof(ImmediateVertexD3D) ));
+ m_Stats.AddDrawCall( vertexCount / 2, vertexCount );
+ break;
+ default:
+ AssertString("ImmediateEnd: unknown draw mode");
+ }
+ AssertIf( FAILED(hr) );
+ // TODO: stats
+
+ // clear vertices
+ m_Imm.m_Vertices.clear();
+}
+
+
+
+bool GfxDeviceD3D9::CaptureScreenshot( int left, int bottom, int width, int height, UInt8* rgba32 )
+{
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ SurfacePointer renderTarget;
+ hr = dev->GetRenderTarget( 0, &renderTarget );
+ if( !renderTarget || FAILED(hr) )
+ return false;
+
+ D3DSURFACE_DESC rtDesc;
+ renderTarget->GetDesc( &rtDesc );
+
+ SurfacePointer resolvedSurface;
+ if( rtDesc.MultiSampleType != D3DMULTISAMPLE_NONE )
+ {
+ hr = dev->CreateRenderTarget( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DMULTISAMPLE_NONE, 0, FALSE, &resolvedSurface, NULL );
+ if( FAILED(hr) )
+ return false;
+ hr = dev->StretchRect( renderTarget, NULL, resolvedSurface, NULL, D3DTEXF_NONE );
+ if( FAILED(hr) )
+ return false;
+ renderTarget = resolvedSurface;
+ }
+
+ SurfacePointer offscreenSurface;
+ hr = dev->CreateOffscreenPlainSurface( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DPOOL_SYSTEMMEM, &offscreenSurface, NULL );
+ if( FAILED(hr) )
+ return false;
+
+ hr = dev->GetRenderTargetData( renderTarget, offscreenSurface );
+ bool ok = SUCCEEDED(hr);
+ if( ok )
+ {
+ rgba32 += (height-1) * width * sizeof(UInt32);
+ if( rtDesc.Format == D3DFMT_A8R8G8B8 || rtDesc.Format == D3DFMT_X8R8G8B8 )
+ {
+ // Backbuffer is 32 bit
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ for( int y = 0; y < height; ++y )
+ {
+ const UInt32* srcPtr = (const UInt32*)src;
+ UInt32* dstPtr = (UInt32*)rgba32;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt32 argbCol = *srcPtr;
+ UInt32 abgrCol = (argbCol&0xFF00FF00) | ((argbCol&0x00FF0000)>>16) | ((argbCol&0x000000FF)<<16);
+ *dstPtr = abgrCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ rgba32 -= width * sizeof(UInt32);
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else if( rtDesc.Format == D3DFMT_R5G6B5 )
+ {
+ // Backbuffer is 16 bit 565
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ for( int y = 0; y < height; ++y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt32* dstPtr = (UInt32*)rgba32;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 rgbCol = *srcPtr;
+ UInt32 abgrCol = 0xFF000000 | ((rgbCol&0xF800)>>8) | ((rgbCol&0x07E0)<<5) | ((rgbCol&0x001F)<<19);
+ *dstPtr = abgrCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ rgba32 -= width * sizeof(UInt32);
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else if( rtDesc.Format == D3DFMT_X1R5G5B5 || rtDesc.Format == D3DFMT_A1R5G5B5 )
+ {
+ // Backbuffer is 15 bit 555
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ for( int y = 0; y < height; ++y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt32* dstPtr = (UInt32*)rgba32;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 rgbCol = *srcPtr;
+ UInt32 abgrCol = ((rgbCol&0x8000)<<16) | ((rgbCol&0x7C00)>>7) | ((rgbCol&0x03E0)<<6) | ((rgbCol&0x001F)<<19);
+ *dstPtr = abgrCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ rgba32 -= width * sizeof(UInt32);
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else
+ {
+ // TODO: handle more conversions!
+ ok = false;
+ }
+ }
+
+ return ok;
+}
+
+
+
+bool GfxDeviceD3D9::ReadbackImage( ImageReference& image, int left, int bottom, int width, int height, int destX, int destY )
+{
+ // TODO: make it work in all different situations
+
+ AssertIf( image.GetFormat() != kTexFormatARGB32 && image.GetFormat() != kTexFormatRGB24 );
+
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+ SurfacePointer renderTarget;
+ hr = dev->GetRenderTarget( 0, &renderTarget );
+ if( !renderTarget || FAILED(hr) )
+ return false;
+
+ D3DSURFACE_DESC rtDesc;
+ renderTarget->GetDesc( &rtDesc );
+
+ SurfacePointer resolvedSurface;
+ if( rtDesc.MultiSampleType != D3DMULTISAMPLE_NONE )
+ {
+ hr = dev->CreateRenderTarget( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DMULTISAMPLE_NONE, 0, FALSE, &resolvedSurface, NULL );
+ if( FAILED(hr) )
+ return false;
+ hr = dev->StretchRect( renderTarget, NULL, resolvedSurface, NULL, D3DTEXF_NONE );
+ if( FAILED(hr) )
+ return false;
+ renderTarget = resolvedSurface;
+ }
+
+ SurfacePointer offscreenSurface;
+ hr = dev->CreateOffscreenPlainSurface( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DPOOL_SYSTEMMEM, &offscreenSurface, NULL );
+ if( FAILED(hr) )
+ return false;
+ if (width <= 0 || left < 0 || left + width > rtDesc.Width)
+ {
+ ErrorString("Trying to read pixel out of bounds");
+ return false;
+ }
+ if (height <= 0 || bottom < 0 || bottom + height > rtDesc.Height)
+ {
+ ErrorString("Trying to read pixel out of bounds");
+ return false;
+ }
+
+ hr = dev->GetRenderTargetData( renderTarget, offscreenSurface );
+ bool ok = SUCCEEDED(hr);
+ if( ok )
+ {
+ if( rtDesc.Format == D3DFMT_A8R8G8B8 || rtDesc.Format == D3DFMT_X8R8G8B8 )
+ {
+ // Render target is 32 bit
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ if( image.GetFormat() == kTexFormatARGB32 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt32* srcPtr = (const UInt32*)src;
+ UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4);
+ for( int x = 0; x < width; ++x )
+ {
+ UInt32 argbCol = *srcPtr;
+ UInt32 bgraCol = ((argbCol&0xFF000000)>>24) | ((argbCol&0x00FF0000)>>8) | ((argbCol&0x0000FF00)<<8) | ((argbCol&0x000000FF)<<24);
+ *dstPtr = bgraCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else if( image.GetFormat() == kTexFormatRGB24 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt32* srcPtr = (const UInt32*)src;
+ UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt32 argbCol = *srcPtr;
+ dstPtr[0] = (argbCol & 0x00FF0000) >> 16;
+ dstPtr[1] = (argbCol & 0x0000FF00) >> 8;
+ dstPtr[2] = (argbCol & 0x000000FF);
+ ++srcPtr;
+ dstPtr += 3;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ AssertString( "Invalid image format" );
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else if( rtDesc.Format == D3DFMT_R5G6B5 )
+ {
+ // Render target is 16 bit 565
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ if( image.GetFormat() == kTexFormatARGB32 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4);
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 argbCol = *srcPtr;
+ UInt32 bgraCol = 0x000000FF | (argbCol&0xF800) | ((argbCol&0x07E0)<<13) | ((argbCol&0x001F)<<27);
+ *dstPtr = bgraCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else if( image.GetFormat() == kTexFormatRGB24 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 argbCol = *srcPtr;
+ dstPtr[0] = (argbCol & 0xF800) >> 8;
+ dstPtr[1] = (argbCol & 0x07E0) >> 3;
+ dstPtr[2] = (argbCol & 0x001F) << 3;
+ ++srcPtr;
+ dstPtr += 3;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ AssertString( "Invalid image format" );
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else if( rtDesc.Format == D3DFMT_A1R5G5B5 || rtDesc.Format == D3DFMT_X1R5G5B5 )
+ {
+ // Render target is 15 bit 555
+ D3DLOCKED_RECT lr;
+ RECT rect;
+ rect.left = left;
+ rect.right = left + width;
+ rect.top = rtDesc.Height - bottom - height;
+ rect.bottom = rtDesc.Height - bottom;
+ hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY );
+ if( SUCCEEDED(hr) )
+ {
+ const UInt8* src = (const UInt8*)lr.pBits;
+ if( image.GetFormat() == kTexFormatARGB32 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4);
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 argbCol = *srcPtr;
+ UInt32 bgraCol = ((argbCol&0x8000)>>8) | ((argbCol&0x7C00)<<1) | ((argbCol&0x03E0)<<14) | ((argbCol&0x001F)<<27);
+ *dstPtr = bgraCol;
+ ++srcPtr;
+ ++dstPtr;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else if( image.GetFormat() == kTexFormatRGB24 )
+ {
+ for( int y = height-1; y >= 0; --y )
+ {
+ const UInt16* srcPtr = (const UInt16*)src;
+ UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3;
+ for( int x = 0; x < width; ++x )
+ {
+ UInt16 argbCol = *srcPtr;
+ dstPtr[0] = (argbCol & 0x7C00) >> 7;
+ dstPtr[1] = (argbCol & 0x03E0) >> 2;
+ dstPtr[2] = (argbCol & 0x001F) << 3;
+ ++srcPtr;
+ dstPtr += 3;
+ }
+ src += lr.Pitch;
+ }
+ }
+ else
+ {
+ AssertString( "Invalid image format" );
+ }
+ }
+ else
+ {
+ ok = false;
+ }
+ offscreenSurface->UnlockRect();
+ }
+ else
+ {
+ // TODO: handle more conversions!
+ ok = false;
+ }
+ }
+
+ return ok;
+}
+
+void GfxDeviceD3D9::GrabIntoRenderTexture(RenderSurfaceHandle rtHandle, RenderSurfaceHandle rd, int x, int y, int width, int height )
+{
+ if( !rtHandle.IsValid() )
+ return;
+
+ RenderColorSurfaceD3D9* renderTexture = reinterpret_cast<RenderColorSurfaceD3D9*>( rtHandle.object );
+
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+ SurfacePointer currentRenderTarget;
+ hr = dev->GetRenderTarget( 0, &currentRenderTarget );
+ if( !currentRenderTarget || FAILED(hr) )
+ return;
+
+ D3DSURFACE_DESC rtDesc;
+ currentRenderTarget->GetDesc( &rtDesc );
+
+ IDirect3DTexture9* texturePointer = static_cast<IDirect3DTexture9*>(m_Textures.GetTexture (renderTexture->textureID));
+ if( !texturePointer )
+ return;
+
+ SurfacePointer textureSurface;
+ hr = texturePointer->GetSurfaceLevel( 0, &textureSurface );
+ if( !textureSurface || FAILED(hr) )
+ return;
+
+ RECT rc;
+ rc.left = x;
+ rc.top = rtDesc.Height - (y + height);
+ rc.right = x + width;
+ rc.bottom = rtDesc.Height - (y);
+ hr = dev->StretchRect( currentRenderTarget, &rc, textureSurface, NULL, D3DTEXF_NONE );
+}
+
+
+void* GfxDeviceD3D9::GetNativeGfxDevice()
+{
+ return GetD3DDevice();
+}
+
+void* GfxDeviceD3D9::GetNativeTexturePointer(TextureID id)
+{
+ return m_Textures.GetTexture (id);
+}
+
+intptr_t GfxDeviceD3D9::CreateExternalTextureFromNative(intptr_t nativeTex)
+{
+ return m_Textures.RegisterNativeTexture((IDirect3DBaseTexture9*)nativeTex);
+}
+
+void GfxDeviceD3D9::UpdateExternalTextureFromNative(TextureID tex, intptr_t nativeTex)
+{
+ m_Textures.UpdateNativeTexture(tex, (IDirect3DBaseTexture9*)nativeTex);
+}
+
+
+#if ENABLE_PROFILER
+
+void GfxDeviceD3D9::BeginProfileEvent (const char* name)
+{
+ if (g_D3D9BeginEventFunc)
+ {
+ wchar_t wideName[100];
+ UTF8ToWide (name, wideName, 100);
+ g_D3D9BeginEventFunc (0, wideName);
+ }
+}
+
+void GfxDeviceD3D9::EndProfileEvent ()
+{
+ if (g_D3D9EndEventFunc)
+ {
+ g_D3D9EndEventFunc ();
+ }
+}
+
+GfxTimerQuery* GfxDeviceD3D9::CreateTimerQuery()
+{
+ Assert(gGraphicsCaps.hasTimerQuery);
+ return m_TimerQueriesD3D9.CreateTimerQuery();
+}
+
+void GfxDeviceD3D9::DeleteTimerQuery(GfxTimerQuery* query)
+{
+ delete query;
+}
+
+void GfxDeviceD3D9::BeginTimerQueries()
+{
+ if(!gGraphicsCaps.hasTimerQuery)
+ return;
+
+ m_TimerQueriesD3D9.BeginTimerQueries();
+}
+
+void GfxDeviceD3D9::EndTimerQueries()
+{
+ if(!gGraphicsCaps.hasTimerQuery)
+ return;
+
+ m_TimerQueriesD3D9.EndTimerQueries();
+}
+
+/*
+SInt32 GfxDeviceD3D9::GetTimerQueryIdentifier()
+{
+ if(!gGraphicsCaps.hasTimerQuery)
+ return -1;
+ // Allocate more queries
+ if(m_QueryCount[m_CurrentQueryBuffer] >= m_GPUQueries[m_CurrentQueryBuffer].size())
+ {
+ int count = std::max (m_QueryCount[m_CurrentQueryBuffer], 100);
+ IDirect3DQuery9* d3dQuery;
+ for( int i = 0; i < count; i++)
+ {
+ GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &d3dQuery);
+ // initialze more Query objects
+ m_GPUQueries[m_CurrentQueryBuffer].push_back(d3dQuery);
+ }
+ }
+ int index = m_QueryCount[m_CurrentQueryBuffer]++;
+ IDirect3DQuery9* currentQuery = m_GPUQueries[m_CurrentQueryBuffer][index];
+ currentQuery ->Issue(D3DISSUE_END);
+ return index;
+}
+
+ProfileTimeFormat GfxDeviceD3D9::GetTimerQueryData(SInt32 identifier, bool wait)
+{
+ if(!gGraphicsCaps.hasTimerQuery)
+ return 0;
+
+ if(m_GPUQueries[m_CurrentQueryBuffer].size()<=identifier)
+ return 0;
+
+ UINT64 time;
+ while (S_OK != m_GPUQueries[m_CurrentQueryBuffer][identifier]->GetData(&time, sizeof(time), D3DGETDATA_FLUSH)) {}
+ return (double)time * m_TimeMultiplier;
+}
+
+void GfxDeviceD3D9::CleanupTimerQueries ()
+{
+ if(!gGraphicsCaps.hasTimerQuery)
+ return;
+
+ for(int buffer = 0; buffer < 2; buffer++)
+ {
+ for(int i = 0; i < m_GPUQueries[buffer].size(); i++)
+ m_GPUQueries[buffer][i]->Release();
+ m_GPUQueries[buffer].clear();
+ if(m_FrequencyQuery[buffer])
+ m_FrequencyQuery[buffer]->Release();
+ m_FrequencyQuery[buffer] = NULL;
+ m_QueryCount[buffer] = 0;
+ }
+}
+*/
+
+#endif // ENABLE_PROFILER
+
+
+// -------- editor only functions
+
+#if UNITY_EDITOR
+void GfxDeviceD3D9::SetAntiAliasFlag( bool aa )
+{
+ #pragma message("! implement SetAntiAliasFlag")
+}
+
+
+void GfxDeviceD3D9::DrawUserPrimitives( GfxPrimitiveType type, int vertexCount, UInt32 vertexChannels, const void* data, int stride )
+{
+ if( vertexCount == 0 )
+ return;
+
+ AssertIf(vertexCount > 60000); // TODO: handle this by multi-batching
+
+ AssertIf( !data || vertexCount < 0 || vertexChannels == 0 );
+
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ IDirect3DVertexDeclaration9* vertexDecl = GetD3DVertexDeclaration( vertexChannels );
+
+ ChannelAssigns channels;
+ for( int i = 0; i < kShaderChannelCount; ++i )
+ {
+ if( !( vertexChannels & (1<<i) ) )
+ continue;
+ VertexComponent destComponent = kSuitableVertexComponentForChannel[i];
+ channels.Bind( (ShaderChannel)i, destComponent );
+ }
+ D3D9_CALL(dev->SetVertexDeclaration( vertexDecl ));
+ UpdateChannelBindingsD3D( channels );
+ BeforeDrawCall(false);
+
+ HRESULT hr;
+ switch( type ) {
+ case kPrimitiveTriangles:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLELIST, vertexCount/3, data, stride ));
+ m_Stats.AddDrawCall( vertexCount / 3, vertexCount );
+ break;
+ case kPrimitiveQuads:
+ while (vertexCount > 0)
+ {
+ int vcount = std::min(vertexCount,kMaxImmediateVerticesPerDraw);
+ hr = D3D9_CALL_HR(dev->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, vcount, vcount / 4 * 2, m_Imm.m_QuadsIB, D3DFMT_INDEX16, data, stride));
+ m_Stats.AddDrawCall(vcount / 4 * 2, vcount);
+ data = (const UInt8*)data + vcount * stride;
+ vertexCount -= vcount;
+ }
+ break;
+ case kPrimitiveLines:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINELIST, vertexCount/2, data, stride ));
+ m_Stats.AddDrawCall( vertexCount / 2, vertexCount );
+ break;
+ case kPrimitiveLineStrip:
+ hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINESTRIP, vertexCount-1, data, stride ));
+ m_Stats.AddDrawCall( vertexCount-1, vertexCount );
+ break;
+ default:
+ ErrorString("Primitive type not supported");
+ return;
+ }
+ Assert(SUCCEEDED(hr));
+}
+
+int GfxDeviceD3D9::GetCurrentTargetAA() const
+{
+ return GetCurrentD3DFSAALevel();
+}
+
+GfxDeviceWindow* GfxDeviceD3D9::CreateGfxWindow( HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias )
+{
+ return new D3D9Window( GetD3DDevice(), window, width, height, depthFormat, antiAlias);
+}
+
+#endif
+
+int GfxDeviceD3D9::GetCurrentTargetWidth() const
+{
+ return m_CurrTargetWidth;
+}
+
+int GfxDeviceD3D9::GetCurrentTargetHeight() const
+{
+ return m_CurrTargetHeight;
+}
+
+void GfxDeviceD3D9::SetCurrentTargetSize(int width, int height)
+{
+ m_CurrTargetWidth = width;
+ m_CurrTargetHeight = height;
+}
+
+void GfxDeviceD3D9::SetCurrentWindowSize(int width, int height)
+{
+ m_CurrWindowWidth = m_CurrTargetWidth = width;
+ m_CurrWindowHeight = m_CurrTargetHeight = height;
+}
+
+
+#if UNITY_EDITOR
+
+static IDirect3DTexture9* FindD3D9TextureByID (TextureID tid)
+{
+ GfxDevice& device = GetRealGfxDevice();
+ if (device.GetRenderer() != kGfxRendererD3D9)
+ return NULL;
+ GfxDeviceD3D9& dev = static_cast<GfxDeviceD3D9&>(device);
+ IDirect3DBaseTexture9* basetex = dev.GetTextures().GetTexture (tid);
+ if (!basetex)
+ return NULL;
+ if (basetex->GetType() != D3DRTYPE_TEXTURE)
+ return NULL;
+ return static_cast<IDirect3DTexture9*>(basetex);
+}
+
+// In the editor, for drawing directly into HDC of D3D texture.
+// Functions not defined in any header; declare prototypes manually:
+// HDC AcquireHDCForTextureD3D9 (TextureID tid, int& outWidth, int& outHeight);
+// void ReleaseHDCForTextureD3D9 (TextureID tid, HDC dc);
+// AcquireHDCForTextureD3D9 _can_ return NULL if it can't get to DC (not D3D9, no
+// texture, wrong texture format, ...).
+
+HDC AcquireHDCForTextureD3D9 (TextureID tid, int& outWidth, int& outHeight)
+{
+ IDirect3DTexture9* tex = FindD3D9TextureByID (tid);
+ if (!tex)
+ return NULL;
+ SurfacePointer surface;
+ if (FAILED(tex->GetSurfaceLevel(0,&surface)))
+ return NULL;
+ D3DSURFACE_DESC desc;
+ if (FAILED(surface->GetDesc (&desc)))
+ return NULL;
+ outWidth = desc.Width;
+ outHeight = desc.Height;
+ HDC dc = NULL;
+ if (FAILED(surface->GetDC(&dc)))
+ return NULL;
+ return dc;
+}
+
+void ReleaseHDCForTextureD3D9 (TextureID tid, HDC dc)
+{
+ IDirect3DTexture9* tex = FindD3D9TextureByID (tid);
+ if (!tex)
+ return;
+ SurfacePointer surface;
+ if (FAILED(tex->GetSurfaceLevel(0,&surface)))
+ return;
+ surface->ReleaseDC (dc);
+}
+
+#endif
+
+
+// ----------------------------------------------------------------------
+// verification of state
+
+#if GFX_DEVICE_VERIFY_ENABLE
+
+#include "Runtime/Utilities/Utility.h"
+
+void VerifyStateF(D3DRENDERSTATETYPE rs, float val, const char *str);
+#define VERIFYF(s,t) VerifyState (s, t, #s " (" #t ")")
+void VerifyStateI(D3DRENDERSTATETYPE rs, int val, const char *str);
+#define VERIFYI(s,t) VerifyStateI (s, t, #s " (" #t ")")
+void VerifyEnabled(D3DRENDERSTATETYPE rs, bool val, const char *str);
+#define VERIFYENAB(s,t) VerifyEnabled ( s, t, #s " (" #t ")")
+
+static void VERIFY_PRINT( const char* format, ... )
+{
+ ErrorString( VFormat( format, va_list(&format + 1) ) );
+}
+
+const float kVerifyDelta = 0.0001f;
+
+void VerifyStateF(D3DRENDERSTATETYPE rs, float val, const char *str)
+{
+ float temp = 0;
+ GetD3DDevice()->GetRenderState(rs,(DWORD*)&temp);
+ if( !CompareApproximately(temp,val,kVerifyDelta) ) {
+ VERIFY_PRINT ("%s differs from cache (%f != %f)\n", str, val, temp);
+ }
+}
+
+void VerifyStateI(D3DRENDERSTATETYPE rs, int val, const char *str)
+{
+ int temp;
+ GetD3DDevice()->GetRenderState(rs,(DWORD*)&temp);
+ if (temp != val) {
+ VERIFY_PRINT ("%s differs from cache (%i != %i)\n", str, val, temp);
+ }
+}
+
+void VerifyEnabled(D3DRENDERSTATETYPE rs, bool val, const char *str)
+{
+ DWORD v;
+ GetD3DDevice()->GetRenderState(rs,&v);
+ bool temp = v==TRUE ? true : false;
+ if (temp != val) {
+ VERIFY_PRINT ("%s differs from cache (%d != %d)\n", str, val, temp);
+ }
+}
+
+void GfxDeviceD3D9::VerifyState()
+{
+ // check if current state blocks match internal state
+ if (m_CurrBlendState != NULL) {
+ if (m_State.blending == 0) {
+ Assert (D3DBLEND_ONE == kBlendModeD3D9[m_CurrBlendState->sourceState.srcBlend]);
+ Assert (D3DBLEND_ZERO == kBlendModeD3D9[m_CurrBlendState->sourceState.dstBlend]);
+ } else {
+ Assert (m_State.srcBlend == kBlendModeD3D9[m_CurrBlendState->sourceState.srcBlend]);
+ Assert (m_State.destBlend == kBlendModeD3D9[m_CurrBlendState->sourceState.dstBlend]);
+ }
+ #if !UNITY_EDITOR // Editor does some funkiness when emulating alpha test, see SetBlendState
+ Assert (kCmpFuncD3D9[m_State.alphaFunc] == m_CurrBlendState->alphaFunc);
+ #endif
+ }
+
+ m_State.Verify();
+}
+
+
+
+void DeviceStateD3D::Verify()
+{
+ #ifdef DUMMY_D3D9_CALLS
+ return;
+ #endif
+ if( !GetD3DDevice() ) {
+ ErrorString("Verify: no D3D device");
+ return;
+ }
+
+ if( depthFunc != kFuncUnknown ) {
+ VERIFYI( D3DRS_ZFUNC, kCmpFuncD3D9[depthFunc] );
+ }
+ if( depthWrite != -1 ) {
+ VERIFYI( D3DRS_ZWRITEENABLE, (depthWrite ? TRUE : FALSE) );
+ }
+ if( blending != -1 ) {
+ VERIFYENAB( D3DRS_ALPHABLENDENABLE, blending != 0 );
+ if( blending ) {
+ VERIFYI( D3DRS_SRCBLEND, srcBlend );
+ VERIFYI( D3DRS_DESTBLEND, destBlend );
+ }
+ }
+
+ if( alphaFunc != kFuncUnknown ) {
+ VERIFYENAB( D3DRS_ALPHATESTENABLE, alphaFunc != kFuncDisabled );
+ if( alphaFunc != kFuncDisabled ) {
+ VERIFYI( D3DRS_ALPHAFUNC, kCmpFuncD3D9[alphaFunc] );
+ if( alphaValue != -1 )
+ VERIFYI( D3DRS_ALPHAREF, alphaValue*255.0f );
+ }
+ }
+}
+
+#endif // GFX_DEVICE_VERIFY_ENABLE
+
diff --git a/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h
new file mode 100644
index 0000000..f648a35
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h
@@ -0,0 +1,361 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "VertexDeclarations.h"
+#include "TexturesD3D9.h"
+#include "Runtime/GfxDevice/ShaderConstantCache.h"
+#include "Runtime/Shaders/MaterialProperties.h"
+#include "VertexPipeD3D9.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "D3D9Context.h"
+#include "Runtime/Math/FloatConversion.h"
+#include "D3D9VBO.h"
+#include "CombinerD3D.h"
+#include "External/shaderlab/Library/program.h"
+#include "External/shaderlab/Library/TextureBinding.h"
+#include "External/shaderlab/Library/texenv.h"
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/GfxDevice/ChannelAssigns.h"
+#include "Runtime/GfxDevice/BuiltinShaderParams.h"
+#include "Runtime/Graphics/Image.h"
+#include "PlatformDependent/Win/SmartComPointer.h"
+#include "Runtime/Utilities/Utility.h"
+#include "D3D9Utils.h"
+#include "D3D9Window.h"
+#include "GpuProgramsD3D.h"
+#include "TimerQueryD3D9.h"
+
+typedef SmartComPointer<IDirect3DSurface9> SurfacePointer;
+
+struct TextureUnitStateD3D
+{
+ TextureID texID;
+ float bias;
+
+ void Invalidate()
+ {
+ texID.m_ID = -1;
+ bias = 1.0e6f;
+ }
+};
+
+class GfxDeviceD3D9;
+
+struct DeviceStateD3D
+{
+ int viewport[4];
+ int scissorRect[4];
+
+ CompareFunction depthFunc;
+ int depthWrite; // 0/1 or -1
+
+ int blending;
+ int srcBlend, destBlend, srcBlendAlpha, destBlendAlpha; // D3D modes
+ int blendOp, blendOpAlpha; // D3D modes
+ CompareFunction alphaFunc;
+ float alphaValue;
+
+ CullMode culling;
+ D3DCULL d3dculling;
+ bool appBackfaceMode, userBackfaceMode, invertProjMatrix;
+ bool wireframe;
+ int scissor;
+
+ // [0] is front, [1] is back, unless invertProjMatrix is true
+ D3DCMPFUNC stencilFunc[2];
+ D3DSTENCILOP stencilFailOp[2], depthFailOp[2], depthPassOp[2];
+
+ float offsetFactor, offsetUnits;
+
+ GpuProgram* activeGpuProgram[kShaderTypeCount];
+ const GpuProgramParameters* activeGpuProgramParams[kShaderTypeCount];
+ IUnknown* activeShader[kShaderTypeCount];
+
+ int colorWriteMask; // ColorWriteMask combinations
+
+ int m_StencilRef;
+
+ TextureUnitStateD3D texturesPS[kMaxSupportedTextureUnits];
+ TextureUnitStateD3D texturesVS[4];
+
+ int fixedFunctionPS;
+
+ bool m_DeviceLost;
+
+ bool m_SoftwareVP;
+ UInt32 m_NeedsSofwareVPFlags;
+
+ void Invalidate( GfxDeviceD3D9& device );
+ void Verify();
+};
+
+// TODO: optimize this. Right now we just send off whole 8 float3 UVs with each
+// immediate mode vertex. We could at least detect the number of them used from
+// ImmediateTexCoord calls.
+struct ImmediateVertexD3D {
+ D3DVECTOR vertex;
+ D3DVECTOR normal;
+ D3DCOLOR color;
+ D3DVECTOR texCoords[8];
+};
+
+struct ImmediateModeD3D {
+ std::vector<ImmediateVertexD3D> m_Vertices;
+ ImmediateVertexD3D m_Current;
+ GfxPrimitiveType m_Mode;
+ IDirect3DVertexDeclaration9* m_ImmVertexDecl;
+ UInt16* m_QuadsIB;
+
+ ImmediateModeD3D();
+ ~ImmediateModeD3D();
+ void Invalidate();
+};
+
+class GfxDeviceD3D9 : public GfxThreadableDevice
+{
+public:
+ struct DeviceBlendStateD3D9 : public DeviceBlendState
+ {
+ UInt8 renderTargetWriteMask;
+ D3DCMPFUNC alphaFunc;
+ };
+
+ struct DeviceDepthStateD3D9 : public DeviceDepthState
+ {
+ D3DCMPFUNC depthFunc;
+ };
+
+ struct DeviceStencilStateD3D9 : public DeviceStencilState
+ {
+ D3DCMPFUNC stencilFuncFront;
+ D3DSTENCILOP stencilFailOpFront;
+ D3DSTENCILOP depthFailOpFront;
+ D3DSTENCILOP depthPassOpFront;
+ D3DCMPFUNC stencilFuncBack;
+ D3DSTENCILOP stencilFailOpBack;
+ D3DSTENCILOP depthFailOpBack;
+ D3DSTENCILOP depthPassOpBack;
+ };
+
+
+ typedef std::map< GfxBlendState, DeviceBlendStateD3D9, memcmp_less<GfxBlendState> > CachedBlendStates;
+ typedef std::map< GfxDepthState, DeviceDepthStateD3D9, memcmp_less<GfxDepthState> > CachedDepthStates;
+ typedef std::map< GfxStencilState, DeviceStencilStateD3D9, memcmp_less<GfxStencilState> > CachedStencilStates;
+ typedef std::map< GfxRasterState, DeviceRasterState, memcmp_less<GfxRasterState> > CachedRasterStates;
+
+
+public:
+ GfxDeviceD3D9();
+ GFX_API ~GfxDeviceD3D9();
+
+ GFX_API void InvalidateState();
+ #if GFX_DEVICE_VERIFY_ENABLE
+ GFX_API void VerifyState();
+ #endif
+
+ GFX_API void Clear(UInt32 clearFlags, const float color[4], float depth, int stencil);
+ GFX_API void SetUserBackfaceMode( bool enable );
+ GFX_API void SetWireframe(bool wire);
+ GFX_API bool GetWireframe() const;
+ GFX_API void SetInvertProjectionMatrix( bool enable );
+ GFX_API bool GetInvertProjectionMatrix() const;
+
+ GFX_API GPUSkinningInfo *CreateGPUSkinningInfo() { return NULL; }
+ GFX_API void DeleteGPUSkinningInfo(GPUSkinningInfo *info) { AssertBreak(false); }
+ GFX_API void SkinOnGPU( GPUSkinningInfo * info, bool lastThisFrame ) { AssertBreak(false); }
+ GFX_API void UpdateSkinSourceData(GPUSkinningInfo *info, const void *vertData, const BoneInfluence *skinData, bool dirty) { AssertBreak(false); }
+ GFX_API void UpdateSkinBonePoses(GPUSkinningInfo *info, const int boneCount, const Matrix4x4f* poses) { AssertBreak(false); }
+
+ GFX_API DeviceBlendState* CreateBlendState(const GfxBlendState& state);
+ GFX_API DeviceDepthState* CreateDepthState(const GfxDepthState& state);
+ GFX_API DeviceStencilState* CreateStencilState(const GfxStencilState& state);
+ GFX_API DeviceRasterState* CreateRasterState(const GfxRasterState& state);
+
+ GFX_API void SetBlendState(const DeviceBlendState* state, float alphaRef);
+ GFX_API void SetRasterState(const DeviceRasterState* state);
+ GFX_API void SetDepthState(const DeviceDepthState* state);
+ GFX_API void SetStencilState(const DeviceStencilState* state, int stencilRef);
+ GFX_API void SetSRGBWrite (const bool);
+ GFX_API bool GetSRGBWrite ();
+
+ GFX_API void SetWorldMatrix( const float matrix[16] );
+ GFX_API void SetViewMatrix( const float matrix[16] );
+ GFX_API void SetProjectionMatrix(const Matrix4x4f& matrix);
+ GFX_API void GetMatrix( float outMatrix[16] ) const;
+
+ GFX_API const float* GetWorldMatrix() const ;
+ GFX_API const float* GetViewMatrix() const ;
+ GFX_API const float* GetProjectionMatrix() const ;
+ GFX_API const float* GetDeviceProjectionMatrix() const;
+
+ GFX_API void SetNormalizationBackface( NormalizationMode mode, bool backface );
+ GFX_API void SetFFLighting( bool on, bool separateSpecular, ColorMaterialMode colorMaterial );
+ GFX_API void SetMaterial( const float ambient[4], const float diffuse[4], const float specular[4], const float emissive[4], const float shininess );
+ GFX_API void SetColor( const float color[4] );
+ GFX_API void SetViewport( int x, int y, int width, int height );
+ GFX_API void GetViewport( int* port ) const;
+
+ GFX_API void SetScissorRect( int x, int y, int width, int height );
+ GFX_API void DisableScissor();
+ GFX_API bool IsScissorEnabled() const;
+ GFX_API void GetScissorRect( int values[4] ) const;
+
+ GFX_API bool IsCombineModeSupported( unsigned int combiner );
+ GFX_API TextureCombinersHandle CreateTextureCombiners( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular );
+ GFX_API void DeleteTextureCombiners( TextureCombinersHandle& textureCombiners );
+ GFX_API void SetTextureCombinersThreadable( TextureCombinersHandle textureCombiners, const TexEnvData* texEnvData, const Vector4f* texColors );
+ GFX_API void SetTextureCombiners( TextureCombinersHandle textureCombiners, const ShaderLab::PropertySheet* props );
+
+ GFX_API void SetTexture (ShaderType shaderType, int unit, int samplerUnit, TextureID texture, TextureDimension dim, float bias);
+ GFX_API void SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace );
+ GFX_API void SetTextureTransform( int unit, TextureDimension dim, TexGenMode texGen, bool identity, const float matrix[16]);
+ GFX_API void SetTextureName ( TextureID texture, const char* name ) { }
+
+ GFX_API void SetShadersThreadable (GpuProgram* programs[kShaderTypeCount], const GpuProgramParameters* params[kShaderTypeCount], UInt8 const * const paramsBuffer[kShaderTypeCount]);
+ GFX_API bool IsShaderActive( ShaderType type ) const;
+ GFX_API void DestroySubProgram( ShaderLab::SubProgram* subprogram );
+
+ GFX_API void DisableLights( int startLight );
+ GFX_API void SetLight( int light, const GfxVertexLight& data);
+ GFX_API void SetAmbient( const float ambient[4] );
+
+ GFX_API void EnableFog(const GfxFogParams& fog);
+ GFX_API void DisableFog();
+
+ GFX_API VBO* CreateVBO();
+ GFX_API void DeleteVBO( VBO* vbo );
+ GFX_API DynamicVBO& GetDynamicVBO();
+
+ GFX_API RenderSurfaceHandle CreateRenderColorSurface (TextureID textureID, int width, int height, int samples, int depth, TextureDimension dim, RenderTextureFormat format, UInt32 createFlags);
+ GFX_API RenderSurfaceHandle CreateRenderDepthSurface(TextureID textureID, int width, int height, int samples, TextureDimension dim, DepthBufferFormat depthFormat, UInt32 createFlags);
+ GFX_API void DestroyRenderSurface(RenderSurfaceHandle& rs);
+ GFX_API void SetRenderTargets (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face = kCubeFaceUnknown);
+ GFX_API void ResolveColorSurface (RenderSurfaceHandle srcHandle, RenderSurfaceHandle dstHandle);
+ GFX_API void ResolveDepthIntoTexture (RenderSurfaceHandle colorHandle, RenderSurfaceHandle depthHandle);
+ GFX_API RenderSurfaceHandle GetActiveRenderColorSurface(int index);
+ GFX_API RenderSurfaceHandle GetActiveRenderDepthSurface();
+ GFX_API void SetSurfaceFlags(RenderSurfaceHandle surf, UInt32 flags, UInt32 keepFlags);
+
+ GFX_API void UploadTexture2D( TextureID texture, TextureDimension dimension, UInt8* srcData, int srcSize, int width, int height, TextureFormat format, int mipCount, UInt32 uploadFlags, int skipMipLevels, TextureUsageMode usageMode, TextureColorSpace colorSpace );
+ GFX_API void UploadTextureSubData2D( TextureID texture, UInt8* srcData, int srcSize, int mipLevel, int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace );
+ GFX_API void UploadTextureCube( TextureID texture, UInt8* srcData, int srcSize, int faceDataSize, int size, TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace );
+ GFX_API void UploadTexture3D( TextureID texture, UInt8* srcData, int srcSize, int width, int height, int depth, TextureFormat format, int mipCount, UInt32 uploadFlags );
+ GFX_API void DeleteTexture( TextureID texture );
+
+ GFX_API PresentMode GetPresentMode();
+
+ GFX_API void BeginFrame();
+ GFX_API void EndFrame();
+ GFX_API void PresentFrame();
+ GFX_API bool IsValidState();
+ GFX_API bool HandleInvalidState();
+ GFX_API void FinishRendering();
+
+ // Immediate mode rendering
+ GFX_API void ImmediateVertex( float x, float y, float z );
+ GFX_API void ImmediateNormal( float x, float y, float z );
+ GFX_API void ImmediateColor( float r, float g, float b, float a );
+ GFX_API void ImmediateTexCoordAll( float x, float y, float z );
+ GFX_API void ImmediateTexCoord( int unit, float x, float y, float z );
+ GFX_API void ImmediateBegin( GfxPrimitiveType type );
+ GFX_API void ImmediateEnd();
+
+ GFX_API bool CaptureScreenshot( int left, int bottom, int width, int height, UInt8* rgba32 );
+ GFX_API bool ReadbackImage( ImageReference& image, int left, int bottom, int width, int height, int destX, int destY );
+ GFX_API void GrabIntoRenderTexture(RenderSurfaceHandle rs, RenderSurfaceHandle rd, int x, int y, int width, int height);
+
+ GFX_API void BeforeDrawCall( bool immediateMode );
+
+ GFX_API bool IsPositionRequiredForTexGen(int texStageIndex) const { return false; }
+ GFX_API bool IsNormalRequiredForTexGen(int texStageIndex) const { return false; }
+ GFX_API bool IsPositionRequiredForTexGen() const { return false; }
+ GFX_API bool IsNormalRequiredForTexGen() const { return false; }
+
+ GFX_API void DiscardContents (RenderSurfaceHandle& rs) {}
+
+#if ENABLE_PROFILER
+ GFX_API void BeginProfileEvent (const char* name);
+ GFX_API void EndProfileEvent ();
+
+ TimerQueriesD3D9& GetTimerQueries() {return m_TimerQueriesD3D9;}
+ GFX_API GfxTimerQuery* CreateTimerQuery();
+ GFX_API void DeleteTimerQuery(GfxTimerQuery* query);
+ GFX_API void BeginTimerQueries();
+ GFX_API void EndTimerQueries();
+ #endif
+
+ #if UNITY_EDITOR
+ GFX_API void SetAntiAliasFlag( bool aa );
+ GFX_API void DrawUserPrimitives( GfxPrimitiveType type, int vertexCount, UInt32 vertexChannels, const void* data, int stride );
+ GFX_API int GetCurrentTargetAA() const;
+ GFX_API GfxDeviceWindow* CreateGfxWindow( HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias );
+ #endif
+
+ GFX_API int GetCurrentTargetWidth() const;
+ GFX_API int GetCurrentTargetHeight() const;
+ GFX_API void SetCurrentTargetSize(int width, int height);
+ GFX_API void SetCurrentWindowSize(int width, int height);
+
+ GFX_API void* GetNativeGfxDevice();
+ GFX_API void* GetNativeTexturePointer(TextureID id);
+ GFX_API intptr_t CreateExternalTextureFromNative(intptr_t nativeTex);
+ GFX_API void UpdateExternalTextureFromNative(TextureID tex, intptr_t nativeTex);
+
+ GFX_API void ResetDynamicResources();
+
+ IDirect3DVertexBuffer9* GetAllWhiteVertexStream();
+
+ VertexDeclarations& GetVertexDecls() { return m_VertexDecls; }
+
+ const DeviceStateD3D& GetState() const { return m_State; }
+ DeviceStateD3D& GetState() { return m_State; }
+ VertexShaderConstantCache& GetVertexShaderConstantCache() { return m_VSConstantCache; }
+ PixelShaderConstantCache& GetPixelShaderConstantCache() { return m_PSConstantCache; }
+
+ const VertexPipeConfig& GetVertexPipeConfig() const { return m_VertexConfig; }
+ VertexPipeConfig& GetVertexPipeConfig() { return m_VertexConfig; }
+ const VertexPipeDataD3D9& GetVertexPipeData() const { return m_VertexData; }
+ VertexPipeDataD3D9& GetVertexPipeData() { return m_VertexData; }
+ TexturesD3D9& GetTextures() { return m_Textures; }
+
+ void PushEventQuery();
+
+private:
+
+ DeviceStateD3D m_State;
+ ImmediateModeD3D m_Imm;
+ VertexPipeConfig m_VertexConfig;
+ TransformState m_TransformState;
+ VertexPipeDataD3D9 m_VertexData;
+ VertexPipePrevious m_VertexPrevious;
+
+ DeviceBlendStateD3D9* m_CurrBlendState;
+ DeviceDepthStateD3D9* m_CurrDepthState;
+ const DeviceStencilStateD3D9* m_CurrStencilState;
+ DeviceRasterState* m_CurrRasterState;
+ int m_CurrTargetWidth;
+ int m_CurrTargetHeight;
+ int m_CurrWindowWidth;
+ int m_CurrWindowHeight;
+
+ IDirect3DVertexBuffer9* m_AllWhiteVertexStream;
+
+ VertexDeclarations m_VertexDecls;
+ TexturesD3D9 m_Textures;
+ DynamicVBO* m_DynamicVBO;
+
+ CachedBlendStates m_CachedBlendStates;
+ CachedDepthStates m_CachedDepthStates;
+ CachedStencilStates m_CachedStencilStates;
+ CachedRasterStates m_CachedRasterStates;
+
+ VertexShaderConstantCache m_VSConstantCache;
+ PixelShaderConstantCache m_PSConstantCache;
+
+#if ENABLE_PROFILER
+ TimerQueriesD3D9 m_TimerQueriesD3D9;
+#endif
+};
+
+GfxDeviceD3D9& GetD3D9GfxDevice();
diff --git a/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp b/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp
new file mode 100644
index 0000000..9a67a54
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp
@@ -0,0 +1,474 @@
+#include "UnityPrefix.h"
+#include "GpuProgramsD3D.h"
+#include "External/shaderlab/Library/ShaderLabErrors.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Math/Vector4.h"
+#include "External/shaderlab/Library/shaderlab.h"
+#include "External/shaderlab/Library/texenv.h"
+#include "External/DirectX/builds/dx9include/d3dx9.h"
+#include "D3D9Context.h"
+#include "Runtime/GfxDevice/ShaderConstantCache.h"
+#include "D3D9Utils.h"
+#include "ShaderPatchingD3D9.h"
+
+#define ENABLE_GPU_PROGRAM_STATS 0
+
+
+#if ENABLE_GPU_PROGRAM_STATS
+typedef std::map<ShaderLab::FastPropertyName, int> PropertyCount;
+PropertyCount s_StatCounts[kShaderTypeCount];
+void PrintDebugGpuProgramStats ()
+{
+ typedef std::pair<std::string, int> NameIntPair;
+ struct Sorter {
+ bool operator() (const NameIntPair& a, const NameIntPair& b) const {
+ return a.second > b.second;
+ }
+ };
+ for (int i = kShaderVertex; i < kShaderTypeCount; ++i)
+ {
+ std::vector<NameIntPair> sorted;
+ sorted.reserve (s_StatCounts[i].size());
+ int totalCount = 0;
+ for (PropertyCount::const_iterator it = s_StatCounts[i].begin(); it != s_StatCounts[i].end(); ++it)
+ {
+ sorted.push_back (std::make_pair(it->first.GetName(), it->second));
+ totalCount += it->second;
+ }
+ std::sort (sorted.begin(), sorted.end(), Sorter());
+ printf_console ("%i Shader Stats: %i props, %i requests\n", i, sorted.size(), totalCount);
+ for (size_t j = 0; j < sorted.size(); ++j)
+ {
+ printf_console (" %-25s %6i %5.1f%%\n", sorted[j].first.c_str(), sorted[j].second, sorted[j].second*100.0/totalCount);
+ }
+ s_StatCounts[i].clear();
+ }
+}
+#define ADD_TO_VS_STATS(name) ++s_StatCounts[kShaderVertex][name]
+#define ADD_TO_PS_STATS(name) ++s_StatCounts[kShaderFragment][name]
+#else
+#define ADD_TO_VS_STATS(name)
+#define ADD_TO_PS_STATS(name)
+#endif
+
+
+VertexShaderConstantCache& GetD3D9VertexShaderConstantCache(); // GfxDeviceD3D9.cpp
+PixelShaderConstantCache& GetD3D9PixelShaderConstantCache(); // GfxDeviceD3D9.cpp
+
+
+// non static; used by CombinerD3D.cpp and VertexPipeD3D9.cpp
+ID3DXBuffer* AssembleD3DShader (const std::string& source)
+{
+ ID3DXBuffer *compiledShader, *compileErrors;
+
+ // Skip validation of shaders at assembly time when in release mode. Saves
+ // some time when loading them.
+ DWORD flags = D3DXSHADER_SKIPVALIDATION;
+ #if DEBUGMODE
+ flags = 0;
+ #endif
+
+ HRESULT hr = D3DXAssembleShader( source.c_str(), source.size(), NULL, NULL, flags, &compiledShader, &compileErrors );
+ if( FAILED(hr) )
+ {
+ if (compileErrors && compileErrors->GetBufferSize() > 0)
+ {
+ std::string error = Format ("Shader error in '%s': D3D shader assembly failed with: %s\nShader Assembly: %s", g_LastParsedShaderName.c_str(), (const char*)compileErrors->GetBufferPointer(), source.c_str());
+ compileErrors->Release();
+ ErrorString (error);
+ }
+ if( compiledShader )
+ compiledShader->Release();
+ return NULL;
+ }
+
+ return compiledShader;
+}
+
+// --------------------------------------------------------------------------
+
+template <typename CACHE>
+static const UInt8* ApplyValueParametersD3D9 (CACHE& constantCache, const UInt8* buffer, const GpuProgramParameters::ValueParameterArray& valueParams)
+{
+ GpuProgramParameters::ValueParameterArray::const_iterator valueParamsEnd = valueParams.end();
+ for (GpuProgramParameters::ValueParameterArray::const_iterator i = valueParams.begin(); i != valueParamsEnd; ++i)
+ {
+ if (i->m_RowCount == 1 && i->m_ArraySize == 1)
+ {
+ // Apply vector parameters
+ const Vector4f* val = reinterpret_cast<const Vector4f*>(buffer);
+ constantCache.SetValues(i->m_Index, val->GetPtr(), 1);
+ buffer += sizeof(Vector4f);
+ }
+ else
+ {
+ // matrix/array
+ int size = *reinterpret_cast<const int*>(buffer); buffer += sizeof(int);
+ Assert (i->m_RowCount == 4 && size == 16);
+ const Matrix4x4f* val = reinterpret_cast<const Matrix4x4f*>(buffer);
+ Matrix4x4f transposed;
+ TransposeMatrix4x4 (val, &transposed);
+ const float *ptr = transposed.GetPtr();
+ constantCache.SetValues (i->m_Index, ptr, 4);
+ buffer += size * sizeof(float);
+ }
+ }
+ return buffer;
+}
+
+
+
+// --------------------------------------------------------------------------
+
+D3D9VertexShader::D3D9VertexShader( const std::string& source )
+: m_FogFailed(0)
+{
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ m_Shaders[i] = NULL;
+ }
+ m_ImplType = kShaderImplVertex;
+ if( !Create(source) )
+ m_NotSupported = true;
+}
+
+D3D9VertexShader::~D3D9VertexShader ()
+{
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ if( m_Shaders[i] )
+ {
+ ULONG refCount = m_Shaders[i]->Release();
+ AssertIf( refCount != 0 );
+ }
+ }
+}
+
+
+bool D3D9VertexShader::Create( const std::string& source )
+{
+ // fast skip 3.0 shaders on unsupporting hardware
+ bool isShaderModel3 = !strncmp(source.c_str(), "vs_3_0", 6);
+ if( gGraphicsCaps.shaderCaps < kShaderLevel3 && isShaderModel3 )
+ return false;
+
+ if (isShaderModel3)
+ m_GpuProgramLevel = kGpuProgramSM3;
+ else
+ {
+ bool isShaderModel1 = !strncmp(source.c_str(), "vs_1_1", 6);
+ m_GpuProgramLevel = isShaderModel1 ? kGpuProgramSM1 : kGpuProgramSM2;
+ }
+
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // assemble shader
+ ID3DXBuffer *compiledShader = AssembleD3DShader( source );
+ if( !compiledShader )
+ {
+ return false;
+ }
+
+ // create shader
+ hr = dev->CreateVertexShader( (const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[0] );
+ compiledShader->Release();
+ if( FAILED(hr) )
+ {
+ printf_console( "D3D shader create error for shader %s\n", source.c_str() );
+ return false;
+ }
+
+ if (isShaderModel3)
+ {
+ m_SourceForFog = source;
+ }
+
+ return true;
+}
+
+void D3D9VertexShader::ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer)
+{
+ GfxDevice& device = GetRealGfxDevice();
+ IDirect3DDevice9* dev = GetD3DDevice();
+ VertexShaderConstantCache& constantCache = GetD3D9VertexShaderConstantCache();
+
+ const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams();
+ buffer = ApplyValueParametersD3D9<VertexShaderConstantCache>(constantCache, buffer, valueParams);
+
+ // Apply textures
+ if (gGraphicsCaps.hasVertexTextures)
+ {
+ const GpuProgramParameters::TextureParameterList& textureParams = params.GetTextureParams();
+ const GpuProgramParameters::TextureParameterList::const_iterator textureParamsEnd = textureParams.end();
+ for( GpuProgramParameters::TextureParameterList::const_iterator i = textureParams.begin(); i != textureParamsEnd; ++i )
+ {
+ const GpuProgramParameters::TextureParameter& t = *i;
+ const TexEnvData* texdata = reinterpret_cast<const TexEnvData*>(buffer);
+ device.SetTexture (kShaderVertex, t.m_Index, 0, texdata->textureID, static_cast<TextureDimension>(texdata->texDim), 0);
+ buffer += sizeof(*texdata);
+ }
+ }
+}
+
+IDirect3DVertexShader9* D3D9VertexShader::GetShader (FogMode fog, bool& outResetToNoFog)
+{
+ int index = 0;
+ outResetToNoFog = false;
+ if (fog > kFogDisabled && !m_SourceForFog.empty())
+ {
+ Assert (fog >= 0 && fog < kFogModeCount);
+
+ if (m_Shaders[fog])
+ {
+ // already have patched fog shader
+ index = fog;
+ }
+ else if (!(m_FogFailed & (1<<fog)))
+ {
+ // patch fog shader on demand
+ std::string src = m_SourceForFog;
+
+ if (PatchVertexShaderFogD3D9 (src))
+ {
+ // assemble & create the shader
+ ID3DXBuffer *compiledShader = AssembleD3DShader (src);
+ if (compiledShader)
+ {
+ HRESULT hr = GetD3DDevice()->CreateVertexShader ((const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[fog]);
+ compiledShader->Release();
+ if (SUCCEEDED(hr))
+ {
+ index = fog;
+ }
+ else
+ {
+ printf_console ("D3D vertex shader create error for patched fog mode %d shader %s\n", (int)fog, src.c_str());
+ }
+ }
+ }
+ }
+ if (index == 0)
+ {
+ outResetToNoFog = true;
+ m_FogFailed |= (1<<fog);
+ }
+ }
+ return m_Shaders[index];
+}
+
+// --------------------------------------------------------------------------
+
+D3D9PixelShader::D3D9PixelShader( const std::string& source )
+: m_FogFailed(0)
+{
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ m_Shaders[i] = NULL;
+ m_FogRegisters[i] = NULL;
+ }
+ m_ImplType = kShaderImplFragment;
+ if( !Create(source) )
+ m_NotSupported = true;
+}
+
+D3D9PixelShader::~D3D9PixelShader ()
+{
+ for (int i = 0; i < kFogModeCount; ++i)
+ {
+ if( m_Shaders[i] )
+ {
+ ULONG refCount = m_Shaders[i]->Release();
+ AssertIf( refCount != 0 );
+ }
+ }
+}
+
+bool D3D9PixelShader::Create( const std::string& source )
+{
+ // fast skip 3.0 shaders on unsupporting hardware
+ bool isShaderModel3 = !strncmp(source.c_str(), "ps_3_0", 6);
+ if( gGraphicsCaps.shaderCaps < kShaderLevel3 && isShaderModel3 )
+ return false;
+
+ m_GpuProgramLevel = isShaderModel3 ? kGpuProgramSM3 : kGpuProgramSM2;
+
+ HRESULT hr;
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ // assemble shader
+ ID3DXBuffer *compiledShader = AssembleD3DShader( source );
+ if( !compiledShader )
+ {
+ return false;
+ }
+
+ // create shader
+ hr = dev->CreatePixelShader( (const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[0] );
+ compiledShader->Release();
+ if( FAILED(hr) )
+ {
+ printf_console( "D3D shader create error for shader %s\n", source.c_str() );
+ return false;
+ }
+
+ if (isShaderModel3)
+ {
+ m_SourceForFog = source;
+ }
+
+ return true;
+}
+
+void D3D9PixelShader::ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer)
+{
+ GfxDevice& device = GetRealGfxDevice();
+ IDirect3DDevice9* dev = GetD3DDevice();
+ PixelShaderConstantCache& constantCache = GetD3D9PixelShaderConstantCache();
+
+ const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams();
+ buffer = ApplyValueParametersD3D9<PixelShaderConstantCache>(constantCache, buffer, valueParams);
+
+ // Apply textures
+ const GpuProgramParameters::TextureParameterList& textureParams = params.GetTextureParams();
+ GpuProgramParameters::TextureParameterList::const_iterator textureParamsEnd = textureParams.end();
+ for( GpuProgramParameters::TextureParameterList::const_iterator i = textureParams.begin(); i != textureParamsEnd; ++i )
+ {
+ const GpuProgramParameters::TextureParameter& t = *i;
+ const TexEnvData* texdata = reinterpret_cast<const TexEnvData*>(buffer);
+ ApplyTexEnvData (t.m_Index, t.m_SamplerIndex, *texdata);
+ buffer += sizeof(*texdata);
+ }
+
+ // Apply fog parameters if needed
+ if (!m_SourceForFog.empty())
+ {
+ const GfxFogParams& fog = device.GetFogParams();
+ if (fog.mode > kFogDisabled && !(m_FogFailed & (1<<fog.mode)))
+ {
+ int reg = m_FogRegisters[fog.mode];
+ constantCache.SetValues (reg, fog.color.GetPtr(), 1);
+ float params[4];
+ params[0] = fog.density * 1.2011224087f ; // density / sqrt(ln(2))
+ params[1] = fog.density * 1.4426950408f; // density / ln(2)
+ if (fog.mode == kFogLinear)
+ {
+ float diff = fog.end - fog.start;
+ float invDiff = Abs(diff) > 0.0001f ? 1.0f/diff : 0.0f;
+ params[2] = -invDiff;
+ params[3] = fog.end * invDiff;
+ }
+ else
+ {
+ params[2] = 0.0f;
+ params[3] = 0.0f;
+ }
+ constantCache.SetValues (reg+1, params, 1);
+ }
+ }
+}
+
+static int FindUnusedConstantRegister (const std::string& src, const GpuProgramParameters& params)
+{
+ int maxRegisterUsed = -1;
+
+ const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams();
+ for (GpuProgramParameters::ValueParameterArray::const_iterator it = valueParams.begin(), itEnd = valueParams.end(); it != itEnd; ++it)
+ {
+ int idx = it->m_Index + it->m_RowCount - 1;
+ if (idx > maxRegisterUsed)
+ maxRegisterUsed = idx;
+ }
+
+ // Built-ins
+ const BuiltinShaderParamIndices& builtins = params.GetBuiltinParams();
+ for (int i = 0; i < kShaderInstanceMatCount; ++i)
+ {
+ int index = builtins.mat[i].gpuIndex;
+ if (index >= 0 && index + 3 > maxRegisterUsed)
+ maxRegisterUsed = index + 3;
+ }
+
+ // Explicit constants in the shader ("def c*")
+ size_t pos = 0;
+ const size_t n = src.size();
+ while ((pos = src.find("def c", pos)) != std::string::npos)
+ {
+ pos += 5; // skip "def c"
+ int reg = -1;
+ sscanf(src.c_str() + pos, "%d", &reg);
+ if (reg > maxRegisterUsed)
+ maxRegisterUsed = reg;
+ }
+
+ return maxRegisterUsed + 1;
+}
+
+IDirect3DPixelShader9* D3D9PixelShader::GetShader(FogMode fog, const GpuProgramParameters& params)
+{
+ int index = 0;
+ if (fog > kFogDisabled && !m_SourceForFog.empty())
+ {
+ Assert (fog >= 0 && fog < kFogModeCount);
+
+ if (m_Shaders[fog])
+ {
+ // already have patched fog shader
+ index = fog;
+ }
+ else if (!(m_FogFailed & (1<<fog)))
+ {
+ // patch fog shader on demand
+ std::string src = m_SourceForFog;
+
+ // find constant register that we'll use to store fog params
+ int reg = FindUnusedConstantRegister (src, params);
+ m_FogRegisters[fog] = reg;
+
+ if (PatchPixelShaderFogD3D9 (src, fog, reg, reg+1))
+ {
+ // assemble & create the shader
+ ID3DXBuffer *compiledShader = AssembleD3DShader (src);
+ if (compiledShader)
+ {
+ HRESULT hr = GetD3DDevice()->CreatePixelShader ((const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[fog]);
+ compiledShader->Release();
+ if (SUCCEEDED(hr))
+ {
+ index = fog;
+ }
+ else
+ {
+ printf_console ("D3D pixel shader create error for patched fog mode %d shader %s\n", (int)fog, src.c_str());
+ }
+ }
+ }
+
+ if (index == 0)
+ m_FogFailed |= (1<<fog);
+ }
+ }
+ return m_Shaders[index];
+}
+
+
+
+// --------------------------------------------------------------------------
+
+#if ENABLE_UNIT_TESTS
+#include "External/UnitTest++/src/UnitTest++.h"
+SUITE (GpuProgramsD3DTests)
+{
+
+TEST(FindUnusedConstantRegisterCanHandleUnsortedParams)
+{
+ GpuProgramParameters pp;
+ pp.AddVectorParam(1,kShaderParamFloat,4,"A",-1,NULL);
+ pp.AddVectorParam(0,kShaderParamFloat,4,"B",-1,NULL);
+ pp.MakeReady(); // this does sort, but sorts by name; NOT the GPU index!
+ CHECK_EQUAL(2,FindUnusedConstantRegister("", pp));
+}
+
+} // SUITE
+#endif // ENABLE_UNIT_TESTS
diff --git a/Runtime/GfxDevice/d3d/GpuProgramsD3D.h b/Runtime/GfxDevice/d3d/GpuProgramsD3D.h
new file mode 100644
index 0000000..6b21fa9
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/GpuProgramsD3D.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "Runtime/GfxDevice/GpuProgram.h"
+
+
+class D3D9VertexShader : public GpuProgram {
+public:
+ D3D9VertexShader( const std::string& source );
+ virtual ~D3D9VertexShader();
+
+ virtual void ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer);
+ IDirect3DVertexShader9* GetShader(FogMode fog, bool& outResetToNoFog);
+ IDirect3DVertexShader9* GetShaderAtFogIndex(FogMode fog) { return m_Shaders[fog]; }
+
+private:
+ bool Create( const std::string& source );
+
+ std::string m_SourceForFog; // original source, used for fog patching if needed
+ IDirect3DVertexShader9* m_Shaders[kFogModeCount];
+ unsigned m_FogFailed; // bit per fog mode
+};
+
+class D3D9PixelShader : public GpuProgram {
+public:
+ D3D9PixelShader( const std::string& source );
+ virtual ~D3D9PixelShader();
+
+ virtual void ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer);
+ IDirect3DPixelShader9* GetShader(FogMode fog, const GpuProgramParameters& params);
+ IDirect3DPixelShader9* GetShaderAtFogIndex(FogMode fog) { return m_Shaders[fog]; }
+
+private:
+ bool Create( const std::string& source );
+
+ std::string m_SourceForFog; // original source, used for fog patching if needed
+ IDirect3DPixelShader9* m_Shaders[kFogModeCount];
+ int m_FogRegisters[kFogModeCount];
+ unsigned m_FogFailed; // bit per fog mode
+};
diff --git a/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp b/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp
new file mode 100644
index 0000000..f1d95c8
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp
@@ -0,0 +1,384 @@
+#include "UnityPrefix.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "D3D9Context.h"
+#include "Runtime/Utilities/Utility.h"
+#include "PlatformDependent/Win/WinDriverUtils.h"
+#include "D3D9Utils.h"
+#include <Shlwapi.h>
+
+#define CAPS_DEBUG_DISABLE_RT 0
+
+
+extern D3DFORMAT kD3D9RenderTextureFormats[kRTFormatCount];
+
+
+extern D3DDEVTYPE g_D3DDevType;
+extern DWORD g_D3DAdapter;
+
+static bool IsTextureFormatSupported( D3DFORMAT format )
+{
+ if( format == D3DFMT_UNKNOWN )
+ return false;
+ HRESULT hr = GetD3DObject()->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), 0, D3DRTYPE_TEXTURE, format );
+ return SUCCEEDED( hr );
+}
+static bool IsSRGBTextureReadSupported( D3DFORMAT format )
+{
+ if( format == D3DFMT_UNKNOWN )
+ return false;
+ HRESULT hr = GetD3DObject()->CheckDeviceFormat (g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_SRGBREAD, D3DRTYPE_TEXTURE, format);
+ return SUCCEEDED( hr );
+}
+static bool IsSRGBTextureWriteSupported( D3DFORMAT format )
+{
+ if( format == D3DFMT_UNKNOWN )
+ return false;
+ HRESULT hr = GetD3DObject()->CheckDeviceFormat (g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_SRGBWRITE, D3DRTYPE_TEXTURE, format);
+ return SUCCEEDED( hr );
+}
+static bool IsRenderTextureFormatSupported( D3DFORMAT format )
+{
+ if( format == D3DFMT_UNKNOWN )
+ return false;
+ HRESULT hr = GetD3DObject()->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_TEXTURE, format );
+ return SUCCEEDED( hr );
+}
+
+D3DFORMAT GetD3D9TextureFormat( TextureFormat inFormat ); // TexturesD3D9.cpp
+
+
+enum {
+ kVendorDummyRef = 0x0000,
+ kVendor3DLabs = 0x3d3d,
+ kVendorMatrox = 0x102b,
+ kVendorS3 = 0x5333,
+ kVendorSIS = 0x1039,
+ kVendorXGI = 0x18ca,
+ kVendorIntel = 0x8086,
+ kVendorATI = 0x1002,
+ kVendorNVIDIA = 0x10de,
+ kVendorTrident = 0x1023,
+ kVendorImgTech = 0x104a,
+ kVendorVIAS3G = 0x1106,
+ kVendor3dfx = 0x121a,
+ kVendorParallels= 0x1ab8,
+ kVendorMicrosoft= 0x1414,
+ kVendorVMWare = 0x15ad,
+};
+struct KnownVendors {
+ DWORD vendorId;
+ const char* name;
+};
+static KnownVendors s_KnownVendors[] = {
+ { kVendorDummyRef, "REFERENCE" },
+ { kVendor3DLabs, "3dLabs" },
+ { kVendorMatrox, "Matrox" },
+ { kVendorS3, "S3" },
+ { kVendorSIS, "SIS" },
+ { kVendorXGI, "XGI" },
+ { kVendorIntel, "Intel" },
+ { kVendorATI, "ATI" },
+ { kVendorNVIDIA, "NVIDIA" },
+ { kVendorTrident, "Trident" },
+ { kVendorImgTech, "Imagination Technologies" },
+ { kVendorVIAS3G, "VIA/S3" },
+ { kVendor3dfx, "3dfx" },
+ { kVendorParallels, "Parallels" },
+ { kVendorMicrosoft, "Microsoft" },
+ { kVendorVMWare, "VMWare" },
+};
+static int kKnownVendorsSize = sizeof(s_KnownVendors)/sizeof(s_KnownVendors[0]);
+
+
+void GraphicsCaps::InitD3D9()
+{
+ IDirect3D9* d3dobject = GetD3DObject();
+ d3dobject->GetDeviceCaps( g_D3DAdapter, g_D3DDevType, &d3d.d3dcaps );
+
+ // get renderer, vendor & driver information
+ D3DADAPTER_IDENTIFIER9 adapterInfo;
+ d3dobject->GetAdapterIdentifier( g_D3DAdapter, 0, &adapterInfo );
+ adapterInfo.Driver[MAX_DEVICE_IDENTIFIER_STRING-1] = 0;
+ adapterInfo.Description[MAX_DEVICE_IDENTIFIER_STRING-1] = 0;
+ adapterInfo.DeviceName[31] = 0;
+ rendererString = adapterInfo.Description;
+
+ if (g_D3DDevType == D3DDEVTYPE_REF)
+ {
+ adapterInfo.VendorId = kVendorDummyRef;
+ rendererString = "REF on " + rendererString;
+ }
+
+ int i;
+ for( i = 0; i < kKnownVendorsSize; ++i )
+ {
+ if( s_KnownVendors[i].vendorId == adapterInfo.VendorId )
+ {
+ vendorString = s_KnownVendors[i].name;
+ break;
+ }
+ }
+ if( i == kKnownVendorsSize )
+ {
+ vendorString = Format( "Unknown (ID=%x)", adapterInfo.VendorId );
+ }
+ windriverutils::VersionInfo driverVersion( HIWORD(adapterInfo.DriverVersion.HighPart), LOWORD(adapterInfo.DriverVersion.HighPart),
+ HIWORD(adapterInfo.DriverVersion.LowPart), LOWORD(adapterInfo.DriverVersion.LowPart) );
+ driverVersionString = Format( "%s %i.%i.%i.%i", adapterInfo.Driver,
+ HIWORD(adapterInfo.DriverVersion.HighPart), LOWORD(adapterInfo.DriverVersion.HighPart),
+ HIWORD(adapterInfo.DriverVersion.LowPart), LOWORD(adapterInfo.DriverVersion.LowPart) );
+ driverLibraryString = driverVersionString;
+ fixedVersionString = "Direct3D 9.0c [" + driverVersionString + ']';
+
+ rendererID = adapterInfo.DeviceId;
+ vendorID = adapterInfo.VendorId;
+
+ // We can't use GetAvailableTextureMem here because the device is not created yet!
+ // And besides that, it would return much more than VRAM on Vista (virtualization and so on).
+ // Use WMI instead.
+ int vramMB;
+ const char* vramMethod = "";
+ if (g_D3DDevType != D3DDEVTYPE_REF)
+ vramMB = windriverutils::GetVideoMemorySizeMB (d3dobject->GetAdapterMonitor(g_D3DAdapter), &vramMethod);
+ else
+ vramMB = 128;
+ videoMemoryMB = vramMB;
+
+ // On windows, we always output D3D info. There is so much variety that it always helps!
+ printf_console( "Direct3D:\n" );
+ printf_console( " Version: %s\n", fixedVersionString.c_str() );
+ printf_console( " Renderer: %s\n", rendererString.c_str() );
+ printf_console( " Vendor: %s\n", vendorString.c_str() );
+ printf_console( " VRAM: %i MB (via %s)\n", (int)videoMemoryMB, vramMethod );
+
+ maxVSyncInterval = 0;
+ if( d3d.d3dcaps.PresentationIntervals & D3DPRESENT_INTERVAL_ONE )
+ {
+ maxVSyncInterval = 1;
+ if( d3d.d3dcaps.PresentationIntervals & D3DPRESENT_INTERVAL_TWO )
+ maxVSyncInterval = 2;
+ }
+
+ DWORD declTypesFloat16 = D3DDTCAPS_FLOAT16_2 | D3DDTCAPS_FLOAT16_4;
+ has16BitFloatVertex = (d3d.d3dcaps.DeclTypes & declTypesFloat16) == declTypesFloat16;
+ needsToSwizzleVertexColors = true;
+
+ bool usesSoftwareVP = !(d3d.d3dcaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT);
+ if( usesSoftwareVP )
+ maxLights = 8; // software T&L always has 8 lights
+ else
+ maxLights = clamp<unsigned int>( d3d.d3dcaps.MaxActiveLights, 0, 8 );
+
+ // Texture sizes
+ maxTextureSize = std::min( d3d.d3dcaps.MaxTextureWidth, d3d.d3dcaps.MaxTextureHeight );
+ maxRenderTextureSize = maxTextureSize;
+ maxCubeMapSize = maxTextureSize;
+
+ has3DTexture = d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_VOLUMEMAP;
+ maxTexUnits = d3d.d3dcaps.MaxSimultaneousTextures;
+ maxTexImageUnits = 16;
+ maxTexCoords = d3d.d3dcaps.MaxSimultaneousTextures;
+ if (maxTexCoords > 8)
+ maxTexCoords = 8;
+
+ // In theory, vertex texturing is texture format dependent. However, in practice the caps lie,
+ // especially on NVIDIA hardware.
+ //
+ // ATI cards: all DX10+ GPUs report all texture formats as vertex texture capable (good!)
+ // Intel cards: all SM3.0+ GPUs report all texture formats as vertex texture capable (good!)
+ // NV cards: all DX10+ GPUs report only floating point formats as capable, but all others actually work as well.
+ // GeForce 6&7 only report R32F and A32R32G32B32F, and only those work.
+ //
+ // So we check for R16F support; this will return true on all GPUs that can handle ALL
+ // texture formats.
+ hasVertexTextures = ((LOWORD(d3d.d3dcaps.VertexShaderVersion) >= (3<<8)+0)) &&
+ SUCCEEDED(d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_VERTEXTEXTURE, D3DRTYPE_TEXTURE, D3DFMT_R16F));
+
+ hasAnisoFilter = d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY;
+ maxAnisoLevel = hasAnisoFilter ? d3d.d3dcaps.MaxAnisotropy : 1;
+ hasMipLevelBias = d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_MIPMAPLODBIAS;
+
+ for( i = 0; i < kTexFormatPCCount; ++i )
+ {
+ d3d.hasBaseTextureFormat[i] = IsTextureFormatSupported( GetD3D9TextureFormat( static_cast<TextureFormat>(i) ) );
+ supportsTextureFormat[i] = d3d.hasBaseTextureFormat[i];
+ }
+
+ hasS3TCCompression = IsTextureFormatSupported(D3DFMT_DXT1) && IsTextureFormatSupported(D3DFMT_DXT3) && IsTextureFormatSupported(D3DFMT_DXT5);
+ d3d.hasTextureFormatA8 = IsTextureFormatSupported(D3DFMT_A8);
+ d3d.hasTextureFormatL8 = IsTextureFormatSupported(D3DFMT_L8);
+ d3d.hasTextureFormatA8L8 = IsTextureFormatSupported(D3DFMT_A8L8);
+ d3d.hasTextureFormatL16 = IsTextureFormatSupported(D3DFMT_L16);
+
+ if (!(d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_POW2))
+ npot = kNPOTFull;
+ else if (d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_NONPOW2CONDITIONAL)
+ npot = kNPOTRestricted;
+ else
+ npot = kNPOTNone;
+
+ npotRT = npot;
+
+ hasSRGBReadWrite =
+ IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatRGB24)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatRGBA32)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatARGB32)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatBGR24)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT1)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT3)))
+ && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT5)));
+
+ // we only do sRGB writes to an 8 bit buffer ...
+ hasSRGBReadWrite = hasSRGBReadWrite && IsSRGBTextureWriteSupported(D3DFMT_A8R8G8B8);
+
+ hasInstancing = false; //@TODO: instancing!
+
+ hasBlendSquare = (d3d.d3dcaps.SrcBlendCaps & D3DPBLENDCAPS_SRCCOLOR) && (d3d.d3dcaps.DestBlendCaps & D3DPBLENDCAPS_DESTCOLOR);
+ hasSeparateAlphaBlend = d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND;
+ hasBlendSub = hasBlendMinMax = d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_BLENDOP;
+
+ hasAutoMipMapGeneration = d3d.d3dcaps.Caps2 & D3DCAPS2_CANAUTOGENMIPMAP;
+
+ for (int i = 0; i < kRTFormatCount; ++i)
+ {
+ if (i == kRTFormatDefault || i == kRTFormatDefaultHDR || i == kRTFormatShadowMap)
+ continue;
+ supportsRenderTextureFormat[i] = IsRenderTextureFormatSupported(kD3D9RenderTextureFormats[i]);
+ }
+ hasRenderToTexture = supportsRenderTextureFormat[kRTFormatARGB32];
+ supportsRenderTextureFormat[kRTFormatDefault] = hasRenderToTexture;
+
+ hasRenderToCubemap = hasRenderToTexture;
+ hasStencil = true;
+ hasRenderTargetStencil = true;
+ hasTwoSidedStencil = d3d.d3dcaps.StencilCaps & D3DSTENCILCAPS_TWOSIDED;
+ maxMRTs = clamp<int> (d3d.d3dcaps.NumSimultaneousRTs, 1, kMaxSupportedRenderTargets);
+ if (!(d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING))
+ maxMRTs = 1;
+
+ d3d.hasATIDepthFormat16 = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatDF16 ) );
+ supportsRenderTextureFormat[kRTFormatDepth] |= d3d.hasATIDepthFormat16;
+ d3d.hasNVDepthFormatINTZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatINTZ ) );
+ supportsRenderTextureFormat[kRTFormatDepth] |= d3d.hasNVDepthFormatINTZ;
+ d3d.hasNVDepthFormatRAWZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatRAWZ ) );
+ d3d.hasNULLFormat = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, kD3D9FormatNULL ) );
+ d3d.hasDepthResolveRESZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, kD3D9FormatRESZ ) );
+
+ hasNativeDepthTexture = d3d.hasATIDepthFormat16 || d3d.hasNVDepthFormatINTZ;
+ hasStencilInDepthTexture = d3d.hasNVDepthFormatINTZ;
+ hasNativeShadowMap = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, D3DFMT_D16 ) );
+ supportsRenderTextureFormat[kRTFormatShadowMap] = hasRenderToTexture && hasNativeShadowMap;
+
+ #if CAPS_DEBUG_DISABLE_RT
+ hasRenderToTexture = hasRenderToCubemap = false;
+ for (int i = 0; i < kRTFormatCount; ++i)
+ supportsRenderTextureFormat[i] = false;
+ maxMRTs = 1;
+ #endif
+
+ // This is somewhat dummy; actual resolving of FSAA levels and types supported happens later when choosing presentation parameters.
+ hasMultiSample = true;
+
+ // Driver bugs/workarounds following
+ DetectDriverBugsD3D9( adapterInfo.VendorId, driverVersion );
+
+ // safeguards
+ maxRenderTextureSize = std::min( maxRenderTextureSize, maxTextureSize );
+ maxCubeMapSize = std::min( maxCubeMapSize, maxTextureSize );
+
+ // in the very end, figure out shader capabilities level (after all workarounds are applied)
+ if( LOWORD(d3d.d3dcaps.PixelShaderVersion) < (3<<8)+0 )
+ {
+ // no ps3.0: 2.x shaders
+ shaderCaps = kShaderLevel2;
+ }
+ else
+ {
+ // has everything we care about!
+ shaderCaps = kShaderLevel3;
+ }
+
+ // Print overall caps & D3D9 hacks used
+ printf_console( " Caps: Shader=%i DepthRT=%i NativeDepth=%i NativeShadow=%i DF16=%i INTZ=%i RAWZ=%i NULL=%i RESZ=%i SlowINTZ=%i\n",
+ shaderCaps,
+ supportsRenderTextureFormat[kRTFormatDepth], hasNativeDepthTexture, hasNativeShadowMap,
+ d3d.hasATIDepthFormat16,
+ d3d.hasNVDepthFormatINTZ, d3d.hasNVDepthFormatRAWZ,
+ d3d.hasNULLFormat, d3d.hasDepthResolveRESZ,
+ d3d.slowINTZSampling
+ );
+}
+
+
+enum WindowsVersion {
+ kWindows2000 = 50, // 5.0
+ kWindowsXP = 51, // 5.1
+ kWindows2003 = 52, // 5.2
+ kWindowsVista = 60, // 6.0
+};
+
+static int GetWindowsVersion()
+{
+ OSVERSIONINFO osinfo;
+ osinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+ if( !GetVersionEx(&osinfo) )
+ return 0;
+
+ if( osinfo.dwPlatformId == VER_PLATFORM_WIN32_NT )
+ return osinfo.dwMajorVersion * 10 + osinfo.dwMinorVersion % 10;
+ else
+ return 0;
+}
+
+
+void GraphicsCaps::DetectDriverBugsD3D9( UInt32 vendorCode, const windriverutils::VersionInfo& driverVersion )
+{
+ d3d.slowINTZSampling = false;
+
+
+ if( vendorCode == kVendorNVIDIA )
+ {
+ // GeForceFX and earlier have sort-of-buggy render to cubemap. E.g. skybox draws correctly,
+ // but objects do not appear. Huh.
+ const int kShaderVersion30 = (3 << 8) + 0;
+ bool isFXOrEarlier = LOWORD(gGraphicsCaps.d3d.d3dcaps.PixelShaderVersion) < kShaderVersion30;
+ if( isFXOrEarlier )
+ {
+ printf_console( "D3D: disabling render to cubemap on pre-GeForce6\n" );
+ buggyCameraRenderToCubemap = true;
+ }
+
+ // Also, native shadow maps seem to have problems on GeForce FX; perhaps it needs to use tex2Dproj instead of tex2D,
+ // or something (FX 5200). Since FX cards are really dying, and the only left ones are FX 5200/5500,
+ // let's just turn shadows off. You don't want them on those cards anyway!
+ if (isFXOrEarlier)
+ {
+ printf_console ("D3D: disabling shadows on pre-GeForce6\n");
+ hasNativeShadowMap = false;
+ hasNativeDepthTexture = false;
+ supportsRenderTextureFormat[kRTFormatDepth] = false;
+ }
+
+ // GeForceFX on 6.14.10.9147 drivers has buggy fullscreen FSAA.
+ // It displays everything stretched, as if AA samples map to pixels directly.
+ if( isFXOrEarlier && driverVersion <= windriverutils::VersionInfo(6,14,10,9147) )
+ {
+ printf_console( "D3D: disabling fullscreen AA (buggy pre-GeForce6 driver)\n" );
+ buggyFullscreenFSAA = true;
+ }
+ }
+ if( vendorCode == kVendorATI )
+ {
+ // On D3D9 Radeon HD cards have big performance hit when using INTZ texture for both sampling & depth testing
+ // (Radeon HD 3xxx-5xxx, Catalyst 9.10 to 10.5). Talking with AMD, we found that using RESZ to copy it into a separate
+ // texture is a decent workaround that results in ok performance.
+ if (d3d.hasDepthResolveRESZ)
+ d3d.slowINTZSampling = true;
+ }
+
+ // Sanitize VRAM amount
+ if( videoMemoryMB < 32 ) {
+ printf_console("D3D: VRAM amount suspiciously low (less than 32MB)\n");
+ videoMemoryMB = 32;
+ }
+}
diff --git a/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp b/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp
new file mode 100644
index 0000000..0d444b3
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp
@@ -0,0 +1,583 @@
+#include "UnityPrefix.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "Runtime/Graphics/ScreenManager.h"
+#include "Runtime/Graphics/Image.h"
+#include "D3D9Context.h"
+#include "TexturesD3D9.h"
+#include "RenderTextureD3D.h"
+#include "D3D9Utils.h"
+
+
+// defined in GfxDeviceD3D9.cpp
+void UnbindTextureD3D9( TextureID texture );
+
+
+// define to 1 to print lots of activity info
+#define DEBUG_RENDER_TEXTURES 0
+
+
+D3DFORMAT kD3D9RenderTextureFormats[kRTFormatCount] = {
+ D3DFMT_A8R8G8B8,
+ D3DFMT_R32F, // Depth
+ D3DFMT_A16B16G16R16F,
+ D3DFMT_D16, // Shadowmap
+ D3DFMT_R5G6B5,
+ D3DFMT_A4R4G4B4,
+ D3DFMT_A1R5G5B5,
+ (D3DFORMAT)-1, // Default
+ D3DFMT_A2R10G10B10,
+ (D3DFORMAT)-1, // DefaultHDR
+ D3DFMT_A16B16G16R16,
+ D3DFMT_A32B32G32R32F,
+ D3DFMT_G32R32F,
+ D3DFMT_G16R16F,
+ D3DFMT_R32F,
+ D3DFMT_R16F,
+ D3DFMT_L8, // R8
+ (D3DFORMAT)-1, // ARGBInt
+ (D3DFORMAT)-1, // RGInt
+ (D3DFORMAT)-1, // RInt
+ (D3DFORMAT)-1, // BGRA32
+};
+
+
+static D3DMULTISAMPLE_TYPE FindSupportedD3DMultiSampleType (D3DFORMAT d3dformat, int maxSamples)
+{
+ BOOL windowed = !GetScreenManager().IsFullScreen();
+ for (int samples = maxSamples; samples >= 1; samples--)
+ {
+ D3DMULTISAMPLE_TYPE msaa = GetD3DMultiSampleType( samples );
+ HRESULT hr = GetD3DObject()->CheckDeviceMultiSampleType( g_D3DAdapter, g_D3DDevType, d3dformat, windowed, msaa, NULL );
+ if (SUCCEEDED(hr))
+ return msaa;
+ }
+ return D3DMULTISAMPLE_NONE;
+}
+
+static bool InitD3DRenderColorSurface (RenderColorSurfaceD3D9& rs, TexturesD3D9& textures)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ HRESULT hr;
+ DWORD usage;
+
+ if (rs.textureID.m_ID)
+ {
+ // Regular render texture
+ usage = D3DUSAGE_RENDERTARGET;
+ int mipCount = 1;
+ if (rs.flags & kSurfaceCreateMipmap && !IsDepthRTFormat(rs.format))
+ {
+ Assert(gGraphicsCaps.hasAutoMipMapGeneration);
+ if (rs.flags & kSurfaceCreateAutoGenMips)
+ usage |= D3DUSAGE_AUTOGENMIPMAP;
+ else
+ mipCount = CalculateMipMapCount3D (rs.width, rs.height, 1);
+ }
+ if (rs.dim == kTexDim2D)
+ {
+ IDirect3DTexture9* rt;
+ D3DFORMAT d3dformat = D3DFMT_UNKNOWN;
+ d3dformat = kD3D9RenderTextureFormats[rs.format];
+ hr = dev->CreateTexture (rs.width, rs.height, mipCount, usage, d3dformat, D3DPOOL_DEFAULT, &rt, NULL);
+ if( FAILED(hr) )
+ {
+ ErrorString( Format( "RenderTexture creation error: CreateTexture failed [%s]", GetD3D9Error(hr) ) );
+ return false;
+ }
+ rs.m_Texture = rt;
+ rt->GetSurfaceLevel( 0, &rs.m_Surface );
+ }
+ else if (rs.dim == kTexDimCUBE)
+ {
+ Assert(rs.width == rs.height);
+ IDirect3DCubeTexture9* rt;
+ hr = dev->CreateCubeTexture (rs.width, mipCount, usage, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &rt, NULL);
+ if( FAILED(hr) )
+ {
+ ErrorString( Format( "RenderTexture creation error: CreateCubeTexture failed [%s]", GetD3D9Error(hr) ) );
+ return false;
+ }
+ rs.m_Texture = rt;
+ }
+ else
+ {
+ ErrorString("RenderTexture creation error: D3D9 only supports 2D or CUBE textures");
+ return false;
+ }
+ }
+ else
+ {
+ D3DFORMAT d3dformat = D3DFMT_UNKNOWN;
+ D3DMULTISAMPLE_TYPE msaa = D3DMULTISAMPLE_NONE;
+ if (!(rs.flags & kSurfaceCreateNeverUsed))
+ {
+ // Create surface without texture to resolve from
+ // Find supported MSAA type based on device and format
+ d3dformat = kD3D9RenderTextureFormats[rs.format];
+ msaa = FindSupportedD3DMultiSampleType( d3dformat, rs.samples );
+ }
+ else
+ {
+ // Dummy render target surface (only needed to make D3D runtime happy)
+ d3dformat = gGraphicsCaps.d3d.hasNULLFormat ? kD3D9FormatNULL : D3DFMT_A8R8G8B8;
+ }
+ IDirect3DSurface9* ds = NULL;
+ hr = dev->CreateRenderTarget( rs.width, rs.height, d3dformat, msaa, 0, FALSE, &ds, NULL );
+ if (FAILED(hr))
+ {
+ ErrorString( Format( "RenderTexture creation error: CreateRenderTarget failed [%s]", GetD3D9Error(hr) ) );
+ return false;
+ }
+ rs.m_Surface = ds;
+ }
+
+ // add to textures map
+ if (rs.textureID.m_ID)
+ textures.AddTexture( rs.textureID, rs.m_Texture );
+
+ return true;
+}
+
+static bool InitD3DRenderDepthSurface (RenderDepthSurfaceD3D9& rs, TexturesD3D9& textures)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ HRESULT hr;
+
+ if (!rs.textureID.m_ID)
+ {
+ // Create depth buffer surface
+ if( rs.depthFormat == kDepthFormatNone )
+ {
+ rs.m_Surface = NULL;
+ }
+ else
+ {
+ // Create surface without texture to resolve from
+ // Find supported MSAA type based on device and format
+ D3DFORMAT d3dformat = (rs.depthFormat == kDepthFormat16 ? D3DFMT_D16 : D3DFMT_D24S8);
+ D3DMULTISAMPLE_TYPE msaa = FindSupportedD3DMultiSampleType( d3dformat, rs.samples );
+ hr = dev->CreateDepthStencilSurface( rs.width, rs.height, d3dformat, msaa, 0, TRUE, &rs.m_Surface, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(rs.m_Surface, rs.width * rs.height * GetBPPFromD3DFormat(d3dformat), &rs);
+ if( FAILED(hr) )
+ {
+ ErrorString( Format( "RenderTexture creation error: CreateDepthStencilSurface failed [%s]", GetD3D9Error(hr) ) );
+ return false;
+ }
+ }
+ }
+ else
+ {
+ // Create depth buffer as texture
+ D3DFORMAT d3dformat = D3DFMT_UNKNOWN;
+ if (rs.flags & kSurfaceCreateShadowmap)
+ {
+ Assert (rs.depthFormat == kDepthFormat16);
+ Assert (gGraphicsCaps.hasNativeShadowMap);
+ d3dformat = D3DFMT_D16;
+ }
+ else
+ {
+ Assert (gGraphicsCaps.hasNativeDepthTexture);
+ if (gGraphicsCaps.d3d.hasNVDepthFormatINTZ)
+ d3dformat = kD3D9FormatINTZ;
+ else if (gGraphicsCaps.d3d.hasATIDepthFormat16)
+ d3dformat = kD3D9FormatDF16;
+ else
+ {
+ AssertString ("No available native depth format");
+ }
+ }
+ IDirect3DTexture9* texture = NULL;
+ hr = dev->CreateTexture (rs.width, rs.height, 1, D3DUSAGE_DEPTHSTENCIL, d3dformat, D3DPOOL_DEFAULT, &texture, NULL);
+ if( FAILED(hr) )
+ {
+ ErrorString( Format( "RenderTexture creation error: CreateTexture failed [%s]", GetD3D9Error(hr) ) );
+ return false;
+ }
+ rs.m_Texture = texture;
+ texture->GetSurfaceLevel (0, &rs.m_Surface);
+ }
+
+ if (rs.textureID.m_ID)
+ textures.AddTexture( rs.textureID, rs.m_Texture );
+
+ return true;
+}
+
+
+static RenderColorSurfaceD3D9* s_ActiveColorTargets[kMaxSupportedRenderTargets];
+static int s_ActiveColorTargetCount;
+static RenderDepthSurfaceD3D9* s_ActiveDepthTarget = NULL;
+static int s_ActiveMip = 0;
+static CubemapFace s_ActiveFace = kCubeFaceUnknown;
+
+static RenderColorSurfaceD3D9* s_ActiveColorBackBuffer = NULL;
+static RenderDepthSurfaceD3D9* s_ActiveDepthBackBuffer = NULL;
+
+// on dx editor we can switch swapchain underneath
+// so lets do smth like gl's default FBO
+// it will be used only from "user" code and we will select proper swap chain here
+static RenderColorSurfaceD3D9* s_DummyColorBackBuffer = NULL;
+static RenderDepthSurfaceD3D9* s_DummyDepthBackBuffer = NULL;
+
+RenderSurfaceBase* DummyColorBackBuferD3D9()
+{
+ if(s_DummyColorBackBuffer == 0)
+ {
+ static RenderColorSurfaceD3D9 __bb;
+ RenderSurfaceBase_InitColor(__bb);
+ __bb.backBuffer = true;
+
+ s_DummyColorBackBuffer = &__bb;
+ }
+ return s_DummyColorBackBuffer;
+}
+
+RenderSurfaceBase* DummyDepthBackBuferD3D9()
+{
+ if(s_DummyDepthBackBuffer == 0)
+ {
+ static RenderDepthSurfaceD3D9 __bb;
+ RenderSurfaceBase_InitDepth(__bb);
+ __bb.backBuffer = true;
+
+ s_DummyDepthBackBuffer = &__bb;
+ }
+ return s_DummyDepthBackBuffer;
+}
+
+bool SetRenderTargetD3D9 (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face, int& outRenderTargetWidth, int& outRenderTargetHeight, bool& outIsBackBuffer)
+{
+ RenderColorSurfaceD3D9* rcolorZero = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[0].object);
+ RenderDepthSurfaceD3D9* rdepth = reinterpret_cast<RenderDepthSurfaceD3D9*>( depthHandle.object );
+
+ #if DEBUG_RENDER_TEXTURES
+ printf_console( "RT: SetRenderTargetD3D9 color=%i depth=%i (%x) mip=%i face=%i\n",
+ rcolorZero ? rcolorZero->textureID.m_ID : 0,
+ rdepth ? rdepth->textureID.m_ID : 0, rdepth ? rdepth->m_Surface : 0,
+ mipLevel, face );
+ #endif
+
+ outIsBackBuffer = false;
+
+ if (count == s_ActiveColorTargetCount && s_ActiveDepthTarget == rdepth && s_ActiveMip == mipLevel && s_ActiveFace == face)
+ {
+ bool colorsSame = true;
+ for (int i = 0; i < count; ++i)
+ {
+ if (s_ActiveColorTargets[i] != reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object))
+ colorsSame = false;
+ }
+ if (colorsSame)
+ return false;
+ }
+
+ IDirect3DDevice9* dev = GetD3DDeviceNoAssert();
+ // Happens at startup, when deleting all RenderTextures
+ if( !dev )
+ {
+ Assert (!rcolorZero && !rdepth);
+ return false;
+ }
+
+ HRESULT hr = S_FALSE;
+
+ Assert(colorHandles[0].IsValid() && depthHandle.IsValid());
+ Assert(rcolorZero->backBuffer == rdepth->backBuffer);
+
+ outIsBackBuffer = rcolorZero->backBuffer;
+ if (!outIsBackBuffer)
+ GetRealGfxDevice().GetFrameStats().AddRenderTextureChange(); // stats
+
+ if(rcolorZero->backBuffer && rcolorZero == s_DummyColorBackBuffer)
+ colorHandles[0].object = rcolorZero = s_ActiveColorBackBuffer;
+ if(rdepth->backBuffer && rdepth == s_DummyDepthBackBuffer)
+ depthHandle.object = rdepth = s_ActiveDepthBackBuffer;
+
+
+ // color surfaces
+ for (int i = 0; i < count; ++i)
+ {
+ RenderColorSurfaceD3D9* rcolor = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object);
+ if(rcolor)
+ {
+ // color surface
+ Assert (rcolor->colorSurface);
+ // Make sure this texture is not used when setting it as render target
+ if (rcolor->textureID.m_ID)
+ UnbindTextureD3D9( rcolor->textureID );
+
+ // Set color surface
+ IDirect3DSurface9* surface = NULL;
+ bool needsRelease = false;
+ if( !rcolor->m_Texture )
+ {
+ Assert (rcolor->m_Surface);
+ surface = rcolor->m_Surface;
+ #if DEBUG_RENDER_TEXTURES
+ printf_console( " RT: color buffer plain\n" );
+ #endif
+ }
+ else if (rcolor->dim == kTexDimCUBE)
+ {
+ Assert (rcolor->m_Texture);
+ IDirect3DCubeTexture9* rt = static_cast<IDirect3DCubeTexture9*>( rcolor->m_Texture );
+ hr = rt->GetCubeMapSurface((D3DCUBEMAP_FACES)(D3DCUBEMAP_FACE_POSITIVE_X + clamp<int>(face,0,5)), mipLevel, &surface);
+ needsRelease = true;
+ }
+ else
+ {
+ #if DEBUG_RENDER_TEXTURES
+ printf_console( " RT: color buffer texture %i\n", rcolor->textureID.m_ID );
+ #endif
+ Assert (rcolor->m_Texture);
+ IDirect3DTexture9* rt = static_cast<IDirect3DTexture9*>( rcolor->m_Texture );
+ hr = rt->GetSurfaceLevel (mipLevel, &surface);
+ needsRelease = true;
+ }
+
+ if( surface )
+ {
+ hr = dev->SetRenderTarget (i, surface);
+ if( FAILED(hr) ) {
+ ErrorString( Format("RenderTexture error: failed to set render target [%s]", GetD3D9Error(hr)) );
+ }
+ if (needsRelease)
+ surface->Release();
+ }
+ else
+ {
+ ErrorString( Format("RenderTexture error: failed to retrieve color surface [%s]", GetD3D9Error(hr)) );
+ }
+ outRenderTargetWidth = rcolor->width;
+ outRenderTargetHeight = rcolor->height;
+ }
+ else
+ {
+ hr = dev->SetRenderTarget (i, NULL);
+ }
+ }
+ for (int i = count; i < s_ActiveColorTargetCount; ++i)
+ {
+ hr = dev->SetRenderTarget (i, NULL);
+ }
+
+
+ // depth surface
+ Assert (!rdepth || !rdepth->colorSurface);
+
+ if (rdepth && rdepth->m_Surface)
+ {
+ // Make sure this texture is not used when setting it as render target
+ if (rdepth->textureID.m_ID)
+ UnbindTextureD3D9( rdepth->textureID );
+
+ // Set depth surface
+ if( rdepth->m_Surface )
+ {
+ #if DEBUG_RENDER_TEXTURES
+ if (rdepth->textureID.m_ID)
+ printf_console( " RT: depth buffer texture %i\n", rdepth->textureID.m_ID );
+ else
+ printf_console( " RT: depth buffer plain %x\n", rdepth->m_Surface );
+ #endif
+ hr = dev->SetDepthStencilSurface( rdepth->m_Surface );
+ if( FAILED(hr) ) {
+ ErrorString( Format("RenderTexture error: failed to set depth stencil [%s]", GetD3D9Error(hr)) );
+ }
+ g_D3DHasDepthStencil = true;
+ D3DSURFACE_DESC desc;
+ desc.Format = D3DFMT_D16;
+ rdepth->m_Surface->GetDesc( &desc );
+ g_D3DDepthStencilFormat = desc.Format;
+ }
+ }
+ else
+ {
+ #if DEBUG_RENDER_TEXTURES
+ printf_console( " RT: depth buffer none\n" );
+ #endif
+ dev->SetDepthStencilSurface( NULL );
+ g_D3DHasDepthStencil = false;
+ g_D3DDepthStencilFormat = D3DFMT_UNKNOWN;
+ }
+
+ for (int i = 0; i < count; ++i)
+ s_ActiveColorTargets[i] = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object);
+ s_ActiveColorTargetCount = count;
+ s_ActiveDepthTarget = rdepth;
+ s_ActiveFace = face;
+ s_ActiveMip = mipLevel;
+
+ if (outIsBackBuffer)
+ {
+ s_ActiveColorBackBuffer = (RenderColorSurfaceD3D9*)colorHandles[0].object;
+ s_ActiveDepthBackBuffer = (RenderDepthSurfaceD3D9*)depthHandle.object;
+
+ // we are rendering to "default FBO", so current target is dummy
+ // as a side effect, if we change swap chain, it will be set correctly, and active remain valid
+ s_ActiveColorTargets[0] = s_DummyColorBackBuffer;
+ s_ActiveDepthTarget = s_DummyDepthBackBuffer;
+ }
+ return true;
+}
+
+RenderSurfaceHandle GetActiveRenderColorSurfaceD3D9(int index)
+{
+ return RenderSurfaceHandle(s_ActiveColorTargets[index]);
+}
+RenderSurfaceHandle GetActiveRenderDepthSurfaceD3D9()
+{
+ return RenderSurfaceHandle(s_ActiveDepthTarget);
+}
+
+bool IsActiveRenderTargetWithColorD3D9()
+{
+ return !s_ActiveColorTargets[0] || s_ActiveColorTargets[0]->backBuffer || !(s_ActiveColorTargets[0]->flags & kSurfaceCreateNeverUsed);
+}
+
+
+RenderSurfaceHandle CreateRenderColorSurfaceD3D9( TextureID textureID, int width, int height, int samples, TextureDimension dim, UInt32 createFlags, RenderTextureFormat format, TexturesD3D9& textures )
+{
+ RenderSurfaceHandle rsHandle;
+
+ if( !gGraphicsCaps.hasRenderToTexture )
+ return rsHandle;
+ if( !gGraphicsCaps.supportsRenderTextureFormat[format] )
+ return rsHandle;
+
+ RenderColorSurfaceD3D9* rs = new RenderColorSurfaceD3D9;
+ rs->width = width;
+ rs->height = height;
+ rs->samples = samples;
+ rs->format = format;
+ rs->textureID = textureID;
+ rs->dim = dim;
+ rs->flags = createFlags;
+
+ // Create it
+ if (!InitD3DRenderColorSurface(*rs, textures))
+ {
+ delete rs;
+ return rsHandle;
+ }
+
+ rsHandle.object = rs;
+ return rsHandle;
+}
+
+RenderSurfaceHandle CreateRenderDepthSurfaceD3D9( TextureID textureID, int width, int height, int samples, DepthBufferFormat depthFormat, UInt32 createFlags, TexturesD3D9& textures )
+{
+ RenderSurfaceHandle rsHandle;
+
+ if( !gGraphicsCaps.hasRenderToTexture )
+ return rsHandle;
+
+ RenderDepthSurfaceD3D9* rs = new RenderDepthSurfaceD3D9;
+ rs->width = width;
+ rs->height = height;
+ rs->samples = samples;
+ rs->depthFormat = depthFormat;
+ rs->textureID = textureID;
+ rs->flags = createFlags;
+
+ // Create it
+ if (!InitD3DRenderDepthSurface( *rs, textures))
+ {
+ delete rs;
+ return rsHandle;
+ }
+
+ rsHandle.object = rs;
+ return rsHandle;
+}
+
+
+void DestroyRenderSurfaceD3D9 (RenderSurfaceD3D9* rs)
+{
+ Assert(rs);
+
+ if(rs == s_ActiveColorBackBuffer || rs == s_ActiveDepthBackBuffer)
+ {
+ #if DEBUG_RENDER_TEXTURES
+ printf_console( " RT: Destroying main %s buffer.\n", s == s_ActiveColorBackBuffer ? "color" : "depth" );
+ #endif
+ s_ActiveColorBackBuffer = NULL;
+ s_ActiveDepthBackBuffer = NULL;
+ }
+
+ RenderSurfaceHandle defaultColor(s_DummyColorBackBuffer);
+ RenderSurfaceHandle defaultDepth(s_DummyDepthBackBuffer);
+
+ if (s_ActiveDepthTarget == rs)
+ {
+ ErrorString( "RenderTexture warning: Destroying active render texture. Switching to main context." );
+ int targetWidth, targetHeight;
+ bool isBackBuffer;
+ SetRenderTargetD3D9 (1, &defaultColor, defaultDepth, 0, kCubeFaceUnknown, targetWidth, targetHeight, isBackBuffer);
+ }
+ for (int i = 0; i < s_ActiveColorTargetCount; ++i)
+ {
+ if (s_ActiveColorTargets[i] == rs)
+ {
+ ErrorString( "RenderTexture warning: Destroying active render texture. Switching to main context." );
+ int targetWidth, targetHeight;
+ bool isBackBuffer;
+ SetRenderTargetD3D9 (1, &defaultColor, defaultDepth, 0, kCubeFaceUnknown, targetWidth, targetHeight, isBackBuffer);
+ }
+ }
+
+ if (rs->m_Surface)
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(rs->m_Surface);
+ ULONG refCount = rs->m_Surface->Release();
+ Assert(refCount == (rs->m_Texture ? 1 : 0));
+ rs->m_Surface = NULL;
+ }
+ if( rs->m_Texture )
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(rs->m_Texture);
+ ULONG refCount = rs->m_Texture->Release();
+ Assert(refCount == 0);
+ rs->m_Texture = NULL;
+ }
+}
+
+void DestroyRenderSurfaceD3D9 (RenderSurfaceHandle& rsHandle, TexturesD3D9& textures)
+{
+ if( !rsHandle.IsValid() )
+ return;
+
+ RenderSurfaceD3D9* rs = reinterpret_cast<RenderSurfaceD3D9*>( rsHandle.object );
+ DestroyRenderSurfaceD3D9( rs );
+
+ if (rs->m_Texture || rs->textureID.m_ID)
+ textures.RemoveTexture (rs->textureID);
+
+ delete rs;
+ rsHandle.object = NULL;
+}
+
+
+
+// --------------------------------------------------------------------------
+
+
+#if ENABLE_UNIT_TESTS
+#include "External/UnitTest++/src/UnitTest++.h"
+
+SUITE ( RenderTextureD3DTests )
+{
+TEST(RenderTextureD3DTests_FormatTableCorrect)
+{
+ // checks that you did not forget to update format table when adding a new format :)
+ for (int i = 0; i < kRTFormatCount; ++i)
+ {
+ CHECK(kD3D9RenderTextureFormats[i] != 0);
+ }
+}
+}
+#endif
diff --git a/Runtime/GfxDevice/d3d/RenderTextureD3D.h b/Runtime/GfxDevice/d3d/RenderTextureD3D.h
new file mode 100644
index 0000000..255e89d
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/RenderTextureD3D.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "D3D9Includes.h"
+
+
+RenderSurfaceHandle CreateRenderColorSurfaceD3D9 (TextureID textureID, int width, int height, int samples, TextureDimension dim, UInt32 createFlags, RenderTextureFormat format, TexturesD3D9& textures);
+RenderSurfaceHandle CreateRenderDepthSurfaceD3D9 (TextureID textureID, int width, int height, int samples, DepthBufferFormat depthFormat, UInt32 createFlags, TexturesD3D9& textures);
+void DestroyRenderSurfaceD3D9 (RenderSurfaceD3D9* rs);
+void DestroyRenderSurfaceD3D9 (RenderSurfaceHandle& rsHandle, TexturesD3D9& textures);
+bool SetRenderTargetD3D9 (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face, int& outRenderTargetWidth, int& outRenderTargetHeight, bool& outIsBackBuffer);
+RenderSurfaceHandle GetActiveRenderColorSurfaceD3D9(int index);
+RenderSurfaceHandle GetActiveRenderDepthSurfaceD3D9();
+
+RenderSurfaceHandle GetBackBufferColorSurfaceD3D9();
+RenderSurfaceHandle GetBackBufferDepthSurfaceD3D9();
+void SetBackBufferColorSurfaceD3D9(RenderSurfaceBase* color);
+void SetBackBufferDepthSurfaceD3D9(RenderSurfaceBase* depth);
diff --git a/Runtime/GfxDevice/d3d/ShaderGenerator.cpp b/Runtime/GfxDevice/d3d/ShaderGenerator.cpp
new file mode 100644
index 0000000..b62e5c7
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/ShaderGenerator.cpp
@@ -0,0 +1,948 @@
+#include "UnityPrefix.h"
+#include <stdlib.h>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <assert.h>
+#include "ShaderGenerator.h"
+#include "Runtime/Utilities/Word.h"
+
+enum ShaderInputRegister {
+ kInputPosition,
+ kInputNormal,
+ kInputUV0,
+ kInputUV1,
+ kInputColor,
+ kInputCount
+};
+
+const char* kShaderInputNames[kInputCount] = {
+ "$IPOS",
+ "$INOR",
+ "$IUV0",
+ "$IUV1",
+ "$ICOL",
+};
+
+const char* kShaderInputDecls[kInputCount] = {
+ "dcl_position",
+ "dcl_normal",
+ "dcl_texcoord0",
+ "dcl_texcoord1",
+ "dcl_color",
+};
+
+
+
+enum ShaderFragmentOptions {
+ kOptionHasTexMatrix = (1<<0),
+};
+
+const int kConstantLocations[kConstCount] = {
+ 0, // kConstMatrixMVP
+ 4, // kConstMatrixMV
+ 8, // kConstMatrixMV_IT
+ 12, // kConstMatrixTexture
+ 44, // kConstAmbient
+ 57, // kConstColorMatAmbient
+ 45, // kConstLightMisc
+ 46, // kConstMatDiffuse
+ 47, // kConstMatSpecular
+ 48, // kConstLightIndexes
+};
+
+enum CommonDependencies {
+ kDep_CamSpacePos,
+ kDep_CamSpaceN,
+ kDep_ViewVector,
+ kDep_ReflVector,
+ kDep_Normal,
+ kDepCount
+};
+
+
+// --------------------------------------------------------------------------
+
+// transform position
+const ShaderFragment kVS_Pos = {
+ (1<<kInputPosition), // input
+ (1<<kConstMatrixMVP), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ NULL, // outs
+ "dp4 oPos.x, $IPOS, c0\n"
+ "dp4 oPos.y, $IPOS, c1\n"
+ "dp4 oPos.z, $IPOS, c2\n"
+ "dp4 oPos.w, $IPOS, c3\n",
+};
+
+// --------------------------------------------------------------------------
+// temps
+
+// NORM = vertex normal
+const ShaderFragment kVS_Load_Normal = {
+ (1<<kInputNormal), // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "NORM", // outs
+ "mov $O_NORM, $INOR\n"
+};
+
+// NORM = normalized vertex normal
+const ShaderFragment kVS_Normalize_Normal = {
+ 0, // input
+ 0, // constants
+ (1<<kDep_Normal), // deps
+ 0, // options
+ 1, // temps
+ "NORM", // ins
+ "NORM", // outs
+ "nrm $TMP0.xyz, $O_NORM\n"
+ "mov $O_NORM.xyz, $TMP0\n"
+};
+
+
+// OPOS = input position of the vertex
+const ShaderFragment kVS_Temp_ObjSpacePos = {
+ (1<<kInputPosition), // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "OPOS", // outs
+ "mov $O_OPOS, $IPOS\n"
+};
+
+// CPOS = camera space position of the vertex
+const ShaderFragment kVS_Temp_CamSpacePos = {
+ (1<<kInputPosition), // input
+ (1<<kConstMatrixMV), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "CPOS", // outs
+ "mul $O_CPOS, $IPOS.y, c5\n"
+ "mad $O_CPOS, c4, $IPOS.x, $O_CPOS\n"
+ "mad $O_CPOS, c6, $IPOS.z, $O_CPOS\n"
+ "mad $O_CPOS, c7, $IPOS.w, $O_CPOS\n",
+};
+
+// CNOR = camera space normal of the vertex
+const ShaderFragment kVS_Temp_CamSpaceN = {
+ 0, // input
+ (1<<kConstMatrixMV_IT), // constants
+ (1<<kDep_Normal), // deps
+ 0, // options
+ 0, // temps
+ "NORM", // ins
+ "CNOR", // outs
+ "mul $O_CNOR, $O_NORM.y, c9\n"
+ "mad $O_CNOR, c8, $O_NORM.x, $O_CNOR\n"
+ "mad $O_CNOR, c10, $O_NORM.z, $O_CNOR\n",
+};
+
+// VIEW = normalized vertex-to-eye vector
+const ShaderFragment kVS_Temp_ViewVector = {
+ 0, // input
+ 0, // constants
+ (1<<kDep_CamSpacePos), // deps
+ 0, // options
+ 0, // temps
+ "CPOS", // ins
+ "VIEW", // outs
+ "dp3 $O_VIEW.w, $O_CPOS, $O_CPOS\n"
+ "rsq $O_VIEW.w, $O_VIEW.w\n"
+ "mul $O_VIEW, -$O_CPOS, $O_VIEW.w\n",
+};
+
+// REFL = camera space reflection vector: 2*dot(V,N)*N-V
+const ShaderFragment kVS_Temp_CamSpaceRefl = {
+ 0, // input
+ 0, // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_ViewVector), // deps
+ 0, // options
+ 0, // temps
+ "CNOR VIEW", // ins
+ "REFL", // outs
+ "mov $O_REFL.xyz, $O_VIEW\n"
+ "dp3 $O_REFL.w, $O_REFL, $O_CNOR\n"
+ "add $O_REFL.w, $O_REFL.w, $O_REFL.w\n"
+ "mad $O_REFL.xyz, $O_REFL.w, $O_CNOR, -$O_REFL\n"
+};
+
+// cheap version
+// SPHR = sphere map: N*0.5+0.5
+//const ShaderFragment kVS_Temp_SphereMap = {
+// 0, // input
+// (1<<kConstLightMisc), // constants
+// (1<<kDep_CamSpaceN), // deps
+// 0, // options
+// 0, // temps
+// "CNOR", // ins
+// "SPHR", // outs
+// "mad $O_SPHR.xyz, $O_CNOR, c45.w, c45.w"
+//};
+
+// SPHR = sphere map. R = reflection vector
+// m = 2*sqrt(Rx*Rx + Ry*Ry + (Rz+1)*(Rz+1))
+// SPHR = Rx/m + 0.5, Ry/m + 0.5
+const ShaderFragment kVS_Temp_SphereMap = {
+ 0, // input
+ (1<<kConstLightMisc), // constants
+ (1<<kDep_ReflVector), // deps
+ 0, // options
+ 1, // temps
+ "REFL", // ins
+ "SPHR", // outs
+ "mul $TMP0.xy, $O_REFL, $O_REFL\n" // Rx*Rx, Ry*Ry
+ "add $O_SPHR.w, $TMP0.y, $TMP0.x\n" // Rx*Rx + Ry*Ry
+ "add $O_SPHR.z, $O_REFL.z, c45.z\n" // Rz+1
+ "mad $O_SPHR.z, $O_SPHR.z, $O_SPHR.z, $O_SPHR.w\n" // (Rz+1)*(Rz+1) + Rx*Rx + Ry*Ry
+ "mul $O_SPHR.z, $O_SPHR.z, c45.y\n" // * 4
+ "rsq $O_SPHR.z, $O_SPHR.z\n" // m
+ "mad $O_SPHR.xy, $O_REFL, $O_SPHR.z, c45.w\n" // R/m+0.5
+};
+
+// --------------------------------------------------------------------------
+// Texture coordinates
+
+const ShaderFragment kVS_Load_UV0 = {
+ (1<<kInputUV0), // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "UV0", // outs
+ "mov $O_UV0, $IUV0\n"
+};
+
+const ShaderFragment kVS_Load_UV1 = {
+ (1<<kInputUV1), // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "UV1", // outs
+ "mov $O_UV1, $IUV1\n"
+};
+
+const ShaderFragment kVS_Out_TexCoord = {
+ 0, // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ "$0", // ins
+ NULL, // outs
+ "mov oT$PARAM, $I_0\n"
+};
+
+
+const ShaderFragment kVS_Out_Matrix2 = {
+ 0, // input
+ (1<<kConstMatrixTexture), // constants
+ 0, // deps
+ kOptionHasTexMatrix, // options
+ 1, // temps
+ "$0", // ins
+ NULL, // outs
+ "mul $TMP0, $I_0.y, $TMPARAM1\n"
+ "mad $TMP0, $TMPARAM0, $I_0.x, $TMP0\n"
+ "add oT$PARAM, $TMPARAM3, $TMP0\n"
+};
+
+const ShaderFragment kVS_Out_Matrix3 = {
+ 0, // input
+ (1<<kConstMatrixTexture), // constants
+ 0, // deps
+ kOptionHasTexMatrix, // options
+ 1, // temps
+ "$0", // ins
+ NULL, // outs
+ "mul $TMP0, $I_0.y, $TMPARAM1\n"
+ "mad $TMP0, $TMPARAM0, $I_0.x, $TMP0\n"
+ "mad $TMP0, $TMPARAM2, $I_0.z, $TMP0\n"
+ "add oT$PARAM, $TMPARAM3, $TMP0\n"
+};
+
+// --------------------------------------------------------------------------
+// Lighting
+
+const ShaderFragment kVS_Out_Diffuse_VertexColor= {
+ (1<<kInputColor), // input
+ 0, // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ NULL, // outs
+ "mov oD0, $ICOL\n"
+};
+
+const ShaderFragment kVS_Light_Diffuse_Pre = {
+ 0, // input
+ (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "DIFF", // outs
+
+ "mov $O_DIFF, c45.xxxz\n" // diffuse = 0
+};
+
+const ShaderFragment kVS_Light_Diffuse_Dir = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants
+ (1<<kDep_CamSpaceN), // deps
+ 0, // options
+ 1, // temps
+ "CNOR", // ins
+ "DIFF", // outs
+
+ "mov $O_CNOR.w, c48.y\n" // CNOR.w is reused as light data index
+ "rep i1\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " dp3 $TMP0.x, $O_CNOR, c61[a0.x]\n" // NdotL
+ " slt $TMP0.w, c45.x, $TMP0.x\n" // clamp = NdotL > 0
+ " mul $TMP0.xyz, $TMP0.x, c62[a0.x]\n" // doff = NdotL * lightColor
+ " mad $O_DIFF.xyz, $TMP0.w, $TMP0, $O_DIFF\n" // diffuse += diff * clamp
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+const ShaderFragment kVS_Light_Diffuse_Point = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos), // deps
+ 0, // options
+ 3, // temps
+ "CNOR CPOS", // ins
+ "DIFF", // outs
+
+ "mov $O_CNOR.w, c48.z\n" // CNOR.w is reused as light data index
+ "rep i2\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space
+ " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight)
+ " rsq $TMP1.w, $TMP0.w\n"
+ " mul $TMP1.xyz, $TMP1, $TMP1.w\n"
+ " dp3 $TMP1.x, $O_CNOR, $TMP1\n" // NdotL
+ " slt $TMP1.y, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2
+ " mov $TMP1.z, c45.z\n" // 1
+ " mad $TMP0.w, c63[a0.x].w, $TMP0.w, $TMP1.z\n" // 1 + toLight2 * quadAttenuation
+ " rcp $TMP0.w, $TMP0.w\n" // attenuation
+ " mad $TMP0.w, $TMP1.y, -$TMP0.w, $TMP0.w\n" // attenuation = 0 if out of range
+ " sge $TMP1.y, $TMP1.x, c45.x\n" // clamp = NdotL > 0
+ " mul $TMP2, $TMP1.x, c62[a0.x]\n" // diff = NdotL * lightColor
+ " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation
+ " mad $O_DIFF.xyz, $TMP1.y, $TMP2, $O_DIFF\n" // diffuse += diff * clamp
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+
+
+const ShaderFragment kVS_Light_Diffuse_Spot = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos), // deps
+ 0, // options
+ 3, // temps
+ "CNOR CPOS", // ins
+ "DIFF", // outs
+
+ "mov $O_CNOR.w, c48.x\n" // CNOR.w is reused as light data index
+ "rep i0\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space
+ " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight)
+ " rsq $TMP1.w, $TMP0.w\n"
+ " mul $TMP1.xyz, $TMP1, $TMP1.w\n"
+ " dp3 $TMP1.w, $O_CNOR, $TMP1\n" // NdotL
+ " dp3 $TMP1.x, $TMP1, c61[a0.x]\n" // rho = dot(L,lightAxisDirection)
+ " add $TMP1.x, $TMP1.x, -c63[a0.x].y\n" // rho-cos(phi/2)
+ " mul $TMP1.x, $TMP1.x, c63[a0.x].x\n" // spotAtten = (rho-cos(phi/2)) / (cos(theta/2)-cos(phi/2))
+ " mov $TMP1.z, c45.z\n" // 1
+ " mad $TMP1.y, c63[a0.x].w, $TMP0.w, $TMP1.z\n" // 1 + toLight2 * quadAttenuation
+ " rcp $TMP1.y, $TMP1.y\n" // attenuation
+ " slt $TMP0.w, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2
+ " mad $TMP0.w, $TMP0.w, -$TMP1.y, $TMP1.y\n" // attenuation = 0 if out of range
+ " max $TMP1.x, $TMP1.x, c45.x\n" // spotAtten = saturate(spotAtten)
+ " min $TMP1.x, $TMP1.x, c45.z\n"
+ " mul $TMP0.w, $TMP0.w, $TMP1.x\n" // attenuation *= spotAtten
+ " sge $TMP1.x, $TMP1.w, c45.x\n" // clamp = NdotL > 0
+ " mul $TMP2, $TMP1.w, c62[a0.x]\n" // diff = NdotL * lightColor
+ " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation
+ " mad $O_DIFF.xyz, $TMP1.x, $TMP2, $O_DIFF\n" // diffuse += diff * clamp
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+
+const ShaderFragment kVS_Light_Specular_Pre = {
+ 0, // input
+ (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ "DIFF SPEC", // outs
+ "mov $O_DIFF, c45.xxxz\n" // diffuse = 0
+ "mov $O_SPEC, c45.x\n" // specular = 0
+};
+
+
+const ShaderFragment kVS_Light_Specular_Dir = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_ViewVector), // deps
+ 0, // options
+ 2, // temps
+ "CNOR VIEW", // ins
+ "DIFF SPEC", // outs
+
+ "mov $O_CNOR.w, c48.y\n" // CNOR.w is reused as light data index
+ "rep i1\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " mov $TMP0.xyz, c61[a0.x]\n" // L = lightDirection
+ // diffuse
+ " dp3 $TMP1.x, $O_CNOR, $TMP0\n" // NdotL
+ " slt $TMP0.w, c45.x, $TMP1.x\n" // clamp = NdotL > 0
+ " mul $TMP1, $TMP1.x, c62[a0.x]\n" // diff = NdotL * lightColor
+ " mad $O_DIFF.xyz, $TMP0.w, $TMP1, $O_DIFF\n" // diffuse += diff * clamp
+ // spec
+ " add $TMP0.xyz, $TMP0, $O_VIEW\n" // L + V
+ " nrm $TMP1.xyz, $TMP0\n" // H = normalize(L + V)
+ " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N
+ " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0)
+ " pow $TMP1.w, $TMP1.w, c47.w\n" // sp = pow(sp, exponent)
+ " mul $TMP1.w, $TMP1.w, $TMP0.w\n" // sp *= clamp
+ " mad $O_SPEC.xyz, $TMP1.w, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor
+
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+
+const ShaderFragment kVS_Light_Specular_Point = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos) | (1<<kDep_ViewVector), // deps
+ 0, // options
+ 3, // temps
+ "CNOR CPOS VIEW", // ins
+ "DIFF SPEC", // outs
+
+ "mov $O_CNOR.w, c48.z\n" // CNOR.w is reused as light data index
+ "rep i2\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space
+ " dp3 $TMP0.w, $TMP1, $TMP1\n" // L = normalize(toLight)
+ " rsq $TMP1.w, $TMP0.w\n"
+ " mul $TMP1.xyz, $TMP1, $TMP1.w\n"
+ // diffuse
+ " dp3 $TMP0.x, $O_CNOR, $TMP1\n" // NdotL
+ " slt $TMP0.y, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2
+ " mov $TMP0.z, c45.z\n" // 1
+ " mad $TMP0.w, c63[a0.x].w, $TMP0.w, $TMP0.z\n" // 1 + toLight2 * quadAttenuation
+ " rcp $TMP0.w, $TMP0.w\n" // attenuation
+ " mad $TMP0.w, $TMP0.y, -$TMP0.w, $TMP0.w\n" // attenuation = 0 if out of range
+ " sge $TMP0.y, $TMP0.x, c45.x\n" // clamp = NdotL > 0
+ " mul $TMP2, $TMP0.x, c62[a0.x]\n" // diff = NdotL * lightColor
+ " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation
+ " mad $O_DIFF.xyz, $TMP0.y, $TMP2, $O_DIFF\n" // diffuse += diff * clamp
+ // spec
+ " add $TMP2.xyz, $TMP1, $O_VIEW\n" // L + V
+ " nrm $TMP1.xyz, $TMP2\n" // H = normalize(L + V)
+ " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N
+ " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0)
+ " pow $TMP1.w, $TMP1.w, c47.w\n" // sp = pow(sp, exponent)
+ " mul $TMP1.w, $TMP1.w, $TMP0.w\n" // sp *= attenuation
+ " mul $TMP1.w, $TMP1.w, $TMP0.y\n" // sp *= clamp
+ " mad $O_SPEC.xyz, $TMP1.w, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor
+
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+const ShaderFragment kVS_Light_Specular_Spot = {
+ 0, // input
+ (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants
+ (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos) | (1<<kDep_ViewVector), // deps
+ 0, // options
+ 3, // temps
+ "CNOR CPOS VIEW", // ins
+ "DIFF SPEC", // outs
+
+ "mov $O_CNOR.w, c48.x\n" // CNOR.w is reused as light data index
+ "rep i0\n"
+ " mova a0.x, $O_CNOR.w\n"
+ " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space
+ " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight)
+ " rsq $TMP1.w, $TMP0.w\n"
+ " mul $TMP1.xyz, $TMP1, $TMP1.w\n"
+ // diffuse
+ " dp3 $TMP1.w, $O_CNOR, $TMP1\n" // NdotL
+ " dp3 $TMP0.x, $TMP1, c61[a0.x]\n" // rho = dot(L,lightAxisDirection)
+ " add $TMP0.x, $TMP0.x, -c63[a0.x].y\n" // rho-cos(phi/2)
+ " mul $TMP0.x, $TMP0.x, c63[a0.x].x\n" // spotAtten = (rho-cos(phi/2)) / (cos(theta/2)-cos(phi/2))
+ " mov $TMP0.z, c45.z\n" // 1
+ " mad $TMP0.y, c63[a0.x].w, $TMP0.w, $TMP0.z\n" // 1 + toLight2 * quadAttenuation
+ " rcp $TMP0.y, $TMP0.y\n" // attenuation
+ " slt $TMP0.w, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2
+ " mad $TMP0.w, $TMP0.w, -$TMP0.y, $TMP0.y\n" // attenuation = 0 if out of range
+ " max $TMP0.x, $TMP0.x, c45.x\n" // spotAtten = saturate(spotAtten)
+ " min $TMP0.x, $TMP0.x, c45.z\n"
+ " mul $TMP0.w, $TMP0.w, $TMP0.x\n" // attenuation *= spotAtten
+ " sge $TMP0.x, $TMP1.w, c45.x\n" // clamp = NdotL > 0
+ " mul $TMP2, $TMP1.w, c62[a0.x]\n" // diff = NdotL * lightColor
+ " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation
+ " mad $O_DIFF.xyz, $TMP0.x, $TMP2, $O_DIFF\n" // diffuse += diff * clamp
+ // spec
+ " add $TMP2.xyz, $TMP1, $O_VIEW\n" // L + V
+ " nrm $TMP1.xyz, $TMP2\n" // H = normalize(L + V)
+ " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N
+ " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0)
+ " pow $TMP2.x, $TMP1.w, c47.w\n" // sp = pow(sp, exponent)
+ " mul $TMP2.x, $TMP2.x, $TMP0.w\n" // sp *= attenuation
+ " mul $TMP2.x, $TMP2.x, $TMP0.x\n" // sp *= clamp
+ " mad $O_SPEC.xyz, $TMP2.x, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor
+
+ " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4
+ "endrep\n"
+};
+
+
+const ShaderFragment kVS_Out_Diffuse_Lighting = {
+ 0, // input
+ (1<<kConstAmbient) | (1<<kConstMatDiffuse) | (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ "DIFF", // ins
+ NULL, // outs
+ "mul $O_DIFF, $O_DIFF, c46\n" // diffuse *= materialDiffuse
+ "add $O_DIFF.xyz, $O_DIFF, c44\n" // diffuse += ambient
+ "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1)
+};
+
+const ShaderFragment kVS_Out_Specular_Lighting = {
+ 0, // input
+ (1<<kConstMatSpecular) | (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ "SPEC", // ins
+ NULL, // outs
+ "mul $O_SPEC, $O_SPEC, c47\n" // specular *= materialSpecular
+ "min oD1, $O_SPEC, c45.z\n" // specular = max(specular,1)
+};
+
+const ShaderFragment kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient = {
+ (1<<kInputColor), // input
+ (1<<kConstColorMatAmbient) | (1<<kConstAmbient) | (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ "DIFF", // ins
+ NULL, // outs
+ "mul $O_DIFF, $O_DIFF, $ICOL\n" // diffuse *= vertexColor
+ "mad $O_DIFF.xyz, $ICOL, c57, $O_DIFF\n" // diffuse += ambient * vertexColor
+ "add $O_DIFF.xyz, $O_DIFF, c44\n" // diffuse += emissive
+ "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1)
+};
+
+const ShaderFragment kVS_Out_Diffuse_Lighting_ColorEmission = {
+ (1<<kInputColor), // input
+ (1<<kConstAmbient) | (1<<kConstMatDiffuse) | (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ "DIFF", // ins
+ NULL, // outs
+ "mul $O_DIFF, $O_DIFF, c46\n" // diffuse *= materialDiffuse
+ "add $O_DIFF.xyz, c44, $O_DIFF\n" // diffuse += ambient
+ "add $O_DIFF, $O_DIFF, $ICOL\n" // diffuse += vertex color
+ "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1)
+};
+
+
+const ShaderFragment kVS_Out_Diffuse_White = {
+ 0, // input
+ (1<<kConstLightMisc), // constants
+ 0, // deps
+ 0, // options
+ 0, // temps
+ NULL, // ins
+ NULL, // outs
+ "mov oD0, c45.z\n"
+};
+
+
+// --------------------------------------------------------------------------
+
+
+static const ShaderFragment* kCommonDependencies[kDepCount] = {
+ &kVS_Temp_CamSpacePos,
+ &kVS_Temp_CamSpaceN,
+ &kVS_Temp_ViewVector,
+ &kVS_Temp_CamSpaceRefl,
+ &kVS_Load_Normal,
+};
+
+static bool IsAlNum( char c ) {
+ return c=='$' || c>='A' && c<='Z' || c>='0' && c<='9';
+}
+
+static const char* SkipTokens( const char* p, int count ) {
+ while( count-- ) {
+ while( IsAlNum(*p++) ) ;
+ if( *p == 0 )
+ return p;
+ ++p;
+ }
+ return p;
+}
+
+static std::string ExtractToken( const char** text ) {
+ const char* ptr = *text;
+ char c = *ptr;
+ while( IsAlNum(c) ) {
+ ++ptr;
+ c = *ptr;
+ }
+
+ if( ptr == *text )
+ return std::string();
+
+ // result
+ std::string res(*text, ptr);
+
+ // skip space after result
+ ++ptr;
+ *text = ptr;
+
+ return res;
+}
+
+void ShaderGenerator::AddFragment( const ShaderFragment* fragment, const char* inputNames, int param )
+{
+ // is already added?
+ FragmentData data(fragment, inputNames, param);
+ for( int i = 0; i < m_FragmentCount; ++i ) {
+ if( m_Fragments[i] == data )
+ return;
+ }
+
+ // add it's dependencies first
+ if( fragment->dependencies ) {
+ for( int i = 0; i < kDepCount; ++i ) {
+ // has this dependency?
+ if( !(fragment->dependencies & (1<<i)) )
+ continue;
+ AddFragment( kCommonDependencies[i] );
+ }
+ }
+
+ // add itself
+ m_Fragments[m_FragmentCount] = data;
+ m_FragmentCount++;
+ assert( m_FragmentCount < kMaxShaderFragments );
+}
+
+// Register plus it's living range - first and last shader fragment indices
+// on where it can be used.
+struct SavedRegister {
+ std::string name;
+ int firstUse;
+ int lastUse;
+ int regIndex;
+};
+typedef std::vector<SavedRegister> SavedRegisters;
+
+static inline int FindSavedRegister( const SavedRegisters& regs, const std::string& name )
+{
+ int n = regs.size();
+ for( int i = 0; i < n; ++i )
+ if( regs[i].name == name )
+ return i;
+ return -1;
+}
+
+void ShaderGenerator::GenerateShader( std::string& output, unsigned int& usedConstants )
+{
+ unsigned int usedConstantsMask = 0;
+
+ output.clear();
+ output.reserve(1024);
+ //debug.clear();
+
+ // shader input mappings
+ int inputMapping[kInputCount];
+ for( int i = 0; i < kInputCount; ++i )
+ inputMapping[i] = -1;
+ int usedInputStack[kInputCount];
+ int usedInputs = 0;
+
+ // saved registers across fragments
+ SavedRegisters savedRegisters;
+
+ // go over fragments and figure out inputs, saved registers and used constants
+ int maxTemps = 0;
+ for( int fi = 0; fi < m_FragmentCount; ++fi ) {
+ const ShaderFragment& frag = *m_Fragments[fi].fragment;
+
+ // fragment vertex inputs
+ for( int i = 0; i < kInputCount; ++i ) {
+ // does fragment use this input?
+ if( frag.inputs & (1<<i) ) {
+ // add to inputs list of in there yet
+ if( inputMapping[i] == -1 ) {
+ usedInputStack[usedInputs] = i;
+ inputMapping[i] = usedInputs;
+ ++usedInputs;
+ }
+ }
+ }
+
+ // remember output registers
+ if( frag.outs ) {
+ const char* outputs = frag.outs;
+ std::string token;
+ while( !(token = ExtractToken(&outputs)).empty() ) {
+ token = "$O_" + token;
+ //TODO: check that text has that token.
+ //TODO: check that text has no $O_ tokens that are not in the output
+ // add to list if not there yet
+ int savedIndex = FindSavedRegister( savedRegisters, token );
+ if( savedIndex == -1 )
+ {
+ SavedRegister r;
+ r.name = token;
+ r.firstUse = fi;
+ r.lastUse = fi;
+ r.regIndex = -1;
+ savedRegisters.push_back( r );
+ }
+ else
+ {
+ savedRegisters[savedIndex].lastUse = fi;
+ assert(savedRegisters[savedIndex].firstUse <= savedRegisters[savedIndex].lastUse);
+ }
+ }
+ }
+
+ // from fragment input registers, determine last use of saved registers
+ if( frag.ins ) {
+ const char* inputs = frag.ins;
+ std::string token;
+ while( !(token = ExtractToken(&inputs)).empty() ) {
+ // a parametrized token?
+ if( token[0] == '$' ) {
+ assert(token.size()==2);
+ assert(token[1]>='0' && token[1]<='9');
+ int index = token[1]-'0';
+ const char* inputNames = m_Fragments[fi].inputNames;
+ inputNames = SkipTokens( inputNames, index );
+ token = ExtractToken(&inputNames);
+ }
+ token = "$O_" + token;
+
+ //TODO: check that text has that token.
+ //TODO: check that text has no $O_ tokens that are not in the input
+ int savedIndex = FindSavedRegister( savedRegisters, token );
+ assert(savedIndex != -1);
+ assert(savedRegisters[savedIndex].lastUse <= fi);
+ savedRegisters[savedIndex].lastUse = fi;
+ }
+ }
+
+ maxTemps = std::max(maxTemps, frag.temps);
+
+ // used constants
+ usedConstantsMask |= frag.constants;
+ }
+
+ assert( savedRegisters.size() <= kMaxSavedRegisters );
+
+ // assign register indices to saved registers
+ int mapFragmentRegister[kMaxShaderFragments][kMaxTempRegisters]; // [fragment][index] = used or not?
+ memset(mapFragmentRegister, 0, sizeof(mapFragmentRegister));
+ for( size_t i = 0; i < savedRegisters.size(); ++i ) {
+ // find unused register over whole lifetime, and assign it
+ SavedRegister& sr = savedRegisters[i];
+ assert(sr.regIndex == -1);
+ for( int regIndex = 0; regIndex < kMaxTempRegisters; ++regIndex ) {
+ bool unused = true;
+ for( int fi = sr.firstUse; fi <= sr.lastUse; ++fi ) {
+ if( mapFragmentRegister[fi][regIndex] != 0 ) {
+ unused = false;
+ break;
+ }
+ }
+ if( unused ) {
+ for( int fi = sr.firstUse; fi <= sr.lastUse; ++fi )
+ mapFragmentRegister[fi][regIndex] = 1;
+ sr.regIndex = regIndex;
+ break;
+ }
+ }
+ assert(sr.regIndex != -1);
+ }
+
+ // generate prolog with declarations
+ output += "vs_2_0\n";
+ for( int i = 0; i < usedInputs; ++i ) {
+ output += kShaderInputDecls[usedInputStack[i]];
+ output += " v";
+ assert(i<=9);
+ output += ('0' + i);
+ output += '\n';
+ }
+
+ // go over fragments, transform register names and output
+ for( int fi = 0; fi < m_FragmentCount; ++fi ) {
+ const ShaderFragment& frag = *m_Fragments[fi].fragment;
+ int param = m_Fragments[fi].param;
+
+ output += '\n';
+ std::string text = frag.text;
+
+ std::string regname("r0");
+ std::string regname2("r00");
+
+ // input registers
+ regname[0] = 'v';
+ for( int i = 0; i < usedInputs; ++i ) {
+ int inputIndex = usedInputStack[i];
+ assert(i<=9);
+ regname[1] = '0' + i;
+ replace_string(text, kShaderInputNames[inputIndex], regname);
+ }
+
+ // fragment inputs
+ if( frag.ins ) {
+ const char* inputs = frag.ins;
+ std::string token;
+ while( !(token = ExtractToken(&inputs)).empty() ) {
+ std::string searchName;
+ std::string savedName;
+ // a parametrized token?
+ if( token[0] == '$' ) {
+ assert(token.size()==2);
+ assert(token[1]>='0' && token[1]<='9');
+ int index = token[1]-'0';
+ const char* inputNames = m_Fragments[fi].inputNames;
+ inputNames = SkipTokens( inputNames, index );
+ token = ExtractToken(&inputNames);
+ searchName = std::string("$I_") + char('0'+index);
+ } else {
+ searchName = "$O_" + token;
+ }
+ savedName = "$O_" + token;
+
+ // Assign register index to this saved reg
+ SavedRegisters::iterator it, itEnd = savedRegisters.end();
+ for( it = savedRegisters.begin(); it != itEnd; ++it ) {
+ const SavedRegister& sr = *it;
+ if( sr.name == savedName )
+ {
+ // replace with register value
+ regname[0] = 'r';
+ assert(sr.regIndex<=9);
+ regname[1] = '0' + sr.regIndex;
+ replace_string(text, searchName, regname);
+ break;
+ }
+ }
+ assert( it != itEnd );
+ }
+ }
+
+ // saved registers
+ if( frag.outs ) {
+ regname[0] = 'r';
+ SavedRegisters::iterator it, itEnd = savedRegisters.end();
+ for( it = savedRegisters.begin(); it != itEnd; ++it ) {
+ const SavedRegister& sr = *it;
+ assert(sr.regIndex<=9);
+ regname[1] = '0' + sr.regIndex;
+ replace_string(text, sr.name, regname);
+ }
+ }
+
+ // fragment-private temporary registers
+ regname[0] = 'r';
+ regname2[0] = 'r';
+ std::string tmpname("$TMP0");
+ int regIndex = 0;
+ for( int i = 0; i < frag.temps; ++i ) {
+ assert(i<=9);
+ tmpname[4] = '0' + i;
+ // find unused register at this fragment
+ while( regIndex < kMaxTempRegisters && mapFragmentRegister[fi][regIndex] != 0 )
+ ++regIndex;
+ assert(regIndex < kMaxTempRegisters);
+ if( regIndex > 9 ) {
+ regname2[1] = '1';
+ regname2[2] = '0' + (regIndex-10);
+ replace_string(text, tmpname, regname2);
+ } else {
+ regname[1] = '0' + regIndex;
+ replace_string(text, tmpname, regname);
+ }
+ ++regIndex;
+ }
+
+ // parameter
+ if( param >= 0 ) {
+ std::string paramString("0");
+ assert(param<=9);
+ paramString[0] = '0'+param;
+ replace_string(text, "$PARAM", paramString);
+ }
+
+ // texture matrix parameters
+ if( frag.options & kOptionHasTexMatrix ) {
+ std::string tmpstring("$TMPARAM0");
+ std::string paramString("c00");
+ for( int i = 0; i < 4; ++i ) {
+ assert(i<=9);
+ tmpstring[8] = '0' + i;
+ int constant = kConstantLocations[kConstMatrixTexture] + param*4 + i;
+ paramString[1] = '0' + constant/10;
+ paramString[2] = '0' + constant%10;
+ replace_string(text, tmpstring, paramString);
+ }
+ }
+
+ output += text;
+ }
+
+
+ usedConstants = usedConstantsMask;
+
+ // checks
+
+ // should be no '$' left
+ assert( output.find('$') == std::string::npos );
+
+ // debug info
+ //char buffer[1000];
+ //_snprintf_s( buffer, 1000, "Fragments: %i SavedRegs: %i\n", m_FragmentCount, maxTemps );
+ //debug += buffer;
+ //for( size_t i = 0; i < savedRegisters.size(); ++i ) {
+ // _snprintf_s( buffer, 1000, " saved %s [%i..%i] r%i\n", savedRegisters[i].name.c_str(), savedRegisters[i].firstUse, savedRegisters[i].lastUse, savedRegisters[i].regIndex );
+ // debug += buffer;
+ //}
+}
diff --git a/Runtime/GfxDevice/d3d/ShaderGenerator.h b/Runtime/GfxDevice/d3d/ShaderGenerator.h
new file mode 100644
index 0000000..ccc52b7
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/ShaderGenerator.h
@@ -0,0 +1,100 @@
+#pragma once
+#include <string>
+
+enum ShaderConstant {
+ kConstMatrixMVP, // model*view*proj
+ kConstMatrixMV, // model*view
+ kConstMatrixMV_IT, // model*view inverse transpose
+ kConstMatrixTexture,// texture matrix
+ kConstAmbient, // materialEmissive + sceneAmbient * materialAmbient
+ kConstColorMatAmbient, // various combos of kConstAmbient, based on color material mode
+ kConstLightMisc, // 0, 4, 1, 0.5
+ kConstMatDiffuse, // material diffuse
+ kConstMatSpecular, // material specular
+ kConstLightIndexes, // light start indexes * 4
+ kConstCount
+};
+
+extern const int kConstantLocations[kConstCount];
+
+
+struct ShaderFragment
+{
+ unsigned int inputs;
+ unsigned int constants;
+ unsigned int dependencies;
+ unsigned int options;
+ int temps;
+ const char* ins;
+ const char* outs;
+ const char* text;
+};
+
+
+class ShaderGenerator
+{
+public:
+ enum {
+ kMaxShaderFragments = 32,
+ kMaxTempRegisters = 12,
+ kMaxSavedRegisters = 16,
+ };
+
+private:
+ struct FragmentData {
+ FragmentData() : fragment(NULL), inputNames(NULL), param(0) { }
+ FragmentData( const ShaderFragment* f, const char* inames, int p ) : fragment(f), inputNames(inames), param(p) { }
+ bool operator==( const FragmentData& rhs ) const {
+ return
+ fragment==rhs.fragment &&
+ param==rhs.param &&
+ ((inputNames==NULL && rhs.inputNames==NULL) || (inputNames && rhs.inputNames && !strcmp(inputNames, rhs.inputNames)));
+ }
+
+ const ShaderFragment* fragment;
+ const char* inputNames;
+ int param;
+ };
+
+public:
+
+ ShaderGenerator() : m_FragmentCount(0)
+ {
+ }
+
+ void AddFragment( const ShaderFragment* fragment, const char* inputNames = NULL, int param = -1 );
+ void GenerateShader( std::string& output, unsigned int& usedConstants );
+
+private:
+ int m_FragmentCount;
+ FragmentData m_Fragments[kMaxShaderFragments];
+};
+
+
+extern const ShaderFragment kVS_Pos;
+extern const ShaderFragment kVS_Light_Diffuse_Pre;
+extern const ShaderFragment kVS_Light_Diffuse_Dir;
+extern const ShaderFragment kVS_Light_Diffuse_Point;
+extern const ShaderFragment kVS_Light_Diffuse_Spot;
+extern const ShaderFragment kVS_Light_Specular_Pre;
+extern const ShaderFragment kVS_Light_Specular_Dir;
+extern const ShaderFragment kVS_Light_Specular_Point;
+extern const ShaderFragment kVS_Light_Specular_Spot;
+extern const ShaderFragment kVS_Out_Diffuse_Lighting;
+extern const ShaderFragment kVS_Out_Specular_Lighting;
+extern const ShaderFragment kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient;
+extern const ShaderFragment kVS_Out_Diffuse_Lighting_ColorEmission;
+extern const ShaderFragment kVS_Out_Diffuse_VertexColor;
+extern const ShaderFragment kVS_Out_Diffuse_White;
+extern const ShaderFragment kVS_Load_UV0;
+extern const ShaderFragment kVS_Load_UV1;
+extern const ShaderFragment kVS_Load_Normal;
+extern const ShaderFragment kVS_Normalize_Normal;
+extern const ShaderFragment kVS_Out_TexCoord;
+extern const ShaderFragment kVS_Out_Matrix2;
+extern const ShaderFragment kVS_Out_Matrix3;
+extern const ShaderFragment kVS_Temp_CamSpacePos;
+extern const ShaderFragment kVS_Temp_CamSpaceN;
+extern const ShaderFragment kVS_Temp_CamSpaceRefl;
+extern const ShaderFragment kVS_Temp_ObjSpacePos;
+extern const ShaderFragment kVS_Temp_SphereMap;
diff --git a/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
new file mode 100644
index 0000000..87f8e17
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp
@@ -0,0 +1,376 @@
+#include "UnityPrefix.h"
+#include "ShaderPatchingD3D9.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/Utilities/Word.h"
+
+#define DEBUG_FOG_PATCHING 0
+
+
+static inline bool IsNewline( char c ) { return c == '\n' || c == '\r'; }
+
+static int FindMaxUsedDclIndex (const std::string& src, char registerName)
+{
+ size_t n = src.size();
+ size_t pos = 0;
+ int maxDcl = -1;
+ while ((pos = src.find("dcl_", pos)) != std::string::npos)
+ {
+ // skip "dcl_"
+ pos += 4;
+
+ // skip until end of dcl_*
+ while (pos < n && !isspace(src[pos]))
+ ++pos;
+ // skip space
+ while (pos < n && isspace(src[pos]))
+ ++pos;
+ // is this an needed register type?
+ if (pos < n && src[pos] == registerName) {
+ int number = -1;
+ sscanf (src.c_str() + pos + 1, "%d", &number);
+ if (number > maxDcl)
+ maxDcl = number;
+ }
+ }
+ return maxDcl;
+}
+
+
+static bool InsertFogDcl (std::string& src, const std::string& registerName)
+{
+ // insert dcl_fog after vs_3_0/ps_3_0 line
+ size_t pos = 6;
+ while (pos < src.size() && !IsNewline(src[pos])) // skip until newline
+ ++pos;
+ while (pos < src.size() && IsNewline(src[pos])) // skip newlines
+ ++pos;
+ if (pos >= src.size())
+ return false;
+ src.insert (pos, Format("dcl_fog %s\n", registerName.c_str()));
+ return true;
+}
+
+
+bool PatchPixelShaderFogD3D9 (std::string& src, FogMode fog, int fogColorReg, int fogParamsReg)
+{
+ const bool isPS3 = !strncmp(src.c_str(), "ps_3_0", 6);
+ if (!isPS3)
+ return true; // nothing to do
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: original pixel shader:\n%s\n", src.c_str());
+ #endif
+
+ // SM3.0 has 10 input registers (v0..v9).
+
+ const int maxDclReg = FindMaxUsedDclIndex (src, 'v');
+ if (maxDclReg >= 9)
+ {
+ // out of registers
+ return false;
+ }
+ const int fogReg = 9;
+ if (!InsertFogDcl (src, Format("v%d.x", fogReg)))
+ {
+ DebugAssert (!"failed to insert fog dcl");
+ return false;
+ }
+
+ // Remap writes to oC0 with r30
+ const int colorReg = 30;
+ const int tempReg = 31;
+ replace_string (src, "oC0", "r30");
+
+ // make sure source ends with a newline
+ if (!IsNewline(src[src.size()-1]))
+ src += '\n';
+
+ // inject fog handling code
+ if (fog == kFogExp2)
+ {
+ // fog = exp(-(density*z)^2)
+ src += Format("mul r%d.x, c%d.x, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/sqrt(ln(2))) * fog
+ src += Format("mul r%d.x, r%d.x, r%d.x\n", tempReg, tempReg, tempReg); // tmp = tmp * tmp
+ src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+ else if (fog == kFogExp)
+ {
+ // fog = exp(-density*z)
+ src += Format("mul r%d.x, c%d.y, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/ln(2)) * fog
+ src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+ else if (fog == kFogLinear)
+ {
+ // fog = (end-z)/(end-start)
+ src += Format("mad_sat r%d.x, c%d.z, v%d.x, c%d.w\n", tempReg, fogParamsReg, fogReg, fogParamsReg); // tmp = (-1/(end-start)) * fog + (end/(end-start))
+ src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp)
+ }
+
+
+ // append final move into oC0
+ src += Format("mov oC0, r%d\n", colorReg);
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: after patching, fog mode %d:\n%s\n", fog, src.c_str());
+ #endif
+
+ return true;
+}
+
+
+bool PatchVertexShaderFogD3D9 (std::string& src)
+{
+ const bool isVS3 = !strncmp(src.c_str(), "vs_3_0", 6);
+ if (!isVS3)
+ return true; // nothing to do
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: original vertex shader:\n%s\n", src.c_str());
+ #endif
+
+ // SM3.0 has 12 output registers (o0..o11), but the pixel shader only has 10 input ones.
+ // Play it safe and let's assume we only have 10 here.
+
+ const int maxDclReg = FindMaxUsedDclIndex (src, 'o');
+ if (maxDclReg >= 9)
+ {
+ // out of registers
+ return false;
+ }
+ const int fogReg = 9;
+ std::string fogRegName = Format("o%d", fogReg);
+ if (!InsertFogDcl (src, fogRegName))
+ {
+ DebugAssert (!"failed to insert fog dcl");
+ return false;
+ }
+
+ // find write to o0, and do the same for oFog
+ size_t posWrite = src.find ("o0.z,");
+ bool writesFullPos = false;
+ if (posWrite == std::string::npos)
+ {
+ posWrite = src.find ("o0,");
+ if (posWrite == std::string::npos)
+ {
+ DebugAssert (!"couldn't find write to o0");
+ return false;
+ }
+ writesFullPos = true;
+ }
+
+ // get whole line
+ size_t n = src.size();
+ size_t posWriteStart = posWrite, posWriteEnd = posWrite;
+ while (posWriteStart > 0 && !IsNewline(src[posWriteStart])) --posWriteStart;
+ ++posWriteStart;
+ while (posWriteEnd < n && !IsNewline(src[posWriteEnd])) ++posWriteEnd;
+
+ std::string instr = src.substr (posWriteStart, posWriteEnd-posWriteStart);
+ if (writesFullPos)
+ {
+ replace_string (instr, "o0", fogRegName, 0);
+ instr += ".z";
+ }
+ else
+ {
+ replace_string (instr, "o0.z", fogRegName, 0);
+ }
+ instr += '\n';
+
+ // insert fog code just after write to position
+ src.insert (posWriteEnd+1, instr);
+
+ #if DEBUG_FOG_PATCHING
+ printf_console ("D3D9 fog patching: after patching:\n%s\n", src.c_str());
+ #endif
+
+ return true;
+}
+
+
+// --------------------------------------------------------------------------
+
+#if ENABLE_UNIT_TESTS
+
+#include "External/UnitTest++/src/UnitTest++.h"
+
+SUITE (ShaderPatchingD3D9Tests)
+{
+
+TEST(FindMaxDclIndexNotPresent)
+{
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("foobar", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo ", 'v'));
+ CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo v", 'v'));
+}
+TEST(FindMaxDclIndexOne)
+{
+ CHECK_EQUAL (0, FindMaxUsedDclIndex("dcl_foobar v0", 'v'));
+ CHECK_EQUAL (1, FindMaxUsedDclIndex("dcl_foobar v1", 'v'));
+ CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v2.x", 'v'));
+ CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3.rgb", 'v'));
+ CHECK_EQUAL (6, FindMaxUsedDclIndex("dcl_foobar v6", 'v'));
+ CHECK_EQUAL (10, FindMaxUsedDclIndex("dcl_foobar v10", 'v'));
+ CHECK_EQUAL (0, FindMaxUsedDclIndex("ps_3_0\ndcl_foobar v0\nmov oC0, v0", 'v'));
+}
+TEST(FindMaxDclIndexMultiple)
+{
+ CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v0\ndcl_foobar v2", 'v'));
+ CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3\ndcl_foobar v1", 'v'));
+}
+
+TEST(PatchVSZWrite)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "dp4 o0.z, c0, c1\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "dp4 o0.z, c0, c1\n"
+ "dp4 o9, c0, c1\n"
+ , s);
+}
+TEST(PatchVSFullWrite)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "mov o0, c0\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "mov o0, c0\n"
+ "mov o9, c0.z\n"
+ , s);
+}
+TEST(PatchVSWriteNotAtEnd)
+{
+ std::string s;
+ s = "vs_3_0\n"
+ "dcl_position o0\n"
+ "mov o0, r0\n"
+ "mov r0, r1\n"
+ ;
+ CHECK (PatchVertexShaderFogD3D9(s));
+ CHECK_EQUAL(
+ "vs_3_0\n"
+ "dcl_fog o9\n"
+ "dcl_position o0\n"
+ "mov o0, r0\n"
+ "mov o9, r0.z\n"
+ "mov r0, r1\n"
+ , s);
+}
+TEST(PatchPSDisjointColorAlphaWrite)
+{
+ std::string s =
+ "ps_3_0\n"
+ "; 31 ALU, 2 TEX\n"
+ "dcl_2d s0\n"
+ "dcl_2d s1\n"
+ "def c5, 0.0, 128.0, 2.0, 0\n"
+ "dcl_texcoord0 v0.xy\n"
+ "dcl_texcoord1 v1.xyz\n"
+ "dcl_texcoord2 v2.xyz\n"
+ "dcl_texcoord3 v3.xyz\n"
+ "dcl_texcoord4 v4\n"
+ "texldp r3.x, v4, s1\n"
+ "dp3_pp r0.x, v3, v3\n"
+ "rsq_pp r0.x, r0.x\n"
+ "mad_pp r0.xyz, r0.x, v3, c0\n"
+ "dp3_pp r0.w, r0, r0\n"
+ "rsq_pp r0.w, r0.w\n"
+ "mul_pp r0.xyz, r0.w, r0\n"
+ "mov_pp r0.w, c4.x\n"
+ "dp3_pp r0.x, v1, r0\n"
+ "dp3_pp r2.x, v1, c0\n"
+ "mul_pp r1.y, c5, r0.w\n"
+ "max_pp r1.x, r0, c5\n"
+ "pow r0, r1.x, r1.y\n"
+ "mov r1.x, r0\n"
+ "texld r0, v0, s0\n"
+ "mul r1.w, r0, r1.x\n"
+ "mul_pp r1.xyz, r0, c3\n"
+ "mul_pp r0.xyz, r1, c1\n"
+ "max_pp r2.x, r2, c5\n"
+ "mul_pp r2.xyz, r0, r2.x\n"
+ "mov_pp r0.xyz, c1\n"
+ "mul_pp r0.xyz, c2, r0\n"
+ "mad r0.xyz, r0, r1.w, r2\n"
+ "mul_pp r2.w, r3.x, c5.z\n"
+ "mul r0.xyz, r0, r2.w\n"
+ "mad_pp oC0.xyz, r1, v2, r0\n" // color RGB
+ "mov_pp r2.x, c1.w\n"
+ "mul_pp r0.x, c2.w, r2\n"
+ "mul_pp r0.y, r0.w, c3.w\n"
+ "mul r0.x, r1.w, r0\n"
+ "mad oC0.w, r3.x, r0.x, r0.y\n"; // color A
+ std::string exps =
+ "ps_3_0\n"
+ "dcl_fog v9.x\n"
+ "; 31 ALU, 2 TEX\n"
+ "dcl_2d s0\n"
+ "dcl_2d s1\n"
+ "def c5, 0.0, 128.0, 2.0, 0\n"
+ "dcl_texcoord0 v0.xy\n"
+ "dcl_texcoord1 v1.xyz\n"
+ "dcl_texcoord2 v2.xyz\n"
+ "dcl_texcoord3 v3.xyz\n"
+ "dcl_texcoord4 v4\n"
+ "texldp r3.x, v4, s1\n"
+ "dp3_pp r0.x, v3, v3\n"
+ "rsq_pp r0.x, r0.x\n"
+ "mad_pp r0.xyz, r0.x, v3, c0\n"
+ "dp3_pp r0.w, r0, r0\n"
+ "rsq_pp r0.w, r0.w\n"
+ "mul_pp r0.xyz, r0.w, r0\n"
+ "mov_pp r0.w, c4.x\n"
+ "dp3_pp r0.x, v1, r0\n"
+ "dp3_pp r2.x, v1, c0\n"
+ "mul_pp r1.y, c5, r0.w\n"
+ "max_pp r1.x, r0, c5\n"
+ "pow r0, r1.x, r1.y\n"
+ "mov r1.x, r0\n"
+ "texld r0, v0, s0\n"
+ "mul r1.w, r0, r1.x\n"
+ "mul_pp r1.xyz, r0, c3\n"
+ "mul_pp r0.xyz, r1, c1\n"
+ "max_pp r2.x, r2, c5\n"
+ "mul_pp r2.xyz, r0, r2.x\n"
+ "mov_pp r0.xyz, c1\n"
+ "mul_pp r0.xyz, c2, r0\n"
+ "mad r0.xyz, r0, r1.w, r2\n"
+ "mul_pp r2.w, r3.x, c5.z\n"
+ "mul r0.xyz, r0, r2.w\n"
+ "mad_pp r30.xyz, r1, v2, r0\n"
+ "mov_pp r2.x, c1.w\n"
+ "mul_pp r0.x, c2.w, r2\n"
+ "mul_pp r0.y, r0.w, c3.w\n"
+ "mul r0.x, r1.w, r0\n"
+ "mad r30.w, r3.x, r0.x, r0.y\n"
+ "mul r31.x, c7.x, v9.x\n"
+ "mul r31.x, r31.x, r31.x\n"
+ "exp_sat r31.x, -r31.x\n"
+ "lrp r30.rgb, r31.x, r30, c6\n"
+ "mov oC0, r30\n";
+ CHECK (PatchPixelShaderFogD3D9(s, kFogExp2, 6, 7));
+ CHECK_EQUAL(exps, s);
+}
+
+} // SUITE
+
+#endif // ENABLE_UNIT_TESTS
diff --git a/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h
new file mode 100644
index 0000000..e36a619
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <string>
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+
+bool PatchVertexShaderFogD3D9 (std::string& src);
+bool PatchPixelShaderFogD3D9 (std::string& src, FogMode fog, int fogColorReg, int fogParamsReg);
diff --git a/Runtime/GfxDevice/d3d/TexturesD3D9.cpp b/Runtime/GfxDevice/d3d/TexturesD3D9.cpp
new file mode 100644
index 0000000..d2baef9
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/TexturesD3D9.cpp
@@ -0,0 +1,696 @@
+#include "UnityPrefix.h"
+#include "TexturesD3D9.h"
+#include "Runtime/Graphics/TextureFormat.h"
+#include "Runtime/Graphics/Image.h"
+#include "D3D9Context.h"
+#include "Runtime/Allocator/FixedSizeAllocator.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/Graphics/S3Decompression.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "D3D9Utils.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/GfxDevice/VramLimits.h"
+#include "Runtime/GfxDevice/TextureUploadUtils.h"
+#include "Runtime/GfxDevice/TextureIdMap.h"
+#include "External/ProphecySDK/include/prcore/Surface.hpp"
+#include "Runtime/Profiler/MemoryProfiler.h"
+#include "Runtime/Utilities/InitializeAndCleanup.h"
+
+struct D3DTexture
+{
+ explicit D3DTexture( IDirect3DBaseTexture9* tex )
+ : texture(tex), wrapMode(D3DTADDRESS_CLAMP), minFilter(D3DTEXF_POINT), magFilter(D3DTEXF_POINT), mipFilter(D3DTEXF_NONE), aniso(1), sRGB(0) { }
+
+ IDirect3DBaseTexture9* texture;
+ D3DTEXTUREADDRESS wrapMode;
+ D3DTEXTUREFILTERTYPE minFilter;
+ D3DTEXTUREFILTERTYPE magFilter;
+ D3DTEXTUREFILTERTYPE mipFilter;
+ int aniso;
+ bool sRGB;
+};
+
+typedef FixedSizeAllocator<sizeof(D3DTexture)> TextureAllocator;
+static TextureAllocator* _TextureAlloc = NULL;
+
+namespace TextureD3D9Alloc
+{
+ void StaticInitialize()
+ {
+ _TextureAlloc = UNITY_NEW_AS_ROOT(TextureAllocator(kMemGfxDevice),kMemGfxDevice, "TextureStructs", "");
+ }
+
+ void StaticDestroy()
+ {
+ UNITY_DELETE(_TextureAlloc, kMemGfxDevice);
+ }
+}
+
+static RegisterRuntimeInitializeAndCleanup s_TextureAllocManagerCallbacks(TextureD3D9Alloc::StaticInitialize, TextureD3D9Alloc::StaticDestroy);
+
+static inline intptr_t AllocD3DTexture(IDirect3DBaseTexture9* tex)
+{
+ return (intptr_t)(new (_TextureAlloc->alloc()) D3DTexture(tex));
+}
+
+static inline D3DTexture* QueryD3DTexture(TextureID textureID)
+{
+ return (D3DTexture*)TextureIdMap::QueryNativeTexture(textureID);
+}
+
+
+static D3DCOLOR ColorToD3D( const float color[4] )
+{
+ return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) );
+}
+
+
+struct FormatDesc {
+ TextureFormat unityformat;
+ D3DFORMAT d3dformat;
+ int bpp;
+ prcore::PixelFormat prformat;
+};
+
+const static FormatDesc kTextureFormatTable[kTexFormatPCCount+2] = // +1 for A8L8 case
+{
+ { kTexFormatPCCount, D3DFMT_UNKNOWN, 0, prcore::PixelFormat() },
+ { kTexFormatAlpha8, D3DFMT_A8, 1, prcore::PixelFormat(8,0,0xff) }, // Alpha8
+ { kTexFormatARGB4444, D3DFMT_A4R4G4B4, 2, prcore::PixelFormat(16,0x00000f00,0x000000f0,0x0000000f,0x0000f000) }, // ARGB4444
+ { kTexFormatRGB24, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGB24
+ { kTexFormatRGBA32, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGBA32
+ { kTexFormatARGB32, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // ARGB32
+ { kTexFormatARGBFloat, D3DFMT_UNKNOWN, 0, prcore::PixelFormat() }, // ARGBFloat
+ { kTexFormatRGB565, D3DFMT_R5G6B5, 2, prcore::PixelFormat(16,0x0000f800,0x000007e0,0x0000001f,0x00000000) }, // RGB565
+ { kTexFormatBGR24, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // BGR24
+ { kTexFormatAlphaLum16, D3DFMT_L16, 0, prcore::PixelFormat() }, // AlphaLum16
+ { kTexFormatDXT1, D3DFMT_DXT1, 0, prcore::PixelFormat() }, // DXT1
+ { kTexFormatDXT3, D3DFMT_DXT3, 0, prcore::PixelFormat() }, // DXT3
+ { kTexFormatDXT5, D3DFMT_DXT5, 0, prcore::PixelFormat() }, // DXT5
+ { kTexFormatRGBA4444, D3DFMT_A4R4G4B4, 2, prcore::PixelFormat(16,0x00000f00,0x000000f0,0x0000000f,0x0000f000) }, // RGBA4444
+
+ // following are not Unity formats, but might be used as fallbacks for some unsupported formats
+ { kTexFormatAlphaLum16, D3DFMT_A8L8, 2, prcore::PixelFormat(16,0x00ff,0xff00) }, // A8L8, used on cards that don't support A8; alpha -> alpha
+};
+
+const static FormatDesc kTextureFormatETC =
+{
+ kTexFormatETC_RGB4, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000)
+};
+
+const static FormatDesc kTextureFormatATC[2] =
+{
+ { kTexFormatATC_RGB4, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGB24
+ { kTexFormatATC_RGBA8, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGBA32
+};
+
+
+D3DFORMAT GetD3D9TextureFormat( TextureFormat inFormat )
+{
+ return kTextureFormatTable[inFormat].d3dformat;
+}
+
+static const FormatDesc& GetUploadFormat( TextureFormat inFormat, bool forceFallbackFormat = false )
+{
+ if (forceFallbackFormat)
+ {
+ return kTextureFormatTable[kTexFormatARGB32];
+ }
+ else if( inFormat == kTexFormatAlpha8 && !gGraphicsCaps.d3d.hasTextureFormatA8 )
+ {
+ // A8 not supported: A8L8 or fallback one depending on support
+ if( gGraphicsCaps.d3d.hasTextureFormatA8L8 )
+ return kTextureFormatTable[ kTexFormatPCCount ]; // return A8L8 option, see table above
+ else
+ return kTextureFormatTable[kTexFormatARGB32];
+ }
+ else if( IsCompressedDXTTextureFormat(inFormat) && !gGraphicsCaps.hasS3TCCompression )
+ {
+ // Compressed format not supported: decompress into fallback format
+ return kTextureFormatTable[kTexFormatARGB32];
+ }
+ else if ( IsCompressedETCTextureFormat(inFormat) )
+ {
+ return kTextureFormatETC;
+ }
+ else if ( IsCompressedATCTextureFormat(inFormat) )
+ {
+ return kTextureFormatATC[ HasAlphaTextureFormat(inFormat)? 1 : 0 ];
+ }
+ else if (!gGraphicsCaps.d3d.hasBaseTextureFormat[inFormat])
+ {
+ // This format not supported in general: convert to fallback format
+ return kTextureFormatTable[kTexFormatARGB32];
+ }
+
+ // All ok, return incoming format
+ return kTextureFormatTable[inFormat];
+}
+
+intptr_t TexturesD3D9::RegisterNativeTexture(IDirect3DBaseTexture9* texture) const
+{
+ return AllocD3DTexture(texture);
+}
+
+void TexturesD3D9::UpdateNativeTexture(TextureID textureID, IDirect3DBaseTexture9* texture)
+{
+ D3DTexture* target = QueryD3DTexture(textureID);
+ if(target)
+ target->texture = texture;
+ else
+ AddTexture(textureID, texture);
+}
+
+void TexturesD3D9::AddTexture( TextureID textureID, IDirect3DBaseTexture9* texture )
+{
+ TextureIdMap::UpdateTexture(textureID, AllocD3DTexture(texture));
+}
+
+void TexturesD3D9::RemoveTexture( TextureID textureID )
+{
+ D3DTexture* target = QueryD3DTexture(textureID);
+ if(target)
+ {
+ target->~D3DTexture();
+ _TextureAlloc->free(target);
+ }
+ TextureIdMap::RemoveTexture(textureID);
+}
+
+IDirect3DBaseTexture9* TexturesD3D9::GetTexture( TextureID textureID ) const
+{
+ D3DTexture* target = QueryD3DTexture(textureID);
+ return target ? target->texture : 0;
+}
+
+
+
+static void BlitAlphaLum16 (int width, int height, D3DFORMAT d3dFormat, const UInt8* srcData, UInt8* destData, int pitch)
+{
+ // Handle AlphaLum16 case. ProphecySDK does not support 16 bit/channel formats,
+ // so we blit manually.
+ UInt32 rowBytes = GetRowBytesFromWidthAndFormat(width,kTexFormatAlphaLum16);
+ const UInt8* srcRowData = srcData;
+ UInt8* destRowData = destData;
+ if( d3dFormat == D3DFMT_L16 )
+ {
+ for( int r = 0; r < height; ++r )
+ {
+ memcpy( destRowData, srcRowData, rowBytes );
+ srcRowData += rowBytes;
+ destRowData += pitch;
+ }
+ }
+ else if( d3dFormat == D3DFMT_L8 )
+ {
+ for( int r = 0; r < height; ++r )
+ {
+ for( int c = 0; c < width; ++c )
+ destRowData[c] = srcRowData[c*2+1];
+ srcRowData += rowBytes;
+ destRowData += pitch;
+ }
+ }
+ else
+ {
+ AssertIf( d3dFormat != D3DFMT_A8R8G8B8 );
+ for( int r = 0; r < height; ++r )
+ {
+ for( int c = 0; c < width; ++c )
+ {
+ DWORD val = srcRowData[c*2+1];
+ ((D3DCOLOR*)destRowData)[c] = 0xFF000000 | (val<<16) | (val<<8) | (val);
+ }
+ srcRowData += rowBytes;
+ destRowData += pitch;
+ }
+ }
+}
+
+void InitRGBA32Buffer(int width, int height, UInt8*& buffer, int& srcPitch, prcore::PixelFormat& pf)
+{
+ int imageSize = CalculateImageSize( width, height, kTexFormatRGBA32 );
+ if( buffer == NULL )
+ buffer = new UInt8[imageSize];
+ srcPitch = GetRowBytesFromWidthAndFormat(width, kTexFormatRGBA32);
+ pf = GetProphecyPixelFormat(kTexFormatRGBA32);
+}
+
+void TexturesD3D9::UploadTexture2D(
+ TextureID tid, TextureDimension dimension, UInt8* srcData, int width, int height,
+ TextureFormat format, int mipCount, UInt32 uploadFlags, int masterTextureLimit, TextureUsageMode usageMode, TextureColorSpace colorSpace )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ AssertIf( srcData == NULL );
+ AssertIf( (!IsPowerOfTwo(width) || !IsPowerOfTwo(height)) && !IsNPOTTextureAllowed(mipCount > 1) );
+
+ if( dimension != kTexDim2D )
+ {
+ ErrorString( "Incorrect texture dimension!" );
+ return;
+ }
+
+ // Nothing to do here. Early out instead of failing, empty textures are serialized by dynamic fonts.
+ if( width == 0 || height == 0 )
+ return;
+
+ bool uploadIsCompressed, decompressOnTheFly;
+ HandleFormatDecompression (format, &usageMode, colorSpace, &uploadIsCompressed, &decompressOnTheFly);
+
+ if( decompressOnTheFly )
+ uploadIsCompressed = false;
+
+ const FormatDesc& uploadFormat = GetUploadFormat (decompressOnTheFly ? kTexFormatRGBA32 : format, usageMode != kTexUsageNone);
+ D3DFORMAT d3dFormat = uploadFormat.d3dformat;
+
+ if( format == kTexFormatAlphaLum16 && !gGraphicsCaps.d3d.hasTextureFormatL16 )
+ {
+ // AlphaLum16 requires some trickery if hardware does not support L16:
+ // first we try to do L8 instead, then fallback to A8R8G8B8.
+ if( gGraphicsCaps.d3d.hasTextureFormatL8 )
+ d3dFormat = D3DFMT_L8;
+ else
+ d3dFormat = D3DFMT_A8R8G8B8;
+ }
+
+ int baseLevel, maxLevel, texWidth, texHeight;
+ size_t textureSize;
+ prcore::Surface::BlitMode blitMode = prcore::Surface::BLIT_COPY;
+ if (SkipLevelsForMasterTextureLimit (masterTextureLimit, format, uploadFormat.unityformat, mipCount, uploadIsCompressed, &srcData, &width, &height, &baseLevel, &maxLevel, &texWidth, &texHeight, &textureSize))
+ blitMode = prcore::Surface::BLIT_SCALE;
+
+ // if we don't support mip maps - don't use them
+ if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPMAP) )
+ {
+ mipCount = 1;
+ baseLevel = 0;
+ }
+
+ // create texture if it does not exist already
+ IDirect3DTexture9* texture = NULL;
+
+ D3DTexture* target = QueryD3DTexture(tid);
+ if(!target)
+ {
+ HRESULT hr = dev->CreateTexture( texWidth, texHeight, mipCount - baseLevel, 0, d3dFormat, D3DPOOL_MANAGED, &texture, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, CalculateImageSize(texWidth, texHeight,format)*(mipCount>1?1.33:1),tid.m_ID);
+ if( FAILED(hr) )
+ printf_console( "d3d: failed to create 2D texture id=%i w=%i h=%i mips=%i d3dfmt=%i [%s]\n", tid, texWidth, texHeight, mipCount-baseLevel, d3dFormat, GetD3D9Error(hr) );
+ TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture));
+ }
+ else
+ {
+ texture = (IDirect3DTexture9*)target->texture;
+ }
+
+ if( !texture )
+ {
+ AssertString( "failed to create 2D texture" );
+ return;
+ }
+
+ UInt8* decompressBuffer = NULL;
+ UInt8* tempBuffer = NULL;
+ int bufferPitch;
+
+ // Upload the mip levels
+ for( int level = baseLevel; level <= maxLevel; ++level )
+ {
+ D3DLOCKED_RECT lr;
+ HRESULT hr = texture->LockRect( level-baseLevel, &lr, NULL, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock level %i of texture %i [%s]\n", level-baseLevel, tid, GetD3D9Error(hr) );
+ if( decompressBuffer )
+ delete[] decompressBuffer;
+ return;
+ }
+
+ if( decompressOnTheFly )
+ {
+ ConvertCompressedTextureUpload (width, height, format, srcData, decompressBuffer, bufferPitch, usageMode, colorSpace, level);
+
+ prcore::Surface srcSurface( width, height, bufferPitch, GetProphecyPixelFormat(kTexFormatRGBA32), decompressBuffer );
+ prcore::Surface dstSurface( texWidth, texHeight, lr.Pitch, uploadFormat.prformat, lr.pBits );
+ dstSurface.BlitImage( srcSurface, blitMode );
+ }
+ else if( format == kTexFormatAlphaLum16 )
+ {
+ BlitAlphaLum16( width, height, d3dFormat, srcData, (UInt8*)lr.pBits, lr.Pitch );
+ }
+ else if( !uploadIsCompressed )
+ {
+ prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat( width,format ), GetProphecyPixelFormat(format), srcData );
+ prcore::Surface dstSurface( texWidth, texHeight, lr.Pitch, uploadFormat.prformat, lr.pBits );
+
+ if (!ConvertUncompressedTextureUpload(srcSurface, dstSurface, blitMode, uploadFormat.unityformat, usageMode, colorSpace, width, height, (UInt8*)lr.pBits, lr.Pitch, uploadFormat.prformat, tempBuffer, bufferPitch))
+ {
+ dstSurface.BlitImage( srcSurface, blitMode );
+ }
+ }
+ else
+ {
+ if( width == texWidth && height == texHeight )
+ {
+ BlitCopyCompressedImage( format, srcData, width, height, (UInt8*)lr.pBits, width, height, false );
+ }
+ else
+ {
+ // TODO: fill with garbage?
+ }
+ }
+
+ texture->UnlockRect( level-baseLevel );
+
+ // Go to next level
+ AssertIf( width == 1 && height == 1 && level != maxLevel );
+ AdvanceToNextMipLevel (format, srcData, width, height, texWidth, texHeight);
+ }
+
+ delete[] decompressBuffer;
+}
+
+void TexturesD3D9::UploadTextureSubData2D(
+ TextureID tid, UInt8* srcData, int mipLevel,
+ int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if( !dev )
+ return;
+
+ // if we don't support mip maps and want to change higher level - don't
+ if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPMAP) && mipLevel != 0 )
+ return;
+
+ AssertIf( srcData == NULL );
+ AssertIf( IsCompressedDXTTextureFormat( format ) );
+
+ // find the texture
+ D3DTexture* target = QueryD3DTexture(tid);
+ if(target == 0)
+ {
+ AssertString( "Texture not found" );
+ return;
+ }
+
+ const FormatDesc& uploadFormat = GetUploadFormat( format );
+ IDirect3DTexture9* texture = (IDirect3DTexture9*)target->texture;
+ AssertIf( !texture );
+
+ RECT rect;
+ rect.left = x;
+ rect.top = y;
+ rect.right = x + width;
+ rect.bottom = y + height;
+ D3DLOCKED_RECT lr;
+ HRESULT hr = texture->LockRect( mipLevel, &lr, &rect, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock sub level %i of texture %i [%s]\n", mipLevel, tid, GetD3D9Error(hr) );
+ return;
+ }
+
+ // TODO: handle other format conversions
+
+ prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat(width,format), GetProphecyPixelFormat(format), srcData );
+ prcore::Surface dstSurface( width, height, lr.Pitch, uploadFormat.prformat, lr.pBits );
+ dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY );
+
+ texture->UnlockRect( mipLevel );
+}
+
+
+void TexturesD3D9::UploadTextureCube(
+ TextureID tid, UInt8* srcData, int faceDataSize, int size,
+ TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if (!dev)
+ return;
+
+ // if we don't support cube mip maps - don't use them
+ if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPCUBEMAP) )
+ mipCount = 1;
+
+ const FormatDesc& uploadFormat = GetUploadFormat(format);
+ IDirect3DCubeTexture9* texture = NULL;
+
+ D3DTexture* target = QueryD3DTexture(tid);
+ if(!target)
+ {
+ HRESULT hr = dev->CreateCubeTexture( size, mipCount, 0, uploadFormat.d3dformat, D3DPOOL_MANAGED, &texture, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, 6*CalculateImageSize(size, size, format)*(mipCount>1?1.33:1),tid.m_ID);
+ if( FAILED(hr) )
+ printf_console( "d3d: failed to create cubemap id=%i size=%i mips=%i d3dfmt=%i [%s]\n", tid, size, mipCount, uploadFormat.d3dformat, GetD3D9Error(hr) );
+ TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture));
+ }
+ else
+ {
+ texture = (IDirect3DCubeTexture9*)target->texture;
+ }
+ if( !texture )
+ {
+ AssertString( "failed to create cubemap" );
+ return;
+ }
+
+ // Upload data
+ bool uploadIsCompressed = IsCompressedDXTTextureFormat(format); // TODO: handle when we don't have DXT
+
+ static const D3DCUBEMAP_FACES faces[6] =
+ {
+ D3DCUBEMAP_FACE_POSITIVE_X,
+ D3DCUBEMAP_FACE_NEGATIVE_X,
+ D3DCUBEMAP_FACE_POSITIVE_Y,
+ D3DCUBEMAP_FACE_NEGATIVE_Y,
+ D3DCUBEMAP_FACE_POSITIVE_Z,
+ D3DCUBEMAP_FACE_NEGATIVE_Z,
+ };
+
+ int maxLevel = mipCount - 1;
+ for (int face=0;face<6;face++)
+ {
+ int mipSize = size;
+ UInt8* data = srcData + face * faceDataSize;
+
+ // Upload the mip levels
+ for( int level = 0; level <= maxLevel; ++level )
+ {
+ D3DLOCKED_RECT lr;
+ HRESULT hr = texture->LockRect( faces[face], level, &lr, NULL, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock level %i of face %i of cubemap %i [%s]\n", level, face, tid, GetD3D9Error(hr) );
+ return;
+ }
+
+ // TODO: handle DXT decompression on the fly
+ // TODO: handle other format conversions
+
+ if( !uploadIsCompressed )
+ {
+ prcore::Surface srcSurface( mipSize, mipSize, GetRowBytesFromWidthAndFormat(mipSize,format), GetProphecyPixelFormat(format), data );
+ prcore::Surface dstSurface( mipSize, mipSize, lr.Pitch, uploadFormat.prformat, lr.pBits );
+ dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY );
+ }
+ else
+ {
+ BlitCopyCompressedImage( format, data, mipSize, mipSize, (UInt8*)lr.pBits, mipSize /* TODO */, mipSize, false );
+ }
+
+ texture->UnlockRect( faces[face], level );
+
+ // Go to next level
+ data += CalculateImageSize( mipSize, mipSize, format );
+ AssertIf( mipSize == 1 && level != maxLevel );
+
+ mipSize = std::max( mipSize / 2, 1 );
+ }
+ }
+}
+
+void TexturesD3D9::UploadTexture3D(
+ TextureID tid, UInt8* srcData, int width, int height, int depth,
+ TextureFormat format, int mipCount, UInt32 uploadFlags )
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+ if (!dev || !gGraphicsCaps.has3DTexture)
+ return;
+
+ // if we don't support volume mip maps - don't use them
+ if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_VOLUMEMAP) )
+ mipCount = 1;
+
+
+ const FormatDesc& uploadFormat = GetUploadFormat( format );
+ D3DFORMAT d3dFormat = uploadFormat.d3dformat;
+ if( format == kTexFormatAlphaLum16 )
+ {
+ // AlphaLum16 requires some trickery if hardware does not support L16:
+ // first we try to do L8 instead, then fallback to A8R8G8B8.
+ if( !gGraphicsCaps.d3d.hasTextureFormatL16 && gGraphicsCaps.d3d.hasTextureFormatL8 )
+ d3dFormat = D3DFMT_L8;
+ else
+ d3dFormat = D3DFMT_A8R8G8B8;
+ }
+
+ IDirect3DVolumeTexture9* texture = NULL;
+
+ D3DTexture* target = QueryD3DTexture(tid);
+ if(!target)
+ {
+ HRESULT hr = dev->CreateVolumeTexture( width, height, depth, mipCount, 0, d3dFormat, D3DPOOL_MANAGED, &texture, NULL );
+ REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, depth*CalculateImageSize(width, height, format)*(mipCount>1?1.33:1),tid.m_ID);
+ if( FAILED(hr) )
+ printf_console( "d3d: failed to create 3D texture id=%i w=%i h=%i d=%i mips=%i d3dfmt=%i [%s]\n", tid, width, height, depth, mipCount, d3dFormat, GetD3D9Error(hr) );
+ TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture));
+ }
+ else
+ {
+ texture = (IDirect3DVolumeTexture9*)target->texture;
+ }
+ if( !texture )
+ {
+ AssertString( "failed to create 3D texture" );
+ return;
+ }
+
+ int maxLevel = mipCount - 1;
+ for( int level=0; level <= maxLevel; ++level )
+ {
+ D3DLOCKED_BOX lr;
+ HRESULT hr = texture->LockBox( level, &lr, NULL, 0 );
+ if( FAILED(hr) )
+ {
+ printf_console( "d3d: failed to lock level %i of 3D texture %i [%s]\n", level, tid, GetD3D9Error(hr) );
+ return;
+ }
+
+ UInt8* destData = (UInt8*)lr.pBits;
+ const int sliceSize = CalculateImageSize(width, height, format);
+ for( int slice = 0; slice < depth; ++slice )
+ {
+ if( format == kTexFormatAlphaLum16 )
+ {
+ BlitAlphaLum16 (width, height, d3dFormat, srcData, destData, lr.RowPitch);
+ }
+ else
+ {
+ // Regular ProphecySDK blit
+ prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat(width,format), GetProphecyPixelFormat(format), srcData );
+ prcore::Surface dstSurface( width, height, lr.RowPitch, uploadFormat.prformat, destData );
+ dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY );
+ }
+ srcData += sliceSize;
+ destData += lr.SlicePitch;
+ }
+
+ texture->UnlockBox( level );
+
+ AssertIf( width == 1 && height == 1 && level != maxLevel );
+
+ width = std::max( width / 2, 1 );
+ height = std::max( height / 2, 1 );
+ depth = std::max( depth / 2, 1 );
+ }
+}
+
+
+
+bool TexturesD3D9::SetTexture (ShaderType shaderType, int unit, TextureID textureID)
+{
+ IDirect3DDevice9* dev = GetD3DDevice();
+
+ D3DTexture* target = QueryD3DTexture(textureID);
+ if(target)
+ {
+ const D3DTexture& texture = *target;
+ DWORD d3dUnit = GetD3D9SamplerIndex (shaderType, unit);
+ D3D9_CALL(dev->SetTexture( d3dUnit, texture.texture ));
+ // TODO: caching of those!
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSU, texture.wrapMode ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSV, texture.wrapMode ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSW, texture.wrapMode ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MINFILTER, texture.minFilter ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MAGFILTER, texture.magFilter ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MIPFILTER, texture.mipFilter ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MAXANISOTROPY, texture.aniso ));
+ D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_SRGBTEXTURE, texture.sRGB ));
+ return true;
+ }
+ else
+ {
+ // Ok, just don't complain here. Mostly with render textures, once in a while it
+ // happens that RT is not created yet, and someone tries to render with it.
+ // Just silently ignore that case.
+ //ErrorString( Format("SetTexture with unknown texture %i", textureID) );
+ return false;
+ }
+}
+
+static D3DTEXTUREADDRESS s_D3DWrapModes[kTexWrapCount] = {
+ D3DTADDRESS_WRAP,
+ D3DTADDRESS_CLAMP,
+};
+static D3DTEXTUREFILTERTYPE s_D3DMinMagFilters[kTexFilterCount] = {
+ D3DTEXF_POINT,
+ D3DTEXF_LINEAR,
+ D3DTEXF_LINEAR,
+};
+static D3DTEXTUREFILTERTYPE s_D3DMipFilters[kTexFilterCount] = {
+ D3DTEXF_POINT,
+ D3DTEXF_POINT,
+ D3DTEXF_LINEAR,
+};
+
+
+void TexturesD3D9::SetTextureParams( TextureID textureID, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace )
+{
+ D3DTexture* target = QueryD3DTexture(textureID);
+ if(!target)
+ return;
+
+ D3DTexture& texture = *target;
+ AssertIf( !texture.texture );
+
+ if( gGraphicsCaps.hasAnisoFilter && texDim != kTexDim3D )
+ texture.aniso = std::min( anisoLevel, gGraphicsCaps.maxAnisoLevel );
+ else
+ texture.aniso = 1;
+ texture.wrapMode = s_D3DWrapModes[wrap];
+
+ if( !hasMipMap && filter == kTexFilterTrilinear )
+ filter = kTexFilterBilinear;
+
+ texture.minFilter = texture.magFilter = s_D3DMinMagFilters[filter];
+ if( texture.aniso > 1 )
+ {
+ texture.minFilter = D3DTEXF_ANISOTROPIC;
+ // some cards (notably GeForces) can do min anisotropic filter, but not mag anisotropic filter
+ if( gGraphicsCaps.d3d.d3dcaps.TextureFilterCaps & D3DPTFILTERCAPS_MAGFANISOTROPIC )
+ texture.magFilter = D3DTEXF_ANISOTROPIC;
+ }
+ texture.mipFilter = s_D3DMipFilters[filter];
+
+ //sRGB
+ texture.sRGB = colorSpace == kTexColorSpaceSRGB || colorSpace == kTexColorSpaceSRGBXenon;
+ // actual setting of sampler states will happen in SetTexture
+}
+
+
+void TexturesD3D9::DeleteTexture( TextureID textureID )
+{
+ D3DTexture* target = QueryD3DTexture(textureID);
+ if(!target)
+ return;
+
+ // texture can be null if texture creation failed. At least don't make it crash here
+ if( target->texture )
+ {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(target->texture);
+ ULONG refCount = target->texture->Release();
+ AssertIf( refCount != 0 );
+ }
+ TextureIdMap::RemoveTexture(textureID);
+}
diff --git a/Runtime/GfxDevice/d3d/TexturesD3D9.h b/Runtime/GfxDevice/d3d/TexturesD3D9.h
new file mode 100644
index 0000000..113434c
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/TexturesD3D9.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "Runtime/Graphics/TextureFormat.h"
+#include "Runtime/Graphics/RenderSurface.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include "Runtime/Threads/AtomicOps.h"
+#include <map>
+
+class ImageReference;
+
+class TexturesD3D9
+{
+public:
+ TexturesD3D9() {}
+ ~TexturesD3D9() {}
+ bool SetTexture (ShaderType shaderType, int unit, TextureID textureID);
+ void SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace );
+
+ void DeleteTexture( TextureID textureID );
+
+ void UploadTexture2D(
+ TextureID tid, TextureDimension dimension, UInt8* srcData, int width, int height,
+ TextureFormat format, int mipCount, UInt32 uploadFlags, int masterTextureLimit, TextureUsageMode usageMode, TextureColorSpace colorSpace );
+
+ void UploadTextureSubData2D(
+ TextureID tid, UInt8* srcData, int mipLevel,
+ int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace );
+
+ void UploadTextureCube(
+ TextureID tid, UInt8* srcData, int faceDataSize, int size,
+ TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace );
+
+ void UploadTexture3D(
+ TextureID tid, UInt8* srcData, int width, int height, int depth,
+ TextureFormat format, int mipCount, UInt32 uploadFlags );
+
+ void AddTexture( TextureID textureID, IDirect3DBaseTexture9* texture );
+ void RemoveTexture( TextureID textureID );
+ IDirect3DBaseTexture9* GetTexture( TextureID textureID ) const;
+
+ intptr_t RegisterNativeTexture(IDirect3DBaseTexture9* texture) const;
+ void UpdateNativeTexture(TextureID textureID, IDirect3DBaseTexture9* texture);
+};
+
+struct RenderSurfaceD3D9 : RenderSurfaceBase
+{
+ RenderSurfaceD3D9()
+ : m_Texture(NULL)
+ , m_Surface(NULL)
+ {
+ RenderSurfaceBase_Init(*this);
+ }
+ void Release() {
+ if (m_Texture) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Texture);
+ m_Texture->Release();
+ m_Texture = NULL;
+ }
+ if (m_Surface) {
+ REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Surface);
+ m_Surface->Release();
+ m_Surface = NULL;
+ }
+ }
+ IDirect3DBaseTexture9* m_Texture;
+ IDirect3DSurface9* m_Surface;
+};
+
+struct RenderColorSurfaceD3D9 : public RenderSurfaceD3D9
+{
+ RenderColorSurfaceD3D9()
+ : format(kRTFormatARGB32)
+ , dim(kTexDim2D)
+ {
+ RenderSurfaceBase_InitColor(*this);
+ }
+ RenderTextureFormat format;
+ TextureDimension dim;
+};
+
+struct RenderDepthSurfaceD3D9 : public RenderSurfaceD3D9
+{
+ RenderDepthSurfaceD3D9()
+ : depthFormat(kDepthFormatNone)
+ {
+ RenderSurfaceBase_InitDepth(*this);
+ }
+ DepthBufferFormat depthFormat;
+};
diff --git a/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp b/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp
new file mode 100644
index 0000000..70f9ed7
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp
@@ -0,0 +1,196 @@
+#include "UnityPrefix.h"
+#if ENABLE_PROFILER
+#include "GfxDeviceD3D9.h"
+#include "TimerQueryD3D9.h"
+
+
+TimerQueryD3D9::TimerQueryD3D9()
+ : m_Query(NULL), m_Time(0), m_Active(false)
+{
+ GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &m_Query);
+ m_TimeMultiplier = 0.0f;
+}
+
+TimerQueryD3D9::~TimerQueryD3D9()
+{
+ SAFE_RELEASE(m_Query);
+}
+
+void TimerQueryD3D9::Measure()
+{
+ // Flush previous result
+ GetElapsed(kWaitRenderThread);
+
+ TimerQueriesD3D9& queries = GetD3D9GfxDevice().GetTimerQueries();
+ if (m_Query && queries.HasFrequencyQuery())
+ {
+ queries.AddActiveTimerQuery(this);
+ m_Query->Issue(D3DISSUE_END);
+ m_Active = true;
+ m_Time = kInvalidProfileTime;
+ }
+ else
+ m_Time = 0;
+ m_TimeMultiplier = 0.0f;
+}
+
+ProfileTimeFormat TimerQueryD3D9::GetElapsed(UInt32 flags)
+{
+ while (m_Active)
+ {
+ bool wait = (flags & kWaitRenderThread) != 0;
+ if (!GetD3D9GfxDevice().GetTimerQueries().PollNextTimerQuery(wait))
+ break;
+ }
+ return m_Time;
+}
+
+bool TimerQueryD3D9::PollResult(UInt64& prevTime, bool wait)
+{
+ for (;;)
+ {
+ UINT64 time;
+ DWORD flags = wait ? D3DGETDATA_FLUSH : 0;
+ HRESULT hr = m_Query->GetData(&time, sizeof(time), flags);
+ if (hr == S_OK)
+ {
+ UInt64 elapsed = prevTime ? (time - prevTime) : 0;
+ m_Time = ProfileTimeFormat(elapsed * m_TimeMultiplier);
+ prevTime = time;
+ return true;
+ }
+ // Stop polling on unknown result (e.g D3DERR_DEVICELOST)
+ if (hr != S_FALSE)
+ {
+ m_Time = 0;
+ prevTime = 0;
+ return true;
+ }
+ if (!wait)
+ break;
+ }
+ return false;
+}
+
+TimerQueriesD3D9::TimerQueriesD3D9()
+{
+ m_LastQueryTime = 0;
+ m_FrequencyQuery = NULL;
+ memset(m_StartTimeQueries, 0, sizeof(m_StartTimeQueries));
+ m_StartTimeQueryIndex = 0;
+}
+
+void TimerQueriesD3D9::ReleaseAllQueries()
+{
+ SAFE_RELEASE(m_FrequencyQuery);
+ for (int i = 0; i < kStartTimeQueryCount; i++)
+ {
+ delete m_StartTimeQueries[i];
+ m_StartTimeQueries[i] = NULL;
+ }
+ m_InactiveTimerQueries.append(m_ActiveTimerQueries);
+ m_InactiveTimerQueries.append(m_PolledTimerQueries);
+ TimerQueryList& queries = m_InactiveTimerQueries;
+ for (TimerQueryList::iterator it = queries.begin(); it != queries.end(); ++it)
+ {
+ TimerQueryD3D9& query = *it;
+ query.m_Active = false;
+ query.m_Time = 0;
+ SAFE_RELEASE(query.m_Query);
+ }
+}
+
+void TimerQueriesD3D9::RecreateAllQueries()
+{
+ Assert(m_ActiveTimerQueries.empty());
+ Assert(m_PolledTimerQueries.empty());
+ TimerQueryList& queries = m_InactiveTimerQueries;
+ for (TimerQueryList::iterator it = queries.begin(); it != queries.end(); ++it)
+ {
+ TimerQueryD3D9& query = *it;
+ GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &query.m_Query);
+ }
+}
+
+void TimerQueriesD3D9::BeginTimerQueries()
+{
+ // Poll queries from previous frames
+ PollTimerQueries();
+
+ if (m_FrequencyQuery == NULL)
+ {
+ GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, &m_FrequencyQuery);
+ }
+ if (m_FrequencyQuery)
+ m_FrequencyQuery->Issue(D3DISSUE_END);
+
+ int& index = m_StartTimeQueryIndex;
+ if (m_StartTimeQueries[index] == NULL)
+ {
+ m_StartTimeQueries[index] = new TimerQueryD3D9;
+ }
+ m_StartTimeQueries[index]->Measure();
+ index = (index + 1) % kStartTimeQueryCount;
+}
+
+void TimerQueriesD3D9::EndTimerQueries()
+{
+ if(m_FrequencyQuery == NULL)
+ return;
+
+ HRESULT hr;
+ UINT64 freq;
+ do
+ {
+ hr = m_FrequencyQuery->GetData(&freq, sizeof(freq), D3DGETDATA_FLUSH);
+ } while (hr == S_FALSE);
+ if (hr == S_OK)
+ {
+ float timeMult = float(1000000000.0 / (double)freq);
+ TimerQueryList::iterator query, queryEnd = m_ActiveTimerQueries.end();
+ for (query = m_ActiveTimerQueries.begin(); query != queryEnd; ++query)
+ query->SetTimeMultiplier(timeMult);
+ }
+ // Move queries from active to polled list
+ m_PolledTimerQueries.append(m_ActiveTimerQueries);
+}
+
+TimerQueryD3D9* TimerQueriesD3D9::CreateTimerQuery()
+{
+ TimerQueryD3D9* query = new TimerQueryD3D9;
+ m_InactiveTimerQueries.push_back(*query);
+ return query;
+}
+
+void TimerQueriesD3D9::AddActiveTimerQuery(TimerQueryD3D9* query)
+{
+ query->RemoveFromList();
+ m_ActiveTimerQueries.push_back(*query);
+}
+
+void TimerQueriesD3D9::PollTimerQueries()
+{
+ for (;;)
+ {
+ if (!PollNextTimerQuery(false))
+ break;
+ }
+}
+
+bool TimerQueriesD3D9::PollNextTimerQuery(bool wait)
+{
+ if (m_PolledTimerQueries.empty())
+ return false;
+
+ TimerQueryD3D9& query = m_PolledTimerQueries.front();
+ if (query.PollResult(m_LastQueryTime, wait))
+ {
+ query.m_Active = false;
+ query.RemoveFromList();
+ m_InactiveTimerQueries.push_back(query);
+ return true;
+ }
+ return false;
+}
+
+#endif
diff --git a/Runtime/GfxDevice/d3d/TimerQueryD3D9.h b/Runtime/GfxDevice/d3d/TimerQueryD3D9.h
new file mode 100644
index 0000000..ecc4a94
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/TimerQueryD3D9.h
@@ -0,0 +1,67 @@
+#ifndef TIMERQUERYD3D9_H
+#define TIMERQUERYD3D9_H
+
+#if ENABLE_PROFILER
+
+#include "Runtime/GfxDevice/GfxTimerQuery.h"
+
+class TimerQueriesD3D9;
+
+class TimerQueryD3D9 : public GfxTimerQuery
+{
+public:
+ ~TimerQueryD3D9();
+
+ virtual void Measure();
+ virtual ProfileTimeFormat GetElapsed(UInt32 flags);
+
+ bool PollResult(UInt64& prevTime, bool wait);
+ void SetTimeMultiplier(float tm) { m_TimeMultiplier = tm; }
+
+private:
+ friend TimerQueriesD3D9;
+ TimerQueryD3D9();
+
+ IDirect3DQuery9* m_Query;
+ ProfileTimeFormat m_Time;
+ float m_TimeMultiplier;
+ bool m_Active;
+};
+
+class TimerQueriesD3D9
+{
+public:
+ TimerQueriesD3D9();
+
+ void ReleaseAllQueries();
+ void RecreateAllQueries();
+
+ void BeginTimerQueries();
+ void EndTimerQueries();
+
+ TimerQueryD3D9* CreateTimerQuery();
+
+ void AddActiveTimerQuery(TimerQueryD3D9* query);
+ void PollTimerQueries();
+ bool PollNextTimerQuery(bool wait);
+
+ bool HasFrequencyQuery() const { return m_FrequencyQuery != NULL; }
+
+private:
+ enum
+ {
+ kStartTimeQueryCount = 3
+ };
+
+ UInt64 m_LastQueryTime;
+ IDirect3DQuery9* m_FrequencyQuery;
+ TimerQueryD3D9* m_StartTimeQueries[kStartTimeQueryCount];
+ int m_StartTimeQueryIndex;
+ typedef List<TimerQueryD3D9> TimerQueryList;
+ TimerQueryList m_InactiveTimerQueries;
+ TimerQueryList m_ActiveTimerQueries;
+ TimerQueryList m_PolledTimerQueries;
+};
+
+#endif
+#endif
diff --git a/Runtime/GfxDevice/d3d/VertexDeclarations.cpp b/Runtime/GfxDevice/d3d/VertexDeclarations.cpp
new file mode 100644
index 0000000..180a105
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/VertexDeclarations.cpp
@@ -0,0 +1,124 @@
+#include "UnityPrefix.h"
+#include "VertexDeclarations.h"
+#include "D3D9Context.h"
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+
+bool VertexDeclarations::KeyType::operator < (const KeyType& rhs) const
+{
+ return memcmp(channels, rhs.channels, sizeof(channels)) < 0;
+}
+
+VertexDeclarations::VertexDeclarations()
+{
+}
+
+VertexDeclarations::~VertexDeclarations()
+{
+ Clear();
+}
+
+struct D3DVertexSemantics
+{
+ UInt8 usage;
+ UInt8 index;
+};
+
+static D3DVertexSemantics kChannelVertexSemantics[kShaderChannelCount] =
+{
+ { D3DDECLUSAGE_POSITION, 0 }, // position
+ { D3DDECLUSAGE_NORMAL, 0 }, // normal
+ { D3DDECLUSAGE_COLOR, 0 }, // color
+ { D3DDECLUSAGE_TEXCOORD, 0 }, // uv
+ { D3DDECLUSAGE_TEXCOORD, 1 }, // uv2
+ { D3DDECLUSAGE_TANGENT, 0 }, // tangent
+};
+
+static FORCE_INLINE D3DDECLTYPE GetD3DVertexDeclType(const ChannelInfo& info)
+{
+ switch (info.format)
+ {
+ case kChannelFormatFloat:
+ {
+ switch (info.dimension)
+ {
+ case 1: return D3DDECLTYPE_FLOAT1;
+ case 2: return D3DDECLTYPE_FLOAT2;
+ case 3: return D3DDECLTYPE_FLOAT3;
+ case 4: return D3DDECLTYPE_FLOAT4;
+ }
+ break;
+ }
+ case kChannelFormatFloat16:
+ {
+ switch (info.dimension)
+ {
+ case 2: return D3DDECLTYPE_FLOAT16_2;
+ case 4: return D3DDECLTYPE_FLOAT16_4;
+ }
+ break;
+ }
+ case kChannelFormatColor:
+ {
+ return D3DDECLTYPE_D3DCOLOR;
+ }
+ }
+ Assert("No matching D3D vertex decl type!");
+ return D3DDECLTYPE_UNUSED;
+}
+
+IDirect3DVertexDeclaration9* VertexDeclarations::GetVertexDecl( const ChannelInfoArray channels )
+{
+ KeyType key;
+ memcpy(key.channels, channels, sizeof(key.channels));
+
+ // already have vertex declaration for these formats?
+ VertexDeclMap::iterator it = m_VertexDeclMap.find( key );
+ if( it != m_VertexDeclMap.end() )
+ return it->second;
+
+ // don't have this declaration yet - create one
+ // KD: not sure if elements need to be ordered by stream, playing it safe
+ D3DVERTEXELEMENT9 elements[kShaderChannelCount+1];
+ int elIndex = 0;
+ for( int stream = 0; stream < kMaxVertexStreams; stream++ )
+ {
+ for( int chan = 0; chan < kShaderChannelCount; chan++ )
+ {
+ if( channels[chan].stream == stream && channels[chan].IsValid() )
+ {
+ DebugAssert(elIndex < kShaderChannelCount);
+ D3DVERTEXELEMENT9& elem = elements[elIndex];
+ elem.Stream = stream;
+ elem.Offset = channels[chan].offset;
+ elem.Type = GetD3DVertexDeclType(channels[chan]);
+ elem.Method = D3DDECLMETHOD_DEFAULT;
+ elem.Usage = kChannelVertexSemantics[chan].usage;
+ elem.UsageIndex = kChannelVertexSemantics[chan].index;
+ ++elIndex;
+ }
+ }
+ }
+ D3DVERTEXELEMENT9 declEnd = D3DDECL_END();
+ elements[elIndex] = declEnd;
+
+ IDirect3DVertexDeclaration9* decl = NULL;
+ HRESULT hr = GetD3DDevice()->CreateVertexDeclaration( elements, &decl );
+ if( FAILED(hr) ) {
+ // TODO: error!
+ }
+ m_VertexDeclMap[key] = decl;
+ return decl;
+}
+
+void VertexDeclarations::Clear()
+{
+ VertexDeclMap::iterator it;
+ for( it = m_VertexDeclMap.begin(); it != m_VertexDeclMap.end(); ++it )
+ {
+ if( it->second ) {
+ ULONG refCount = it->second->Release();
+ AssertIf( refCount != 0 );
+ }
+ }
+ m_VertexDeclMap.clear();
+}
diff --git a/Runtime/GfxDevice/d3d/VertexDeclarations.h b/Runtime/GfxDevice/d3d/VertexDeclarations.h
new file mode 100644
index 0000000..f737f5a
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/VertexDeclarations.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "D3D9Includes.h"
+#include "Runtime\Filters\Mesh\VertexData.h"
+#include <map>
+
+
+class VertexDeclarations
+{
+public:
+ VertexDeclarations();
+ ~VertexDeclarations();
+
+ IDirect3DVertexDeclaration9* GetVertexDecl( const ChannelInfoArray channels );
+ void Clear();
+
+private:
+ struct KeyType
+ {
+ bool operator < (const KeyType& rhs) const;
+ ChannelInfoArray channels;
+ };
+
+ typedef UNITY_MAP(kMemVertexData, KeyType, IDirect3DVertexDeclaration9*) VertexDeclMap;
+ VertexDeclMap m_VertexDeclMap;
+};
diff --git a/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp b/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp
new file mode 100644
index 0000000..8a91f74
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp
@@ -0,0 +1,705 @@
+#include "UnityPrefix.h"
+#include "VertexPipeD3D9.h"
+#include "ShaderGenerator.h"
+#include "D3D9Utils.h"
+#include "Runtime/GfxDevice/BuiltinShaderParams.h"
+#include "External/DirectX/builds/dx9include/d3dx9.h"
+#include <map>
+
+
+
+#define PRINT_VERTEX_PIPE_STATS 0
+
+#define PRINT_AMD_SHADER_ANALYZER_OUTPUT 0
+
+
+
+
+// GpuProgramsD3D.cpp
+ID3DXBuffer* AssembleD3DShader( const std::string& source );
+
+
+#if PRINT_AMD_SHADER_ANALYZER_OUTPUT
+void PrintAMDShaderAnalyzer( const std::string& source )
+{
+ const char* kPath = "C:\\Program Files\\AMD\\GPU ShaderAnalyzer 1.45\\GPUShaderAnalyzer.exe";
+ const char* kInputPath = "ShaderInput.txt";
+ const char* kOutputPath = "ShaderOutput.txt";
+ DeleteFileA(kInputPath);
+ DeleteFileA(kOutputPath);
+ FILE* fout = fopen(kInputPath, "wt");
+ fwrite(source.c_str(), source.size(), 1, fout);
+ fclose(fout);
+
+ std::string commandLine = std::string(kPath) + " " + kInputPath + " -Analyze " + kOutputPath + " -Module Latest -ASIC HD3870";
+
+ STARTUPINFOA si;
+ ZeroMemory( &si, sizeof(si) );
+ si.cb = sizeof(si);
+
+ PROCESS_INFORMATION pi;
+ ZeroMemory( &pi, sizeof(pi) );
+
+ if( CreateProcessA(
+ NULL, // name of executable module
+ (char*)commandLine.c_str(), // command line string
+ NULL, // process attributes
+ NULL, // thread attributes
+ FALSE, // handle inheritance option
+ 0, // creation flags
+ NULL, // new environment block
+ NULL, // current directory name
+ &si, // startup information
+ &pi ) ) // process information
+ {
+ WaitForSingleObject( pi.hProcess, INFINITE );
+ CloseHandle( pi.hProcess );
+ CloseHandle( pi.hThread );
+
+ FILE* fin = fopen(kOutputPath, "rt");
+ if( fin ) {
+ fseek(fin, 0, SEEK_END);
+ int length = ftell(fin);
+ fseek(fin, 0, SEEK_SET);
+ char* buffer = new char[length+1];
+ memset(buffer, 0,length+1);
+ fread(buffer, length, 1, fin);
+ fclose(fin);
+ }
+ }
+ //DeleteFileA(kInputPath);
+ //DeleteFileA(kOutputPath);
+}
+#endif
+
+
+
+static inline D3DCOLOR ColorToD3D( const float color[4] )
+{
+ return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) );
+}
+
+
+static void ResetDeviceVertexPipeStateD3D9 (IDirect3DDevice9* dev, const TransformState& state, const BuiltinShaderParamValues& builtins, const VertexPipeConfig& config, const VertexPipeDataD3D9& data)
+{
+ DebugAssertIf (!dev);
+
+ data.haveToResetDeviceState = false;
+
+ dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() );
+ Matrix4x4f dummyViewMatrix;
+ dummyViewMatrix.SetIdentity(); dummyViewMatrix.Get(2,2) = -1.0f;
+ dev->SetTransform( D3DTS_VIEW, (const D3DMATRIX*)dummyViewMatrix.GetPtr() );
+ dev->SetTransform( D3DTS_PROJECTION, (const D3DMATRIX*)builtins.GetMatrixParam(kShaderMatProj).GetPtr() );
+
+ dev->SetRenderState( D3DRS_COLORVERTEX, FALSE );
+
+ for( int i = 0; i < kMaxSupportedVertexLights; ++i )
+ dev->LightEnable( i, FALSE );
+
+ dev->SetRenderState( D3DRS_AMBIENT, 0 );
+ dev->SetRenderState( D3DRS_LIGHTING, FALSE );
+ dev->SetRenderState( D3DRS_SPECULARENABLE, FALSE );
+
+ for( int i = 0; i < kMaxSupportedTextureCoords; ++i ) {
+ dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i );
+ dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTSS_TCI_PASSTHRU );
+ }
+}
+
+void ResetVertexPipeStateD3D9 (IDirect3DDevice9* dev, TransformState& state, BuiltinShaderParamValues& builtins, VertexPipeConfig& config, VertexPipeDataD3D9& data, VertexPipePrevious& previous)
+{
+ config.Reset();
+ data.Reset();
+ state.Invalidate(builtins);
+ previous.Reset();
+
+ data.haveToResetDeviceState = true;
+ if (dev)
+ ResetDeviceVertexPipeStateD3D9 (dev, state, builtins, config, data);
+}
+
+
+void SetupFixedFunctionD3D9 (
+ IDirect3DDevice9* dev,
+ TransformState& state,
+ BuiltinShaderParamValues& builtins,
+ const VertexPipeConfig& config,
+ const VertexPipeDataD3D9& data,
+ VertexPipePrevious& previous,
+ bool vsActive, bool immediateMode)
+{
+ if (dev && data.haveToResetDeviceState)
+ ResetDeviceVertexPipeStateD3D9 (dev, state, builtins, config, data);
+
+ // matrices
+ if (!vsActive)
+ {
+ D3D9_CALL(dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() ));
+ }
+
+ // set color material first, then material, then color
+ if( config.colorMaterial != previous.config.colorMaterial )
+ {
+ if( config.colorMaterial != kColorMatDisabled )
+ {
+ D3DMATERIALCOLORSOURCE srcAmbient, srcDiffuse, srcEmission;
+ switch( config.colorMaterial )
+ {
+ case kColorMatEmission:
+ srcAmbient = D3DMCS_MATERIAL;
+ srcDiffuse = D3DMCS_MATERIAL;
+ srcEmission = D3DMCS_COLOR1;
+ break;
+ case kColorMatAmbientAndDiffuse:
+ srcAmbient = D3DMCS_COLOR1;
+ srcDiffuse = D3DMCS_COLOR1;
+ srcEmission = D3DMCS_MATERIAL;
+ break;
+ default:
+ return;
+ }
+ D3D9_CALL(dev->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, srcAmbient ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, srcDiffuse ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, srcEmission ));
+ D3D9_CALL(dev->SetRenderState( D3DRS_COLORVERTEX, TRUE ));
+ }
+ else
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_COLORVERTEX, FALSE ));
+ }
+ }
+
+ // material
+ if( !vsActive && config.hasLighting )
+ D3D9_CALL(dev->SetMaterial( &data.material ));
+
+ // lights
+ D3DLIGHT9 d3dlight;
+ d3dlight.Ambient.r = d3dlight.Ambient.g = d3dlight.Ambient.b = d3dlight.Ambient.a = 0.0f;
+ d3dlight.Falloff = 1.0f;
+ d3dlight.Attenuation0 = 1.0f;
+ d3dlight.Attenuation1 = 0.0f;
+
+ const UInt32 lightsEnabled = (1<<data.vertexLightCount)-1;
+ const UInt32 lightsPrevious = (1<<previous.vertexLightCount)-1;
+ const UInt32 lightsDifferent = lightsPrevious ^ lightsEnabled;
+ UInt32 lightMask = 1;
+ for (int i = 0; i < kMaxSupportedVertexLights; ++i, lightMask <<= 1)
+ {
+ const UInt32 lightDiff = lightsDifferent & lightMask;
+ if( lightsEnabled & lightMask )
+ {
+ const GfxVertexLight& l = data.lights[i];
+ static D3DLIGHTTYPE kD3DTypes[kLightTypeCount] = { D3DLIGHT_SPOT, D3DLIGHT_DIRECTIONAL, D3DLIGHT_POINT };
+ d3dlight.Type = kD3DTypes[l.type];
+ d3dlight.Diffuse = *(const D3DCOLORVALUE*)&l.color;
+ d3dlight.Specular = *(const D3DCOLORVALUE*)&l.color;
+ d3dlight.Position = *(const D3DVECTOR*)&l.position;
+ d3dlight.Direction = *(const D3DVECTOR*)&l.spotDirection;
+ d3dlight.Range = l.range;
+ d3dlight.Attenuation2 = l.quadAtten;
+ d3dlight.Theta = Deg2Rad(l.spotAngle) * 0.5f;
+ d3dlight.Phi = Deg2Rad(l.spotAngle);
+ D3D9_CALL(dev->SetLight (i,&d3dlight));
+ if (lightDiff)
+ D3D9_CALL(dev->LightEnable (i,TRUE));
+ }
+ else
+ {
+ if (lightDiff)
+ D3D9_CALL(dev->LightEnable (i, FALSE));
+ }
+ }
+ previous.vertexLightCount = data.vertexLightCount;
+
+
+ // ambient, lighting & specular
+ if( data.ambient != previous.ambient )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_AMBIENT, ColorToD3D(data.ambient.GetPtr()) ));
+ previous.ambient = data.ambient;
+ }
+ if( config.hasLighting != previous.config.hasLighting )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_LIGHTING, config.hasLighting ? TRUE : FALSE ));
+ }
+ if( config.hasSpecular != previous.config.hasSpecular )
+ {
+ D3D9_CALL(dev->SetRenderState( D3DRS_SPECULARENABLE, config.hasSpecular ? TRUE : FALSE ));
+ }
+ if (config.hasNormalization != previous.config.hasNormalization)
+ {
+ D3D9_CALL(dev->SetRenderState (D3DRS_NORMALIZENORMALS, config.hasNormalization ? TRUE : FALSE));
+ }
+
+
+ UInt32 textureMatrixModes = config.textureMatrixModes;
+ UInt32 projectedTextures = data.projectedTextures;
+ UInt32 textureSources = config.textureSources;
+ for( int i = 0; i < config.texCoordCount; ++i )
+ {
+ // texgen
+ UInt32 texSource = (textureSources >> (i*3)) & 0x7;
+ if( !vsActive )
+ {
+ static DWORD kTexSourceFlags[kTexSourceTypeCount] = { 0, 1, D3DTSS_TCI_SPHEREMAP, D3DTSS_TCI_CAMERASPACEPOSITION, D3DTSS_TCI_CAMERASPACEPOSITION, D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR, D3DTSS_TCI_CAMERASPACENORMAL };
+ DWORD d3dsource = kTexSourceFlags[texSource];
+ if( immediateMode && texSource <= kTexSourceUV1 )
+ d3dsource = i;
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, d3dsource ));
+ }
+ else
+ {
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ));
+ }
+
+ // matrix
+ unsigned matmode = (textureMatrixModes >> (i*2)) & 3;
+ static DWORD kTexFlags[kTexMatrixTypeCount] = { D3DTTFF_DISABLE, D3DTTFF_COUNT2, D3DTTFF_COUNT3, D3DTTFF_COUNT4 };
+ DWORD textureTransformFlags = kTexFlags[matmode];
+ if (projectedTextures & (1<<i))
+ textureTransformFlags |= D3DTTFF_PROJECTED;
+ if (vsActive)
+ textureTransformFlags = D3DTTFF_DISABLE;
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, textureTransformFlags ));
+
+ if( !vsActive )
+ {
+ if( texSource == kTexSourceObject )
+ {
+ // D3D has no "object space" texture generation.
+ // So instead we use camera space, and multiply the matrix so it matches:
+ // newMatrix = matrix * inverse(modelview) * mirrorZ
+ // Mirror along Z is required to match OpenGL's generation (eye space Z is negative).
+ Matrix4x4f mv = state.worldViewMatrix;
+ mv.Invert_Full();
+ // Negate Z axis (mv = mv * Scale(1,1,-1))
+ mv.Get(0,2) = -mv.Get(0,2);
+ mv.Get(1,2) = -mv.Get(1,2);
+ mv.Get(2,2) = -mv.Get(2,2);
+ mv.Get(3,2) = -mv.Get(3,2);
+ Matrix4x4f texmat;
+ MultiplyMatrices4x4 (&state.texMatrices[i], &mv, &texmat);
+ D3D9_CALL(dev->SetTransform( (D3DTRANSFORMSTATETYPE)(D3DTS_TEXTURE0 + i), (const D3DMATRIX*)texmat.GetPtr() ));
+ }
+ else
+ {
+ D3D9_CALL(dev->SetTransform( (D3DTRANSFORMSTATETYPE)(D3DTS_TEXTURE0 + i), (const D3DMATRIX*)state.texMatrices[i].GetPtr() ));
+ }
+ }
+ }
+ if( config.texCoordCount != previous.config.texCoordCount )
+ {
+ for( int i = config.texCoordCount; i < kMaxSupportedTextureCoords; ++i )
+ {
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE ));
+ }
+ }
+
+ if( !vsActive )
+ D3D9_CALL(dev->SetVertexShader(NULL));
+ previous.vertexShader = NULL;
+ previous.config = config;
+}
+
+
+
+
+
+// ----------------------------------------------------------------------
+
+
+
+
+struct VSLightData {
+ Vector4f pos;
+ Vector4f dir;
+ Vector4f color;
+ Vector4f params;
+};
+
+struct ShaderData {
+ IDirect3DVertexShader9* shader;
+ unsigned int usedConstants;
+ std::string text;
+ //std::string debug;
+};
+
+
+struct VertexPipeKeyCompare {
+ union {
+ VertexPipeConfig key;
+ UInt64 asint;
+ } u;
+ VertexPipeKeyCompare() { u.asint = 0; }
+ bool operator <( const VertexPipeKeyCompare& r ) const { return u.asint < r.u.asint; }
+};
+
+typedef std::map<VertexPipeKeyCompare, ShaderData> ShaderCache;
+static ShaderCache g_Shaders;
+
+
+static IDirect3DVertexShader9* GetShaderForConfig( const VertexPipeConfig& config, IDirect3DDevice9* dev, unsigned int& usedConstants )
+{
+ VertexPipeKeyCompare key;
+ key.u.key = config;
+ ShaderCache::iterator it = g_Shaders.find(key);
+ if( it != g_Shaders.end() ) {
+ const ShaderData& sdata = it->second;
+ usedConstants = sdata.usedConstants;
+ return sdata.shader;
+ }
+
+ ShaderGenerator gen;
+ gen.AddFragment( &kVS_Pos );
+
+ // lighting
+ if( config.hasLighting )
+ {
+ // normalize normals?
+ if (config.hasNormalization)
+ gen.AddFragment (&kVS_Normalize_Normal);
+
+ UInt32 hasLightType = config.hasLightType;
+ if( config.hasSpecular )
+ {
+ gen.AddFragment( &kVS_Light_Specular_Pre );
+ if( hasLightType & (1<<kLightDirectional) )
+ gen.AddFragment( &kVS_Light_Specular_Dir );
+ if( hasLightType & (1<<kLightPoint) )
+ gen.AddFragment( &kVS_Light_Specular_Point );
+ if( hasLightType & (1<<kLightSpot) )
+ gen.AddFragment( &kVS_Light_Specular_Spot );
+ }
+ else
+ {
+ gen.AddFragment( &kVS_Light_Diffuse_Pre );
+ if( hasLightType & (1<<kLightDirectional) )
+ gen.AddFragment( &kVS_Light_Diffuse_Dir );
+ if( hasLightType & (1<<kLightPoint) )
+ gen.AddFragment( &kVS_Light_Diffuse_Point );
+ if( hasLightType & (1<<kLightSpot) )
+ gen.AddFragment( &kVS_Light_Diffuse_Spot );
+ }
+
+ const ShaderFragment* frag = NULL;
+ if( config.hasVertexColor ) {
+ switch( config.colorMaterial ) {
+ case kColorMatAmbientAndDiffuse: frag = &kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient; break;
+ case kColorMatEmission: frag = &kVS_Out_Diffuse_Lighting_ColorEmission; break;
+ default: frag = &kVS_Out_Diffuse_Lighting; break;
+ }
+ } else {
+ frag = &kVS_Out_Diffuse_Lighting;
+ }
+ gen.AddFragment( frag );
+
+ if( config.hasSpecular ) {
+ gen.AddFragment( &kVS_Out_Specular_Lighting );
+ }
+
+ }
+ else
+ {
+ if( config.hasVertexColor )
+ gen.AddFragment( &kVS_Out_Diffuse_VertexColor );
+ else
+ gen.AddFragment( &kVS_Out_Diffuse_White );
+ }
+ // texgen
+ static const ShaderFragment* kFragSources[kTexSourceTypeCount] = {
+ &kVS_Load_UV0,
+ &kVS_Load_UV1,
+ &kVS_Temp_SphereMap,
+ &kVS_Temp_ObjSpacePos,
+ &kVS_Temp_CamSpacePos,
+ &kVS_Temp_CamSpaceRefl,
+ &kVS_Temp_CamSpaceN,
+ };
+ static const char* kFragSourceNames[kTexSourceTypeCount] = {
+ "UV0",
+ "UV1",
+ "SPHR",
+ "OPOS",
+ "CPOS",
+ "REFL",
+ "CNOR",
+ };
+ static const ShaderFragment* kFragMatrices[kTexMatrixTypeCount] = {
+ &kVS_Out_TexCoord,
+ &kVS_Out_Matrix2,
+ &kVS_Out_Matrix3,
+ &kVS_Out_Matrix3
+ };
+ for( int i = 0; i < config.texCoordCount; ++i )
+ {
+ unsigned src = (config.textureSources >> (i*3)) & 7;
+ // normalize normals?
+ if (config.hasNormalization)
+ {
+ if (src == kTexSourceSphereMap || src == kTexSourceCubeReflect || src == kTexSourceCubeNormal)
+ gen.AddFragment (&kVS_Normalize_Normal);
+ }
+ gen.AddFragment( kFragSources[src] );
+ }
+ for( int i = 0; i < config.texCoordCount; ++i )
+ {
+ unsigned src = (config.textureSources >> (i*3)) & 7;
+ unsigned matmode = (config.textureMatrixModes >> (i*2)) & 3;
+ gen.AddFragment (kFragMatrices[matmode], kFragSourceNames[src], i);
+ }
+ ShaderData data;
+ data.shader = NULL;
+ gen.GenerateShader( data.text, data.usedConstants );
+
+ ID3DXBuffer* compiledShader = AssembleD3DShader( data.text );
+ if( compiledShader ) {
+ dev->CreateVertexShader( (const DWORD*)compiledShader->GetBufferPointer(), &data.shader );
+ compiledShader->Release();
+ }
+
+ AssertIf(!data.shader);
+ g_Shaders.insert( std::make_pair(key, data) );
+
+ #if PRINT_AMD_SHADER_ANALYZER_OUTPUT
+ PrintAMDShaderAnalyzer( data.text );
+ #endif
+
+ usedConstants = data.usedConstants;
+ return data.shader;
+}
+
+void SetupVertexShaderD3D9 (
+ IDirect3DDevice9* dev,
+ TransformState& state,
+ const BuiltinShaderParamValues& builtins,
+ VertexPipeConfig& config,
+ const VertexPipeDataD3D9& data,
+ VertexPipePrevious& previous,
+ VertexShaderConstantCache& cache,
+ bool vsActive, bool immediateMode)
+{
+ if( vsActive )
+ return;
+
+ D3D9_CALL(dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() ));
+
+ // figure out which light types do we have
+ if( !config.hasLighting ) {
+ config.hasLightType = 0;
+ } else {
+ UInt32 hasLightType = 0;
+ for (int i = 0; i < data.vertexLightCount; ++i)
+ {
+ hasLightType |= (1<<data.lights[i].type);
+ }
+ config.hasLightType = hasLightType;
+ }
+
+ // create vertex shader
+ unsigned int usedConstants;
+ IDirect3DVertexShader9* shader = GetShaderForConfig(config, dev, usedConstants);
+ AssertIf(!shader);
+
+ // set shader
+ if( shader != previous.vertexShader )
+ {
+ D3D9_CALL(dev->SetVertexShader( shader ));
+ previous.vertexShader = shader;
+ }
+
+ // matrices
+ Matrix4x4f mvp;
+ MultiplyMatrices4x4 (&builtins.GetMatrixParam(kShaderMatProj), &state.worldViewMatrix, &mvp );
+ mvp.Transpose();
+ cache.SetValues( kConstantLocations[kConstMatrixMVP], mvp.GetPtr(), 4 );
+
+ const Matrix4x4f& mv = state.worldViewMatrix;
+ cache.SetValues( kConstantLocations[kConstMatrixMV], mv.GetPtr(), 4 );
+
+ if( usedConstants & (1<<kConstMatrixMV_IT) )
+ {
+ Matrix4x4f matrixTemp;
+ Matrix4x4f::Invert_General3D( mv, matrixTemp );
+ matrixTemp.Transpose();
+ if (data.normalization == kNormalizationScale)
+ {
+ // Inverse transpose of modelview is only used to transform the normals
+ // in our generated shader. We can just stuff mesh scale in there.
+ float scale = Magnitude (state.worldMatrix.GetAxisX());
+ matrixTemp.Get (0, 0) *= scale;
+ matrixTemp.Get (1, 0) *= scale;
+ matrixTemp.Get (2, 0) *= scale;
+ matrixTemp.Get (0, 1) *= scale;
+ matrixTemp.Get (1, 1) *= scale;
+ matrixTemp.Get (2, 1) *= scale;
+ matrixTemp.Get (0, 2) *= scale;
+ matrixTemp.Get (1, 2) *= scale;
+ matrixTemp.Get (2, 2) *= scale;
+ }
+ cache.SetValues( kConstantLocations[kConstMatrixMV_IT], matrixTemp.GetPtr(), 4 );
+ }
+
+ // misc
+ float misc[4] = { 0, 4, 1, 0.5f };
+ cache.SetValues( kConstantLocations[kConstLightMisc], misc, 1 );
+
+ // if lighting is used:
+ if( config.hasLighting )
+ {
+ // ambient
+ if( config.colorMaterial != kColorMatAmbientAndDiffuse )
+ {
+ SimpleVec4 amb;
+ amb.val[0] = data.ambientClamped.val[0] * data.material.Ambient.r;
+ amb.val[1] = data.ambientClamped.val[1] * data.material.Ambient.g;
+ amb.val[2] = data.ambientClamped.val[2] * data.material.Ambient.b;
+ amb.val[3] = data.ambientClamped.val[3] * data.material.Ambient.a;
+ if( config.colorMaterial != kColorMatEmission ) {
+ amb.val[0] += data.material.Emissive.r;
+ amb.val[1] += data.material.Emissive.g;
+ amb.val[2] += data.material.Emissive.b;
+ amb.val[3] += data.material.Emissive.a;
+ }
+ cache.SetValues( kConstantLocations[kConstAmbient], amb.GetPtr(), 1 );
+ }
+ else
+ {
+ cache.SetValues( kConstantLocations[kConstColorMatAmbient], data.ambientClamped.GetPtr(), 1 );
+ cache.SetValues( kConstantLocations[kConstAmbient], &data.material.Emissive.r, 1 );
+ }
+ previous.ambient = data.ambient;
+
+ // material
+ cache.SetValues( kConstantLocations[kConstMatDiffuse], &data.material.Diffuse.r, 1 );
+ D3D9_CALL(dev->SetVertexShaderConstantF( kConstantLocations[kConstMatDiffuse], &data.material.Diffuse.r, 1 ));
+ if( usedConstants & (1<<kConstMatSpecular) )
+ {
+ D3DCOLORVALUE specAndPower = data.material.Specular;
+ specAndPower.a = data.material.Power;
+ cache.SetValues( kConstantLocations[kConstMatSpecular], &specAndPower.r, 1 );
+ }
+
+ // pack the lights
+ int lightCounts[kLightTypeCount];
+ float lightStart[kLightTypeCount];
+ int lightsTotal = 0;
+ float lightsTotalF = 0;
+ memset(lightCounts, 0, sizeof(lightCounts));
+ memset(lightStart, 0, sizeof(lightStart));
+ VSLightData lights[kMaxSupportedVertexLights];
+ for( int t = 0; t < kLightTypeCount; ++t )
+ {
+ lightStart[t] = lightsTotalF;
+ for( int i = 0; i < data.vertexLightCount; ++i )
+ {
+ const GfxVertexLight& src = data.lights[i];
+ if( src.type != t )
+ continue;
+
+ VSLightData& dst = lights[lightsTotal];
+ // position
+ dst.pos.Set( src.position.x, src.position.y, src.position.z, 1.0f );
+ // direction
+ dst.dir.Set( -src.spotDirection.x, -src.spotDirection.y, -src.spotDirection.z, 0.0f );
+ // color
+ dst.color.Set( src.color.x, src.color.y, src.color.z, 1.0f );
+ // params: 1/(cos(theta/2)-cos(phi/2), cos(phi/2), range^2, d^2 attenuation
+ float sqrRange = src.range * src.range;
+ if( src.type == kLightSpot )
+ {
+ float cosTheta = cosf(Deg2Rad(src.spotAngle)*0.25f);
+ float cosPhi = cosf(Deg2Rad(src.spotAngle)*0.5f);
+ float cosDiff = cosTheta - cosPhi;
+ dst.params.Set(
+ cosDiff != 0.0f ? 1.0f / cosDiff : 0.0f,
+ cosPhi,
+ src.range * src.range,
+ src.quadAtten
+ );
+ }
+ else
+ {
+ dst.params.Set(
+ 0.0f,
+ 0.0f,
+ src.range * src.range,
+ src.quadAtten
+ );
+ }
+
+ ++lightCounts[t];
+ ++lightsTotal;
+ ++lightsTotalF;
+ }
+ }
+
+ // light indices
+ int miscI[kLightTypeCount][4];
+ for( int t = 0; t < kLightTypeCount; ++t ) {
+ miscI[t][0] = lightCounts[t];
+ miscI[t][1] = 0;
+ miscI[t][2] = 0;
+ miscI[t][3] = 0;
+ }
+ D3D9_CALL(dev->SetVertexShaderConstantI( 0, miscI[0], kLightTypeCount ));
+
+ if (lightsTotal)
+ cache.SetValues( 60, (const float*)lights, 4*lightsTotal );
+ misc[0] = lightStart[0] * 4.0f;
+ misc[1] = lightStart[1] * 4.0f;
+ misc[2] = lightStart[2] * 4.0f;
+ misc[3] = 0.0f;
+ cache.SetValues(kConstantLocations[kConstLightIndexes], misc, 1);
+ }
+
+ // texture matrices & transform flags
+ UInt32 matrixModes = config.textureMatrixModes;
+ UInt32 projectedTextures = data.projectedTextures;
+ UInt32 textureSources = config.textureSources;
+ for( int i = 0; i < config.texCoordCount; ++i )
+ {
+ unsigned matmode = (matrixModes >> (i*2)) & 0x3;
+ if( matmode != kTexMatrixNone )
+ {
+ cache.SetValues(kConstantLocations[kConstMatrixTexture]+i*4, state.texMatrices[i].GetPtr(), 4);
+ }
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ));
+ // projected texture flag
+ DWORD textureTransformFlags = (projectedTextures & (1<<i)) ? D3DTTFF_PROJECTED : D3DTTFF_DISABLE;
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, textureTransformFlags ));
+ }
+
+ if( config.texCoordCount != previous.config.texCoordCount )
+ {
+ for( int i = config.texCoordCount; i < kMaxSupportedTextureCoords; ++i )
+ {
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ));
+ D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE ));
+ }
+ }
+
+ previous.config = config;
+}
+
+
+void CleanupVertexShadersD3D9 ()
+{
+ #if PRINT_VERTEX_PIPE_STATS
+ printf_console("Vertex pipe shader cache: %i shaders generated\n", g_Shaders.size());
+ #endif
+ ShaderCache::iterator it, itEnd = g_Shaders.end();
+ for( it = g_Shaders.begin(); it != itEnd; ++it )
+ {
+ IDirect3DVertexShader9* vs = it->second.shader;
+ if( vs ) {
+ ULONG refCount = vs->Release();
+ AssertIf( refCount != 0 );
+ }
+ }
+ g_Shaders.clear ();
+}
+
diff --git a/Runtime/GfxDevice/d3d/VertexPipeD3D9.h b/Runtime/GfxDevice/d3d/VertexPipeD3D9.h
new file mode 100644
index 0000000..af9d8d3
--- /dev/null
+++ b/Runtime/GfxDevice/d3d/VertexPipeD3D9.h
@@ -0,0 +1,139 @@
+#pragma once
+
+#include "Runtime/GfxDevice/GfxDeviceTypes.h"
+#include "Runtime/GfxDevice/GfxDeviceObjects.h"
+#include "Runtime/Math/Vector4.h"
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/GfxDevice/ShaderConstantCache.h"
+#include "Runtime/GfxDevice/TransformState.h"
+#include "D3D9Includes.h"
+
+class BuiltinShaderParamValues;
+
+enum TextureSourceMode {
+ kTexSourceUV0,
+ kTexSourceUV1,
+ // match the order of TexGenMode!
+ kTexSourceSphereMap,
+ kTexSourceObject,
+ kTexSourceEyeLinear,
+ kTexSourceCubeReflect,
+ kTexSourceCubeNormal,
+ kTexSourceTypeCount
+};
+
+enum TextureMatrixMode {
+ kTexMatrixNone,
+ kTexMatrix2,
+ kTexMatrix3,
+ kTexMatrix4,
+ kTexMatrixTypeCount
+};
+
+struct VertexPipeConfig {
+ // 2 bytes
+ UInt64 textureMatrixModes : 16; // TextureMatrixMode: 2 bits for each unit
+ // 3 bytes
+ UInt64 textureSources : 24; // TextureSourceMode: 3 bits for each unit
+ // 1 byte
+ UInt64 colorMaterial : 3; // ColorMaterialMode
+ UInt64 texCoordCount : 4; // number of texture coordinates
+ UInt64 hasVertexColor : 1; // is vertex color coming from per-vertex data?
+ // 1 byte
+ UInt64 hasLighting : 1; // lighting on?
+ UInt64 hasSpecular : 1; // specular on?
+ UInt64 hasLightType : 3; // has light of given type? (bit per type)
+ UInt64 hasNormalization : 1; // needs to normalize normals?
+ // 10 bits left
+
+ void Reset() {
+ memset(this, 0, sizeof(*this));
+ }
+
+ void SetTextureUnit( UInt32 unit ) {
+ Assert (unit < 8);
+ UInt32 tc = texCoordCount;
+ if( unit >= tc ) {
+ tc = unit+1;
+ texCoordCount = tc;
+ }
+ }
+ void ClearTextureUnit( UInt32 unit ) {
+ Assert (unit < 8);
+ UInt32 tc = texCoordCount;
+ if( unit < tc ) {
+ tc = unit;
+ texCoordCount = tc;
+ }
+ }
+};
+
+
+struct VertexPipeDataD3D9
+{
+ GfxVertexLight lights[kMaxSupportedVertexLights];
+ D3DMATERIAL9 material;
+ SimpleVec4 ambient;
+ SimpleVec4 ambientClamped;
+ int vertexLightCount;
+ UInt32 projectedTextures; // 1 bit per unit
+
+
+ NormalizationMode normalization;
+
+ mutable bool haveToResetDeviceState;
+
+ void Reset() {
+ memset (&material, 0, sizeof(material));
+ ambient.set (0,0,0,0);
+ ambientClamped.set (0,0,0,0);
+ vertexLightCount = 0;
+ projectedTextures = 0;
+ normalization = kNormalizationUnknown;
+ haveToResetDeviceState = false;
+ }
+};
+
+
+struct VertexPipePrevious {
+ VertexPipeConfig config;
+ SimpleVec4 ambient;
+ int vertexLightCount;
+ IDirect3DVertexShader9* vertexShader;
+
+ void Reset() {
+ config.Reset ();
+ ambient.set(-1,-1,-1,-1);
+ vertexLightCount = 0;
+ vertexShader = NULL;
+ }
+};
+
+void ResetVertexPipeStateD3D9 (
+ IDirect3DDevice9* dev,
+ TransformState& state,
+ BuiltinShaderParamValues& builtins,
+ VertexPipeConfig& config,
+ VertexPipeDataD3D9& data,
+ VertexPipePrevious& previous);
+
+void SetupFixedFunctionD3D9 (
+ IDirect3DDevice9* dev,
+ TransformState& state,
+ BuiltinShaderParamValues& builtins,
+ const VertexPipeConfig& config,
+ const VertexPipeDataD3D9& data,
+ VertexPipePrevious& previous,
+ bool vsActive, bool immediateMode);
+
+void SetupVertexShaderD3D9 (
+ IDirect3DDevice9* dev,
+ TransformState& state,
+ const BuiltinShaderParamValues& builtins,
+ VertexPipeConfig& config,
+ const VertexPipeDataD3D9& data,
+ VertexPipePrevious& previous,
+ VertexShaderConstantCache& cache,
+ bool vsActive, bool immediateMode);
+
+void CleanupVertexShadersD3D9 ();