diff options
Diffstat (limited to 'Runtime/GfxDevice/d3d')
32 files changed, 11517 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/d3d/CombinerD3D.cpp b/Runtime/GfxDevice/d3d/CombinerD3D.cpp new file mode 100644 index 0000000..2be2b47 --- /dev/null +++ b/Runtime/GfxDevice/d3d/CombinerD3D.cpp @@ -0,0 +1,600 @@ +#include "UnityPrefix.h" +#include "CombinerD3D.h" +#include "External/shaderlab/Library/texenv.h" +#include "External/shaderlab/Library/pass.h" +#include "External/shaderlab/Library/TextureBinding.h" +#include "D3D9Context.h" +#include "Runtime/Utilities/BitUtility.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "External/DirectX/builds/dx9include/d3dx9.h" + + +// -------------------------------------------------------------------------- +// Combiners to fixed function texture stages + +// NOTE: not all GL combiner modes are representable in TSS: +// * per-stage constants +// * DOUBLE/QUAD on arbitrary operations +// * a*b+-c, a*b-c +// So what we do is: if hardware supports ps_1_1, we generate pixel shaders on the fly, see below. + +static D3DTEXTUREOP kCombinerFuncTable[3][8] = { + { D3DTOP_SELECTARG1, D3DTOP_MODULATE, D3DTOP_ADD, D3DTOP_ADDSIGNED, D3DTOP_SUBTRACT, D3DTOP_LERP, D3DTOP_DOTPRODUCT3, D3DTOP_DOTPRODUCT3 }, + { D3DTOP_ADD, D3DTOP_MODULATE2X, D3DTOP_DISABLE, D3DTOP_ADDSIGNED2X, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE }, + { D3DTOP_DISABLE, D3DTOP_MODULATE4X, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE, D3DTOP_DISABLE }, +}; +static DWORD kCombinerFuncCapsTable[3][8] = { + { D3DTEXOPCAPS_SELECTARG1, D3DTEXOPCAPS_MODULATE, D3DTEXOPCAPS_ADD, D3DTEXOPCAPS_ADDSIGNED, D3DTEXOPCAPS_SUBTRACT, D3DTEXOPCAPS_LERP, D3DTEXOPCAPS_DOTPRODUCT3, D3DTEXOPCAPS_DOTPRODUCT3 }, + { D3DTEXOPCAPS_ADD, D3DTEXOPCAPS_MODULATE2X, 0, D3DTEXOPCAPS_ADDSIGNED2X, 0, 0, 0, 0 }, + { 0, D3DTEXOPCAPS_MODULATE4X, 0, 0, 0, 0, 0, 0 }, +}; + +void InitializeCombinerCapsD3D9() +{ + DWORD texOpCaps = gGraphicsCaps.d3d.d3dcaps.TextureOpCaps; + for( int r = 0; r < 3; ++r ) { + for( int c = 0; c < 8; ++c ) { + if( kCombinerFuncCapsTable[r][c] ) { + if( !(texOpCaps & kCombinerFuncCapsTable[r][c]) ) + kCombinerFuncTable[r][c] = D3DTOP_DISABLE; + } + } + } +} + +static int kCombinerSourceTable[4] = { + D3DTA_CURRENT, D3DTA_TEXTURE, D3DTA_TFACTOR, D3DTA_DIFFUSE // TODO: TFACTOR is global, not per-stage! +}; +static const int kCombinerOperandModTableRGB[4] = { + 0, D3DTA_ALPHAREPLICATE, D3DTA_COMPLEMENT, D3DTA_ALPHAREPLICATE | D3DTA_COMPLEMENT +}; +static const int kCombinerOperandModTableAlpha[4] = { + 0, 0, D3DTA_COMPLEMENT, D3DTA_COMPLEMENT +}; + + + +static bool CombinerToTextureStage( UInt32 comb, D3DTEXTUREOP& outOp, int outArgs[3], bool alpha ) +{ + int s0 = (comb >> combiner::kSrcZeroShift) & 0xFF; + + int cf = COMBINER_GET_FUNC(comb); + int s1 = (comb) & 0xFF; + int scale = HighestBit( (comb >> combiner::kScaleShift) ); + AssertIf( scale < 0 || scale > 2 ); + + const int* kCombinerOperandModTable = alpha ? kCombinerOperandModTableAlpha : kCombinerOperandModTableRGB; + int source0 = kCombinerSourceTable[s0 & combiner::kSourceMask] | kCombinerOperandModTable[s0 >> combiner::kOperandShift]; + int source1 = kCombinerSourceTable[s1 & combiner::kSourceMask] | kCombinerOperandModTable[s1 >> combiner::kOperandShift]; + if( !(cf & combiner::kBlendFuncMask) ) + { + outOp = kCombinerFuncTable[scale][cf]; + if( outOp == D3DTOP_DISABLE ) + return false; + // we emulate "source double" with "source + source" + if( cf == 0 ) + source1 = source0; + outArgs[0] = source0; + outArgs[1] = source1; + outArgs[2] = D3DTA_CURRENT; + } + else + { + int blendF = COMBINER_GET_BLEND_FUNC_INDEX(cf); + int src2 = cf & combiner::kSourceMask; + int oper2 = ((cf & combiner::kOperandTwo) >> combiner::kOperandShift) | 1; + int source2 = kCombinerSourceTable[src2] | kCombinerOperandModTable[oper2]; + + DWORD texOpCaps = gGraphicsCaps.d3d.d3dcaps.TextureOpCaps; + + switch( blendF ) + { + case 0: + // src0 lerp(src2 alpha) src1 + if( (scale != 0) || !(texOpCaps & D3DTEXOPCAPS_LERP) ) + return false; + outOp = D3DTOP_LERP; + outArgs[0] = source0; + outArgs[1] = source1; + outArgs[2] = source2; + break; + case 1: + // src0 * src2 alpha + src1 + if( texOpCaps & D3DTEXOPCAPS_MULTIPLYADD ) { + if( scale != 0 ) + return false; + outOp = D3DTOP_MULTIPLYADD; + outArgs[0] = source0; + outArgs[1] = source2; + outArgs[2] = source1; + } else { + // TODO + return false; + } + break; + case 2: + // src0 * src2 alpha +- src1 + // not supported! + return false; + case 3: + // src0 * src2 alpha - src1 + // not supported! + return false; + default: + AssertString( "Unknown combiner blend function" ); + return false; + } + AssertIf( outOp <= D3DTOP_DISABLE || outOp > D3DTOP_LERP ); + } + + return true; +} + +static bool CombinerToTextureStage( const ShaderLab::TextureBinding& te, D3DTextureStage& stage, bool& outTFactorUsed ) +{ + int combColor = te.m_CombColor; + if( !CombinerToTextureStage( combColor, stage.colorOp, stage.colorArgs, false ) ) + return false; + + // For DOT3 operation, we have to force using no function on alpha + // However, on some old cards this has no effect; they always replicate DOT3 to all channels + // (e.g. GeForce 2). Oh well. + int combColorFunc = COMBINER_GET_FUNC(combColor); + int combAlpha = te.m_CombAlpha; + if( combColorFunc == 6 ) { // DOT3 + combAlpha &= ~(0xFF << combiner::kFuncShift); + } + if( !CombinerToTextureStage( combAlpha, stage.alphaOp, stage.alphaArgs, true ) ) + return false; + + if( te.IsTexColorUsed() ) + { + outTFactorUsed = true; + } + + return true; +} + + +// -------------------------------------------------------------------------- +// Combiners to pixel shader 1.1 + + +// Supports up to 4 texture stages. +// Each stage outputs into r0 register. +// Per-stage constants are stored in corresponding constant registers [c0..c3]. +// "Color" command (TFACTOR equivalent) should store in c4. +// r1 is used in some cases to load & process some constants (e.g. where we'd want to do a c4_bias, we first load it into r1 and then do r1_bias) + + +// Cache for generated pixel shaders. +struct CombinersCacheEntry +{ + UInt32 combColor[kMaxD3DTextureStagesForPS]; + UInt32 combAlpha[kMaxD3DTextureStagesForPS]; + IDirect3DPixelShader9* pixelShader; + bool specular; + + bool Equals( int count, const ShaderLab::TextureBinding* texEnvs, bool specular ) const + { + AssertIf( count > kMaxD3DTextureStagesForPS ); + if( specular != this->specular ) + return false; + for( int i = 0; i < count; ++i ) + { + if( texEnvs[i].m_CombColor != combColor[i] ) + return false; + if( texEnvs[i].m_CombAlpha != combAlpha[i] ) + return false; + } + return true; + } +}; +// Not a map on purpose - comparison is cheap and we want to store everything in a single block. +static std::vector<CombinersCacheEntry> s_CombinersCache[kMaxD3DTextureStagesForPS][2]; // [2] = lighting off, lighting on + +void TextureCombinersD3D::CleanupCombinerCache() +{ + for( int i = 0; i < kMaxD3DTextureStagesForPS; ++i ) + { + for( int j = 0; j < 2; ++j ) + { + std::vector<CombinersCacheEntry>& cache = s_CombinersCache[i][j]; + for( int k = 0; k < cache.size(); ++k ) + { + IDirect3DPixelShader9* ps = cache[k].pixelShader; + if( ps ) { + ULONG refCount = ps->Release(); + AssertIf( refCount != 0 ); + } + } + cache.clear(); + cache.swap(std::vector<CombinersCacheEntry>()); + } + } +} + +static const char* kPSDestRegRGBA[3] = { " r0", "_x2 r0", "_x4 r0" }; +static const char* kPSDestRegRGB [3] = { " r0.rgb", "_x2 r0.rgb", "_x4 r0.rgb" }; +static const char* kPSDestRegA [3] = { " r0.a", "_x2 r0.a", "_x4 r0.a" }; + +static const char* kPSOperandPrefixTable[4] = { + "", "", "1-", "1-", +}; +static const char* kPSOperandSuffixTableRGB[4] = { + "", ".a", "", ".a", +}; + +static std::string CombinerSrcPS( combiner::Source source, int stage, bool lighting ) +{ + switch( source ) + { + case combiner::kSrcPrevious: + if( stage == 0 ) + return lighting ? "v0" : "c4"; + else + return "r0"; + case combiner::kSrcTexture: + return 't' + IntToString(stage); + case combiner::kSrcConstant: + return 'c' + IntToString(stage); + case combiner::kSrcPrimaryColor: + return lighting ? "v0" : "c4"; + default: + AssertString( "Unknown source" ); + return ""; + } +} + +static void FixupForConstantModifiers( std::string& source, std::string& outFixup, UInt32 operand, bool alpha, bool dot3 ) +{ + if( source.size() >= 4 && source[0]=='1' && source[1]=='-' && source[2] == 'c' ) + { + std::string sub = source.substr( 3, source.size()-3 ); + outFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n'; + source = "1-r1"; + if( !alpha ) + source += kPSOperandSuffixTableRGB[operand]; + } + if( dot3 && source.size() >= 2 && source[0]=='c' ) + { + std::string sub = source.substr( 1, source.size()-1 ); + outFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n'; + source = "r1"; + if( !alpha ) + source += kPSOperandSuffixTableRGB[operand]; + } +} + +static void CombinerToPixelShaderText( int stage, bool lighting, UInt32 comb, bool alpha, std::string& outInstruction, std::string& outPrevFixup, bool& outPrevSat, bool& outSkipAlpha ) +{ + outSkipAlpha = false; + + int s0 = (comb >> combiner::kSrcZeroShift) & 0xFF; + + int cf = COMBINER_GET_FUNC(comb); + int s1 = (comb) & 0xFF; + int scale = HighestBit( (comb >> combiner::kScaleShift) ); + AssertIf( scale < 0 || scale > 2 ); + + combiner::Source src0 = static_cast<combiner::Source>(s0 & combiner::kSourceMask); + combiner::Source src1 = static_cast<combiner::Source>(s1 & combiner::kSourceMask); + UInt32 oper0 = s0 >> combiner::kOperandShift; + UInt32 oper1 = s1 >> combiner::kOperandShift; + std::string source0 = kPSOperandPrefixTable[oper0] + CombinerSrcPS( src0, stage, lighting ); + if( !alpha ) + source0 += kPSOperandSuffixTableRGB[oper0]; + std::string source1 = kPSOperandPrefixTable[oper1] + CombinerSrcPS( src1, stage, lighting ); + std::string suffix1 = alpha ? "" : kPSOperandSuffixTableRGB[oper1]; + const char** kPSDestReg = (cf == 7) ? kPSDestRegRGBA : (alpha ? kPSDestRegA : kPSDestRegRGB); + std::string destReg = kPSDestReg[scale]; + + std::string text; + + // Some special rules: + // * For bias modifier or a lerp, a previous instruction should saturate the result + // * For negate modifier, a previous instruction can't saturate the result (hence can't just saturate everything) + // * 1-x or x_bias not allowed on constants. So if we detect such case, we try to load it into r1 + // with additional instruction. + // * Emulating DOT3 requires _bx2 modifier, which is not allowed on constants. So we also detect + // that and load it into r1 with additional instruction. + + bool dot3 = (cf == 6 || cf == 7); + FixupForConstantModifiers( source0, outPrevFixup, oper0, alpha, dot3 ); + FixupForConstantModifiers( source1, outPrevFixup, oper1, alpha, dot3 ); + + bool addSatOnPrevious = false; // should we add "saturate" on previous instruction? + if( !(cf & combiner::kBlendFuncMask) ) + { + switch( cf ) + { + case 0: text = "mov" + destReg + ", " + source0; break; + case 1: text = "mul" + destReg + ", " + source0 + ", " + source1 + suffix1; break; + case 2: text = "add" + destReg + ", " + source0 + ", " + source1 + suffix1; break; + case 3: + if( source1[0] == 'c' ) + { + std::string sub = source1.substr( 1, source1.size()-1 ); + outPrevFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n'; + source1 = "r1"; + } + text = "add" + destReg + ", " + source0 + ", " + source1 + "_bias" + suffix1; + if( (s1 & combiner::kSourceMask) == 0 ) // if source1 is "previous", need to saturate previous instruction + addSatOnPrevious = true; + break; + case 4: text = "sub" + destReg + ", " + source0 + ", " + source1 + suffix1; break; + case 5: AssertIf(false); break; + case 6: + // DOT3: for rgb do the dot, for alpha just use source0 + if( alpha ) + text += "mov" + destReg + ", " + source0; + else + text = "dp3" + destReg + ", " + source0 + "_bx2, " + source1 + "_bx2" + suffix1; break; + case 7: + // DOT3RGBA: do the dot into all four channels, and do not issue co-issued alpha + // instructions for this stage. + text = "dp3" + destReg + ", " + source0 + "_bx2, " + source1 + "_bx2" + suffix1; + outSkipAlpha = true; + break; + } + } + else + { + int blendF = COMBINER_GET_BLEND_FUNC_INDEX(cf); + combiner::Source src2 = static_cast<combiner::Source>(cf & combiner::kSourceMask); + int oper2 = ((cf & combiner::kOperandTwo) >> combiner::kOperandShift) | 1; + std::string source2 = kPSOperandPrefixTable[oper2] + CombinerSrcPS(src2, stage, lighting); + if( !alpha ) + source2 += kPSOperandSuffixTableRGB[oper2]; + + FixupForConstantModifiers( source2, outPrevFixup, oper2, alpha, false ); + + switch( blendF ) + { + case 0: + // src0 lerp(src2 alpha) src1 + text = "lrp" + destReg + ", " + source2 + ", " + source0 + ", " + source1 + suffix1; + if( src2 == combiner::kSrcPrevious ) // if src2 is "previous", need to saturate previous instruction + addSatOnPrevious = true; + break; + case 1: + // src0 * src2 alpha + src1 + text = "mad" + destReg + ", " + source0 + ", " + source2 + ", " + source1 + suffix1; + break; + case 2: + // src0 * src2 alpha +- src1 + if( source1[0] == 'c' ) + { + std::string sub = source1.substr( 1, source1.size()-1 ); + outPrevFixup += std::string( alpha ? "+mov_sat r1.a, c" : "mov_sat r1.rgb, c" ) + sub + '\n'; + source1 = "r1"; + } + text = "mad" + destReg + ", " + source0 + ", " + source2 + ", " + source1 + "_bias" + suffix1; + if( (s1 & combiner::kSourceMask) == 0 ) // if source1 is "previous", need to saturate previous instruction + addSatOnPrevious = true; + break; + case 3: + // src0 * src2 alpha - src1 + text = "mad" + destReg + ", " + source0 + ", " + source2 + ", -" + source1 + suffix1; + break; + default: + AssertString( "Unknown combiner blend function" ); + break; + } + } + + // if we're not the first instruction and we need to modify previous one - do it. + int typeIndex = alpha ? 1 : 0; + outPrevSat = false; + if( stage != 0 && addSatOnPrevious ) + outPrevSat = true; + + if( alpha ) + outInstruction += '+'; + outInstruction += text; + outInstruction += '\n'; +} + + +// GpuProgramsD3D.cpp +ID3DXBuffer* AssembleD3DShader( const std::string& source ); + + +static IDirect3DPixelShader9* CombinersToPixelShader( int count, const ShaderLab::TextureBinding* texEnvs, bool lighting, bool addSpecular ) +{ + AssertIf( count < 1 ); + + // ps_1_1 supports only 4 textures + if (count > kMaxD3DTextureStagesForPS) + return NULL; + + // look for such combiner setup in cache + int lightingIdx = lighting ? 1 : 0; + int cacheCount = s_CombinersCache[count-1][lightingIdx].size(); + for( int i = 0; i < cacheCount; ++i ) + { + CombinersCacheEntry& ce = s_CombinersCache[count-1][lightingIdx][i]; + if( ce.Equals( count, texEnvs, addSpecular ) ) + { + AssertIf( !ce.pixelShader ); + return ce.pixelShader; + } + } + + std::string text = "ps_1_1\n"; + // sample textures + for( int i = 0; i < count; ++i ) + { + text += "tex t" + IntToString(i) + '\n'; + } + + // do combiner operations + CombinersCacheEntry cacheEntry; + int previousInstructions[2] = { 0, 0 }; + for( int i = 0; i < count; ++i ) + { + UInt32 combColor = texEnvs[i].m_CombColor; + UInt32 combAlpha = texEnvs[i].m_CombAlpha; + cacheEntry.combColor[i] = combColor; + cacheEntry.combAlpha[i] = combAlpha; + cacheEntry.specular = addSpecular; + std::string instruction, fixup; + bool satPrevious, skipAlpha; + // color + CombinerToPixelShaderText( i, lighting, combColor, false, instruction, fixup, satPrevious, skipAlpha ); + if( satPrevious ) { + while( text[previousInstructions[0]] != ' ' ) + ++previousInstructions[0]; + text.insert( previousInstructions[0], "_sat" ); + previousInstructions[1] += 4; // move the other pointer forward by _sat length as well + } + int colorInstructionLength = instruction.size(); + // alpha + if( !skipAlpha ) + { + CombinerToPixelShaderText( i, lighting, combAlpha, true, instruction, fixup, satPrevious, skipAlpha ); + if( satPrevious ) { + while( text[previousInstructions[1]] != ' ' ) + ++previousInstructions[1]; + text.insert( previousInstructions[1], "_sat" ); + previousInstructions[0] += 4; // move the other pointer forward by _sat length as well + } + } + + text += fixup; + previousInstructions[0] = text.size(); + previousInstructions[1] = text.size() + (skipAlpha ? 0 : colorInstructionLength); + text += instruction; + } + + // add specular at the end if needed + if( addSpecular ) + { + text += "add r0.rgb, r0, v1"; + } + + // compile pixel shader + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + + // assemble shader + ID3DXBuffer *compiledShader = AssembleD3DShader( text ); + IDirect3DPixelShader9* ps = NULL; + if( compiledShader ) + { + // create shader + hr = dev->CreatePixelShader( (const DWORD*)compiledShader->GetBufferPointer(), &ps ); + compiledShader->Release(); + if( FAILED(hr) ) + { + ErrorStringMsg ("D3D9 Combiners: failed to create pixel shader representation: %s", text.c_str()); + } + } + AssertIf( !ps ); + + // insert into cache + cacheEntry.pixelShader = ps; + s_CombinersCache[count-1][lightingIdx].push_back( cacheEntry ); + + return ps; +} + + +// -------------------------------------------------------------------------- + + + +TextureCombinersD3D* TextureCombinersD3D::Create( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular ) +{ + static int uniqueIDCounter = 1; + + static bool combinerCapsInitialized = false; + if( !combinerCapsInitialized ) + { + InitializeCombinerCapsD3D9(); + combinerCapsInitialized = true; + } + + // For threaded rendering this check is done on the client side (and we get NULL here) + if (props) + { + // check texgen modes & texture dimension are supported + for( int i = 0; i < count; ++i ) { + TextureDimension texDim; + TexGenMode texGen; + GetTexEnvInfoFromName( texEnvs[i].m_TextureName, texDim, texGen, props ); + if( !ShaderLab::IsTexEnvSupported( texEnvs[i].m_TextureName, texDim, texGen ) ) + return NULL; + } + } + + bool canConvertToStages = true; + + // "primary" in the combiner might refer to diffuse or texture factor, depending on + // whether lighting is on or vertex colors are bound + kCombinerSourceTable[3] = hasVertexColorOrLighting ? D3DTA_DIFFUSE : D3DTA_TFACTOR; + + TextureCombinersD3D* d3dte = new TextureCombinersD3D(); + d3dte->uniqueID = ++uniqueIDCounter; + d3dte->envCount = count; + d3dte->texEnvs = texEnvs; + d3dte->pixelShader = NULL; + d3dte->textureFactorIndex = -1; + + // special case: when no SetTextures are present, setup to do { combine primary } equivalent + if( count == 0 ) + { + d3dte->stages[0].colorOp = D3DTOP_SELECTARG1; + d3dte->stages[0].colorArgs[0] = d3dte->stages[0].colorArgs[1] = d3dte->stages[0].colorArgs[2] = kCombinerSourceTable[3]; + d3dte->stages[0].alphaOp = D3DTOP_SELECTARG1; + d3dte->stages[0].alphaArgs[0] = d3dte->stages[0].alphaArgs[1] = d3dte->stages[0].alphaArgs[2] = kCombinerSourceTable[3]; + d3dte->stages[1].colorOp = D3DTOP_DISABLE; + d3dte->stageCount = 1; + return d3dte; + } + + // try to convert to pixel shader and use that if everything is ok + d3dte->pixelShader = CombinersToPixelShader( count, texEnvs, hasVertexColorOrLighting, usesAddSpecular ); + + // if don't have pixel shader, convert to TSS setup + if( !d3dte->pixelShader ) + { + if( count > gGraphicsCaps.d3d.d3dcaps.MaxSimultaneousTextures ) + { + // In theory we could convert more; if most of combiner stages do not actually + // use the texture. In practice we just cap it at MaxSimultaneousTextures; + // it will match GL behaviour as well. + canConvertToStages = false; + } + else + { + for( int i = 0; i < count; ++i ) + { + const ShaderLab::TextureBinding& te = texEnvs[i]; + D3DTextureStage& stage = d3dte->stages[i]; + bool textureFactorUsed = false; + if( !CombinerToTextureStage(te, stage, textureFactorUsed) ) + { + canConvertToStages = false; + break; + } + if (textureFactorUsed) + { + d3dte->textureFactorIndex = i; + } + } + d3dte->stages[count].colorOp = D3DTOP_DISABLE; + } + + // no can't do + if( !canConvertToStages ) + { + delete d3dte; + return NULL; + } + } + + d3dte->stageCount = count; + return d3dte; +} diff --git a/Runtime/GfxDevice/d3d/CombinerD3D.h b/Runtime/GfxDevice/d3d/CombinerD3D.h new file mode 100644 index 0000000..6f7fb05 --- /dev/null +++ b/Runtime/GfxDevice/d3d/CombinerD3D.h @@ -0,0 +1,37 @@ +#pragma once + +#include "D3D9Includes.h" +#include "External/shaderlab/Library/shadertypes.h" + +namespace ShaderLab { + struct TextureBinding; + class TexEnv; +} + + +const int kMaxD3DTextureStages = 8; +const int kMaxD3DTextureStagesForPS = 4; + +struct D3DTextureStage +{ + D3DTEXTUREOP colorOp; + int colorArgs[3]; + D3DTEXTUREOP alphaOp; + int alphaArgs[3]; +}; + +struct TextureCombinersD3D +{ + static TextureCombinersD3D* Create( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular ); + static void CleanupCombinerCache(); + + D3DTextureStage stages[kMaxD3DTextureStages+1]; + int envCount, stageCount; // these might be different! + IDirect3DPixelShader9* pixelShader; + const ShaderLab::TextureBinding* texEnvs; + + int textureFactorIndex; + bool textureFactorUsed; + + int uniqueID; +}; diff --git a/Runtime/GfxDevice/d3d/D3D9Context.cpp b/Runtime/GfxDevice/d3d/D3D9Context.cpp new file mode 100644 index 0000000..e192ad8 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Context.cpp @@ -0,0 +1,629 @@ +#include "UnityPrefix.h" +#include "D3D9Context.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "D3D9Enumeration.h" +#include "D3D9Utils.h" +#include "GfxDeviceD3D9.h" +#include "TimerQueryD3D9.h" +#include "PlatformDependent/Win/WinUtils.h" +#include "Configuration/UnityConfigure.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Threads/ThreadSharedObject.h" +#include "Runtime/Misc/Plugins.h" +#if UNITY_EDITOR +#include "Runtime/GfxDevice/GfxDeviceSetup.h" +#include "Runtime/Misc/QualitySettings.h" +#include "Runtime/Camera/RenderManager.h" +#include "D3D9Window.h" +#endif + +#if WEBPLUG +#define ENABLE_NV_PERFHUD 0 +#else +#define ENABLE_NV_PERFHUD 1 +#endif + +#define ENABLE_D3D_WINDOW_LOGGING 1 + +static IDirect3D9* s_D3D = NULL; +static IDirect3DDevice9* s_Device = NULL; + +static RenderColorSurfaceD3D9 s_BackBuffer; +static RenderDepthSurfaceD3D9 s_DepthStencil; +static HWND s_Window = NULL; +static HINSTANCE s_D3DDll = NULL; +static D3DPRESENT_PARAMETERS s_PresentParams; +static D3D9FormatCaps* s_FormatCaps = NULL; +static bool s_CurrentlyWindowed = true; +static D3DDISPLAYMODE s_LastWindowedMode; +bool g_D3DUsesMixedVP = false; +bool g_D3DHasDepthStencil = true; +D3DFORMAT g_D3DDepthStencilFormat = D3DFMT_D16; +D3DDEVTYPE g_D3DDevType; +DWORD g_D3DAdapter = D3DADAPTER_DEFAULT; + +#if WEBPLUG +extern bool gInsideFullscreenToggle; +#endif + +typedef IDirect3D9* (WINAPI* Direct3DCreate9Func)(UINT); + +GfxDeviceD3D9& GetD3D9GfxDevice(); +void SetD3D9DeviceLost( bool lost ); // GfxDeviceD3D9.cpp +bool IsD3D9DeviceLost(); +void ResetDynamicResourcesD3D9(); + +#if ENABLE_PROFILER +D3DPERF_BeginEventFunc g_D3D9BeginEventFunc; +D3DPERF_EndEventFunc g_D3D9EndEventFunc; +#endif + + +bool InitializeD3D(D3DDEVTYPE devtype) +{ + AssertIf( s_D3D || s_Device || s_Window || s_D3DDll || s_FormatCaps ); + g_D3DDevType = devtype; + + s_D3DDll = LoadLibrary( "d3d9.dll" ); + if( !s_D3DDll ) + { + printf_console( "d3d: no D3D9 installed\n" ); + return false; // no d3d9 installed + } + + Direct3DCreate9Func createFunc = (Direct3DCreate9Func)GetProcAddress( s_D3DDll, "Direct3DCreate9" ); + if( !createFunc ) + { + printf_console( "d3d: Direct3DCreate9 not found\n" ); + FreeLibrary( s_D3DDll ); + s_D3DDll = NULL; + return false; // for some reason Direct3DCreate9 not found + } + + #if ENABLE_PROFILER + g_D3D9BeginEventFunc = (D3DPERF_BeginEventFunc)GetProcAddress(s_D3DDll, "D3DPERF_BeginEvent"); + g_D3D9EndEventFunc = (D3DPERF_EndEventFunc)GetProcAddress(s_D3DDll, "D3DPERF_EndEvent"); + #endif + + // create D3D object + s_D3D = createFunc( D3D_SDK_VERSION ); + if( !s_D3D ) + { + printf_console( "d3d: no 9.0c available\n" ); + FreeLibrary( s_D3DDll ); + s_D3DDll = NULL; + return false; // D3D initialization failed + } + + // validate the adapter ordinal + UINT adapterCount = s_D3D->GetAdapterCount(); + if ( g_D3DAdapter >= adapterCount ) + g_D3DAdapter = D3DADAPTER_DEFAULT; + + // check whether we have a HAL device + D3DDISPLAYMODE mode; + HRESULT hr; + if (FAILED(hr = s_D3D->GetAdapterDisplayMode(g_D3DAdapter, &mode))) + { + printf_console ("d3d: failed to get adapter mode (adapter %d error 0x%08x)\n", g_D3DAdapter, hr); + s_D3D->Release(); + s_D3D = NULL; + FreeLibrary( s_D3DDll ); + s_D3DDll = NULL; + return false; // failed to get adapter mode + } + if( FAILED( s_D3D->CheckDeviceType( g_D3DAdapter, g_D3DDevType, mode.Format, mode.Format, TRUE ) ) ) + { + printf_console( "d3d: no support for this device type (accelerated/ref)\n" ); + s_D3D->Release(); + s_D3D = NULL; + FreeLibrary( s_D3DDll ); + s_D3DDll = NULL; + return false; // no HAL driver available + } + + // enumerate all formats, multi sample types and whatnot + s_FormatCaps = new D3D9FormatCaps(); + if( !s_FormatCaps->Enumerate( *s_D3D ) ) + { + printf_console( "d3d: no video modes available\n" ); + return false; + } + + return true; +} + +IDirect3D9* GetD3DObject() +{ + AssertIf( !s_D3D ); + return s_D3D; +} +D3D9FormatCaps* GetD3DFormatCaps() +{ + AssertIf( !s_FormatCaps ); + return s_FormatCaps; +} + +void CleanupD3D() +{ + AssertIf( s_Device || s_Window ); + + delete s_FormatCaps; + s_FormatCaps = NULL; + + if( s_D3D ) + { + s_D3D->Release(); + s_D3D = NULL; + } + if( s_D3DDll ) + { + FreeLibrary( s_D3DDll ); + s_D3DDll = NULL; + } +} + +D3DFORMAT GetD3DFormatForChecks() +{ + AssertIf( !s_FormatCaps ); + return s_FormatCaps->GetAdapterFormatForChecks(); +} + +static void SetFramebufferDepthFormat(GfxDevice* realDevice, D3DFORMAT format) +{ + // Not the most robust way to figure out the format, but should do. + int depthBPP = GetBPPFromD3DFormat(format); + DepthBufferFormat depthFormat = kDepthFormatNone; + if (depthBPP == 16) + depthFormat = kDepthFormat16; + else if (depthBPP == 32) + depthFormat = kDepthFormat24; + realDevice->SetFramebufferDepthFormat(depthFormat); + + // Set it on the client device as well, if we're changing resolutions + // and the property hasn't been propagated by copying from the real to client device. + if (IsGfxDevice()) + GetGfxDevice().SetFramebufferDepthFormat(depthFormat); +} + +bool InitializeOrResetD3DDevice( + class GfxDevice* device, + HWND window, int width, int height, + int refreshRate, bool fullscreen, int vBlankCount, int fsaa, + int& outBackbufferBPP, int& outFrontbufferBPP, int& outDepthBPP, int& outFSAA ) +{ + AssertIf( !s_D3D ); + + outBackbufferBPP = 4; + outFrontbufferBPP = 4; + outDepthBPP = 4; + outFSAA = 0; + + width = std::max(width, 1); + height = std::max(height, 1); + + D3DDISPLAYMODE mode; + if( s_CurrentlyWindowed ) + { + HRESULT hr = s_D3D->GetAdapterDisplayMode( g_D3DAdapter, &mode ); + if( FAILED( hr ) ) + { + printf_console( "d3d initialize: failed to get adapter display mode [%s]\n", GetD3D9Error(hr) ); + return false; + } + s_LastWindowedMode = mode; + } + else + { + // If we are fullscreen right now, use last checked Windowed mode format + // to choose compatible formats. Otherwise we won't be able to switch to 16 bit + // desktop mode after a 32 bit fullscreen one. + mode = s_LastWindowedMode; + } + + D3DPRESENT_PARAMETERS& pparams = s_PresentParams; + ZeroMemory (&pparams, sizeof(D3DPRESENT_PARAMETERS)); + pparams.BackBufferWidth = width; + pparams.BackBufferHeight = height; + pparams.BackBufferCount = 1; + pparams.hDeviceWindow = window; + pparams.FullScreen_RefreshRateInHz = fullscreen ? refreshRate : 0; + + pparams.EnableAutoDepthStencil = FALSE; + g_D3DHasDepthStencil = true; + + pparams.Windowed = fullscreen ? FALSE : TRUE; + pparams.SwapEffect = D3DSWAPEFFECT_DISCARD; + + // fullscreen FSAA might be buggy + if( fullscreen && gGraphicsCaps.buggyFullscreenFSAA ) + fsaa = 1; + + s_FormatCaps->FindBestPresentationParams( width, height, mode.Format, !fullscreen, vBlankCount, fsaa, pparams ); + + outBackbufferBPP = GetBPPFromD3DFormat(pparams.BackBufferFormat)/8; + outFrontbufferBPP = GetBPPFromD3DFormat(mode.Format)/8; + outDepthBPP = GetBPPFromD3DFormat(pparams.AutoDepthStencilFormat)/8; + outFSAA = (pparams.MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) ? pparams.MultiSampleQuality : pparams.MultiSampleType; + g_D3DDepthStencilFormat = pparams.AutoDepthStencilFormat; + device->SetCurrentTargetSize(pparams.BackBufferWidth, pparams.BackBufferHeight); + SetFramebufferDepthFormat(device, pparams.AutoDepthStencilFormat); + + bool deviceInLostState = false; + if( !s_Device ) + { + AssertIf( s_Window ); + + UINT adapterIndex = g_D3DAdapter; + D3DDEVTYPE devType = g_D3DDevType; + + #if ENABLE_NV_PERFHUD + UINT adapterCount = s_D3D->GetAdapterCount(); + D3DADAPTER_IDENTIFIER9 perfHudID; + memset( &perfHudID, 0, sizeof(perfHudID) ); + s_D3D->GetAdapterIdentifier( adapterCount-1, 0, &perfHudID ); + perfHudID.Description[MAX_DEVICE_IDENTIFIER_STRING-1] = 0; + if( strstr( perfHudID.Description, "PerfHUD" ) != NULL ) + { + adapterIndex = adapterCount-1; + devType = D3DDEVTYPE_REF; + } + #endif + + const int kShaderVersion11 = (1 << 8) + 1; + bool hasHardwareTL = gGraphicsCaps.d3d.d3dcaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT; + bool hasVS11 = LOWORD(gGraphicsCaps.d3d.d3dcaps.VertexShaderVersion) >= kShaderVersion11; + DWORD behaviourFlags = D3DCREATE_HARDWARE_VERTEXPROCESSING; + if( !hasVS11 ) + behaviourFlags = D3DCREATE_MIXED_VERTEXPROCESSING; + if( !hasHardwareTL ) + behaviourFlags = D3DCREATE_SOFTWARE_VERTEXPROCESSING; + g_D3DUsesMixedVP = (behaviourFlags == D3DCREATE_MIXED_VERTEXPROCESSING); + + if( GetGfxThreadingMode() == kGfxThreadingModeThreaded ) + behaviourFlags |= D3DCREATE_MULTITHREADED; + + // Preserve FPU mode. Benchmarking both in hardware and software vertex processing does not + // reveal any real differences. If FPU mode is not preserved, bad things will happen, like: + // * doubles will act like floats + // * on Firefox/Safari, some JavaScript libraries will stop working (spect.aculo.us, dojo) - case 17513 + // * some random funky FPU exceptions will happen + HRESULT hr = s_D3D->CreateDevice( adapterIndex, devType, window, behaviourFlags | D3DCREATE_FPU_PRESERVE, &pparams, &s_Device ); + if( FAILED( hr ) ) + { + printf_console( "d3d: creation params: flags=%x swap=%i vsync=%x w=%i h=%i fmt=%i bbcount=%i dsformat=%i pflags=%x\n", + behaviourFlags, pparams.SwapEffect, pparams.PresentationInterval, + pparams.BackBufferWidth, pparams.BackBufferHeight, pparams.BackBufferFormat, pparams.BackBufferCount, + pparams.AutoDepthStencilFormat, pparams.Flags ); + printf_console( "d3d: failed to create device [%s]\n", GetD3D9Error(hr) ); + if (devType == D3DDEVTYPE_REF) + { + winutils::AddErrorMessage("Reference Rasterizer was requested but is not available.\nPlease make sure you have DirectX SDK installed."); + winutils::DisplayErrorMessagesAndQuit ("REFRAST not available"); + } + return false; + } + s_CurrentlyWindowed = pparams.Windowed ? true : false; + + gGraphicsCaps.hasTimerQuery = + (GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, NULL) != D3DERR_NOTAVAILABLE) && + (GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP,NULL) != D3DERR_NOTAVAILABLE); + } + else + { + AssertIf( !s_Window ); + + // If we're resetting device mid-frame (e.g. script calls Screen.SetResolution), + // we need to end scene, reset and begin scene again. + bool wasInsideFrame = GetD3D9GfxDevice().IsInsideFrame(); + if( wasInsideFrame ) + { + s_Device->EndScene(); + GetD3D9GfxDevice().SetInsideFrame(false); + } + + // cleanup + s_BackBuffer.Release(); + s_DepthStencil.Release(); + + PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventBeforeReset); + + D3DPRESENT_PARAMETERS ppcopy = pparams; // copy them, as Reset changes some values + HRESULT hr = s_Device->Reset( &ppcopy ); + if( FAILED(hr) ) + { + if( hr == D3DERR_DEVICELOST ) + { + deviceInLostState = true; + SetD3D9DeviceLost( true ); + } + else + { + ErrorString( Format("D3D device reset failed [%s]", GetD3D9Error(hr)) ); + return false; + } + } + + PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventAfterReset); + + s_CurrentlyWindowed = ppcopy.Windowed ? true : false; + if( wasInsideFrame && !deviceInLostState ) + { + s_Device->BeginScene(); + GetD3D9GfxDevice().SetInsideFrame(true); + } + +#if ENABLE_PROFILER + if (gGraphicsCaps.hasTimerQuery) + GetD3D9GfxDevice().GetTimerQueries().RecreateAllQueries(); +#endif + } + + s_Window = window; + if( !deviceInLostState ) + { + s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface); + s_BackBuffer.width = pparams.BackBufferWidth; + s_BackBuffer.height = pparams.BackBufferHeight; + // create depth stencil + D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (s_Device, pparams.BackBufferWidth, pparams.BackBufferHeight, pparams.AutoDepthStencilFormat, pparams.MultiSampleType, pparams.MultiSampleQuality, TRUE); + if (depthStencil.m_Surface) + { + s_DepthStencil.m_Surface = depthStencil.m_Surface; + s_DepthStencil.m_Texture = depthStencil.m_Texture; + s_DepthStencil.width = pparams.BackBufferWidth; + s_DepthStencil.height = pparams.BackBufferHeight; + s_DepthStencil.depthFormat = kDepthFormat16; //@TODO? + } + + s_BackBuffer.backBuffer = true; + s_DepthStencil.backBuffer = true; + + #if !UNITY_EDITOR + RenderSurfaceHandle bbHandle(&s_BackBuffer), dsHandle(&s_DepthStencil); + device->SetRenderTargets(1, &bbHandle, dsHandle); + #endif + s_Device->SetRenderState (D3DRS_ZENABLE, TRUE); + } + + return true; +} + +void GetBackBuffersAfterDeviceReset() +{ + AssertIf (!s_Device); + AssertIf (!s_DepthStencil.m_Surface); + s_BackBuffer.Release(); + s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface); + s_BackBuffer.backBuffer = true; +} + +#if UNITY_EDITOR +void EditorInitializeD3D(GfxDevice* device) +{ + int dummy; + if( !InitializeOrResetD3DDevice( device, s_HiddenWindowD3D, 32, 32, 0, false, 0, 0, dummy, dummy, dummy, dummy ) ) + { + winutils::AddErrorMessage( "Failed to create master Direct3D window" ); + DestroyGfxDevice(); + winutils::DisplayErrorMessagesAndQuit( "Failed to initialize 3D graphics" ); + } + + // Disable D3D Debug runtime in editor release mode: + // VERTEXSTATS query is only available in Debug runtime. + #if UNITY_RELEASE + if (CheckD3D9DebugRuntime(GetD3DDevice())) + { + winutils::AddErrorMessage ( + "You are using Direct3D Debug Runtime, this is not supported by\r\n" + "Unity. Switch to Retail runtime in DirectX Control Panel."); + DestroyGfxDevice(); + winutils::DisplayErrorMessagesAndQuit ("D3D9 Debug Runtime is not supported"); + } + #endif +} +#endif + +bool FullResetD3DDevice() +{ + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("FullResetD3DDevice\n"); + #endif + // destroy dynamic VBO / render textures and reset the device + ResetDynamicResourcesD3D9(); + bool ok = ResetD3DDevice(); + if( ok ) + SetD3D9DeviceLost( false ); + return ok; +} + +bool HandleD3DDeviceLost() +{ + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("HandleD3DDeviceLost\n"); + #endif + HRESULT hr = s_Device->TestCooperativeLevel(); + bool ok = false; + switch( hr ) + { + // Is device actually lost? + case D3D_OK: + { + ok = true; + break; + } + // If device was lost, do not render until we get it back + case D3DERR_DEVICELOST: + { + #if ENABLE_D3D_WINDOW_LOGGING + printf_console(" HandleD3DDeviceLost: still lost\n"); + #endif + break; + } + // If device needs to be reset, do that + case D3DERR_DEVICENOTRESET: + { + #if ENABLE_D3D_WINDOW_LOGGING + printf_console(" HandleD3DDeviceLost: needs reset, doing it\n"); + #endif + ok = FullResetD3DDevice(); + break; + } + } + + if( !ok ) + return false; + + // device is not lost anymore, proceed + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("D3Dwindow device not lost anymore\n"); + #endif + GetBackBuffersAfterDeviceReset(); + SetD3D9DeviceLost( false ); + + return true; +} + +bool ResetD3DDevice() +{ + AssertIf( !s_D3D || !s_Device || !s_Window ); + + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("ResetD3DDevice\n"); + #endif + + // cleanup + s_BackBuffer.Release(); + s_DepthStencil.Release(); + + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("dev->Reset\n"); + #endif + + D3DPRESENT_PARAMETERS ppcopy = s_PresentParams; // copy them, as Reset changes some values + + #if WEBPLUG + // Reset sends WM_ACTIVATE message which makes Web Player exit fullscreen (unless gInsideFullscreenToggle is set). + bool insideFullscreenToggle = gInsideFullscreenToggle; + gInsideFullscreenToggle = true; + #endif + + PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventBeforeReset); + + HRESULT hr = s_Device->Reset( &ppcopy ); + + #if WEBPLUG + gInsideFullscreenToggle = insideFullscreenToggle; + #endif + + bool setToLost = false; + if( FAILED(hr) ) + { + if( hr == D3DERR_DEVICELOST ) + { + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("set device to lost\n"); + #endif + SetD3D9DeviceLost( true ); + setToLost = true; + } + else + { + ErrorString( Format("D3D device reset failed [%s]", GetD3D9Error(hr)) ); + return false; + } + } + else + { + PluginsSetGraphicsDevice (s_Device, kGfxRendererD3D9, kGfxDeviceEventAfterReset); + + s_Device->GetRenderTarget (0, &s_BackBuffer.m_Surface); + s_BackBuffer.width = ppcopy.BackBufferWidth; + s_BackBuffer.height = ppcopy.BackBufferHeight; + // create depth stencil + D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (s_Device, ppcopy.BackBufferWidth, ppcopy.BackBufferHeight, ppcopy.AutoDepthStencilFormat, ppcopy.MultiSampleType, ppcopy.MultiSampleQuality, TRUE); + if (depthStencil.m_Surface) + { + s_DepthStencil.m_Surface = depthStencil.m_Surface; + s_DepthStencil.m_Texture = depthStencil.m_Texture; + s_DepthStencil.width = ppcopy.BackBufferWidth; + s_DepthStencil.height = ppcopy.BackBufferHeight; + s_DepthStencil.depthFormat = kDepthFormat16; //@TODO? + } + + s_BackBuffer.backBuffer = true; + s_DepthStencil.backBuffer = true; + + #if !UNITY_EDITOR + RenderSurfaceHandle bbHandle(&s_BackBuffer), dsHandle(&s_DepthStencil); + GetRealGfxDevice().SetRenderTargets(1, &bbHandle, dsHandle); + #endif + s_Device->SetRenderState (D3DRS_ZENABLE, TRUE); + } + s_CurrentlyWindowed = ppcopy.Windowed ? true : false; + + return !setToLost; +} + +void DestroyD3DDevice() +{ + // This can happen when quiting from screen selector - window is not set up yet + if( !s_Window || !s_Device ) + return; + + // cleanup + s_BackBuffer.Release(); + s_DepthStencil.Release(); + s_Device->Release(); + s_Device = NULL; + s_Window = NULL; +} + +IDirect3DDevice9* GetD3DDevice() +{ + AssertIf( !s_Device ); + return s_Device; +} + +IDirect3DDevice9* GetD3DDeviceNoAssert() +{ + return s_Device; +} + + + +#if UNITY_EDITOR + +#include "PlatformDependent/Win/WinUtils.h" + +HWND s_HiddenWindowD3D = NULL; + +bool CreateHiddenWindowD3D() +{ + AssertIf( s_HiddenWindowD3D ); + + // Dummy master window is 64x64 in size. Seems that 32x32 is too small for Rage cards (produces internal driver errors in CreateDevice). + s_HiddenWindowD3D = CreateWindowW( + L"STATIC", + L"UnityHiddenWindow", + WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN, + 0, 0, 64, 64, + NULL, NULL, + winutils::GetInstanceHandle(), NULL ); + if( !s_HiddenWindowD3D ) + { + winutils::AddErrorMessage( "Failed to create hidden window: %s", WIN_LAST_ERROR_TEXT ); + return false; + } + + return true; +} + +void DestroyHiddenWindowD3D() +{ + AssertIf( !s_HiddenWindowD3D ); + DestroyWindow( s_HiddenWindowD3D ); + s_HiddenWindowD3D = NULL; +} + +#endif diff --git a/Runtime/GfxDevice/d3d/D3D9Context.h b/Runtime/GfxDevice/d3d/D3D9Context.h new file mode 100644 index 0000000..370a1c7 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Context.h @@ -0,0 +1,44 @@ +#pragma once + +#include "D3D9Includes.h" +#include "D3D9Enumeration.h" + +bool InitializeD3D(D3DDEVTYPE devtype); +void CleanupD3D(); +bool InitializeOrResetD3DDevice( + class GfxDevice* device, + HWND window, int width, int height, + int refreshRate, bool fullscreen, int vBlankCount, int fsaa, + int& outBackbufferBPP, int& outFrontbufferBPP, int& outDepthBPP, int& outFSAA ); +void GetBackBuffersAfterDeviceReset(); +bool ResetD3DDevice(); +#if UNITY_EDITOR +void EditorInitializeD3D(GfxDevice* device); +#endif +bool FullResetD3DDevice(); +bool HandleD3DDeviceLost(); +void DestroyD3DDevice(); +extern D3DDEVTYPE g_D3DDevType; +extern DWORD g_D3DAdapter; +extern bool g_D3DUsesMixedVP; +extern bool g_D3DHasDepthStencil; +extern D3DFORMAT g_D3DDepthStencilFormat; + +IDirect3DDevice9* GetD3DDevice(); +IDirect3DDevice9* GetD3DDeviceNoAssert(); +IDirect3D9* GetD3DObject(); +D3D9FormatCaps* GetD3DFormatCaps(); +D3DFORMAT GetD3DFormatForChecks(); + +typedef int (WINAPI* D3DPERF_BeginEventFunc)(D3DCOLOR, LPCWSTR); +typedef int (WINAPI* D3DPERF_EndEventFunc)(); +extern D3DPERF_BeginEventFunc g_D3D9BeginEventFunc; +extern D3DPERF_EndEventFunc g_D3D9EndEventFunc; + + +#if UNITY_EDITOR +bool CreateHiddenWindowD3D(); +void DestroyHiddenWindowD3D(); +extern HWND s_HiddenWindowD3D; +#endif + diff --git a/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp b/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp new file mode 100644 index 0000000..b78433e --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Enumeration.cpp @@ -0,0 +1,344 @@ +#include "UnityPrefix.h" +#include "D3D9Enumeration.h" +#include "D3D9Utils.h" +#include "Runtime/GfxDevice/VramLimits.h" + +// --------------------------------------------------------------------------- + + +const int kMinDisplayWidth = 512; +const int kMinDisplayHeight = 384; +const int kMinColorBits = 4; +const int kMinAlphaBits = 0; + +extern D3DDEVTYPE g_D3DDevType; +extern DWORD g_D3DAdapter; + +// --------------------------------------------------------------------------- + +static int GetFormatColorBits( D3DFORMAT fmt ) { + switch( fmt ) { + case D3DFMT_A2B10G10R10: + case D3DFMT_A2R10G10B10: return 10; + case D3DFMT_R8G8B8: + case D3DFMT_A8R8G8B8: + case D3DFMT_X8R8G8B8: return 8; + case D3DFMT_R5G6B5: + case D3DFMT_X1R5G5B5: + case D3DFMT_A1R5G5B5: return 5; + case D3DFMT_A4R4G4B4: + case D3DFMT_X4R4G4B4: return 4; + case D3DFMT_R3G3B2: + case D3DFMT_A8R3G3B2: return 2; + default: return 0; + } +} + +static int GetFormatAlphaBits( D3DFORMAT fmt ) { + switch( fmt ) { + case D3DFMT_R8G8B8: + case D3DFMT_X8R8G8B8: + case D3DFMT_R5G6B5: + case D3DFMT_X1R5G5B5: + case D3DFMT_R3G3B2: + case D3DFMT_X4R4G4B4: return 0; + case D3DFMT_A8R8G8B8: + case D3DFMT_A8R3G3B2: return 8; + case D3DFMT_A1R5G5B5: return 1; + case D3DFMT_A4R4G4B4: return 4; + case D3DFMT_A2B10G10R10: + case D3DFMT_A2R10G10B10: return 2; + default: return 0; + } +} + +int GetFormatDepthBits( D3DFORMAT fmt ) { + switch( fmt ) { + case D3DFMT_D16: return 16; + case D3DFMT_D15S1: return 15; + case D3DFMT_D24X8: + case D3DFMT_D24S8: + case D3DFMT_D24X4S4: return 24; + case D3DFMT_D32: return 32; + default: return 0; + } +} + +static D3DFORMAT ConvertToAlphaFormat( D3DFORMAT fmt ) +{ + if( fmt == D3DFMT_X8R8G8B8 ) + fmt = D3DFMT_A8R8G8B8; + else if( fmt == D3DFMT_X4R4G4B4 ) + fmt = D3DFMT_A4R4G4B4; + else if( fmt == D3DFMT_X1R5G5B5 ) + fmt = D3DFMT_A1R5G5B5; + return fmt; +} + +// ----------------------------------------------------------------------------- + + +static UInt32 buildVertexProcessings( const D3DCAPS9& caps ) +{ + UInt32 result = 0; + + // TODO: check vertex shader version + + DWORD devCaps = caps.DevCaps; + if( devCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ) { + if( devCaps & D3DDEVCAPS_PUREDEVICE ) { + result |= (1<<kVPPureHardware); + } + result |= (1<<kVPHardware); + result |= (1<<kVPMixed); + } + + result |= (1<<kVPSoftware); + + return result; +} + + +static void buildDepthStencilFormats( IDirect3D9& d3d, D3DDeviceCombo& devCombo ) +{ + const D3DFORMAT dsFormats[] = { + D3DFMT_D24S8, D3DFMT_D24X8, D3DFMT_D24X4S4, D3DFMT_D16, D3DFMT_D15S1, D3DFMT_D32, + }; + const int dsFormatCount = sizeof(dsFormats) / sizeof(dsFormats[0]); + + for( int idsf = 0; idsf < dsFormatCount; ++idsf ) { + D3DFORMAT format = dsFormats[idsf]; + if( SUCCEEDED( d3d.CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, devCombo.adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format ) ) ) + { + if( SUCCEEDED( d3d.CheckDepthStencilMatch( g_D3DAdapter, g_D3DDevType, devCombo.adapterFormat, devCombo.backBufferFormat, format ) ) ) + { + devCombo.depthStencilFormats.push_back( format ); + } + } + } +} + + +static void buildMultiSampleTypes( IDirect3D9& d3d, D3DDeviceCombo& devCombo ) +{ + const size_t kMaxSamples = 16; + devCombo.multiSampleTypes.reserve( kMaxSamples ); + devCombo.multiSampleTypes.push_back( D3DMULTISAMPLE_NONE ); + + for( int samples = 2; samples <= kMaxSamples; ++samples ) { + D3DMULTISAMPLE_TYPE msType = GetD3DMultiSampleType( samples ); + DWORD msQuality; + if( SUCCEEDED( d3d.CheckDeviceMultiSampleType( g_D3DAdapter, g_D3DDevType, devCombo.backBufferFormat, devCombo.isWindowed, msType, NULL ) ) ) + devCombo.multiSampleTypes.push_back( samples ); + } +} + + +static void buildConflicts( IDirect3D9& d3d, D3DDeviceCombo& devCombo ) +{ + for( size_t ids = 0; ids < devCombo.depthStencilFormats.size(); ++ids ) { + D3DFORMAT format = (D3DFORMAT)devCombo.depthStencilFormats[ids]; + for( size_t ims = 0; ims < devCombo.multiSampleTypes.size(); ++ims ) { + D3DMULTISAMPLE_TYPE msType = (D3DMULTISAMPLE_TYPE)devCombo.multiSampleTypes[ims]; + if( FAILED( d3d.CheckDeviceMultiSampleType( + g_D3DAdapter, g_D3DDevType, + format, devCombo.isWindowed, msType, NULL ) ) ) + { + D3DDeviceCombo::MultiSampleConflict conflict; + conflict.format = format; + conflict.type = msType; + devCombo.conflicts.push_back( conflict ); + } + } + } +} + + +static bool enumerateDeviceCombos( IDirect3D9& d3d, const D3DCAPS9& caps, const DwordVector& adapterFormats, D3DDeviceComboVector& outCombos ) +{ + const D3DFORMAT bbufferFormats[] = { + D3DFMT_A8R8G8B8, D3DFMT_X8R8G8B8, D3DFMT_A2R10G10B10, + D3DFMT_R5G6B5, D3DFMT_A1R5G5B5, D3DFMT_X1R5G5B5 + }; + const int bbufferFormatCount = sizeof(bbufferFormats) / sizeof(bbufferFormats[0]); + + bool isWindowedArray[] = { false, true }; + + // see which adapter formats are supported by this device + for( size_t iaf = 0; iaf < adapterFormats.size(); ++iaf ) + { + D3DFORMAT format = (D3DFORMAT)adapterFormats[iaf]; + for( int ibbf = 0; ibbf < bbufferFormatCount; ibbf++ ) + { + D3DFORMAT bbufferFormat = bbufferFormats[ibbf]; + if( GetFormatAlphaBits(bbufferFormat) < kMinAlphaBits ) + continue; + for( int iiw = 0; iiw < 2; ++iiw ) { + bool isWindowed = isWindowedArray[iiw]; + if( FAILED( d3d.CheckDeviceType( g_D3DAdapter, g_D3DDevType, format, bbufferFormat, isWindowed ) ) ) + continue; + + // Here, we have an adapter format / backbuffer format/ windowed + // combo that is supported by the system. We still need to find one or + // more suitable depth/stencil buffer format, multisample type, + // vertex processing type, and vsync. + D3DDeviceCombo devCombo; + + devCombo.adapterFormat = format; + devCombo.backBufferFormat = bbufferFormat; + devCombo.isWindowed = isWindowed; + devCombo.presentationIntervals = caps.PresentationIntervals; + + buildDepthStencilFormats( d3d, devCombo ); + if( devCombo.depthStencilFormats.empty() ) + continue; + + buildMultiSampleTypes( d3d, devCombo ); + if( devCombo.multiSampleTypes.empty() ) + continue; + + buildConflicts( d3d, devCombo ); + + outCombos.push_back( devCombo ); + } + } + } + + return !outCombos.empty(); +} + + +bool D3D9FormatCaps::Enumerate( IDirect3D9& d3d ) +{ + AssertIf( !m_Combos.empty() ); + HRESULT hr; + + const D3DFORMAT allowedFormats[] = { + D3DFMT_X8R8G8B8, D3DFMT_X1R5G5B5, D3DFMT_R5G6B5, D3DFMT_A2R10G10B10 + }; + const int allowedFormatCount = sizeof(allowedFormats) / sizeof(allowedFormats[0]); + + m_AdapterFormatForChecks = D3DFMT_UNKNOWN; + + // build a list of all display adapter formats + DwordVector adapterFormatList; // D3DFORMAT + + for( size_t ifmt = 0; ifmt < allowedFormatCount; ++ifmt ) + { + D3DFORMAT format = allowedFormats[ifmt]; + int modeCount = d3d.GetAdapterModeCount( g_D3DAdapter, format ); + for( int mode = 0; mode < modeCount; ++mode ) { + D3DDISPLAYMODE dm; + d3d.EnumAdapterModes( g_D3DAdapter, format, mode, &dm ); + if( dm.Width < (UINT)kMinDisplayWidth || dm.Height < (UINT)kMinDisplayHeight || GetFormatColorBits(dm.Format) < kMinColorBits ) + continue; + // adapterInfo->displayModes.push_back( dm ); + if( std::find(adapterFormatList.begin(),adapterFormatList.end(),dm.Format) == adapterFormatList.end() ) { + adapterFormatList.push_back( dm.Format ); + if( m_AdapterFormatForChecks == D3DFMT_UNKNOWN ) + m_AdapterFormatForChecks = format; + } + } + } + + if( m_AdapterFormatForChecks == D3DFMT_UNKNOWN ) // for some reason no format was selected for checks, use default + m_AdapterFormatForChecks = allowedFormats[0]; + + // get info for device on this adapter + D3DCAPS9 caps; + if( FAILED( d3d.GetDeviceCaps( g_D3DAdapter, g_D3DDevType, &caps ) ) ) + return false; + + // find suitable vertex processing modes (if any) + m_VertexProcessings = buildVertexProcessings( caps ); + AssertIf( !m_VertexProcessings ); + + // get info for each device combo on this device + if( !enumerateDeviceCombos( d3d, caps, adapterFormatList, m_Combos ) ) + return false; + + return true; +} + + +void D3D9FormatCaps::FindBestPresentationParams( int width, int height, D3DFORMAT desktopMode, bool windowed, int vBlankCount, int multiSample, D3DPRESENT_PARAMETERS& outParams ) const +{ + const D3DDeviceCombo* bestCombo = NULL; + int bestScore = -1; + + for( size_t idc = 0; idc < m_Combos.size(); ++idc ) + { + const D3DDeviceCombo& devCombo = m_Combos[idc]; + if( windowed && !devCombo.isWindowed ) + continue; + if( !windowed && devCombo.isWindowed ) + continue; + if( windowed ) + { + if( devCombo.adapterFormat != desktopMode ) + continue; + } + + int score = 0; + + bool matchesBB = (devCombo.backBufferFormat == ConvertToAlphaFormat(devCombo.adapterFormat)); + bool matchesDesktop = (devCombo.adapterFormat == desktopMode); + + if( matchesBB ) + score += 1; + if( matchesDesktop ) + score += 1; + if( GetFormatAlphaBits(devCombo.backBufferFormat) > 0 ) + score += 1; + + if( score > bestScore ) + { + bestScore = score; + bestCombo = &devCombo; + } + } + + if( !bestCombo ) + { + // This can happen if we're debugging force-16BPP modes on a 32BPP desktop, and so on + outParams.BackBufferFormat = desktopMode; + outParams.AutoDepthStencilFormat = D3DFMT_D16; + outParams.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; + outParams.MultiSampleType = D3DMULTISAMPLE_NONE; + outParams.MultiSampleQuality = 0; + return; + } + + outParams.BackBufferFormat = bestCombo->backBufferFormat; + outParams.AutoDepthStencilFormat = (D3DFORMAT)bestCombo->depthStencilFormats[0]; + + // No support for intervals above 1 in windowed mode (case 497116) + if (windowed && vBlankCount > 1) + vBlankCount = 1; + + // best possible vsync parameter (if device doesn't support 2 fall back to 1) + DWORD intervals = bestCombo->presentationIntervals; + outParams.PresentationInterval = ( vBlankCount >= 2 ) && ( intervals & D3DPRESENT_INTERVAL_TWO ) ? D3DPRESENT_INTERVAL_TWO : + ( vBlankCount >= 1 ) && ( intervals & D3DPRESENT_INTERVAL_ONE ) ? D3DPRESENT_INTERVAL_ONE : + ( vBlankCount == 0 ) && ( intervals & D3DPRESENT_INTERVAL_IMMEDIATE ) ? D3DPRESENT_INTERVAL_IMMEDIATE : + D3DPRESENT_INTERVAL_DEFAULT; + + // Here we already know backbuffer, depth buffer formats and so on, so we can also clamp used FSAA to sane VRAM limits. + int backbufferBPP = GetBPPFromD3DFormat(outParams.BackBufferFormat)/8; + int frontbufferBPP = GetBPPFromD3DFormat(desktopMode)/8; + int depthBPP = GetBPPFromD3DFormat(outParams.AutoDepthStencilFormat)/8; + multiSample = ChooseSuitableFSAALevel( width, height, backbufferBPP, frontbufferBPP, depthBPP, multiSample ); + + // Find out best matched multi sample type. + int msIdx = 0; + if( multiSample > 1 ) + { + while( msIdx < bestCombo->multiSampleTypes.size() && bestCombo->multiSampleTypes[msIdx] <= multiSample ) + ++msIdx; + --msIdx; + AssertIf( msIdx < 0 ); + } + outParams.MultiSampleType = GetD3DMultiSampleType(bestCombo->multiSampleTypes[msIdx]); + outParams.MultiSampleQuality = 0; +} + diff --git a/Runtime/GfxDevice/d3d/D3D9Enumeration.h b/Runtime/GfxDevice/d3d/D3D9Enumeration.h new file mode 100644 index 0000000..240bb89 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Enumeration.h @@ -0,0 +1,64 @@ +#pragma once + +#include "D3D9Includes.h" + + +struct D3DDeviceCombo; + +typedef std::vector<DWORD> DwordVector; +typedef std::vector<D3DDeviceCombo> D3DDeviceComboVector; + + +enum D3DVertexProcessing { + kVPPureHardware, + kVPHardware, + kVPMixed, + kVPSoftware, +}; + + +//--------------------------------------------------------------------------- + +// A combo of adapter format, back buffer format, and windowed/fulscreen that +// is compatible with a D3D device. +struct D3DDeviceCombo { +public: + // A depth/stencil buffer format that is incompatible with a multisample type. + struct MultiSampleConflict { + D3DFORMAT format; + D3DMULTISAMPLE_TYPE type; + }; + typedef std::vector<MultiSampleConflict> MultiSampleConflictVector; +public: + D3DFORMAT adapterFormat; + D3DFORMAT backBufferFormat; + bool isWindowed; + DWORD presentationIntervals; + + DwordVector depthStencilFormats; + DwordVector multiSampleTypes; + MultiSampleConflictVector conflicts; +}; + + +//--------------------------------------------------------------------------- + +class D3D9FormatCaps { +public: + D3D9FormatCaps() : m_VertexProcessings(0) { } + + bool Enumerate( IDirect3D9& d3d ); + + // Fills in BackBufferFormat, AutoDepthStencilFormat, PresentationInterval, + // MultiSampleType, MultiSampleQuality. + void FindBestPresentationParams( int width, int height, D3DFORMAT desktopMode, bool windowed, int vBlankCount, int multiSample, D3DPRESENT_PARAMETERS& outParams ) const; + + // Gets adapter format for doing CheckDeviceFormat checks. + // Usually D3DFMT_X8R8G8B8, except for really old cards that can't do 32 bpp. + D3DFORMAT GetAdapterFormatForChecks() const { return m_AdapterFormatForChecks; } + +public: + D3DDeviceComboVector m_Combos; + UInt32 m_VertexProcessings; // bitmask + D3DFORMAT m_AdapterFormatForChecks; +}; diff --git a/Runtime/GfxDevice/d3d/D3D9Includes.h b/Runtime/GfxDevice/d3d/D3D9Includes.h new file mode 100644 index 0000000..84596ff --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Includes.h @@ -0,0 +1,7 @@ +#ifndef D3DINCLUDES_H +#define D3DINCLUDES_H + +//#define D3D_DEBUG_INFO +#include "External/DirectX/builds/dx9include/d3d9.h" + +#endif diff --git a/Runtime/GfxDevice/d3d/D3D9Utils.cpp b/Runtime/GfxDevice/d3d/D3D9Utils.cpp new file mode 100644 index 0000000..3e25633 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Utils.cpp @@ -0,0 +1,169 @@ +#include "UnityPrefix.h" +#include "D3D9Utils.h" +#include "Runtime/Utilities/ArrayUtility.h" +#include "Runtime/Shaders/GraphicsCaps.h" + + +#ifdef DUMMY_D3D9_CALLS +HRESULT CallDummyD3D9Function() +{ + return S_OK; +} +#endif + +struct D3D9Error { + HRESULT hr; + const char* message; +}; + +static D3D9Error s_D3DErrors[] = { + { D3DOK_NOAUTOGEN, "no mipmap autogen" }, + { D3DERR_WRONGTEXTUREFORMAT, "wrong texture format" }, + { D3DERR_UNSUPPORTEDCOLOROPERATION, "unsupported color op" }, + { D3DERR_UNSUPPORTEDCOLORARG, "unsupported color arg" }, + { D3DERR_UNSUPPORTEDALPHAOPERATION, "unsupported alpha op" }, + { D3DERR_UNSUPPORTEDALPHAARG, "unsupported alpha arg" }, + { D3DERR_TOOMANYOPERATIONS, "too many texture operations" }, + { D3DERR_CONFLICTINGTEXTUREFILTER, "conflicting texture filters" }, + { D3DERR_UNSUPPORTEDFACTORVALUE, "unsupported factor value" }, + { D3DERR_CONFLICTINGRENDERSTATE, "conflicting render states" }, + { D3DERR_UNSUPPORTEDTEXTUREFILTER, "unsupported texture filter" }, + { D3DERR_CONFLICTINGTEXTUREPALETTE, "conflicting texture palettes" }, + { D3DERR_DRIVERINTERNALERROR, "internal driver error" }, + { D3DERR_NOTFOUND, "requested item not found" }, + { D3DERR_MOREDATA, "more data than fits into buffer" }, + { D3DERR_DEVICELOST, "device lost" }, + { D3DERR_DEVICENOTRESET, "device not reset" }, + { D3DERR_NOTAVAILABLE, "queried technique not available" }, + { D3DERR_OUTOFVIDEOMEMORY, "out of VRAM" }, + { D3DERR_INVALIDDEVICE, "invalid device" }, + { D3DERR_INVALIDCALL, "invalid call" }, + { D3DERR_DRIVERINVALIDCALL, "driver invalid call" }, + { D3DERR_WASSTILLDRAWING, "was still drawing" }, + { S_OK, "S_OK" }, + { E_FAIL, "E_FAIL" }, + { E_INVALIDARG, "E_INVALIDARG" }, + { E_OUTOFMEMORY, "out of memory" }, +}; + +const char* GetD3D9Error( HRESULT hr ) +{ + for( int i = 0; i < ARRAY_SIZE(s_D3DErrors); ++i ) + { + if( hr == s_D3DErrors[i].hr ) + return s_D3DErrors[i].message; + } + + static char buffer[1000]; + sprintf( buffer, "unknown error, code 0x%X", hr ); + return buffer; +} + +int GetBPPFromD3DFormat( D3DFORMAT format ) +{ + switch( format ) { + case D3DFMT_UNKNOWN: + case kD3D9FormatNULL: + return 0; + case D3DFMT_X8R8G8B8: + case D3DFMT_A8R8G8B8: + case D3DFMT_A2R10G10B10: + case D3DFMT_A2B10G10R10: + case D3DFMT_R8G8B8: + case D3DFMT_A8B8G8R8: + case D3DFMT_R32F: + case D3DFMT_D24X8: + case D3DFMT_D24S8: + case D3DFMT_D24X4S4: + case kD3D9FormatINTZ: + case kD3D9FormatRAWZ: + return 32; + case D3DFMT_X1R5G5B5: + case D3DFMT_A1R5G5B5: + case D3DFMT_A4R4G4B4: + case D3DFMT_X4R4G4B4: + case D3DFMT_R5G6B5: + case D3DFMT_R16F: + case D3DFMT_D16: + case D3DFMT_D15S1: + case D3DFMT_D16_LOCKABLE: + case D3DFMT_L16: + case D3DFMT_A8L8: + case kD3D9FormatDF16: + return 16; + case D3DFMT_A16B16G16R16F: + return 64; + case D3DFMT_A32B32G32R32F: + return 128; + case D3DFMT_DXT1: + return 4; + case D3DFMT_A8: + case D3DFMT_L8: + case D3DFMT_DXT3: + case D3DFMT_DXT5: + return 8; + default: + ErrorString( Format("Unknown D3D format %x", format) ); + return 32; + } +} + +int GetStencilBitsFromD3DFormat (D3DFORMAT fmt) +{ + switch( fmt ) { + case D3DFMT_D15S1: return 1; + case D3DFMT_D24S8: return 8; + case D3DFMT_D24X4S4: return 4; + default: return 0; + } +} + +D3DMULTISAMPLE_TYPE GetD3DMultiSampleType (int samples) +{ + // Optimizer should take care of this, since value of D3DMULTISAMPLE_N_SAMPLES is N + switch( samples ) { + case 0: + case 1: return D3DMULTISAMPLE_NONE; + case 2: return D3DMULTISAMPLE_2_SAMPLES; + case 3: return D3DMULTISAMPLE_3_SAMPLES; + case 4: return D3DMULTISAMPLE_4_SAMPLES; + case 5: return D3DMULTISAMPLE_5_SAMPLES; + case 6: return D3DMULTISAMPLE_6_SAMPLES; + case 7: return D3DMULTISAMPLE_7_SAMPLES; + case 8: return D3DMULTISAMPLE_8_SAMPLES; + case 9: return D3DMULTISAMPLE_9_SAMPLES; + case 10: return D3DMULTISAMPLE_10_SAMPLES; + case 11: return D3DMULTISAMPLE_11_SAMPLES; + case 12: return D3DMULTISAMPLE_12_SAMPLES; + case 13: return D3DMULTISAMPLE_13_SAMPLES; + case 14: return D3DMULTISAMPLE_14_SAMPLES; + case 15: return D3DMULTISAMPLE_15_SAMPLES; + case 16: return D3DMULTISAMPLE_16_SAMPLES; + default: + ErrorString("Unknown sample count"); + return D3DMULTISAMPLE_NONE; + } +} + +bool CheckD3D9DebugRuntime (IDirect3DDevice9* dev) +{ + IDirect3DQuery9* query = NULL; + HRESULT hr = dev->CreateQuery (D3DQUERYTYPE_VERTEXSTATS, &query); + if( SUCCEEDED(hr) ) + { + query->Release (); + return true; + } + return false; +} + + +D3D9DepthStencilTexture CreateDepthStencilTextureD3D9 (IDirect3DDevice9* dev, int width, int height, D3DFORMAT format, D3DMULTISAMPLE_TYPE msType, DWORD msQuality, BOOL discardable) +{ + D3D9DepthStencilTexture tex; + + HRESULT hr = dev->CreateDepthStencilSurface (width, height, format, msType, msQuality, discardable, &tex.m_Surface, NULL); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(tex.m_Surface, width * height * GetBPPFromD3DFormat(format), NULL); + + return tex; +} diff --git a/Runtime/GfxDevice/d3d/D3D9Utils.h b/Runtime/GfxDevice/d3d/D3D9Utils.h new file mode 100644 index 0000000..529e58b --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Utils.h @@ -0,0 +1,69 @@ +#pragma once + +#include "D3D9Includes.h" +#include "Runtime/GfxDevice/GfxDeviceTypes.h" + +//#define DUMMY_D3D9_CALLS + +#ifndef DUMMY_D3D9_CALLS +#define D3D9_CALL(x) x +#define D3D9_CALL_HR(x) x +#else +HRESULT CallDummyD3D9Function(); +#define D3D9_CALL(x) CallDummyD3D9Function() +#define D3D9_CALL_HR(x) CallDummyD3D9Function() +#endif + + +const char* GetD3D9Error( HRESULT hr ); +int GetBPPFromD3DFormat( D3DFORMAT format ); +int GetStencilBitsFromD3DFormat (D3DFORMAT fmt); +D3DMULTISAMPLE_TYPE GetD3DMultiSampleType (int samples); + +bool CheckD3D9DebugRuntime (IDirect3DDevice9* dev); + +struct D3D9DepthStencilTexture { + D3D9DepthStencilTexture() : m_Texture(NULL), m_Surface(NULL) {} + + IDirect3DTexture9* m_Texture; + IDirect3DSurface9* m_Surface; + + void Release() { + if (m_Texture) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Texture); + m_Texture->Release(); + m_Texture = NULL; + } + if (m_Surface) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Surface); + m_Surface->Release(); + m_Surface = NULL; + } + } +}; + +const D3DFORMAT kD3D9FormatDF16 = (D3DFORMAT)MAKEFOURCC('D','F','1','6'); +const D3DFORMAT kD3D9FormatINTZ = (D3DFORMAT)MAKEFOURCC('I','N','T','Z'); +const D3DFORMAT kD3D9FormatRAWZ = (D3DFORMAT)MAKEFOURCC('R','A','W','Z'); +const D3DFORMAT kD3D9FormatNULL = (D3DFORMAT)MAKEFOURCC('N','U','L','L'); +const D3DFORMAT kD3D9FormatRESZ = (D3DFORMAT)MAKEFOURCC('R','E','S','Z'); + + +D3D9DepthStencilTexture CreateDepthStencilTextureD3D9 ( + IDirect3DDevice9* dev, int width, int height, D3DFORMAT format, + D3DMULTISAMPLE_TYPE msType, DWORD msQuality, BOOL discardable ); + +static inline DWORD GetD3D9SamplerIndex (ShaderType type, int unit) +{ + switch (type) { + case kShaderVertex: + DebugAssert (unit >= 0 && unit < 4); // DX9 has limit of 4 vertex samplers + return unit + D3DVERTEXTEXTURESAMPLER0; + case kShaderFragment: + DebugAssert (unit >= 0 && unit < kMaxSupportedTextureUnits); + return unit; + default: + Assert ("Unsupported shader type for sampler"); + return 0; + } +} diff --git a/Runtime/GfxDevice/d3d/D3D9VBO.cpp b/Runtime/GfxDevice/d3d/D3D9VBO.cpp new file mode 100644 index 0000000..19cc409 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9VBO.cpp @@ -0,0 +1,815 @@ +#include "UnityPrefix.h" +#include "D3D9VBO.h" +#include "D3D9Context.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "D3D9Utils.h" +#include "GfxDeviceD3D9.h" +#include "Runtime/Profiler/MemoryProfiler.h" + + +// defined in GfxDeviceD3D9.cpp +IDirect3DVertexDeclaration9* GetD3DVertexDeclaration( UInt32 shaderChannelsMap ); +void UpdateChannelBindingsD3D( const ChannelAssigns& channels ); + + +// Define this to 1 to make VBO operations randomly fail. +// Use this to test error checking code. +#define DEBUG_RANDOMLY_FAIL_D3D_VBO 0 + + +#if !DEBUGMODE && DEBUG_RANDOMLY_FAIL_D3D_VBO +#error Never enable random VBO failures on release code! +#endif + +#if DEBUG_RANDOMLY_FAIL_D3D_VBO +#define RANDOM_FAIL_FOR_DEBUG - ((rand()%8==0) ? 100000000 : 0) +#else +#define RANDOM_FAIL_FOR_DEBUG +#endif + + +static const D3DPRIMITIVETYPE kTopologyD3D9[kPrimitiveTypeCount] = +{ + D3DPT_TRIANGLELIST, + D3DPT_TRIANGLESTRIP, + D3DPT_TRIANGLELIST, //@TODO: make work + D3DPT_LINELIST, + D3DPT_LINESTRIP, + D3DPT_POINTLIST, +}; + + +// ----------------------------------------------------------------------------- + +IDirect3DIndexBuffer9* D3D9VBO::ms_CustomIB = NULL; +int D3D9VBO::ms_CustomIBSize = 0; +UInt32 D3D9VBO::ms_CustomIBUsedBytes = 0; + +D3D9VBO::D3D9VBO() +: m_IB(NULL) +, m_IBSize(0) +{ + memset(m_VertexDecls, 0, sizeof(m_VertexDecls)); + memset(m_VBStreams, 0, sizeof(m_VBStreams)); +} + +D3D9VBO::~D3D9VBO () +{ + for( int s = 0; s < kMaxVertexStreams; s++ ) + { + if( m_VBStreams[s] ) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[s]); + ULONG refCount = m_VBStreams[s]->Release(); + AssertIf( refCount != 0 ); + m_VBStreams[s] = NULL; + } + } + if( m_IB ) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB); + ULONG refCount = m_IB->Release(); + AssertIf( refCount != 0 ); + m_IB = NULL; + } + +} + + +void D3D9VBO::ResetDynamicVB() +{ + // Gets called on all VBs and ignores non-dynamic ones + for( int s = 0; s < kMaxVertexStreams; s++ ) + { + if( m_StreamModes[s] == kStreamModeDynamic ) + { + // Vertex buffer can be null when switching fullscreen in web player. + // There we lose device a couple of times, and ResetDynamicVB is called several + // times in succession. + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[s]); + SAFE_RELEASE( m_VBStreams[s] ); + } + } +} + +void D3D9VBO::CleanupSharedIndexBuffer() +{ + if( ms_CustomIB ) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(ms_CustomIB); + ULONG refCount = ms_CustomIB->Release(); + AssertIf( refCount != 0 ); + ms_CustomIBSize = 0; + ms_CustomIBUsedBytes = 0; + ms_CustomIB = NULL; + } +} + +void D3D9VBO::BindVertexStreams( IDirect3DDevice9* dev, const ChannelAssigns& channels ) +{ + int freeStream = -1; + for( int s = 0; s < kMaxVertexStreams; s++ ) + { + if( m_VBStreams[s] ) + D3D9_CALL( dev->SetStreamSource( s, m_VBStreams[s], 0, m_Streams[s].stride ) ); + else + freeStream = s; + } + int declIndex = kVertexDeclDefault; + if ((channels.GetSourceMap() & VERTEX_FORMAT1(Color)) && !m_ChannelInfo[kShaderChannelColor].IsValid()) + { + if (freeStream != -1) + { + declIndex = kVertexDeclAllWhiteStream; + if (!m_VertexDecls[declIndex]) + { + ChannelInfoArray channelInfo; + memcpy(&channelInfo, m_ChannelInfo, sizeof(channelInfo)); + ChannelInfo& colorInfo = channelInfo[kShaderChannelColor]; + colorInfo.stream = freeStream; + colorInfo.offset = 0; + colorInfo.format = kChannelFormatColor; + colorInfo.dimension = 1; + m_VertexDecls[declIndex] = GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( channelInfo ); + } + IDirect3DVertexBuffer9* whiteVB = GetD3D9GfxDevice().GetAllWhiteVertexStream(); + D3D9_CALL( dev->SetStreamSource( freeStream, whiteVB, 0, sizeof(D3DCOLOR) ) ); + } + else + ErrorString("Need a free stream to add default vertex colors!"); + } + D3D9_CALL( dev->SetVertexDeclaration( m_VertexDecls[declIndex] ) ); + UpdateChannelBindingsD3D( channels ); +} + +void D3D9VBO::UpdateVertexStream( const VertexBufferData& sourceData, unsigned stream ) +{ + DebugAssert( !m_IsStreamMapped[stream] ); + const StreamInfo& srcStream = sourceData.streams[stream]; + int oldSize = CalculateVertexStreamSize(m_Streams[stream], m_VertexCount); + int newSize = CalculateVertexStreamSize(srcStream, sourceData.vertexCount); + m_Streams[stream] = srcStream; + if (newSize == 0) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[stream]); + SAFE_RELEASE( m_VBStreams[stream] ); + return; + } + + const bool isDynamic = (m_StreamModes[stream] == kStreamModeDynamic); + DWORD usage = isDynamic ? (D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY) : (D3DUSAGE_WRITEONLY); + D3DPOOL pool = isDynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED; + + if( m_VBStreams[stream] == NULL || newSize != oldSize ) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VBStreams[stream]); + SAFE_RELEASE( m_VBStreams[stream] ); + IDirect3DDevice9* dev = GetD3DDevice(); + HRESULT hr = dev->CreateVertexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, usage, 0, pool, &m_VBStreams[stream], NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_VBStreams[stream],newSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to create vertex buffer of size %d [%s]\n", newSize, GetD3D9Error(hr) ); + return; + } + } + + // Don't update contents if there is no source data. + // This is used to update the vertex declaration only, leaving buffer intact. + // Also to create an empty buffer that is written to later. + if (!sourceData.buffer) + return; + + UInt8* buffer; + HRESULT hr = m_VBStreams[stream]->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, isDynamic ? D3DLOCK_DISCARD : 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock vertex buffer %p [%s]\n", m_VBStreams[stream], GetD3D9Error(hr) ); + return; + } + CopyVertexStream( sourceData, buffer, stream ); + + m_VBStreams[stream]->Unlock(); +} + + +void D3D9VBO::UpdateIndexBufferData (const IndexBufferData& sourceData) +{ + if( !sourceData.indices ) + { + m_IBSize = 0; + return; + } + + AssertIf( !m_IB ); + UInt8* buffer; + HRESULT hr = m_IB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock index buffer %p [%s]\n", m_IB, GetD3D9Error(hr) ); + return; + } + + memcpy (buffer, sourceData.indices, sourceData.count * kVBOIndexSize); + + m_IB->Unlock(); +} + +bool D3D9VBO::MapVertexStream( VertexStreamData& outData, unsigned stream ) +{ + if( m_VBStreams[stream] == NULL ) + { + printf_console( "d3d: attempt to map null vertex buffer\n" ); + return false; + } + DebugAssertIf( IsVertexBufferLost() ); + AssertIf( m_IsStreamMapped[stream] ); + + const bool isDynamic = (m_StreamModes[stream] == kStreamModeDynamic); + + UInt8* buffer; + int vbSize = CalculateVertexStreamSize(m_Streams[stream], m_VertexCount); + HRESULT hr = m_VBStreams[stream]->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&buffer, isDynamic ? D3DLOCK_DISCARD : 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to map vertex buffer %p of size %i [%s]\n", m_VBStreams[stream], vbSize, GetD3D9Error(hr) ); + return false; + } + m_IsStreamMapped[stream] = true; + + outData.buffer = buffer; + outData.channelMask = m_Streams[stream].channelMask; + outData.stride = m_Streams[stream].stride; + outData.vertexCount = m_VertexCount; + + GetRealGfxDevice().GetFrameStats().AddUploadVBO( vbSize ); + + return true; +} + +void D3D9VBO::UnmapVertexStream( unsigned stream ) +{ + DebugAssert( m_VBStreams[stream] ); + AssertIf( !m_IsStreamMapped[stream] ); + m_IsStreamMapped[stream] = false; + m_VBStreams[stream]->Unlock(); +} + +bool D3D9VBO::IsVertexBufferLost() const +{ + for( int s = 0; s < kMaxVertexStreams; s++ ) + if( m_Streams[s].channelMask && !m_VBStreams[s] ) + return true; + + return false; +} + +int D3D9VBO::GetRuntimeMemorySize() const +{ +#if ENABLE_MEM_PROFILER + return GetMemoryProfiler()->GetRelatedMemorySize(this) + + GetMemoryProfiler()->GetRelatedIDMemorySize((UInt32)this); +#else + return 0; +#endif +/* int vertexSize = 0; + for( int s = 0; s < kMaxVertexStreams; s++ ) + vertexSize += m_Streams[s].stride; + + return vertexSize * m_VertexCount + m_IBSize;*/ +} + + +void D3D9VBO::DrawVBO (const ChannelAssigns& channels, UInt32 firstIndexByte, UInt32 indexCount, GfxPrimitiveType topology, UInt32 firstVertex, UInt32 vertexCount) +{ + // just return if no indices + if( m_IBSize == 0 ) + return; + + HRESULT hr; + + if( m_VBStreams[0] == NULL || m_IB == NULL ) + { + printf_console( "d3d: VB or IB is null\n" ); + return; + } + + GfxDevice& device = GetRealGfxDevice(); + IDirect3DDevice9* dev = GetD3DDevice(); + + BindVertexStreams( dev, channels ); + device.BeforeDrawCall( false ); + + if (topology == kPrimitiveQuads) + { + UInt32 ibBytesLocked; + UInt16* ibPtr = MapDynamicIndexBuffer (indexCount/4*6, ibBytesLocked); + if (!ibPtr) + return; + const UInt16* ibSrc = NULL; + hr = m_IB->Lock (firstIndexByte, indexCount*kVBOIndexSize, (void**)&ibSrc, D3DLOCK_READONLY); + if (FAILED(hr)) + { + UnmapDynamicIndexBuffer(); + return; + } + FillIndexBufferForQuads (ibPtr, ibBytesLocked, ibSrc, indexCount/4); + m_IB->Unlock (); + UnmapDynamicIndexBuffer (); + firstIndexByte = ms_CustomIBUsedBytes; + ms_CustomIBUsedBytes += ibBytesLocked; + D3D9_CALL(dev->SetIndices(ms_CustomIB)); + } + else + { + D3D9_CALL(dev->SetIndices( m_IB )); + } + + // draw + D3DPRIMITIVETYPE primType = kTopologyD3D9[topology]; + int primCount = GetPrimitiveCount (indexCount, topology, false); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive (primType, 0, firstVertex, vertexCount, firstIndexByte/2, primCount)); + Assert(SUCCEEDED(hr)); + + device.GetFrameStats().AddDrawCall (primCount, vertexCount); +} + +UInt16* D3D9VBO::MapDynamicIndexBuffer (int indexCount, UInt32& outBytesUsed) +{ + HRESULT hr; + const UInt32 kMaxIndices = 64000; // Smaller threshold than absolutely necessary + Assert (indexCount <= kMaxIndices); + indexCount = std::min<UInt32>(indexCount, kMaxIndices); + int ibCapacity = indexCount * kVBOIndexSize; + int newIBSize = std::max (ibCapacity, 32*1024); // 32k IB at least + + if (newIBSize > ms_CustomIBSize) + { + if (ms_CustomIB) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(ms_CustomIB); + ms_CustomIB->Release(); + } + ms_CustomIBSize = newIBSize; + ms_CustomIBUsedBytes = 0; + + IDirect3DDevice9* dev = GetD3DDevice(); + HRESULT hr = dev->CreateIndexBuffer (ms_CustomIBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, D3DFMT_INDEX16, D3DPOOL_DEFAULT , &ms_CustomIB, NULL); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(ms_CustomIB,ms_CustomIBSize,0); + + if( FAILED(hr) ) + { + printf_console ("d3d: failed to create custom index buffer of size %d [%s]\n", newIBSize, GetD3D9Error(hr)); + return NULL; + } + } + + UInt16* buffer; + if (ms_CustomIBUsedBytes + ibCapacity > ms_CustomIBSize) + { + hr = ms_CustomIB->Lock (0 RANDOM_FAIL_FOR_DEBUG, ibCapacity, (void**)&buffer, D3DLOCK_DISCARD); + if (FAILED(hr)) + { + printf_console ("d3d: failed to lock shared index buffer with discard [%s]\n", GetD3D9Error(hr)); + return NULL; + } + ms_CustomIBUsedBytes = 0; + } + else + { + hr = ms_CustomIB->Lock (ms_CustomIBUsedBytes RANDOM_FAIL_FOR_DEBUG, ibCapacity, (void**)&buffer, D3DLOCK_NOOVERWRITE); + if (FAILED(hr)) + { + printf_console ("d3d: failed to lock shared index buffer, offset %i size %i [%s]\n", ms_CustomIBUsedBytes, ibCapacity, GetD3D9Error(hr)); + return NULL; + } + } + outBytesUsed = ibCapacity; + + return buffer; +} + +void D3D9VBO::UnmapDynamicIndexBuffer () +{ + ms_CustomIB->Unlock(); +} + + +#if GFX_ENABLE_DRAW_CALL_BATCHING + void D3D9VBO::DrawCustomIndexed( const ChannelAssigns& channels, void* indices, UInt32 indexCount, + GfxPrimitiveType topology, UInt32 vertexRangeBegin, UInt32 vertexRangeEnd, UInt32 drawVertexCount ) + { + Assert(!m_IsStreamMapped[0]); + + if (m_VBStreams[0] == NULL) + { + printf_console( "d3d: VB is null\n" ); + return; + } + UInt32 ibBytesUsed; + UInt16* ibPtr = MapDynamicIndexBuffer (indexCount, ibBytesUsed); + if (!ibPtr) + return; + memcpy (ibPtr, indices, ibBytesUsed); + UnmapDynamicIndexBuffer (); + + GfxDevice& device = GetRealGfxDevice(); + IDirect3DDevice9* dev = GetD3DDevice(); + HRESULT hr; + + BindVertexStreams( dev, channels ); + device.BeforeDrawCall( false ); + + D3D9_CALL(dev->SetIndices( ms_CustomIB )); + + D3DPRIMITIVETYPE primType = kTopologyD3D9[topology]; + int primCount = GetPrimitiveCount (indexCount, topology, false); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive(primType, 0, vertexRangeBegin, vertexRangeEnd-vertexRangeBegin, ms_CustomIBUsedBytes / kVBOIndexSize, primCount)); + Assert(SUCCEEDED(hr)); + ms_CustomIBUsedBytes += ibBytesUsed; + + device.GetFrameStats().AddDrawCall (primCount, drawVertexCount); + } +#endif + + +void D3D9VBO::UpdateVertexData( const VertexBufferData& buffer ) +{ + // Old vertex count and streams are still used here + for (unsigned stream = 0; stream < kMaxVertexStreams; stream++) + UpdateVertexStream( buffer, stream ); + + memcpy( m_ChannelInfo, buffer.channels, sizeof(m_ChannelInfo) ); + memset( m_VertexDecls, 0, sizeof(m_VertexDecls) ); + m_VertexDecls[kVertexDeclDefault] = GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( m_ChannelInfo ); + m_VertexCount = buffer.vertexCount; +} + +void D3D9VBO::UpdateIndexData (const IndexBufferData& buffer) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + int newSize = CalculateIndexBufferSize(buffer); + + if( !m_IB ) + { + // initially, create a static buffer + HRESULT hr = dev->CreateIndexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, (buffer.hasTopologies & (1<<kPrimitiveQuads)) ? 0 : D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_IB, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,newSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to create index buffer of size %d [%s]\n", newSize, GetD3D9Error(hr) ); + return; + } + } + else + { + if( newSize != m_IBSize ) + { + IDirect3DIndexBuffer9* oldIB = m_IB; + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB); + m_IB->Release(); + HRESULT hr = dev->CreateIndexBuffer( newSize RANDOM_FAIL_FOR_DEBUG, (buffer.hasTopologies & (1<<kPrimitiveQuads)) ? 0 : D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_IB, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,newSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to resize index buffer %p to size %d [%s]\n", oldIB, newSize, GetD3D9Error(hr) ); + return; + } + } + } + m_IBSize = newSize; + UpdateIndexBufferData(buffer); +} + +// ----------------------------------------------------------------------------- + + +DynamicD3D9VBO::DynamicD3D9VBO( UInt32 vbSize, UInt32 ibSize ) +: DynamicVBO() +, m_VBSize(vbSize) +, m_VBUsedBytes(0) +, m_IBSize(ibSize) +, m_IBUsedBytes(0) +, m_VB(NULL) +, m_IB(NULL) +, m_VertexDecl(NULL) +, m_LastChunkStartVertex(0) +, m_LastChunkStartIndex(0) +, m_QuadsIB(NULL) +, m_QuadsIBFailed(false) +{ +} + +DynamicD3D9VBO::~DynamicD3D9VBO () +{ + if( m_VB ) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VB); + ULONG refCount = m_VB->Release(); + AssertIf( refCount != 0 ); + } + if( m_IB ) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB); + ULONG refCount = m_IB->Release(); + AssertIf( refCount != 0 ); + } + if( m_QuadsIB ) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_QuadsIB); + ULONG refCount = m_QuadsIB->Release(); + AssertIf( refCount != 0 ); + } +} + +void DynamicD3D9VBO::InitializeQuadsIB() +{ + AssertIf( m_QuadsIB ); + + IDirect3DDevice9* dev = GetD3DDevice(); + HRESULT hr = dev->CreateIndexBuffer( VBO::kMaxQuads * 6 * kVBOIndexSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_MANAGED, &m_QuadsIB, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_QuadsIB,VBO::kMaxQuads * 6 * kVBOIndexSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to create quads index buffer [%s]\n", GetD3D9Error(hr) ); + m_QuadsIBFailed = true; + return; + } + UInt16* ib = NULL; + hr = m_QuadsIB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, (void**)&ib, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock quads index buffer [%s]\n", GetD3D9Error(hr) ); + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_QuadsIB); + m_QuadsIB->Release(); + m_QuadsIB = NULL; + m_QuadsIBFailed = true; + return; + } + + UInt32 baseIndex = 0; + for( int i = 0; i < VBO::kMaxQuads; ++i ) + { + ib[0] = baseIndex + 1; + ib[1] = baseIndex + 2; + ib[2] = baseIndex; + ib[3] = baseIndex + 2; + ib[4] = baseIndex + 3; + ib[5] = baseIndex; + baseIndex += 4; + ib += 6; + } + + m_QuadsIB->Unlock(); +} + +void DynamicD3D9VBO::DrawChunk (const ChannelAssigns& channels) +{ + // just return if nothing to render + if( !m_LastChunkShaderChannelMask ) + return; + + HRESULT hr; + + AssertIf( !m_LastChunkShaderChannelMask || !m_LastChunkStride ); + AssertIf( m_LendedChunk ); + + GfxDevice& device = GetRealGfxDevice(); + IDirect3DDevice9* dev = GetD3DDevice(); + + // setup VBO + DebugAssertIf( !m_VB ); + D3D9_CALL(dev->SetStreamSource( 0, m_VB, 0, m_LastChunkStride )); + D3D9_CALL(dev->SetVertexDeclaration( m_VertexDecl )); + UpdateChannelBindingsD3D( channels ); + device.BeforeDrawCall( false ); + + // draw + GfxDeviceStats& stats = device.GetFrameStats(); + int primCount = 0; + if( m_LastRenderMode == kDrawTriangleStrip ) + { + hr = D3D9_CALL_HR(dev->DrawPrimitive( D3DPT_TRIANGLESTRIP, m_LastChunkStartVertex, m_LastChunkVertices-2 )); + primCount = m_LastChunkVertices-2; + } + else if (m_LastRenderMode == kDrawIndexedTriangleStrip) + { + DebugAssertIf( !m_IB ); + D3D9_CALL(dev->SetIndices( m_IB )); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLESTRIP, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices-2 )); + primCount = m_LastChunkIndices-2; + } + else if( m_LastRenderMode == kDrawQuads ) + { + // initialize quads index buffer if needed + if( !m_QuadsIB ) + InitializeQuadsIB(); + // if quads index buffer has valid data, draw with it + if( !m_QuadsIBFailed ) + { + D3D9_CALL(dev->SetIndices( m_QuadsIB )); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, 0, m_LastChunkVertices/2 )); + primCount = m_LastChunkVertices/2; + } + } + else if (m_LastRenderMode == kDrawIndexedLines) + { + DebugAssertIf( !m_IB ); + D3D9_CALL(dev->SetIndices( m_IB )); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_LINELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices/2 )); + primCount = m_LastChunkIndices/2; + } + else if (m_LastRenderMode == kDrawIndexedPoints) + { + DebugAssertIf( !m_IB ); + D3D9_CALL(dev->SetIndices( m_IB )); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_POINTLIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices )); + primCount = m_LastChunkIndices; + } + else + { + DebugAssertIf( !m_IB ); + D3D9_CALL(dev->SetIndices( m_IB )); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, m_LastChunkStartVertex, 0, m_LastChunkVertices, m_LastChunkStartIndex, m_LastChunkIndices/3 )); + primCount = m_LastChunkIndices/3; + } + stats.AddDrawCall (primCount, m_LastChunkVertices); + AssertIf(FAILED(hr)); +} + +bool DynamicD3D9VBO::GetChunk( UInt32 shaderChannelMask, UInt32 maxVertices, UInt32 maxIndices, RenderMode renderMode, void** outVB, void** outIB ) +{ + Assert( !m_LendedChunk ); + Assert( maxVertices < 65536 && maxIndices < 65536*3 ); + Assert(!((renderMode == kDrawQuads) && (VBO::kMaxQuads*4 < maxVertices))); + DebugAssertMsg(outVB != NULL && maxVertices > 0, "DynamicD3D9VBO::GetChunk - outVB: 0x%08x maxVertices: %d", outVB, maxVertices); + DebugAssertMsg( + (renderMode == kDrawIndexedQuads && (outIB != NULL && maxIndices > 0)) || + (renderMode == kDrawIndexedPoints && (outIB != NULL && maxIndices > 0)) || + (renderMode == kDrawIndexedLines && (outIB != NULL && maxIndices > 0)) || + (renderMode == kDrawIndexedTriangles && (outIB != NULL && maxIndices > 0)) || + (renderMode == kDrawIndexedTriangleStrip && (outIB != NULL && maxIndices > 0)) || + (renderMode == kDrawTriangleStrip && (outIB == NULL && maxIndices == 0)) || + (renderMode == kDrawQuads && (outIB == NULL && maxIndices == 0)), + "DynamicD3D9VBO::GetChunk - renderMode: %d outIB: 0x%08x maxIndices: %d", renderMode, outIB, maxIndices); + HRESULT hr; + bool success = true; + + m_LendedChunk = true; + m_LastRenderMode = renderMode; + + if( maxVertices == 0 ) + maxVertices = 8; + + m_LastChunkStride = 0; + for( int i = 0; i < kShaderChannelCount; ++i ) { + if( shaderChannelMask & (1<<i) ) + m_LastChunkStride += VBO::GetDefaultChannelByteSize(i); + } + if (shaderChannelMask != m_LastChunkShaderChannelMask) + { + m_VertexDecl = GetD3DVertexDeclaration( shaderChannelMask ); + m_LastChunkShaderChannelMask = shaderChannelMask; + } + IDirect3DDevice9* dev = GetD3DDevice(); + + // -------- vertex buffer + + DebugAssertIf( !outVB ); + UInt32 vbCapacity = maxVertices * m_LastChunkStride; + // check if requested chunk is larger than current buffer + if( vbCapacity > m_VBSize ) { + m_VBSize = vbCapacity * 2; // allocate more up front + if( m_VB ){ + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_VB); + m_VB->Release(); + } + m_VB = NULL; + } + // allocate buffer if don't have it yet + if( !m_VB ) { + hr = dev->CreateVertexBuffer( m_VBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_VB, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_VB,m_VBSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to create dynamic vertex buffer of size %d [%s]\n", m_VBSize, GetD3D9Error(hr) ); + success = false; + *outVB = NULL; + } + } + + // lock, making sure the offset we lock is multiple of vertex stride + if( m_VB ) + { + m_VBUsedBytes = ((m_VBUsedBytes + (m_LastChunkStride-1)) / m_LastChunkStride) * m_LastChunkStride; + if( m_VBUsedBytes + vbCapacity > m_VBSize ) { + hr = m_VB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, outVB, D3DLOCK_DISCARD ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock dynamic vertex buffer with discard [%s]\n", GetD3D9Error(hr) ); + *outVB = NULL; + success = false; + } + m_VBUsedBytes = 0; + } else { + hr = m_VB->Lock( m_VBUsedBytes RANDOM_FAIL_FOR_DEBUG, vbCapacity, outVB, D3DLOCK_NOOVERWRITE ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock vertex index buffer, offset %i size %i [%s]\n", m_VBUsedBytes, vbCapacity, GetD3D9Error(hr) ); + *outVB = NULL; + success = false; + } + } + m_LastChunkStartVertex = m_VBUsedBytes / m_LastChunkStride; + DebugAssertIf( m_LastChunkStartVertex * m_LastChunkStride != m_VBUsedBytes ); + } + + // -------- index buffer + + const bool indexed = (renderMode != kDrawQuads) && (renderMode != kDrawTriangleStrip); + if( success && maxIndices && indexed ) + { + UInt32 ibCapacity = maxIndices * kVBOIndexSize; + // check if requested chunk is larger than current buffer + if( ibCapacity > m_IBSize ) { + m_IBSize = ibCapacity * 2; // allocate more up front + if( m_IB ){ + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_IB); + m_IB->Release(); + } + m_IB = NULL; + } + // allocate buffer if don't have it yet + if( !m_IB ) { + hr = dev->CreateIndexBuffer( m_IBSize RANDOM_FAIL_FOR_DEBUG, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &m_IB, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(m_IB,m_IBSize,this); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to create dynamic index buffer of size %d [%s]\n", m_IBSize, GetD3D9Error(hr) ); + if( m_VB ) + m_VB->Unlock(); + } + } + // lock it if we have IB created successfully + if( m_IB ) + { + if( m_IBUsedBytes + ibCapacity > m_IBSize ) { + hr = m_IB->Lock( 0 RANDOM_FAIL_FOR_DEBUG, 0, outIB, D3DLOCK_DISCARD ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock dynamic index buffer with discard [%s]\n", GetD3D9Error(hr) ); + *outIB = NULL; + success = false; + if( m_VB ) + m_VB->Unlock(); + } + m_IBUsedBytes = 0; + } else { + hr = m_IB->Lock( m_IBUsedBytes RANDOM_FAIL_FOR_DEBUG, ibCapacity, outIB, D3DLOCK_NOOVERWRITE ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock dynamic index buffer, offset %i size %i [%s]\n", m_IBUsedBytes, ibCapacity, GetD3D9Error(hr) ); + *outIB = NULL; + success = false; + if( m_VB ) + m_VB->Unlock(); + } + } + m_LastChunkStartIndex = m_IBUsedBytes / 2; + } + else + { + *outIB = NULL; + success = false; + } + } + + if( !success ) + m_LendedChunk = false; + + return success; +} + +void DynamicD3D9VBO::ReleaseChunk( UInt32 actualVertices, UInt32 actualIndices ) +{ + Assert( m_LendedChunk ); + Assert( m_LastRenderMode == kDrawIndexedTriangleStrip || m_LastRenderMode == kDrawIndexedQuads || m_LastRenderMode == kDrawIndexedPoints || m_LastRenderMode == kDrawIndexedLines || actualIndices % 3 == 0 ); + m_LendedChunk = false; + + const bool indexed = (m_LastRenderMode != kDrawQuads) && (m_LastRenderMode != kDrawTriangleStrip); + + m_LastChunkVertices = actualVertices; + m_LastChunkIndices = actualIndices; + + // unlock buffers + m_VB->Unlock(); + if( indexed ) + m_IB->Unlock(); + + if( !actualVertices || (indexed && !actualIndices) ) { + m_LastChunkShaderChannelMask = 0; + return; + } + + UInt32 actualVBSize = actualVertices * m_LastChunkStride; + m_VBUsedBytes += actualVBSize; + UInt32 actualIBSize = actualIndices * kVBOIndexSize; + m_IBUsedBytes += actualIBSize; +} + + diff --git a/Runtime/GfxDevice/d3d/D3D9VBO.h b/Runtime/GfxDevice/d3d/D3D9VBO.h new file mode 100644 index 0000000..71c0da9 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9VBO.h @@ -0,0 +1,86 @@ +#pragma once + +#include "D3D9Includes.h" +#include "Runtime/Shaders/VBO.h" + + +// Implements Direct3D9 VBO +class D3D9VBO : public VBO { +public: + D3D9VBO(); + virtual ~D3D9VBO(); + + virtual void UpdateVertexData( const VertexBufferData& buffer ); + virtual void UpdateIndexData (const IndexBufferData& buffer); + virtual void DrawVBO (const ChannelAssigns& channels, UInt32 firstIndexByte, UInt32 indexCount, GfxPrimitiveType topology, UInt32 firstVertex, UInt32 vertexCount); + #if GFX_ENABLE_DRAW_CALL_BATCHING + virtual void DrawCustomIndexed( const ChannelAssigns& channels, void* indices, UInt32 indexCount, + GfxPrimitiveType topology, UInt32 vertexRangeBegin, UInt32 vertexRangeEnd, UInt32 drawVertexCount ); + #endif + virtual bool MapVertexStream( VertexStreamData& outData, unsigned stream ); + virtual void UnmapVertexStream( unsigned stream ); + virtual bool IsVertexBufferLost() const; + + virtual void ResetDynamicVB(); + + virtual int GetRuntimeMemorySize() const; + + static void CleanupSharedIndexBuffer(); + +private: + void BindVertexStreams( IDirect3DDevice9* dev, const ChannelAssigns& channels ); + void UpdateVertexStream( const VertexBufferData& sourceData, unsigned stream ); + void UpdateIndexBufferData (const IndexBufferData& sourceData); + static UInt16* MapDynamicIndexBuffer (int indexCount, UInt32& outBytesUsed); + static void UnmapDynamicIndexBuffer (); + +private: + int m_VertexCount; + + enum + { + kVertexDeclDefault, + kVertexDeclAllWhiteStream, + kVertexDeclCount + }; + + IDirect3DVertexBuffer9* m_VBStreams[kMaxVertexStreams]; + IDirect3DIndexBuffer9* m_IB; + IDirect3DVertexDeclaration9* m_VertexDecls[kVertexDeclCount]; + ChannelInfoArray m_ChannelInfo; + int m_IBSize; + + static IDirect3DIndexBuffer9* ms_CustomIB; + static int ms_CustomIBSize; + static UInt32 ms_CustomIBUsedBytes; +}; + +class DynamicD3D9VBO : public DynamicVBO { +public: + DynamicD3D9VBO( UInt32 vbSize, UInt32 ibSize ); + virtual ~DynamicD3D9VBO(); + + virtual bool GetChunk( UInt32 shaderChannelMask, UInt32 maxVertices, UInt32 maxIndices, RenderMode mode, void** outVB, void** outIB ); + virtual void ReleaseChunk( UInt32 actualVertices, UInt32 actualIndices ); + virtual void DrawChunk (const ChannelAssigns& channels); + +private: + void InitializeQuadsIB(); + +private: + UInt32 m_VBSize; + UInt32 m_VBUsedBytes; + UInt32 m_IBSize; + UInt32 m_IBUsedBytes; + + IDirect3DVertexBuffer9* m_VB; + IDirect3DIndexBuffer9* m_IB; + IDirect3DVertexDeclaration9* m_VertexDecl; // vertex declaration for the last chunk + + UInt32 m_LastChunkStartVertex; + UInt32 m_LastChunkStartIndex; + + IDirect3DIndexBuffer9* m_QuadsIB; // static IB for drawing quads + bool m_QuadsIBFailed; +}; + diff --git a/Runtime/GfxDevice/d3d/D3D9Window.cpp b/Runtime/GfxDevice/d3d/D3D9Window.cpp new file mode 100644 index 0000000..b568b34 --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Window.cpp @@ -0,0 +1,272 @@ +#include "UnityPrefix.h" +#include "D3D9Window.h" +#include "GfxDeviceD3D9.h" +#include "RenderTextureD3D.h" +#include "Runtime/Misc/QualitySettings.h" +#include "Runtime/Threads/ThreadSharedObject.h" +#include "Runtime/GfxDevice/GfxDevice.h" + + +#if UNITY_EDITOR + +bool IsD3D9DeviceLost(); +void SetD3D9DeviceLost( bool lost ); + +static bool s_OldHasDepthFlag = false; +static D3D9Window* s_CurrentD3DWindow = NULL; +static int s_CurrentD3DFSAALevel = 0; + +int GetCurrentD3DFSAALevel() { return s_CurrentD3DFSAALevel; } + +void SetNoRenderTextureActiveEditor(); // RenderTexture.cpp + + +D3D9Window::D3D9Window(IDirect3DDevice9* device, HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias ) +: GfxDeviceWindow(window, width, height, depthFormat, antiAlias) +, m_SwapChain(NULL) +, m_FSAALevel(0) +{ + m_Device = device; + Reshape( width, height, depthFormat, antiAlias ); +} + +D3D9Window::~D3D9Window() +{ + if( s_CurrentD3DWindow == this ) + { + s_CurrentD3DWindow = NULL; + s_CurrentD3DFSAALevel = 0; + } + + DestroyRenderSurfaceD3D9(&m_DepthStencil); + DestroyRenderSurfaceD3D9(&m_BackBuffer); + SAFE_RELEASE(m_SwapChain); +} + +bool D3D9Window::Reshape( int width, int height, DepthBufferFormat depthFormat, int antiAlias ) +{ + if(GfxDeviceWindow::Reshape(width, height, depthFormat, antiAlias)==false)return false; + + + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("D3Dwindow %x Reshape %ix%i d=%i aa=%i\n", this, width, height, depthFormat, antiAlias); + #endif + // release old + m_DepthStencil.Release(); + m_BackBuffer.Release(); + SAFE_RELEASE(m_SwapChain); + + HRESULT hr; + + + // Choose presentation params + if( antiAlias == -1 ) + antiAlias = GetQualitySettings().GetCurrent().antiAliasing; + + D3DDISPLAYMODE mode; + hr = GetD3DObject()->GetAdapterDisplayMode( D3DADAPTER_DEFAULT, &mode ); + D3DPRESENT_PARAMETERS params; + + ZeroMemory( ¶ms, sizeof(params) ); + params.BackBufferWidth = m_Width; + params.BackBufferHeight = m_Height; + params.BackBufferCount = 1; + params.hDeviceWindow = m_Window; + params.FullScreen_RefreshRateInHz = 0; + params.Windowed = TRUE; + params.SwapEffect = D3DSWAPEFFECT_COPY; + params.BackBufferFormat = D3DFMT_A8R8G8B8; + params.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE; + params.EnableAutoDepthStencil = FALSE; + GetD3DFormatCaps()->FindBestPresentationParams( width, height, mode.Format, true, 0, antiAlias, params ); + if( params.MultiSampleType != D3DMULTISAMPLE_NONE ) { + params.SwapEffect = D3DSWAPEFFECT_DISCARD; + m_CanUseBlitOptimization = false; + } else { + m_CanUseBlitOptimization = true; + } + m_FSAALevel = (params.MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) ? params.MultiSampleQuality : params.MultiSampleType; + + hr = m_Device->CreateAdditionalSwapChain( ¶ms, &m_SwapChain ); + if( FAILED(hr) ) { + printf_console( "d3d: swap chain: swap=%i vsync=%x w=%i h=%i fmt=%i bbcount=%i dsformat=%i pflags=%x\n", + params.SwapEffect, params.PresentationInterval, + params.BackBufferWidth, params.BackBufferHeight, params.BackBufferFormat, params.BackBufferCount, + params.AutoDepthStencilFormat, params.Flags ); + printf_console( "d3d: failed to create swap chain [%s]\n", GetD3D9Error(hr) ); + m_InvalidState = true; + return !m_InvalidState; + } + + IDirect3DSurface9* backBuffer = NULL; + hr = m_SwapChain->GetBackBuffer( 0, D3DBACKBUFFER_TYPE_MONO, &backBuffer ); + if( FAILED(hr) ) { + AssertString( "Failed to get back buffer for D3DWindow" ); + m_SwapChain->Release(); + m_SwapChain = NULL; + m_InvalidState = true; + return !m_InvalidState; + } + + m_BackBuffer.backBuffer = true; + m_DepthStencil.backBuffer = true; + + m_BackBuffer.m_Surface = backBuffer; + m_BackBuffer.width = params.BackBufferWidth; + m_BackBuffer.height = params.BackBufferHeight; + m_BackBuffer.format = kRTFormatARGB32; + + // Depth format + bool needsDepth = false; + m_DepthStencilFormat = D3DFMT_UNKNOWN; + switch( depthFormat ) { + case kDepthFormatNone: + needsDepth = false; + m_DepthStencilFormat = D3DFMT_UNKNOWN; + break; + case kDepthFormat16: + needsDepth = true; + m_DepthStencilFormat = D3DFMT_D16; + break; + case kDepthFormat24: + needsDepth = true; + m_DepthStencilFormat = D3DFMT_D24S8; + break; + default: + ErrorString("Unknown depth format"); + } + + if( needsDepth ) + { + D3D9DepthStencilTexture depthStencil = CreateDepthStencilTextureD3D9 (m_Device, m_Width, m_Height, m_DepthStencilFormat, params.MultiSampleType, params.MultiSampleQuality, FALSE); + m_Device->SetRenderState (D3DRS_ZENABLE, TRUE); + if (!depthStencil.m_Surface) + { + AssertString( "Failed to create depth/stencil for D3DWindow" ); + m_SwapChain->Release(); + m_SwapChain = NULL; + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_BackBuffer.m_Surface); + m_BackBuffer.m_Surface->Release(); + m_BackBuffer.m_Surface = NULL; + m_InvalidState = true; + return !m_InvalidState; + } + m_DepthStencil.m_Surface = depthStencil.m_Surface; + m_DepthStencil.m_Texture = depthStencil.m_Texture; + m_DepthStencil.width = m_Width; + m_DepthStencil.height = m_Height; + m_DepthStencil.depthFormat = depthFormat; + } + + return !m_InvalidState; +} + +void D3D9Window::SetAsActiveWindow () +{ + GetRealGfxDevice().SetRenderTargets(1, &GetBackBuffer(), GetDepthStencil()); + GetRealGfxDevice().SetActiveRenderTexture(NULL); + GetRealGfxDevice().SetCurrentWindowSize(m_Width, m_Height); + GetRealGfxDevice().SetInvertProjectionMatrix(false); + + s_OldHasDepthFlag = g_D3DHasDepthStencil; + g_D3DHasDepthStencil = (m_DepthStencil.m_Surface != NULL); + + s_CurrentD3DWindow = this; + s_CurrentD3DFSAALevel = m_FSAALevel; + + // not entirely correct but better not touch anything if we don't have depth + if(m_DepthStencil.m_Surface != NULL) + g_D3DDepthStencilFormat = m_DepthStencilFormat; +} + +bool D3D9Window::BeginRendering() +{ + if (GfxDeviceWindow::BeginRendering()) + { + HRESULT hr; + + // Handle lost devices + if (!GetRealGfxDevice().IsValidState()) + { + return false; + } + + // begin scene + if (IsD3D9DeviceLost()) + { + ErrorString ("GUI Window tries to begin rendering while D3D9 device is lost!"); + } + GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() ); + if (device.IsInsideFrame()) + { + ErrorString ("GUI Window tries to begin rendering while something else has not finished rendering! Either you have a recursive OnGUI rendering, or previous OnGUI did not clean up properly."); + } + + m_Device->BeginScene(); + SetAsActiveWindow (); + + device.SetInsideFrame(true); + return true; + } + else + { + #if ENABLE_D3D_WINDOW_LOGGING + printf_console("D3Dwindow %ix%i BeginRendering: invalid state\n", m_Width, m_Height); + #endif + return false; + } +} + +bool D3D9Window::EndRendering( bool presentContent ) +{ + if(GfxDeviceWindow::EndRendering(presentContent)) + { + + g_D3DHasDepthStencil = s_OldHasDepthFlag; + s_CurrentD3DWindow = NULL; + s_CurrentD3DWindow = 0; + + if( IsD3D9DeviceLost() ) + return false; + + HRESULT hr; + GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() ); + Assert( device.IsInsideFrame() ); + hr = m_Device->EndScene(); + device.SetInsideFrame(false); + if( m_SwapChain && presentContent ) + { + hr = m_SwapChain->Present( NULL, NULL, NULL, NULL, 0 ); + device.PushEventQuery(); + // When D3DERR_DRIVERINTERNALERROR is returned from Present(), + // the application can do one of the following, try recovering just as + // from the lost device. + if( hr == D3DERR_DEVICELOST || hr == D3DERR_DRIVERINTERNALERROR ) + { + SetD3D9DeviceLost( true ); + return false; + } + } + return true; + } + else + { + return false; + } +} + +RenderSurfaceHandle D3D9Window::GetBackBuffer() +{ + RenderSurfaceHandle handle; + handle.object = &m_BackBuffer; + return handle; +} + +RenderSurfaceHandle D3D9Window::GetDepthStencil() +{ + RenderSurfaceHandle handle; + handle.object = &m_DepthStencil; + return handle; +} + +#endif diff --git a/Runtime/GfxDevice/d3d/D3D9Window.h b/Runtime/GfxDevice/d3d/D3D9Window.h new file mode 100644 index 0000000..038b59f --- /dev/null +++ b/Runtime/GfxDevice/d3d/D3D9Window.h @@ -0,0 +1,39 @@ +#ifndef D3D9WINDOW_H +#define D3D9WINDOW_H + +#include "D3D9Includes.h" +#include "Runtime/GfxDevice/GfxDeviceWindow.h" +#include "Runtime/GfxDevice/GfxDeviceObjects.h" +#include "D3D9Utils.h" +#include "TexturesD3D9.h" + +class D3D9Window : public GfxDeviceWindow +{ +private: + IDirect3DDevice9* m_Device; + IDirect3DSwapChain9* m_SwapChain; + RenderColorSurfaceD3D9 m_BackBuffer; + RenderDepthSurfaceD3D9 m_DepthStencil; + D3DFORMAT m_DepthStencilFormat; + int m_FSAALevel; +public: + D3D9Window( IDirect3DDevice9* device, HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias ); + ~D3D9Window(); + + bool Reshape( int width, int height, DepthBufferFormat depthFormat, int antiAlias ); + + bool BeginRendering(); + bool EndRendering( bool presentContent ); + void SetAsActiveWindow (); + + D3DFORMAT GetDepthStencilFormat() const { return m_DepthStencilFormat; } + + RenderSurfaceHandle GetBackBuffer(); + RenderSurfaceHandle GetDepthStencil(); +}; + +#if UNITY_EDITOR +int GetCurrentD3DFSAALevel(); +#endif + +#endif diff --git a/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp new file mode 100644 index 0000000..77fe956 --- /dev/null +++ b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.cpp @@ -0,0 +1,3009 @@ +#include "UnityPrefix.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "GfxDeviceD3D9.h" +#include "D3D9Context.h" +#include "Runtime/Math/FloatConversion.h" +#include "D3D9VBO.h" +#include "CombinerD3D.h" +#include "External/shaderlab/Library/program.h" +#include "External/shaderlab/Library/TextureBinding.h" +#include "External/shaderlab/Library/texenv.h" +#include "Runtime/Math/Matrix4x4.h" +#include "Runtime/GfxDevice/ChannelAssigns.h" +#include "External/shaderlab/Library/pass.h" +#include "Runtime/GfxDevice/BuiltinShaderParams.h" +#include "Runtime/GfxDevice/GpuProgramParamsApply.h" +#include "Runtime/Graphics/Image.h" +#include "Runtime/Graphics/ScreenManager.h" +#include "PlatformDependent/Win/SmartComPointer.h" +#include "PlatformDependent/Win/WinUnicode.h" +#include "Runtime/Allocator/LinearAllocator.h" +#include "Runtime/Utilities/Utility.h" +#include "Runtime/Utilities/ArrayUtility.h" +#include "Runtime/Threads/Thread.h" +#include "Runtime/Misc/Plugins.h" +#include "D3D9Utils.h" +#include "D3D9Window.h" +#include "RenderTextureD3D.h" +#include "GpuProgramsD3D.h" +#include "TimerQueryD3D9.h" +#include "GfxDeviceD3D9.h" + + +// -------------------------------------------------------------------------- + +bool IsActiveRenderTargetWithColorD3D9(); + +typedef std::list<IDirect3DQuery9*> D3D9QueryList; +static D3D9QueryList s_EventQueries; + +static void PushEventQuery (int maxBuffer); +static void CleanupEventQueries (); + + + +static const D3DBLEND kBlendModeD3D9[] = { + D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_DESTCOLOR, D3DBLEND_SRCCOLOR, D3DBLEND_INVDESTCOLOR, D3DBLEND_SRCALPHA, D3DBLEND_INVSRCCOLOR, + D3DBLEND_DESTALPHA, D3DBLEND_INVDESTALPHA, D3DBLEND_SRCALPHASAT, D3DBLEND_INVSRCALPHA, +}; + +static const D3DBLENDOP kBlendOpD3D9[] = { + D3DBLENDOP_ADD, D3DBLENDOP_SUBTRACT, D3DBLENDOP_REVSUBTRACT, D3DBLENDOP_MIN, D3DBLENDOP_MAX, +}; + +static const D3DCMPFUNC kCmpFuncD3D9[] = { + D3DCMP_ALWAYS, D3DCMP_NEVER, D3DCMP_LESS, D3DCMP_EQUAL, D3DCMP_LESSEQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL, D3DCMP_GREATEREQUAL, D3DCMP_ALWAYS +}; + +static const D3DSTENCILOP kStencilOpD3D9[] = { + D3DSTENCILOP_KEEP, D3DSTENCILOP_ZERO, D3DSTENCILOP_REPLACE, D3DSTENCILOP_INCRSAT, + D3DSTENCILOP_DECRSAT, D3DSTENCILOP_INVERT, D3DSTENCILOP_INCR, D3DSTENCILOP_DECR +}; + +static D3DCULL kCullModeD3D9[] = { + D3DCULL_NONE, D3DCULL_CW, D3DCULL_CCW +}; + +// -------------------------------------------------------------------------- + + +static inline D3DCOLOR ColorToD3D( const float color[4] ) +{ + return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) ); +} + + + +// -------------------------------------------------------------------------- + +enum { + kNeedsSoftwareVPVertexShader = (1<<0), + kNeedsSoftwareVPTexGen = (1<<1), +}; + +class GfxDeviceD3D9; + +static void ApplyBackfaceMode( DeviceStateD3D& state ); +static void ApplyStencilFuncAndOp( DeviceStateD3D& state ); + + + + + +void DeviceStateD3D::Invalidate( GfxDeviceD3D9& device ) +{ + int i; + + depthFunc = kFuncUnknown; + depthWrite = -1; + + blending = -1; // unknown + srcBlend = destBlend = srcBlendAlpha = destBlendAlpha = -1; // won't match any D3D mode + blendOp = blendOpAlpha = -1; // won't match any D3D mode + alphaFunc = kFuncUnknown; + alphaValue = -1.0f; + + culling = kCullUnknown; + d3dculling = D3DCULL_FORCE_DWORD; + scissor = -1; + + offsetFactor = offsetUnits = -1000.0f; + for( i = 0; i < kShaderTypeCount; ++i ) + { + activeGpuProgramParams[i] = NULL; + activeGpuProgram[i] = NULL; + activeShader[i] = NULL; + } + fixedFunctionPS = 0; + + colorWriteMask = -1; // TBD ? + m_StencilRef = -1; + + for (i = 0; i < ARRAY_SIZE(texturesPS); ++i) + texturesPS[i].Invalidate(); + for (i = 0; i < ARRAY_SIZE(texturesVS); ++i) + texturesVS[i].Invalidate(); + + m_SoftwareVP = false; + m_NeedsSofwareVPFlags = 0; + + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + if( dev && !m_DeviceLost ) + { + D3D9_CALL(dev->SetVertexShader( NULL )); + D3D9_CALL(dev->SetPixelShader( NULL )); + + ApplyBackfaceMode( *this ); + + if( g_D3DUsesMixedVP ) + D3D9_CALL(dev->SetSoftwareVertexProcessing( FALSE )); + + // misc. state + D3D9_CALL(dev->SetRenderState( D3DRS_LOCALVIEWER, TRUE )); + + #if UNITY_EDITOR + D3D9_CALL(dev->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID )); + #endif + } +} + + +void UpdateChannelBindingsD3D( const ChannelAssigns& channels ) +{ + // Texture coordinate index bindings + GfxDeviceD3D9& device = (GfxDeviceD3D9&)GetRealGfxDevice(); + if( device.IsShaderActive(kShaderVertex) ) + return; + DeviceStateD3D& state = device.GetState(); + IDirect3DDevice9* dev = GetD3DDevice(); + + const int maxTexCoords = gGraphicsCaps.maxTexCoords; // fetch here once + + VertexPipeConfig& config = device.GetVertexPipeConfig(); + UInt32 textureSources = config.textureSources; + for( int i = 0; i < maxTexCoords; ++i ) + { + UInt32 source = (textureSources >> (i*3)) & 0x7; + if( source > kTexSourceUV1 ) + continue; + ShaderChannel texCoordChannel = channels.GetSourceForTarget( (VertexComponent)(kVertexCompTexCoord0 + i) ); + if( texCoordChannel == kShaderChannelTexCoord0 ) + textureSources = textureSources & ~(7<<i*3) | (kTexSourceUV0<<i*3); + else if( texCoordChannel == kShaderChannelTexCoord1 ) + textureSources = textureSources & ~(7<<i*3) | (kTexSourceUV1<<i*3); + else if( texCoordChannel != kShaderChannelNone ) { + AssertString( "Bad texcoord index" ); + } + } + config.textureSources = textureSources; + + config.hasVertexColor = (channels.GetTargetMap() & (1<<kVertexCompColor)) ? 1 : 0; +} + + +struct SetValuesFunctorD3D9 +{ + SetValuesFunctorD3D9(GfxDevice& device, VertexShaderConstantCache& vs, PixelShaderConstantCache& ps) : m_Device(device), vscache(vs), pscache(ps) { } + GfxDevice& m_Device; + VertexShaderConstantCache& vscache; + PixelShaderConstantCache& pscache; + void SetVectorVal (ShaderType shaderType, ShaderParamType type, int index, const float* ptr, int cols, const GpuProgramParameters& params, int cbIndex) + { + if (shaderType == kShaderVertex) + vscache.SetValues(index, ptr, 1); + else + pscache.SetValues(index, ptr, 1); + } + void SetMatrixVal (ShaderType shaderType, int index, const Matrix4x4f* ptr, int rows, const GpuProgramParameters& params, int cbIndex) + { + DebugAssert(rows == 4); + Matrix4x4f mat; + TransposeMatrix4x4 (ptr, &mat); + if (shaderType == kShaderVertex) + vscache.SetValues(index, mat.GetPtr(), 4); + else + pscache.SetValues(index, mat.GetPtr(), 4); + } + void SetTextureVal (ShaderType shaderType, int index, int samplerIndex, TextureDimension dim, TextureID texID) + { + m_Device.SetTexture (shaderType, index, samplerIndex, texID, dim, std::numeric_limits<float>::infinity()); + } +}; + + +// Compute/Update any deferred state before each draw call +void GfxDeviceD3D9::BeforeDrawCall( bool immediateMode ) +{ + VertexShaderConstantCache& vscache = GetVertexShaderConstantCache(); + PixelShaderConstantCache& pscache = GetPixelShaderConstantCache(); + DeviceStateD3D& state = m_State; + IDirect3DDevice9* dev = GetD3DDevice(); + bool usesVertexShader = (state.activeShader[kShaderVertex] != NULL); + + //@TODO: remove TESTING CODE + static bool oldTnL = false; + if( oldTnL != (!immediateMode) ) + { + m_VertexPrevious.config.Reset (); + m_VertexPrevious.ambient.set(-1,-1,-1,-1); + oldTnL = !immediateMode; + } + + m_TransformState.UpdateWorldViewMatrix (m_BuiltinParamValues); + + // Deferred setup of fixed function stuff + if (!immediateMode) + SetupVertexShaderD3D9( dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious, vscache, usesVertexShader, immediateMode ); + else + SetupFixedFunctionD3D9( dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious, usesVertexShader, immediateMode ); + + + // update GL equivalents of built-in shader state + + const BuiltinShaderParamIndices& paramsVS = *m_BuiltinParamIndices[kShaderVertex]; + const BuiltinShaderParamIndices& paramsPS = *m_BuiltinParamIndices[kShaderFragment]; + int gpuIndexVS, gpuIndexPS; + +#define SET_BUILTIN_MATRIX_BEGIN(idx) \ + gpuIndexVS = paramsVS.mat[idx].gpuIndex; gpuIndexPS = paramsPS.mat[idx].gpuIndex; if (gpuIndexVS >= 0 || gpuIndexPS >= 0) + +#define SET_BUILTIN_MATRIX_END(name) \ + if (gpuIndexVS >= 0) vscache.SetValues(gpuIndexVS, name.GetPtr(), 4); \ + if (gpuIndexPS >= 0) pscache.SetValues(gpuIndexPS, name.GetPtr(), 4) + + // MVP matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatMVP) + { + Matrix4x4f matMul; + MultiplyMatrices4x4 (&m_BuiltinParamValues.GetMatrixParam(kShaderMatProj), &m_TransformState.worldViewMatrix, &matMul); + Matrix4x4f mat; + TransposeMatrix4x4 (&matMul, &mat); + SET_BUILTIN_MATRIX_END(mat); + } + // MV matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatMV) + { + Matrix4x4f mat; + TransposeMatrix4x4 (&m_TransformState.worldViewMatrix, &mat); + SET_BUILTIN_MATRIX_END(mat); + } + // Transpose MV matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatTransMV) + { + const Matrix4x4f& mat = m_TransformState.worldViewMatrix; + SET_BUILTIN_MATRIX_END(mat); + } + // Inverse transpose of MV matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatInvTransMV) + { + Matrix4x4f mat; + Matrix4x4f::Invert_Full (m_TransformState.worldViewMatrix, mat); + if (m_VertexData.normalization == kNormalizationScale) + { + // Inverse transpose of modelview should be scaled by uniform + // normal scale (this will match state.matrix.invtrans.modelview + // and gl_NormalMatrix in OpenGL) + float scale = Magnitude (m_TransformState.worldMatrix.GetAxisX()); + mat.Get (0, 0) *= scale; + mat.Get (1, 0) *= scale; + mat.Get (2, 0) *= scale; + mat.Get (0, 1) *= scale; + mat.Get (1, 1) *= scale; + mat.Get (2, 1) *= scale; + mat.Get (0, 2) *= scale; + mat.Get (1, 2) *= scale; + mat.Get (2, 2) *= scale; + } + SET_BUILTIN_MATRIX_END(mat); + } + // M matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatM) + { + Matrix4x4f mat; + TransposeMatrix4x4 (&m_TransformState.worldMatrix, &mat); + SET_BUILTIN_MATRIX_END(mat); + } + // Inverse M matrix + SET_BUILTIN_MATRIX_BEGIN(kShaderInstanceMatInvM) + { + Matrix4x4f mat = m_TransformState.worldMatrix; + if (m_VertexData.normalization == kNormalizationScale) + { + // Kill scale in the world matrix before inverse + float invScale = m_BuiltinParamValues.GetInstanceVectorParam(kShaderInstanceVecScale).w; + mat.Get (0, 0) *= invScale; + mat.Get (1, 0) *= invScale; + mat.Get (2, 0) *= invScale; + mat.Get (0, 1) *= invScale; + mat.Get (1, 1) *= invScale; + mat.Get (2, 1) *= invScale; + mat.Get (0, 2) *= invScale; + mat.Get (1, 2) *= invScale; + mat.Get (2, 2) *= invScale; + } + Matrix4x4f inverseMat; + Matrix4x4f::Invert_General3D (mat, inverseMat); + TransposeMatrix4x4 (&inverseMat, &mat); + SET_BUILTIN_MATRIX_END(mat); + } + + // Set instance vector parameters + for (int i = 0; i < kShaderInstanceVecCount; ++i) + { + gpuIndexVS = paramsVS.vec[i].gpuIndex; + if (gpuIndexVS >= 0) + vscache.SetValues(gpuIndexVS, m_BuiltinParamValues.GetInstanceVectorParam((ShaderBuiltinInstanceVectorParam)i).GetPtr(), 1); + gpuIndexPS = paramsPS.vec[i].gpuIndex; + if (gpuIndexPS >= 0) + pscache.SetValues(gpuIndexPS, m_BuiltinParamValues.GetInstanceVectorParam((ShaderBuiltinInstanceVectorParam)i).GetPtr(), 1); + } + + // Texture matrices for vertex shader + for( int i = 0; i < 8; ++i ) + { + if( paramsVS.mat[kShaderInstanceMatTexture0 + i].gpuIndex >= 0 ) + { + Matrix4x4f mat; + TransposeMatrix4x4 (&m_TransformState.texMatrices[i], &mat); + const int index = paramsVS.mat[kShaderInstanceMatTexture0 + i].gpuIndex; + vscache.SetValues( index, mat.GetPtr(), 4 ); + } + } + + // Software VP flags + if( g_D3DUsesMixedVP ) + { + if( state.m_NeedsSofwareVPFlags ) + { + if( state.m_SoftwareVP == false ) + { + D3D9_CALL(dev->SetSoftwareVertexProcessing( TRUE )); + state.m_SoftwareVP = true; + } + } + else + { + if( state.m_SoftwareVP == true ) + { + D3D9_CALL(dev->SetSoftwareVertexProcessing( FALSE )); + state.m_SoftwareVP = false; + } + } + } + + SetValuesFunctorD3D9 setValuesFunc(*this, vscache, pscache); + ApplyMaterialPropertyBlockValues(m_MaterialProperties, m_State.activeGpuProgram ,m_State.activeGpuProgramParams, setValuesFunc); + + vscache.CommitVertexConstants(); + pscache.CommitPixelConstants(); +} + + +DeviceBlendState* GfxDeviceD3D9::CreateBlendState(const GfxBlendState& state) +{ + std::pair<CachedBlendStates::iterator, bool> result = m_CachedBlendStates.insert(std::make_pair(state, DeviceBlendStateD3D9())); + if (!result.second) + return &result.first->second; + + DeviceBlendStateD3D9& d3dstate = result.first->second; + memcpy(&d3dstate.sourceState, &state, sizeof(GfxBlendState)); + DWORD d3dmask = 0; + const UInt8 mask = state.renderTargetWriteMask; + if( mask & kColorWriteR ) d3dmask |= D3DCOLORWRITEENABLE_RED; + if( mask & kColorWriteG ) d3dmask |= D3DCOLORWRITEENABLE_GREEN; + if( mask & kColorWriteB ) d3dmask |= D3DCOLORWRITEENABLE_BLUE; + if( mask & kColorWriteA ) d3dmask |= D3DCOLORWRITEENABLE_ALPHA; + d3dstate.renderTargetWriteMask = d3dmask; + + DebugAssertIf(kFuncUnknown==state.alphaTest); + d3dstate.alphaFunc = kCmpFuncD3D9[state.alphaTest]; + return &result.first->second; +} + + +DeviceDepthState* GfxDeviceD3D9::CreateDepthState(const GfxDepthState& state) +{ + std::pair<CachedDepthStates::iterator, bool> result = m_CachedDepthStates.insert(std::make_pair(state, DeviceDepthStateD3D9())); + if (!result.second) + return &result.first->second; + + DeviceDepthStateD3D9& d3dstate = result.first->second; + memcpy(&d3dstate.sourceState, &state, sizeof(GfxDepthState)); + d3dstate.depthFunc = kCmpFuncD3D9[state.depthFunc]; + return &result.first->second; +} + +DeviceStencilState* GfxDeviceD3D9::CreateStencilState(const GfxStencilState& state) +{ + std::pair<CachedStencilStates::iterator, bool> result = m_CachedStencilStates.insert(std::make_pair(state, DeviceStencilStateD3D9())); + if (!result.second) + return &result.first->second; + + DeviceStencilStateD3D9& st = result.first->second; + memcpy(&st.sourceState, &state, sizeof(state)); + st.stencilFuncFront = kCmpFuncD3D9[state.stencilFuncFront]; + st.stencilFailOpFront = kStencilOpD3D9[state.stencilFailOpFront]; + st.depthFailOpFront = kStencilOpD3D9[state.stencilZFailOpFront]; + st.depthPassOpFront = kStencilOpD3D9[state.stencilPassOpFront]; + st.stencilFuncBack = kCmpFuncD3D9[state.stencilFuncBack]; + st.stencilFailOpBack = kStencilOpD3D9[state.stencilFailOpBack]; + st.depthFailOpBack = kStencilOpD3D9[state.stencilZFailOpBack]; + st.depthPassOpBack = kStencilOpD3D9[state.stencilPassOpBack]; + return &result.first->second; +} + + + +DeviceRasterState* GfxDeviceD3D9::CreateRasterState(const GfxRasterState& state) +{ + std::pair<CachedRasterStates::iterator, bool> result = m_CachedRasterStates.insert(std::make_pair(state, DeviceRasterState())); + if (!result.second) + return &result.first->second; + + DeviceRasterState& d3dstate = result.first->second; + memcpy(&d3dstate.sourceState, &state, sizeof(DeviceRasterState)); + + return &result.first->second; +} + + +void GfxDeviceD3D9::SetBlendState(const DeviceBlendState* state, float alphaRef) +{ + DeviceBlendStateD3D9* devstate = (DeviceBlendStateD3D9*)state; + + if (m_CurrBlendState == devstate && alphaRef == m_State.alphaValue) + return; + + m_CurrBlendState = devstate; + if (!m_CurrBlendState) + return; + + UInt32 colMask = devstate->renderTargetWriteMask; + if (!IsActiveRenderTargetWithColorD3D9()) + colMask = 0; + + if(colMask != m_State.colorWriteMask) + { + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + D3D9_CALL(dev->SetRenderState(D3DRS_COLORWRITEENABLE, colMask)); + m_State.colorWriteMask = colMask; + } + + const GfxBlendState& desc = state->sourceState; + const CompareFunction mode = state->sourceState.alphaTest; + const D3DBLEND d3dsrc = kBlendModeD3D9[desc.srcBlend]; + const D3DBLEND d3ddst = kBlendModeD3D9[desc.dstBlend]; + const D3DBLEND d3dsrca = kBlendModeD3D9[desc.srcBlendAlpha]; + const D3DBLEND d3ddsta = kBlendModeD3D9[desc.dstBlendAlpha]; + const D3DBLENDOP d3dop = kBlendOpD3D9[desc.blendOp]; + const D3DBLENDOP d3dopa = kBlendOpD3D9[desc.blendOpAlpha]; + + const bool blendDisabled = (d3dsrc == D3DBLEND_ONE && d3ddst == D3DBLEND_ZERO && d3dsrca == D3DBLEND_ONE && d3ddsta == D3DBLEND_ZERO); + + IDirect3DDevice9* dev = GetD3DDevice(); + if(blendDisabled) + { + if( m_State.blending != 0 ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE )); + m_State.blending = 0; + } + } + else + { + if( d3dsrc != m_State.srcBlend || d3ddst != m_State.destBlend ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_SRCBLEND, d3dsrc )); + D3D9_CALL(dev->SetRenderState( D3DRS_DESTBLEND, d3ddst )); + m_State.srcBlend = d3dsrc; + m_State.destBlend = d3ddst; + } + + if (d3dop != m_State.blendOp) + { + bool supports = true; + if( (d3dop == D3DBLENDOP_SUBTRACT || d3dop == D3DBLENDOP_REVSUBTRACT) && !gGraphicsCaps.hasBlendSub ) + supports = false; + if( (d3dop == D3DBLENDOP_MIN || d3dop == D3DBLENDOP_MAX) && !gGraphicsCaps.hasBlendMinMax ) + supports = false; + + if(supports) + { + D3D9_CALL(dev->SetRenderState(D3DRS_BLENDOP, d3dop)); + m_State.blendOp = d3dop; + } + } + if (gGraphicsCaps.hasSeparateAlphaBlend) + { + if( d3dsrca != m_State.srcBlendAlpha || d3ddsta != m_State.destBlendAlpha || d3dopa != m_State.blendOpAlpha ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_SEPARATEALPHABLENDENABLE, d3dsrc != d3dsrca || d3ddst != d3ddsta || d3dopa != d3dop)); + D3D9_CALL(dev->SetRenderState( D3DRS_SRCBLENDALPHA, d3dsrca )); + D3D9_CALL(dev->SetRenderState( D3DRS_DESTBLENDALPHA, d3ddsta )); + m_State.srcBlendAlpha = d3dsrca; + m_State.destBlendAlpha = d3ddsta; + + bool supports = true; + if( (d3dopa == D3DBLENDOP_SUBTRACT || d3dopa == D3DBLENDOP_REVSUBTRACT) && !gGraphicsCaps.hasBlendSub ) + supports = false; + if( (d3dopa == D3DBLENDOP_MIN || d3dopa == D3DBLENDOP_MAX) && !gGraphicsCaps.hasBlendMinMax ) + supports = false; + + if (supports) + { + D3D9_CALL(dev->SetRenderState(D3DRS_BLENDOPALPHA, d3dopa)); + m_State.blendOpAlpha = d3dopa; + } + } + } + if( m_State.blending != 1 ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE )); + m_State.blending = 1; + } + } + + DebugAssertIf(mode==kFuncUnknown); +#if UNITY_EDITOR // gles2.0 doesn't have FF alpha testing(only discard/clip on shader side), so disable on editor while emulating + bool skipAlphaTestFF = (gGraphicsCaps.IsEmulatingGLES20() && IsShaderActive(kShaderFragment)); + // possible that vertex shader will be used with FF "frag shader" (like Transparent/vertexlit.shader), + // which will change alphatesting. So later on when real frag shaders come, we need to force disable alpha + // testing or enjoy nasty artefacts (like active alpha testing messing up the whole scene). + if ( skipAlphaTestFF && m_State.alphaFunc!=kFuncDisabled ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, FALSE )); + m_State.alphaFunc = kFuncDisabled; + } + + if ( !skipAlphaTestFF ) + { +#endif + if( mode != m_State.alphaFunc || alphaRef != m_State.alphaValue ) + { + if( mode != kFuncDisabled ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, TRUE )); + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHAFUNC, kCmpFuncD3D9[mode] )); + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHAREF, alphaRef * 255.0f )); + } + else + { + D3D9_CALL(dev->SetRenderState( D3DRS_ALPHATESTENABLE, FALSE )); + } + + m_State.alphaFunc = mode; + m_State.alphaValue = alphaRef; + } +#if UNITY_EDITOR + } +#endif + // TODO: ATI/NVIDIA hacks +} + + +void GfxDeviceD3D9::SetRasterState(const DeviceRasterState* state) +{ + DeviceRasterState* devstate = (DeviceRasterState*)state; + if(!devstate) + { + m_CurrRasterState = NULL; + return; + } + + m_CurrRasterState = devstate; + + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + CullMode cull = devstate->sourceState.cullMode; + D3DCULL d3dcull = kCullModeD3D9[cull]; + if( d3dcull != m_State.d3dculling ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_CULLMODE, d3dcull )); + m_State.culling = cull; + m_State.d3dculling = d3dcull; + } + + float zFactor = devstate->sourceState.slopeScaledDepthBias; + float zUnits = devstate->sourceState.depthBias; + if( zFactor != m_State.offsetFactor || zUnits != m_State.offsetUnits ) + { + m_State.offsetFactor = zFactor; + m_State.offsetUnits = zUnits; + + // In D3D9 the values are in floating point, with 1 meaning "full depth range". + // In theory the offset should depend on depth buffer bit count, and on 24 bit depth buffer a value close to 4.8e-7 should be used + // (see Lengyel's GDC2007 "projection matrix tricks"). + // However, it looks like even on 16 bit depth buffer, a value as-if-24-bit should be used (tested on Radeon HD 3850, GeForce 8600, Intel 945). + const double kOneBit = 4.8e-7; + + // It looks like generally we need twice the one bit (PolygonOff2 unit test, on Radeon 3850 and GeForce 8600). + // To be somewhat more safer, we make it trhee times the one bit. Still looks quite okay. + const float kBiasMultiplier = 3.0 * kOneBit; + + if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_DEPTHBIAS ) + { + zUnits *= kBiasMultiplier; + D3D9_CALL(dev->SetRenderState( D3DRS_DEPTHBIAS, *(DWORD*)&zUnits )); + } + if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_SLOPESCALEDEPTHBIAS, *(DWORD*)&zFactor )); + } + } +} + + +void GfxDeviceD3D9::SetDepthState(const DeviceDepthState* state) +{ + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + DeviceDepthStateD3D9* devstate = (DeviceDepthStateD3D9*)state; + if (m_CurrDepthState == devstate) + return; + + m_CurrDepthState = devstate; + + if (!m_CurrDepthState) + return; + + if( devstate->sourceState.depthFunc != m_State.depthFunc ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ZFUNC, devstate->depthFunc )); + m_State.depthFunc = devstate->sourceState.depthFunc; + } + + int d3dDepthWriteMode = devstate->sourceState.depthWrite ? TRUE : FALSE; + if( d3dDepthWriteMode != m_State.depthWrite ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_ZWRITEENABLE, d3dDepthWriteMode )); + m_State.depthWrite = d3dDepthWriteMode; + } +} + +void GfxDeviceD3D9::SetStencilState(const DeviceStencilState* state, int stencilRef) +{ + if (m_CurrStencilState == state && m_State.m_StencilRef == stencilRef) + return; + const DeviceStencilStateD3D9* st = static_cast<const DeviceStencilStateD3D9*>(state); + m_CurrStencilState = st; + if (!m_CurrStencilState) + return; + + IDirect3DDevice9* dev = GetD3DDevice(); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILENABLE, st->sourceState.stencilEnable)); + D3D9_CALL (dev->SetRenderState (D3DRS_TWOSIDEDSTENCILMODE, TRUE)); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILMASK, st->sourceState.readMask)); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILWRITEMASK, st->sourceState.writeMask)); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILREF, stencilRef)); + + m_State.stencilFunc[0] = st->stencilFuncFront; + m_State.stencilFailOp[0] = st->stencilFailOpFront; + m_State.depthFailOp[0] = st->depthFailOpFront; + m_State.depthPassOp[0] = st->depthPassOpFront; + m_State.stencilFunc[1] = st->stencilFuncBack; + m_State.stencilFailOp[1] = st->stencilFailOpBack; + m_State.depthFailOp[1] = st->depthFailOpBack; + m_State.depthPassOp[1] = st->depthPassOpBack; + ApplyStencilFuncAndOp(m_State); + + m_State.m_StencilRef = stencilRef; +} + +static void ApplyStencilFuncAndOp (DeviceStateD3D& state) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + // Normally [0] is front and [1] back stencil state, but when rendering + // upside-down, the winding order flips, so flip the state as well. + const int cw = state.invertProjMatrix ? 1 : 0; + const int ccw = (cw + 1)%2; + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILFUNC, state.stencilFunc[cw])); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILFAIL, state.stencilFailOp[cw])); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILZFAIL, state.depthFailOp[cw])); + D3D9_CALL (dev->SetRenderState (D3DRS_STENCILPASS, state.depthPassOp[cw])); + D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILFUNC, state.stencilFunc[ccw])); + D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILFAIL, state.stencilFailOp[ccw])); + D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILZFAIL, state.depthFailOp[ccw])); + D3D9_CALL (dev->SetRenderState (D3DRS_CCW_STENCILPASS, state.depthPassOp[ccw])); +} + +void GfxDeviceD3D9::SetSRGBWrite (bool enable) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + D3D9_CALL (dev->SetRenderState (D3DRS_SRGBWRITEENABLE, enable)); +} + +bool GfxDeviceD3D9::GetSRGBWrite () +{ + IDirect3DDevice9* dev = GetD3DDevice(); + DWORD v; + D3D9_CALL (dev->GetRenderState (D3DRS_SRGBWRITEENABLE, &v)); + return (v==TRUE); +} + +GfxThreadableDevice* CreateD3D9GfxDevice(bool forceREF) +{ + if( !InitializeD3D(forceREF ? D3DDEVTYPE_REF : D3DDEVTYPE_HAL) ) + return NULL; + + #if UNITY_EDITOR + if (!CreateHiddenWindowD3D()) + return NULL; + #endif + + gGraphicsCaps.InitD3D9(); + + GfxDeviceD3D9* device = UNITY_NEW_AS_ROOT(GfxDeviceD3D9(), kMemGfxDevice, "D3D9GfxDevice", ""); + +#if UNITY_EDITOR + EditorInitializeD3D(device); +#else + ScreenManagerWin& screenMgr = GetScreenManager(); + HWND window = screenMgr.GetWindow(); + int width = screenMgr.GetWidth(); + int height = screenMgr.GetHeight(); + int dummy; + if (!InitializeOrResetD3DDevice(device, window, width, height, 0, false, 0, 0, dummy, dummy, dummy, dummy)) + { + UNITY_DELETE(device, kMemGfxDevice); + device = NULL; + } +#endif + + return device; +} + +GfxDeviceD3D9& GetD3D9GfxDevice() +{ + GfxDevice& device = GetRealGfxDevice(); + Assert( device.GetRenderer() == kGfxRendererD3D9 ); + return static_cast<GfxDeviceD3D9&>(device); +} + +bool IsD3D9DeviceLost() +{ + GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() ); + AssertIf( device.GetRenderer() != kGfxRendererD3D9 ); + return device.GetState().m_DeviceLost; +} + +void SetD3D9DeviceLost( bool lost ) +{ + GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() ); + AssertIf( device.GetRenderer() != kGfxRendererD3D9 ); + device.GetState().m_DeviceLost = lost; +} + + +GfxDeviceD3D9::GfxDeviceD3D9() +{ + m_State.m_DeviceLost = false; + m_DynamicVBO = NULL; + + m_State.appBackfaceMode = false; + m_State.userBackfaceMode = false; + m_State.invertProjMatrix = false; + m_State.wireframe = false; + + InvalidateState(); + ResetFrameStats(); + + m_Renderer = kGfxRendererD3D9; + m_UsesOpenGLTextureCoords = false; + m_UsesHalfTexelOffset = true; + m_IsThreadable = true; + + m_MaxBufferedFrames = 1; // -1 means no limiting, default is 1 + + m_State.viewport[0] = m_State.viewport[1] = m_State.viewport[2] = m_State.viewport[3] = 0; + m_State.scissorRect[0] = m_State.scissorRect[1] = m_State.scissorRect[2] = m_State.scissorRect[3] = 0; + + m_CurrBlendState = 0; + m_CurrDepthState = 0; + m_CurrStencilState = 0; + m_CurrRasterState = 0; + m_CurrTargetWidth = 0; + m_CurrTargetHeight = 0; + m_CurrWindowWidth = 0; + m_CurrWindowHeight = 0; + + m_AllWhiteVertexStream = NULL; + + extern RenderSurfaceBase* DummyColorBackBuferD3D9(); + SetBackBufferColorSurface(DummyColorBackBuferD3D9()); + + extern RenderSurfaceBase* DummyDepthBackBuferD3D9(); + SetBackBufferDepthSurface(DummyDepthBackBuferD3D9()); +} + +GfxDeviceD3D9::~GfxDeviceD3D9() +{ +#if !ENABLE_GFXDEVICE_REMOTE_PROCESS_WORKER + PluginsSetGraphicsDevice (GetD3DDevice(), kGfxRendererD3D9, kGfxDeviceEventShutdown); +#endif + + D3D9VBO::CleanupSharedIndexBuffer(); + + CleanupEventQueries (); +#if ENABLE_PROFILER + m_TimerQueriesD3D9.ReleaseAllQueries(); +#endif + + if( m_DynamicVBO ) + delete m_DynamicVBO; + + SAFE_RELEASE(m_AllWhiteVertexStream); + SAFE_RELEASE(m_Imm.m_ImmVertexDecl); + m_VertexDecls.Clear(); + TextureCombinersD3D::CleanupCombinerCache(); + CleanupVertexShadersD3D9 (); + DestroyD3DDevice(); + + #if UNITY_EDITOR + DestroyHiddenWindowD3D(); + #endif + + CleanupD3D(); +} + +void GfxDeviceD3D9::InvalidateState() +{ + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + if( m_State.m_DeviceLost ) + dev = NULL; + + ResetVertexPipeStateD3D9 (dev, m_TransformState, m_BuiltinParamValues, m_VertexConfig, m_VertexData, m_VertexPrevious); + m_FogParams.Invalidate(); + m_State.Invalidate(*this); + m_Imm.Invalidate(); + m_VSConstantCache.Invalidate(); + m_PSConstantCache.Invalidate(); + + m_CurrBlendState = NULL; + m_CurrDepthState = NULL; + m_CurrStencilState = NULL; + m_CurrRasterState = NULL; +} + + +void GfxDeviceD3D9::Clear(UInt32 clearFlags, const float color[4], float depth, int stencil) +{ + if( !g_D3DHasDepthStencil ) + clearFlags &= ~kGfxClearDepthStencil; + if (!IsActiveRenderTargetWithColorD3D9()) + clearFlags &= ~kGfxClearColor; + + DWORD flags = 0; + if (clearFlags & kGfxClearColor) flags |= D3DCLEAR_TARGET; + if (clearFlags & kGfxClearDepth) flags |= D3DCLEAR_ZBUFFER; + if (clearFlags & kGfxClearStencil && GetStencilBitsFromD3DFormat (g_D3DDepthStencilFormat) > 0) { + flags |= D3DCLEAR_STENCIL; + } + GetD3DDevice()->Clear (0, NULL, flags, ColorToD3D(color), depth, stencil); +} + + +static void ApplyBackfaceMode( DeviceStateD3D& state ) +{ + if( (state.appBackfaceMode == state.userBackfaceMode) == state.invertProjMatrix ) + { + kCullModeD3D9[kCullFront] = D3DCULL_CCW; + kCullModeD3D9[kCullBack] = D3DCULL_CW; + } + else + { + kCullModeD3D9[kCullFront] = D3DCULL_CW; + kCullModeD3D9[kCullBack] = D3DCULL_CCW; + } + + if( state.culling != kCullUnknown ) + { + IDirect3DDevice9* dev = GetD3DDevice(); + D3DCULL d3dcull = kCullModeD3D9[state.culling]; + if( d3dcull != state.d3dculling ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_CULLMODE, d3dcull )); + state.d3dculling = d3dcull; + } + } +} + +void GfxDeviceD3D9::SetUserBackfaceMode( bool enable ) +{ + if( m_State.userBackfaceMode == enable ) + return; + m_State.userBackfaceMode = enable; + ApplyBackfaceMode( m_State ); +} + + +void GfxDeviceD3D9::SetWireframe( bool wire ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + D3D9_CALL(dev->SetRenderState( D3DRS_FILLMODE, wire ? D3DFILL_WIREFRAME : D3DFILL_SOLID )); + m_State.wireframe = wire; +} + +bool GfxDeviceD3D9::GetWireframe() const +{ + return m_State.wireframe; +} + + + +// Even with programmable shaders, some things need fixed function D3DTS_PROJECTION to be set up; +// most notably fixed function fog (shader model 2.0). +static void SetFFProjectionMatrixD3D9 (const Matrix4x4f& m) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + Matrix4x4f projFlip; + projFlip.m_Data[ 0] = m.m_Data[ 0]; + projFlip.m_Data[ 1] = m.m_Data[ 1]; + projFlip.m_Data[ 2] = m.m_Data[ 2]; + projFlip.m_Data[ 3] = m.m_Data[ 3]; + projFlip.m_Data[ 4] = m.m_Data[ 4]; + projFlip.m_Data[ 5] = m.m_Data[ 5]; + projFlip.m_Data[ 6] = m.m_Data[ 6]; + projFlip.m_Data[ 7] = m.m_Data[ 7]; + projFlip.m_Data[ 8] = -m.m_Data[ 8]; + projFlip.m_Data[ 9] = -m.m_Data[ 9]; + projFlip.m_Data[10] = -m.m_Data[10]; + projFlip.m_Data[11] = -m.m_Data[11]; + projFlip.m_Data[12] = m.m_Data[12]; + projFlip.m_Data[13] = m.m_Data[13]; + projFlip.m_Data[14] = m.m_Data[14]; + projFlip.m_Data[15] = m.m_Data[15]; + D3D9_CALL(dev->SetTransform (D3DTS_PROJECTION, (const D3DMATRIX*)projFlip.GetPtr())); +} + + +void GfxDeviceD3D9::SetInvertProjectionMatrix( bool enable ) +{ + if( m_State.invertProjMatrix == enable ) + return; + + m_State.invertProjMatrix = enable; + ApplyBackfaceMode( m_State ); + ApplyStencilFuncAndOp( m_State ); + + // When setting up "invert" flag, invert the matrix as well. + Matrix4x4f& m = m_BuiltinParamValues.GetWritableMatrixParam(kShaderMatProj); + m.Get(1,1) = -m.Get(1,1); + m.Get(1,3) = -m.Get(1,3); + m_TransformState.dirtyFlags |= TransformState::kProjDirty; + SetFFProjectionMatrixD3D9 (m); +} + +bool GfxDeviceD3D9::GetInvertProjectionMatrix() const +{ + return m_State.invertProjMatrix; +} + +void GfxDeviceD3D9::SetWorldMatrix( const float matrix[16] ) +{ + CopyMatrix (matrix, m_TransformState.worldMatrix.GetPtr()); + m_TransformState.dirtyFlags |= TransformState::kWorldDirty; +} + +void GfxDeviceD3D9::SetViewMatrix( const float matrix[16] ) +{ + m_TransformState.SetViewMatrix (matrix, m_BuiltinParamValues); +} + +void GfxDeviceD3D9::SetProjectionMatrix(const Matrix4x4f& matrix) +{ + Matrix4x4f& m = m_BuiltinParamValues.GetWritableMatrixParam(kShaderMatProj); + CopyMatrix (matrix.GetPtr(), m.GetPtr()); + CopyMatrix (matrix.GetPtr(), m_TransformState.projectionMatrixOriginal.GetPtr()); + + CalculateDeviceProjectionMatrix (m, m_UsesOpenGLTextureCoords, m_State.invertProjMatrix); + SetFFProjectionMatrixD3D9 (m); + + m_TransformState.dirtyFlags |= TransformState::kProjDirty; +} + + +void GfxDeviceD3D9::GetMatrix(float outMatrix[16]) const +{ + m_TransformState.UpdateWorldViewMatrix (m_BuiltinParamValues); + CopyMatrix (m_TransformState.worldViewMatrix.GetPtr(), outMatrix); +} + +const float* GfxDeviceD3D9::GetWorldMatrix() const +{ + return m_TransformState.worldMatrix.GetPtr(); +} + +const float* GfxDeviceD3D9::GetViewMatrix() const +{ + return m_BuiltinParamValues.GetMatrixParam(kShaderMatView).GetPtr(); +} + +const float* GfxDeviceD3D9::GetProjectionMatrix() const +{ + return m_TransformState.projectionMatrixOriginal.GetPtr(); +} + +const float* GfxDeviceD3D9::GetDeviceProjectionMatrix() const +{ + return m_BuiltinParamValues.GetMatrixParam(kShaderMatProj).GetPtr(); +} + +void GfxDeviceD3D9::SetNormalizationBackface( NormalizationMode mode, bool backface ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + if( mode != m_VertexData.normalization ) + { + m_VertexData.normalization = mode; + m_VertexConfig.hasNormalization = (mode == kNormalizationFull); + } + if( m_State.appBackfaceMode != backface ) + { + m_State.appBackfaceMode = backface; + ApplyBackfaceMode( m_State ); + } +} + +void GfxDeviceD3D9::SetFFLighting( bool on, bool separateSpecular, ColorMaterialMode colorMaterial ) +{ + m_VertexConfig.hasLighting = on ? 1 : 0; + m_VertexConfig.hasSpecular = separateSpecular ? 1 : 0; + DebugAssertIf(colorMaterial==kColorMatUnknown); + m_VertexConfig.colorMaterial = colorMaterial; +} + +void GfxDeviceD3D9::SetMaterial( const float ambient[4], const float diffuse[4], const float specular[4], const float emissive[4], const float shininess ) +{ + D3DMATERIAL9& mat = m_VertexData.material; + mat.Ambient = *(D3DCOLORVALUE*)ambient; + mat.Diffuse = *(D3DCOLORVALUE*)diffuse; + mat.Specular = *(D3DCOLORVALUE*)specular; + mat.Emissive = *(D3DCOLORVALUE*)emissive; + mat.Power = std::max<float>( std::min<float>(shininess,1.0f), 0.0f) * 128.0f; +} + + +void GfxDeviceD3D9::SetColor( const float color[4] ) +{ + // If we have pixel shader set up, do nothing; fixed function + // constant color can't be possibly used there + if (m_State.activeShader[kShaderFragment] != 0) // inlined IsShaderActive(kShaderFragment) + return; + + // There's no really good place to make a glColor equivalent, put it into + // TFACTOR... Additionally put that into c4 register for ps_1_1 combiner emulation + IDirect3DDevice9* dev = GetD3DDevice(); + D3D9_CALL(dev->SetRenderState( D3DRS_TEXTUREFACTOR, ColorToD3D(color) )); + m_PSConstantCache.SetValues( kMaxD3DTextureStagesForPS, color, 1 ); +} + + +void GfxDeviceD3D9::SetViewport( int x, int y, int width, int height ) +{ + m_State.viewport[0] = x; + m_State.viewport[1] = y; + m_State.viewport[2] = width; + m_State.viewport[3] = height; + + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + if( !dev ) // happens on startup, when deleting all render textures + return; + D3DVIEWPORT9 view; + view.X = x; + view.Y = y; + view.Width = width; + view.Height = height; + view.MinZ = 0.0f; + view.MaxZ = 1.0f; + dev->SetViewport( &view ); +} + +void GfxDeviceD3D9::GetViewport( int* port ) const +{ + port[0] = m_State.viewport[0]; + port[1] = m_State.viewport[1]; + port[2] = m_State.viewport[2]; + port[3] = m_State.viewport[3]; +} + + +void GfxDeviceD3D9::SetScissorRect( int x, int y, int width, int height ) +{ + if (m_State.scissor != 1) + { + if (gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SCISSORTEST ) + { + GetD3DDevice()->SetRenderState( D3DRS_SCISSORTESTENABLE, TRUE ); + } + m_State.scissor = 1; + } + + + m_State.scissorRect[0] = x; + m_State.scissorRect[1] = y; + m_State.scissorRect[2] = width; + m_State.scissorRect[3] = height; + + RECT rc; + rc.left = x; + rc.top = y; + rc.right = x + width; + rc.bottom = y + height; + GetD3DDevice()->SetScissorRect( &rc ); + +} +void GfxDeviceD3D9::DisableScissor() +{ + if (m_State.scissor != 0) + { + if( gGraphicsCaps.d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_SCISSORTEST ) + { + GetD3DDevice()->SetRenderState( D3DRS_SCISSORTESTENABLE, FALSE); + } + m_State.scissor = 0; + } +} +bool GfxDeviceD3D9::IsScissorEnabled() const +{ + return m_State.scissor == 1; +} + +void GfxDeviceD3D9::GetScissorRect( int scissor[4] ) const +{ + scissor[0] = m_State.scissorRect[0]; + scissor[1] = m_State.scissorRect[1]; + scissor[2] = m_State.scissorRect[2]; + scissor[3] = m_State.scissorRect[3]; +} + +bool GfxDeviceD3D9::IsCombineModeSupported( unsigned int combiner ) +{ + return true; +} + +TextureCombinersHandle GfxDeviceD3D9::CreateTextureCombiners( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular ) +{ + TextureCombinersD3D* implD3D = TextureCombinersD3D::Create( count, texEnvs, props, hasVertexColorOrLighting, usesAddSpecular ); + return TextureCombinersHandle( implD3D ); +} + +void GfxDeviceD3D9::DeleteTextureCombiners( TextureCombinersHandle& textureCombiners ) +{ + TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners, TextureCombinersD3D); + delete implD3D; + textureCombiners.Reset(); +} + +void GfxDeviceD3D9::SetTextureCombinersThreadable( TextureCombinersHandle textureCombiners, const TexEnvData* texEnvData, const Vector4f* texColors ) +{ + TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners,TextureCombinersD3D); + AssertIf( !implD3D ); + IDirect3DDevice9* dev = GetD3DDevice(); + + AssertIf (IsShaderActive( kShaderFragment )); + + const int maxTexUnits = gGraphicsCaps.maxTexUnits; // fetch here once + + // set textures + int i = 0; + for( ; i < maxTexUnits && i < implD3D->envCount; ++i ) + { + ApplyTexEnvData (i, i, texEnvData[i]); + } + + // clear unused textures + for (; i < maxTexUnits; ++i) + { + if (i < kMaxSupportedTextureCoords) + m_VertexConfig.ClearTextureUnit(i); + + TextureUnitStateD3D& currTex = m_State.texturesPS[i]; + if (currTex.texID.m_ID != 0) + { + D3D9_CALL(dev->SetTexture( GetD3D9SamplerIndex(kShaderFragment,i), NULL )); + currTex.texID.m_ID = 0; + } + } + + // setup texture stages + if( implD3D->pixelShader ) + { + for( i = 0; i < implD3D->stageCount; ++i ) + { + const ShaderLab::TextureBinding& binding = implD3D->texEnvs[i]; + const Vector4f& texcolorVal = texColors[i]; + m_PSConstantCache.SetValues( i, texcolorVal.GetPtr(), 1 ); + } + if( m_State.fixedFunctionPS != implD3D->uniqueID ) + { + D3D9_CALL(dev->SetPixelShader( implD3D->pixelShader )); + m_State.fixedFunctionPS = implD3D->uniqueID; + } + } + else + { + if( implD3D->textureFactorIndex != -1 ) + { + const Vector4f& color = texColors[implD3D->textureFactorIndex]; + D3D9_CALL(dev->SetRenderState( D3DRS_TEXTUREFACTOR, ColorToD3D( color.GetPtr() ) )); + } + for( i = 0; i < implD3D->stageCount; ++i ) + { + // TODO: cache! + const D3DTextureStage& stage = implD3D->stages[i]; + AssertIf( stage.colorOp == D3DTOP_DISABLE || stage.alphaOp == D3DTOP_DISABLE ); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLOROP, stage.colorOp )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG1, stage.colorArgs[0] )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG2, stage.colorArgs[1] )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLORARG0, stage.colorArgs[2] )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAOP, stage.alphaOp )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG1, stage.alphaArgs[0] )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG2, stage.alphaArgs[1] )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAARG0, stage.alphaArgs[2] )); + } + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_COLOROP, D3DTOP_DISABLE )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_ALPHAOP, D3DTOP_DISABLE )); + D3D9_CALL(dev->SetPixelShader( NULL )); + m_State.fixedFunctionPS = 0; + } +} + + +void GfxDeviceD3D9::SetTextureCombiners( TextureCombinersHandle textureCombiners, const ShaderLab::PropertySheet* props ) +{ + TextureCombinersD3D* implD3D = OBJECT_FROM_HANDLE(textureCombiners,TextureCombinersD3D); + AssertIf( !implD3D ); + + int count = std::min(implD3D->envCount, gGraphicsCaps.maxTexUnits); + + // Fill in arrays + TexEnvData* texEnvData; + ALLOC_TEMP (texEnvData, TexEnvData, count); + for( int i = 0; i < count; ++i ) + { + ShaderLab::TexEnv *te = ShaderLab::GetTexEnvForBinding( implD3D->texEnvs[i], props ); + Assert( te != NULL ); + te->PrepareData (implD3D->texEnvs[i].m_TextureName.index, implD3D->texEnvs[i].m_MatrixName, props, &texEnvData[i]); + } + + Vector4f* texColors; + ALLOC_TEMP (texColors, Vector4f, implD3D->envCount); + for( int i = 0; i < implD3D->envCount; ++i ) + { + const ShaderLab::TextureBinding& binding = implD3D->texEnvs[i]; + texColors[i] = binding.GetTexColor().Get (props); + } + GfxDeviceD3D9::SetTextureCombinersThreadable(textureCombiners, texEnvData, texColors); +} + + +void GfxDeviceD3D9::SetTexture (ShaderType shaderType, int unit, int samplerUnit, TextureID texture, TextureDimension dim, float bias) +{ + DebugAssertIf( dim < kTexDim2D || dim > kTexDimCUBE ); + DebugAssertIf (unit < 0 || unit >= kMaxSupportedTextureUnits); + + if (unit < kMaxSupportedTextureCoords) + m_VertexConfig.SetTextureUnit(unit); + + TextureUnitStateD3D* currTex = NULL; + if (shaderType == kShaderFragment) + currTex = &m_State.texturesPS[unit]; + else if (shaderType == kShaderVertex) + currTex = &m_State.texturesVS[unit]; + else + { + AssertString ("Unsupported shader type for SetTexture"); + return; + } + + if (texture != currTex->texID) + { + if (m_Textures.SetTexture (shaderType, unit, texture)) + currTex->texID = texture; + } + m_Stats.AddUsedTexture(texture); + if (gGraphicsCaps.hasMipLevelBias && bias != currTex->bias && shaderType == kShaderFragment) + { + D3D9_CALL(GetD3DDevice()->SetSamplerState( unit, D3DSAMP_MIPMAPLODBIAS, *(DWORD*)&bias )); + currTex->bias = bias; + } +} + + + +void GfxDeviceD3D9::SetTextureTransform( int unit, TextureDimension dim, TexGenMode texGen, bool identity, const float matrix[16] ) +{ + Assert (unit >= 0 && unit < kMaxSupportedTextureCoords); + + m_State.m_NeedsSofwareVPFlags &= ~kNeedsSoftwareVPTexGen; + + // -------- texture matrix + + float* mat = m_TransformState.texMatrices[unit].GetPtr(); + CopyMatrix( matrix, mat ); + + // In OpenGL all texture reads are projective, and matrices are always 4x4, and z/w defaults to 0/1. + // In D3D everything is different. So here we try to figure out how many components need to be transformed, + // munge the matrix and enable projective texturing if needed. + + TextureMatrixMode matrixMode; + int projectedTexture = 0; + if( identity ) + { + // matrix guaranteed to be identity: disable transformation + matrixMode = kTexMatrixNone; + } + else if( dim == kTexDimCUBE || dim == kTexDim3D ) + { + // for cube/volume texture: count3 + matrixMode = kTexMatrix3; + } + else + { + // detect projected matrix + projectedTexture = (mat[3] != 0.0f || mat[7] != 0.0f || mat[11] != 0.0f || mat[15] != 1.0f) ? 1 : 0; + // Cards that do support projected textures or cubemaps seem to want + // Count3 flags for object/eyelinear transforms. Cards that don't support + // projection nor cubemaps will have to use Count2 - fixes GUI text rendering! + bool is3DTexGen = (texGen != kTexGenDisabled && texGen != kTexGenSphereMap); + + if( projectedTexture ) + { + matrixMode = kTexMatrix4; + } + else if( is3DTexGen ) + { + matrixMode = kTexMatrix3; + } + else + { + // regular texture: count2, and move matrix' 4th row into 3rd one + matrixMode = kTexMatrix2; + mat[ 8] = mat[12]; + mat[ 9] = mat[13]; + mat[10] = mat[14]; + mat[11] = mat[15]; + } + } + + m_VertexConfig.textureMatrixModes = m_VertexConfig.textureMatrixModes & ~(3<<(unit*2)) | (matrixMode<<(unit*2)); + m_VertexData.projectedTextures = m_VertexData.projectedTextures & ~(1<<unit) | (projectedTexture<<unit); + + // -------- texture coordinate generation + + TextureSourceMode texSource = texGen == kTexGenDisabled ? kTexSourceUV0 : static_cast<TextureSourceMode>(texGen + 1); + m_VertexConfig.textureSources = m_VertexConfig.textureSources & ~(7<<(unit*3)) | (texSource<<(unit*3)); + + if( texGen == kTexGenSphereMap && !IsShaderActive(kShaderVertex) ) + { + if( g_D3DUsesMixedVP && !(gGraphicsCaps.d3d.d3dcaps.VertexProcessingCaps & D3DVTXPCAPS_TEXGEN_SPHEREMAP) ) + m_State.m_NeedsSofwareVPFlags |= kNeedsSoftwareVPTexGen; + } +} + +void GfxDeviceD3D9::SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace ) +{ + m_Textures.SetTextureParams( texture, texDim, filter, wrap, anisoLevel, hasMipMap, colorSpace ); + + // we'll need to set texture sampler states, so invalidate current texture cache + // invalidate texture unit states that used this texture + for (int i = 0; i < ARRAY_SIZE(m_State.texturesPS); ++i) + { + TextureUnitStateD3D& currTex = m_State.texturesPS[i]; + if( currTex.texID == texture ) + currTex.Invalidate(); + } + for (int i = 0; i < ARRAY_SIZE(m_State.texturesVS); ++i) + { + TextureUnitStateD3D& currTex = m_State.texturesVS[i]; + if (currTex.texID == texture) + currTex.Invalidate(); + } +} + + +void GfxDeviceD3D9::SetShadersThreadable( GpuProgram* programs[kShaderTypeCount], const GpuProgramParameters* params[kShaderTypeCount], UInt8 const * const paramsBuffer[kShaderTypeCount]) +{ + GpuProgram* vertexProgram = programs[kShaderVertex]; + GpuProgram* fragmentProgram = programs[kShaderFragment]; + + IDirect3DDevice9* dev = GetD3DDevice(); + + // vertex shader + if( vertexProgram && vertexProgram->GetImplType() == kShaderImplVertex ) + { + // set the shader + bool resetToNoFog = false; + IDirect3DVertexShader9* shader = static_cast<D3D9VertexShader&>(*vertexProgram).GetShader(m_FogParams.mode, resetToNoFog); + // Note: get pixel shader to match actually used fog mode from VS. If VS was too complex + // to patch for fog, for example, then we want PS to not have fog as well. + if (resetToNoFog) + m_FogParams.mode = kFogDisabled; + DebugAssert (shader); + + if( m_State.activeShader[kShaderVertex] != shader ) + { + D3D9_CALL(dev->SetVertexShader( shader )); + if (m_State.activeShader[kShaderVertex] == NULL) + { + for( int i = 0; i < kMaxSupportedTextureCoords; ++i ) + { + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTSS_TCI_PASSTHRU )); + } + } + + m_VertexPrevious.vertexShader = NULL; + m_VertexPrevious.ambient.set(-1,-1,-1,-1); + + m_State.activeShader[kShaderVertex] = shader; + } + + if( g_D3DUsesMixedVP ) + m_State.m_NeedsSofwareVPFlags |= kNeedsSoftwareVPVertexShader; + + m_BuiltinParamIndices[kShaderVertex] = ¶ms[kShaderVertex]->GetBuiltinParams(); + } + else + { + // clear the shader + DebugAssertIf( vertexProgram != 0 ); + if( m_State.activeShader[kShaderVertex] != 0 ) + { + D3D9_CALL(dev->SetVertexShader( NULL )); + m_State.activeShader[kShaderVertex] = 0; + } + + if( g_D3DUsesMixedVP ) + m_State.m_NeedsSofwareVPFlags &= ~kNeedsSoftwareVPVertexShader; + + m_BuiltinParamIndices[kShaderVertex] = &m_NullParamIndices; + } + + // pixel shader + if( fragmentProgram && fragmentProgram->GetImplType() == kShaderImplFragment ) + { + // set the shader + IDirect3DPixelShader9* shader = static_cast<D3D9PixelShader&>(*fragmentProgram).GetShader(m_FogParams.mode, *params[kShaderFragment]); + DebugAssert (shader); + + if( m_State.activeShader[kShaderFragment] != shader ) + { + D3D9_CALL(dev->SetPixelShader( shader )); + m_State.activeShader[kShaderFragment] = shader; + m_State.fixedFunctionPS = 0; + } + + m_BuiltinParamIndices[kShaderFragment] = ¶ms[kShaderFragment]->GetBuiltinParams(); + } + else + { + // clear the shader + DebugAssertIf( fragmentProgram != 0 ); + if( m_State.activeShader[kShaderFragment] != 0 ) + { + D3D9_CALL(dev->SetPixelShader( NULL )); + m_State.activeShader[kShaderFragment] = 0; + m_State.fixedFunctionPS = 0; + } + + m_BuiltinParamIndices[kShaderFragment] = &m_NullParamIndices; + } + + for (int pt = 0; pt < kShaderTypeCount; ++pt) + { + if (programs[pt]) + { + m_State.activeGpuProgramParams[pt] = params[pt]; + m_State.activeGpuProgram[pt] = programs[pt]; + programs[pt]->ApplyGpuProgram (*params[pt], paramsBuffer[pt]); + } + else + { + m_State.activeGpuProgramParams[pt] = NULL; + m_State.activeGpuProgram[pt] = NULL; + } + } +} + + +bool GfxDeviceD3D9::IsShaderActive( ShaderType type ) const +{ + return m_State.activeShader[type] != 0; +} + +void GfxDeviceD3D9::DestroySubProgram( ShaderLab::SubProgram* subprogram ) +{ + GpuProgram* program = &subprogram->GetGpuProgram(); + if (program->GetImplType() == kShaderImplVertex) + { + for (int i = 0; i < kFogModeCount; ++i) + { + IUnknown* shader = static_cast<D3D9VertexShader*>(program)->GetShaderAtFogIndex(static_cast<FogMode>(i)); + if (m_State.activeShader[kShaderVertex] == shader) + m_State.activeShader[kShaderVertex] = NULL; + } + } + else if (program->GetImplType() == kShaderImplFragment) + { + for (int i = 0; i < kFogModeCount; ++i) + { + IUnknown* shader = static_cast<D3D9PixelShader*>(program)->GetShaderAtFogIndex(static_cast<FogMode>(i)); + if (m_State.activeShader[kShaderFragment] == shader) + m_State.activeShader[kShaderFragment] = NULL; + } + } + delete subprogram; +} + +void GfxDeviceD3D9::DisableLights( int startLight ) +{ + m_VertexData.vertexLightCount = startLight; + + const Vector4f black(0.0F, 0.0F, 0.0F, 0.0F); + for (int i = startLight; i < gGraphicsCaps.maxLights; ++i) + { + m_BuiltinParamValues.SetVectorParam(BuiltinShaderVectorParam(kShaderVecLight0Diffuse + i), black); + } +} + +void GfxDeviceD3D9::SetLight( int light, const GfxVertexLight& data) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + DebugAssert(light >= 0 && light < kMaxSupportedVertexLights); + + DebugAssertIf( (data.position.w == 0.0f) != (data.type == kLightDirectional) ); // directional lights should have 0 in position.w + DebugAssertIf( (data.spotAngle != -1.0f) != (data.type == kLightSpot) ); // non-spot lights should have -1 in spot angle + + GfxVertexLight& dest = m_VertexData.lights[light]; + dest = data; + + const Matrix4x4f& viewMat = m_BuiltinParamValues.GetMatrixParam(kShaderMatView); + + if (data.type == kLightDirectional) + { + dest.position.Set(0.0f,0.0f,0.0f,0.0f); + Vector3f v = viewMat.MultiplyVector3((const Vector3f&)data.position); + dest.spotDirection.Set( v.x, v.y, v.z, 0.0f ); + } + else + { + Vector3f v = viewMat.MultiplyPoint3((const Vector3f&)data.position); + dest.position.Set( v.x, v.y, v.z, 1.0f ); + Vector3f d = viewMat.MultiplyVector3((const Vector3f&)data.spotDirection); + dest.spotDirection.Set( d.x, d.y, d.z, 0.0f ); + } + + SetupVertexLightParams (light, data); +} + +void GfxDeviceD3D9::SetAmbient( const float ambient[4] ) +{ + if( m_VertexData.ambient != ambient ) + { + m_VertexData.ambient.set( ambient ); + m_VertexData.ambientClamped.set( clamp01(ambient[0]), clamp01(ambient[1]), clamp01(ambient[2]), clamp01(ambient[3]) ); + m_BuiltinParamValues.SetVectorParam(kShaderVecLightModelAmbient, Vector4f(ambient)); + } +} + + +static D3DFOGMODE s_D3DFogModes[kFogModeCount] = { D3DFOG_NONE, D3DFOG_LINEAR, D3DFOG_EXP, D3DFOG_EXP2 }; + +void GfxDeviceD3D9::EnableFog(const GfxFogParams& fog) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + DebugAssertIf( fog.mode <= kFogDisabled ); + if( m_FogParams.mode != fog.mode ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGTABLEMODE, s_D3DFogModes[fog.mode] )); // TODO: or maybe vertex fog? + D3D9_CALL(dev->SetRenderState( D3DRS_FOGENABLE, TRUE )); + m_FogParams.mode = fog.mode; + } + if( m_FogParams.start != fog.start ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGSTART, *(DWORD*)&fog.start )); + m_FogParams.start = fog.start; + } + if( m_FogParams.end != fog.end ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGEND, *(DWORD*)&fog.end )); + m_FogParams.end = fog.end; + } + if( m_FogParams.density != fog.density ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGDENSITY, *(DWORD*)&fog.density )); + m_FogParams.density = fog.density; + } + if( m_FogParams.color != fog.color ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGCOLOR, ColorToD3D(fog.color.GetPtr()) )); + m_FogParams.color = fog.color; + } +} + +void GfxDeviceD3D9::DisableFog() +{ + IDirect3DDevice9* dev = GetD3DDevice(); + if( m_FogParams.mode != kFogDisabled ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_FOGENABLE, FALSE )); + m_FogParams.mode = kFogDisabled; + } +} + +VBO* GfxDeviceD3D9::CreateVBO() +{ + VBO* vbo = new D3D9VBO(); + OnCreateVBO(vbo); + return vbo; +} + +void GfxDeviceD3D9::DeleteVBO( VBO* vbo ) +{ + OnDeleteVBO(vbo); + delete vbo; +} + +DynamicVBO& GfxDeviceD3D9::GetDynamicVBO() +{ + if( !m_DynamicVBO ) { + m_DynamicVBO = new DynamicD3D9VBO( 1024 * 1024, 65536 ); // initial 1 MiB VB, 64 KiB IB + } + return *m_DynamicVBO; +} + +IDirect3DVertexBuffer9* GfxDeviceD3D9::GetAllWhiteVertexStream() +{ + if( !m_AllWhiteVertexStream ) + { + int maxVerts = 0x10000; + int size = maxVerts * sizeof(D3DCOLOR); + HRESULT hr = GetD3DDevice()->CreateVertexBuffer( size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_MANAGED, &m_AllWhiteVertexStream, NULL ); + if( !SUCCEEDED(hr) ) + return NULL; + void* buffer; + hr = m_AllWhiteVertexStream->Lock( 0 , 0, &buffer, 0 ); + if( !SUCCEEDED(hr) ) + { + SAFE_RELEASE( m_AllWhiteVertexStream ); + return NULL; + } + D3DCOLOR* dest = (D3DCOLOR*)buffer; + for( int i = 0; i < maxVerts; i++ ) + dest[i] = D3DCOLOR_ARGB(255, 255, 255, 255); + m_AllWhiteVertexStream->Unlock(); + } + return m_AllWhiteVertexStream; +} + +void GfxDeviceD3D9::ResetDynamicResources() +{ + delete m_DynamicVBO; + m_DynamicVBO = NULL; + + CleanupEventQueries (); + ResetDynamicVBs (); + + #if ENABLE_PROFILER + m_TimerQueriesD3D9.ReleaseAllQueries(); + #endif + + D3D9VBO::CleanupSharedIndexBuffer(); +} + + +void ResetDynamicResourcesD3D9() +{ + AutoGfxDeviceAcquireThreadOwnership autoOwner; + GetD3D9GfxDevice().ResetDynamicResources(); +} + +IDirect3DVertexDeclaration9* GetD3DVertexDeclaration( UInt32 shaderChannelsMap ) +{ + ChannelInfoArray channels; + int offset = 0; + for (int i = 0; i < kShaderChannelCount; i++) + { + ChannelInfo& info = channels[i]; + if (shaderChannelsMap & (1 << i)) + { + info.stream = 0; + info.offset = offset; + info.format = VBO::GetDefaultChannelFormat( i ); + info.dimension = VBO::GetDefaultChannelDimension( i ); + offset += VBO::GetDefaultChannelByteSize( i ); +} + else + info.Reset(); + } + return GetD3D9GfxDevice().GetVertexDecls().GetVertexDecl( channels ); +} + +VertexShaderConstantCache& GetD3D9VertexShaderConstantCache() +{ + return GetD3D9GfxDevice().GetVertexShaderConstantCache(); +} + +PixelShaderConstantCache& GetD3D9PixelShaderConstantCache() +{ + return GetD3D9GfxDevice().GetPixelShaderConstantCache(); +} + + +// ---------- render textures + +RenderSurfaceHandle GfxDeviceD3D9::CreateRenderColorSurface (TextureID textureID, int width, int height, int samples, int depth, TextureDimension dim, RenderTextureFormat format, UInt32 createFlags) +{ + return CreateRenderColorSurfaceD3D9 (textureID, width, height, samples, dim, createFlags, format, m_Textures); +} +RenderSurfaceHandle GfxDeviceD3D9::CreateRenderDepthSurface(TextureID textureID, int width, int height, int samples, TextureDimension dim, DepthBufferFormat depthFormat, UInt32 createFlags) +{ + return CreateRenderDepthSurfaceD3D9 (textureID, width, height, samples, depthFormat, createFlags, m_Textures); +} +void GfxDeviceD3D9::DestroyRenderSurface(RenderSurfaceHandle& rs) +{ + DestroyRenderSurfaceD3D9( rs, m_Textures ); +} +void GfxDeviceD3D9::SetRenderTargets (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face) +{ + bool isBackBuffer; + m_CurrTargetWidth = m_CurrWindowWidth; + m_CurrTargetHeight = m_CurrWindowHeight; + if (SetRenderTargetD3D9 (count, colorHandles, depthHandle, mipLevel, face, m_CurrTargetWidth, m_CurrTargetHeight, isBackBuffer)) + { + // changing render target might mean different color clear flags; so reset current state + m_CurrBlendState = NULL; + } +} +void GfxDeviceD3D9::ResolveDepthIntoTexture (RenderSurfaceHandle colorHandle, RenderSurfaceHandle depthHandle) +{ + Assert (gGraphicsCaps.d3d.hasDepthResolveRESZ); + + RenderSurfaceD3D9* depthSurf = reinterpret_cast<RenderSurfaceD3D9*>(depthHandle.object); + + IDirect3DDevice9* dev = GetD3DDevice(); + // Important: change point size render state to something else than RESZ + // before the dummy draw call; otherwise RESZ state set will be filtered out + // by non-PURE D3D device. + dev->SetRenderState (D3DRS_POINTSIZE, 0); + + // Bind destination as texture + SetTexture (kShaderFragment, 0, 0, depthSurf->textureID, kTexDim2D, 0.0f); + + // Dummy draw call + float dummy[3] = {0,0,0}; + dev->DrawPrimitiveUP (D3DPT_POINTLIST, 1, dummy, 12); + + // RESZ to trigger depth buffer copy + dev->SetRenderState (D3DRS_POINTSIZE, 0x7fa05000); +} + + +void GfxDeviceD3D9::ResolveColorSurface (RenderSurfaceHandle srcHandle, RenderSurfaceHandle dstHandle) +{ + Assert (srcHandle.IsValid()); + Assert (dstHandle.IsValid()); + RenderColorSurfaceD3D9* src = reinterpret_cast<RenderColorSurfaceD3D9*>(srcHandle.object); + RenderColorSurfaceD3D9* dst = reinterpret_cast<RenderColorSurfaceD3D9*>(dstHandle.object); + if (!src->colorSurface || !dst->colorSurface) + { + WarningString("RenderTexture: Resolving non-color surfaces."); + return; + } + if (!src->m_Surface || !dst->m_Surface) + { + WarningString("RenderTexture: Resolving NULL surfaces."); + return; + } + if (src->dim != dst->dim) + { + WarningString("RenderTexture: Resolving surfaces of different types."); + return; + } + if (src->format != dst->format) + { + WarningString("RenderTexture: Resolving surfaces of different formats."); + return; + } + if (src->width != dst->width || src->height != dst->height) + { + WarningString("RenderTexture: Resolving surfaces of different sizes."); + return; + } + + IDirect3DDevice9* dev = GetD3DDevice(); + dev->StretchRect (src->m_Surface, NULL, dst->m_Surface, NULL, D3DTEXF_NONE); +} + +RenderSurfaceHandle GfxDeviceD3D9::GetActiveRenderColorSurface (int index) +{ + return GetActiveRenderColorSurfaceD3D9(index); +} +RenderSurfaceHandle GfxDeviceD3D9::GetActiveRenderDepthSurface() +{ + return GetActiveRenderDepthSurfaceD3D9(); +} +void GfxDeviceD3D9::SetSurfaceFlags (RenderSurfaceHandle surf, UInt32 flags, UInt32 keepFlags) +{ +} + + +// ---------- uploading textures + +void GfxDeviceD3D9::UploadTexture2D( TextureID texture, TextureDimension dimension, UInt8* srcData, int srcSize, int width, int height, TextureFormat format, int mipCount, UInt32 uploadFlags, int skipMipLevels, TextureUsageMode usageMode, TextureColorSpace colorSpace ) +{ + m_Textures.UploadTexture2D( texture, dimension, srcData, width, height, format, mipCount, uploadFlags, skipMipLevels, usageMode, colorSpace ); +} +void GfxDeviceD3D9::UploadTextureSubData2D( TextureID texture, UInt8* srcData, int srcSize, int mipLevel, int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace ) +{ + m_Textures.UploadTextureSubData2D( texture, srcData, mipLevel, x, y, width, height, format, colorSpace ); +} +void GfxDeviceD3D9::UploadTextureCube( TextureID texture, UInt8* srcData, int srcSize, int faceDataSize, int size, TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace ) +{ + m_Textures.UploadTextureCube( texture, srcData, faceDataSize, size, format, mipCount, uploadFlags, colorSpace ); +} +void GfxDeviceD3D9::UploadTexture3D( TextureID texture, UInt8* srcData, int srcSize, int width, int height, int depth, TextureFormat format, int mipCount, UInt32 uploadFlags ) +{ + m_Textures.UploadTexture3D( texture, srcData, width, height, depth, format, mipCount, uploadFlags ); +} + +void GfxDeviceD3D9::DeleteTexture( TextureID texture ) +{ + m_Textures.DeleteTexture( texture ); + + // invalidate texture unit states that used this texture + for (int i = 0; i < ARRAY_SIZE(m_State.texturesPS); ++i) + { + TextureUnitStateD3D& currTex = m_State.texturesPS[i]; + if( currTex.texID == texture ) + currTex.Invalidate(); + } + for (int i = 0; i < ARRAY_SIZE(m_State.texturesVS); ++i) + { + TextureUnitStateD3D& currTex = m_State.texturesVS[i]; + if (currTex.texID == texture) + currTex.Invalidate(); + } +} + +void UnbindTextureD3D9( TextureID texture ) +{ + GfxDeviceD3D9& device = static_cast<GfxDeviceD3D9&>( GetRealGfxDevice() ); + IDirect3DDevice9* dev = GetD3DDevice(); + + // invalidate texture unit states that used this texture + for (int i = 0; i < ARRAY_SIZE(device.GetState().texturesPS); ++i) + { + TextureUnitStateD3D& currTex = device.GetState().texturesPS[i]; + if( currTex.texID == texture ) + { + D3D9_CALL(dev->SetTexture(GetD3D9SamplerIndex(kShaderFragment,i), NULL)); + currTex.Invalidate(); + } + } + for (int i = 0; i < ARRAY_SIZE(device.GetState().texturesVS); ++i) + { + TextureUnitStateD3D& currTex = device.GetState().texturesVS[i]; + if (currTex.texID == texture) + { + D3D9_CALL(dev->SetTexture(GetD3D9SamplerIndex(kShaderVertex,i), NULL)); + currTex.Invalidate(); + } + } +} + + +// ---------- context + +GfxDevice::PresentMode GfxDeviceD3D9::GetPresentMode() +{ + return kPresentBeforeUpdate; +} + +void GfxDeviceD3D9::BeginFrame() +{ + if( m_State.m_DeviceLost ) + return; + + // begin scene + Assert( !m_InsideFrame ); + GetD3DDevice()->BeginScene(); + m_InsideFrame = true; + +} + +void GfxDeviceD3D9::EndFrame() +{ + // Check if we're inside scene in case BeginFrame() failed + if( !m_InsideFrame ) + return; + + GetD3DDevice()->EndScene(); + m_InsideFrame = false; +} + +bool GfxDeviceD3D9::IsValidState() +{ + return !m_State.m_DeviceLost; +} + +bool GfxDeviceD3D9::HandleInvalidState() +{ +#if ENABLE_MULTITHREADED_CODE + // Reset render textures owned by the main thread + if (Thread::CurrentThreadIsMainThread()) + CommonReloadResources(kReleaseRenderTextures); +#endif + + ResetDynamicResourcesD3D9(); + + bool success = HandleD3DDeviceLost(); + +#if ENABLE_PROFILER + if (success) + m_TimerQueriesD3D9.RecreateAllQueries(); +#endif + + InvalidateState(); + return success; +} + +static void CleanupEventQueries () +{ + D3D9QueryList::iterator itEnd = s_EventQueries.end(); + for (D3D9QueryList::iterator it = s_EventQueries.begin(); it != itEnd; ++it) + { + IDirect3DQuery9* query = *it; + if (query != NULL) + { + query->Release(); + } + } + s_EventQueries.clear(); +} + +static void PopEventQuery () +{ + AssertIf (s_EventQueries.empty()); + + IDirect3DQuery9* query = s_EventQueries.front(); + AssertIf (query == NULL); + + while (S_FALSE == query->GetData (NULL, 0, D3DGETDATA_FLUSH)) + { + Sleep (1); + } + query->Release(); + + s_EventQueries.pop_front(); +} + +void GfxDeviceD3D9::PushEventQuery () +{ + if (m_MaxBufferedFrames < 0) + return; + + IDirect3DQuery9* query = NULL; + HRESULT hr = GetD3DDevice()->CreateQuery (D3DQUERYTYPE_EVENT, &query); + if (query != NULL) + { + if (SUCCEEDED(query->Issue(D3DISSUE_END))) + s_EventQueries.push_back (query); + else + query->Release(); + } + + // don't exceed maximum lag... instead we'll deterministically block here until the GPU has done enough work + while (!s_EventQueries.empty() && s_EventQueries.size() > m_MaxBufferedFrames) + { + PopEventQuery(); + } +} + +void GfxDeviceD3D9::PresentFrame() +{ + if( m_State.m_DeviceLost ) + return; + + HRESULT hr = GetD3DDevice()->Present( NULL, NULL, NULL, NULL ); + PushEventQuery(); + // When D3DERR_DRIVERINTERNALERROR is returned from Present(), + // the application can do one of the following, try recovering just as + // from the lost device. + if( hr == D3DERR_DEVICELOST || hr == D3DERR_DRIVERINTERNALERROR ) + { + m_State.m_DeviceLost = true; + } +} + +void GfxDeviceD3D9::FinishRendering() +{ + // not needed on D3D +} + + + +// ---------- immediate mode rendering + +// we break very large immediate mode submissions into multiple batches internally +const int kMaxImmediateVerticesPerDraw = 8192; + + +ImmediateModeD3D::ImmediateModeD3D() +: m_ImmVertexDecl(NULL) +{ + m_QuadsIB = new UInt16[kMaxImmediateVerticesPerDraw*6]; + UInt32 baseIndex = 0; + UInt16* ibPtr = m_QuadsIB; + for( int i = 0; i < kMaxImmediateVerticesPerDraw; ++i ) + { + ibPtr[0] = baseIndex + 1; + ibPtr[1] = baseIndex + 2; + ibPtr[2] = baseIndex; + ibPtr[3] = baseIndex + 2; + ibPtr[4] = baseIndex + 3; + ibPtr[5] = baseIndex; + baseIndex += 4; + ibPtr += 6; + } +} + +ImmediateModeD3D::~ImmediateModeD3D() +{ + delete[] m_QuadsIB; +} + + +void ImmediateModeD3D::Invalidate() +{ + m_Vertices.clear(); + memset( &m_Current, 0, sizeof(m_Current) ); +} + +void GfxDeviceD3D9::ImmediateVertex( float x, float y, float z ) +{ + // If the current batch is becoming too large, internally end it and begin it again. + size_t currentSize = m_Imm.m_Vertices.size(); + if( currentSize >= kMaxImmediateVerticesPerDraw - 4 ) + { + GfxPrimitiveType mode = m_Imm.m_Mode; + // For triangles, break batch when multiple of 3's is reached. + if( mode == kPrimitiveTriangles && currentSize % 3 == 0 ) + { + ImmediateEnd(); + ImmediateBegin( mode ); + } + // For other primitives, break on multiple of 4's. + // NOTE: This won't quite work for triangle strips, but we'll just pretend + // that will never happen. + else if( mode != kPrimitiveTriangles && currentSize % 4 == 0 ) + { + ImmediateEnd(); + ImmediateBegin( mode ); + } + } + D3DVECTOR& vert = m_Imm.m_Current.vertex; + vert.x = x; + vert.y = y; + vert.z = z; + m_Imm.m_Vertices.push_back( m_Imm.m_Current ); +} + +void GfxDeviceD3D9::ImmediateNormal( float x, float y, float z ) +{ + m_Imm.m_Current.normal.x = x; + m_Imm.m_Current.normal.y = y; + m_Imm.m_Current.normal.z = z; +} + +void GfxDeviceD3D9::ImmediateColor( float r, float g, float b, float a ) +{ + float color[4] = { r, g, b, a }; + m_Imm.m_Current.color = ColorToD3D( color ); +} + +void GfxDeviceD3D9::ImmediateTexCoordAll( float x, float y, float z ) +{ + for( int i = 0; i < 8; ++i ) + { + D3DVECTOR& uv = m_Imm.m_Current.texCoords[i]; + uv.x = x; + uv.y = y; + uv.z = z; + } +} + +void GfxDeviceD3D9::ImmediateTexCoord( int unit, float x, float y, float z ) +{ + if( unit < 0 || unit >= 8 ) + { + ErrorString( "Invalid unit for texcoord" ); + return; + } + D3DVECTOR& uv = m_Imm.m_Current.texCoords[unit]; + uv.x = x; + uv.y = y; + uv.z = z; +} + +void GfxDeviceD3D9::ImmediateBegin( GfxPrimitiveType type ) +{ + m_Imm.m_Mode = type; + m_Imm.m_Vertices.clear(); +} + +void GfxDeviceD3D9::ImmediateEnd() +{ + if( m_Imm.m_Vertices.empty() ) + return; + + // lazily create vertex declaration + IDirect3DDevice9* dev = GetD3DDevice(); + HRESULT hr = S_OK; + if( !m_Imm.m_ImmVertexDecl ) + { + static const D3DVERTEXELEMENT9 elements[] = { + // stream, offset, data type, processing, semantics, index + { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 }, // position + { 0, 12, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0 }, // normal + { 0, 24, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 }, // color + { 0, 28, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 }, // UVs + { 0, 40, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 }, + { 0, 52, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 2 }, + { 0, 64, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 3 }, + { 0, 76, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 4 }, + { 0, 88, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 5 }, + { 0, 100, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 6 }, + { 0, 112, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 7 }, + D3DDECL_END() + }; + hr = dev->CreateVertexDeclaration( elements, &m_Imm.m_ImmVertexDecl ); + if( FAILED(hr) ) { + // TODO: error + } + } + + // draw + D3D9_CALL(dev->SetVertexDeclaration( m_Imm.m_ImmVertexDecl )); + + BeforeDrawCall( true ); + + int vertexCount = m_Imm.m_Vertices.size(); + const ImmediateVertexD3D* vb = &m_Imm.m_Vertices[0]; + switch( m_Imm.m_Mode ) + { + case kPrimitiveTriangles: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLELIST, vertexCount / 3, vb, sizeof(ImmediateVertexD3D) )); + m_Stats.AddDrawCall( vertexCount / 3, vertexCount ); + break; + case kPrimitiveTriangleStripDeprecated: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLESTRIP, vertexCount - 2, vb, sizeof(ImmediateVertexD3D) )); + m_Stats.AddDrawCall( vertexCount - 2, vertexCount ); + break; + case kPrimitiveQuads: + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitiveUP( D3DPT_TRIANGLELIST, 0, vertexCount, vertexCount / 4 * 2, m_Imm.m_QuadsIB, D3DFMT_INDEX16, vb, sizeof(ImmediateVertexD3D) )); + m_Stats.AddDrawCall( vertexCount / 4 * 2, vertexCount ); + break; + case kPrimitiveLines: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINELIST, vertexCount / 2, vb, sizeof(ImmediateVertexD3D) )); + m_Stats.AddDrawCall( vertexCount / 2, vertexCount ); + break; + default: + AssertString("ImmediateEnd: unknown draw mode"); + } + AssertIf( FAILED(hr) ); + // TODO: stats + + // clear vertices + m_Imm.m_Vertices.clear(); +} + + + +bool GfxDeviceD3D9::CaptureScreenshot( int left, int bottom, int width, int height, UInt8* rgba32 ) +{ + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + + SurfacePointer renderTarget; + hr = dev->GetRenderTarget( 0, &renderTarget ); + if( !renderTarget || FAILED(hr) ) + return false; + + D3DSURFACE_DESC rtDesc; + renderTarget->GetDesc( &rtDesc ); + + SurfacePointer resolvedSurface; + if( rtDesc.MultiSampleType != D3DMULTISAMPLE_NONE ) + { + hr = dev->CreateRenderTarget( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DMULTISAMPLE_NONE, 0, FALSE, &resolvedSurface, NULL ); + if( FAILED(hr) ) + return false; + hr = dev->StretchRect( renderTarget, NULL, resolvedSurface, NULL, D3DTEXF_NONE ); + if( FAILED(hr) ) + return false; + renderTarget = resolvedSurface; + } + + SurfacePointer offscreenSurface; + hr = dev->CreateOffscreenPlainSurface( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DPOOL_SYSTEMMEM, &offscreenSurface, NULL ); + if( FAILED(hr) ) + return false; + + hr = dev->GetRenderTargetData( renderTarget, offscreenSurface ); + bool ok = SUCCEEDED(hr); + if( ok ) + { + rgba32 += (height-1) * width * sizeof(UInt32); + if( rtDesc.Format == D3DFMT_A8R8G8B8 || rtDesc.Format == D3DFMT_X8R8G8B8 ) + { + // Backbuffer is 32 bit + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + for( int y = 0; y < height; ++y ) + { + const UInt32* srcPtr = (const UInt32*)src; + UInt32* dstPtr = (UInt32*)rgba32; + for( int x = 0; x < width; ++x ) + { + UInt32 argbCol = *srcPtr; + UInt32 abgrCol = (argbCol&0xFF00FF00) | ((argbCol&0x00FF0000)>>16) | ((argbCol&0x000000FF)<<16); + *dstPtr = abgrCol; + ++srcPtr; + ++dstPtr; + } + rgba32 -= width * sizeof(UInt32); + src += lr.Pitch; + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else if( rtDesc.Format == D3DFMT_R5G6B5 ) + { + // Backbuffer is 16 bit 565 + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + for( int y = 0; y < height; ++y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt32* dstPtr = (UInt32*)rgba32; + for( int x = 0; x < width; ++x ) + { + UInt16 rgbCol = *srcPtr; + UInt32 abgrCol = 0xFF000000 | ((rgbCol&0xF800)>>8) | ((rgbCol&0x07E0)<<5) | ((rgbCol&0x001F)<<19); + *dstPtr = abgrCol; + ++srcPtr; + ++dstPtr; + } + rgba32 -= width * sizeof(UInt32); + src += lr.Pitch; + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else if( rtDesc.Format == D3DFMT_X1R5G5B5 || rtDesc.Format == D3DFMT_A1R5G5B5 ) + { + // Backbuffer is 15 bit 555 + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + for( int y = 0; y < height; ++y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt32* dstPtr = (UInt32*)rgba32; + for( int x = 0; x < width; ++x ) + { + UInt16 rgbCol = *srcPtr; + UInt32 abgrCol = ((rgbCol&0x8000)<<16) | ((rgbCol&0x7C00)>>7) | ((rgbCol&0x03E0)<<6) | ((rgbCol&0x001F)<<19); + *dstPtr = abgrCol; + ++srcPtr; + ++dstPtr; + } + rgba32 -= width * sizeof(UInt32); + src += lr.Pitch; + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else + { + // TODO: handle more conversions! + ok = false; + } + } + + return ok; +} + + + +bool GfxDeviceD3D9::ReadbackImage( ImageReference& image, int left, int bottom, int width, int height, int destX, int destY ) +{ + // TODO: make it work in all different situations + + AssertIf( image.GetFormat() != kTexFormatARGB32 && image.GetFormat() != kTexFormatRGB24 ); + + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + SurfacePointer renderTarget; + hr = dev->GetRenderTarget( 0, &renderTarget ); + if( !renderTarget || FAILED(hr) ) + return false; + + D3DSURFACE_DESC rtDesc; + renderTarget->GetDesc( &rtDesc ); + + SurfacePointer resolvedSurface; + if( rtDesc.MultiSampleType != D3DMULTISAMPLE_NONE ) + { + hr = dev->CreateRenderTarget( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DMULTISAMPLE_NONE, 0, FALSE, &resolvedSurface, NULL ); + if( FAILED(hr) ) + return false; + hr = dev->StretchRect( renderTarget, NULL, resolvedSurface, NULL, D3DTEXF_NONE ); + if( FAILED(hr) ) + return false; + renderTarget = resolvedSurface; + } + + SurfacePointer offscreenSurface; + hr = dev->CreateOffscreenPlainSurface( rtDesc.Width, rtDesc.Height, rtDesc.Format, D3DPOOL_SYSTEMMEM, &offscreenSurface, NULL ); + if( FAILED(hr) ) + return false; + if (width <= 0 || left < 0 || left + width > rtDesc.Width) + { + ErrorString("Trying to read pixel out of bounds"); + return false; + } + if (height <= 0 || bottom < 0 || bottom + height > rtDesc.Height) + { + ErrorString("Trying to read pixel out of bounds"); + return false; + } + + hr = dev->GetRenderTargetData( renderTarget, offscreenSurface ); + bool ok = SUCCEEDED(hr); + if( ok ) + { + if( rtDesc.Format == D3DFMT_A8R8G8B8 || rtDesc.Format == D3DFMT_X8R8G8B8 ) + { + // Render target is 32 bit + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + if( image.GetFormat() == kTexFormatARGB32 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt32* srcPtr = (const UInt32*)src; + UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4); + for( int x = 0; x < width; ++x ) + { + UInt32 argbCol = *srcPtr; + UInt32 bgraCol = ((argbCol&0xFF000000)>>24) | ((argbCol&0x00FF0000)>>8) | ((argbCol&0x0000FF00)<<8) | ((argbCol&0x000000FF)<<24); + *dstPtr = bgraCol; + ++srcPtr; + ++dstPtr; + } + src += lr.Pitch; + } + } + else if( image.GetFormat() == kTexFormatRGB24 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt32* srcPtr = (const UInt32*)src; + UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3; + for( int x = 0; x < width; ++x ) + { + UInt32 argbCol = *srcPtr; + dstPtr[0] = (argbCol & 0x00FF0000) >> 16; + dstPtr[1] = (argbCol & 0x0000FF00) >> 8; + dstPtr[2] = (argbCol & 0x000000FF); + ++srcPtr; + dstPtr += 3; + } + src += lr.Pitch; + } + } + else + { + AssertString( "Invalid image format" ); + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else if( rtDesc.Format == D3DFMT_R5G6B5 ) + { + // Render target is 16 bit 565 + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + if( image.GetFormat() == kTexFormatARGB32 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4); + for( int x = 0; x < width; ++x ) + { + UInt16 argbCol = *srcPtr; + UInt32 bgraCol = 0x000000FF | (argbCol&0xF800) | ((argbCol&0x07E0)<<13) | ((argbCol&0x001F)<<27); + *dstPtr = bgraCol; + ++srcPtr; + ++dstPtr; + } + src += lr.Pitch; + } + } + else if( image.GetFormat() == kTexFormatRGB24 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3; + for( int x = 0; x < width; ++x ) + { + UInt16 argbCol = *srcPtr; + dstPtr[0] = (argbCol & 0xF800) >> 8; + dstPtr[1] = (argbCol & 0x07E0) >> 3; + dstPtr[2] = (argbCol & 0x001F) << 3; + ++srcPtr; + dstPtr += 3; + } + src += lr.Pitch; + } + } + else + { + AssertString( "Invalid image format" ); + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else if( rtDesc.Format == D3DFMT_A1R5G5B5 || rtDesc.Format == D3DFMT_X1R5G5B5 ) + { + // Render target is 15 bit 555 + D3DLOCKED_RECT lr; + RECT rect; + rect.left = left; + rect.right = left + width; + rect.top = rtDesc.Height - bottom - height; + rect.bottom = rtDesc.Height - bottom; + hr = offscreenSurface->LockRect( &lr, &rect, D3DLOCK_READONLY ); + if( SUCCEEDED(hr) ) + { + const UInt8* src = (const UInt8*)lr.pBits; + if( image.GetFormat() == kTexFormatARGB32 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt32* dstPtr = (UInt32*)(image.GetRowPtr(destY+y) + destX * 4); + for( int x = 0; x < width; ++x ) + { + UInt16 argbCol = *srcPtr; + UInt32 bgraCol = ((argbCol&0x8000)>>8) | ((argbCol&0x7C00)<<1) | ((argbCol&0x03E0)<<14) | ((argbCol&0x001F)<<27); + *dstPtr = bgraCol; + ++srcPtr; + ++dstPtr; + } + src += lr.Pitch; + } + } + else if( image.GetFormat() == kTexFormatRGB24 ) + { + for( int y = height-1; y >= 0; --y ) + { + const UInt16* srcPtr = (const UInt16*)src; + UInt8* dstPtr = image.GetRowPtr(destY+y) + destX * 3; + for( int x = 0; x < width; ++x ) + { + UInt16 argbCol = *srcPtr; + dstPtr[0] = (argbCol & 0x7C00) >> 7; + dstPtr[1] = (argbCol & 0x03E0) >> 2; + dstPtr[2] = (argbCol & 0x001F) << 3; + ++srcPtr; + dstPtr += 3; + } + src += lr.Pitch; + } + } + else + { + AssertString( "Invalid image format" ); + } + } + else + { + ok = false; + } + offscreenSurface->UnlockRect(); + } + else + { + // TODO: handle more conversions! + ok = false; + } + } + + return ok; +} + +void GfxDeviceD3D9::GrabIntoRenderTexture(RenderSurfaceHandle rtHandle, RenderSurfaceHandle rd, int x, int y, int width, int height ) +{ + if( !rtHandle.IsValid() ) + return; + + RenderColorSurfaceD3D9* renderTexture = reinterpret_cast<RenderColorSurfaceD3D9*>( rtHandle.object ); + + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + SurfacePointer currentRenderTarget; + hr = dev->GetRenderTarget( 0, ¤tRenderTarget ); + if( !currentRenderTarget || FAILED(hr) ) + return; + + D3DSURFACE_DESC rtDesc; + currentRenderTarget->GetDesc( &rtDesc ); + + IDirect3DTexture9* texturePointer = static_cast<IDirect3DTexture9*>(m_Textures.GetTexture (renderTexture->textureID)); + if( !texturePointer ) + return; + + SurfacePointer textureSurface; + hr = texturePointer->GetSurfaceLevel( 0, &textureSurface ); + if( !textureSurface || FAILED(hr) ) + return; + + RECT rc; + rc.left = x; + rc.top = rtDesc.Height - (y + height); + rc.right = x + width; + rc.bottom = rtDesc.Height - (y); + hr = dev->StretchRect( currentRenderTarget, &rc, textureSurface, NULL, D3DTEXF_NONE ); +} + + +void* GfxDeviceD3D9::GetNativeGfxDevice() +{ + return GetD3DDevice(); +} + +void* GfxDeviceD3D9::GetNativeTexturePointer(TextureID id) +{ + return m_Textures.GetTexture (id); +} + +intptr_t GfxDeviceD3D9::CreateExternalTextureFromNative(intptr_t nativeTex) +{ + return m_Textures.RegisterNativeTexture((IDirect3DBaseTexture9*)nativeTex); +} + +void GfxDeviceD3D9::UpdateExternalTextureFromNative(TextureID tex, intptr_t nativeTex) +{ + m_Textures.UpdateNativeTexture(tex, (IDirect3DBaseTexture9*)nativeTex); +} + + +#if ENABLE_PROFILER + +void GfxDeviceD3D9::BeginProfileEvent (const char* name) +{ + if (g_D3D9BeginEventFunc) + { + wchar_t wideName[100]; + UTF8ToWide (name, wideName, 100); + g_D3D9BeginEventFunc (0, wideName); + } +} + +void GfxDeviceD3D9::EndProfileEvent () +{ + if (g_D3D9EndEventFunc) + { + g_D3D9EndEventFunc (); + } +} + +GfxTimerQuery* GfxDeviceD3D9::CreateTimerQuery() +{ + Assert(gGraphicsCaps.hasTimerQuery); + return m_TimerQueriesD3D9.CreateTimerQuery(); +} + +void GfxDeviceD3D9::DeleteTimerQuery(GfxTimerQuery* query) +{ + delete query; +} + +void GfxDeviceD3D9::BeginTimerQueries() +{ + if(!gGraphicsCaps.hasTimerQuery) + return; + + m_TimerQueriesD3D9.BeginTimerQueries(); +} + +void GfxDeviceD3D9::EndTimerQueries() +{ + if(!gGraphicsCaps.hasTimerQuery) + return; + + m_TimerQueriesD3D9.EndTimerQueries(); +} + +/* +SInt32 GfxDeviceD3D9::GetTimerQueryIdentifier() +{ + if(!gGraphicsCaps.hasTimerQuery) + return -1; + // Allocate more queries + if(m_QueryCount[m_CurrentQueryBuffer] >= m_GPUQueries[m_CurrentQueryBuffer].size()) + { + int count = std::max (m_QueryCount[m_CurrentQueryBuffer], 100); + IDirect3DQuery9* d3dQuery; + for( int i = 0; i < count; i++) + { + GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &d3dQuery); + // initialze more Query objects + m_GPUQueries[m_CurrentQueryBuffer].push_back(d3dQuery); + } + } + int index = m_QueryCount[m_CurrentQueryBuffer]++; + IDirect3DQuery9* currentQuery = m_GPUQueries[m_CurrentQueryBuffer][index]; + currentQuery ->Issue(D3DISSUE_END); + return index; +} + +ProfileTimeFormat GfxDeviceD3D9::GetTimerQueryData(SInt32 identifier, bool wait) +{ + if(!gGraphicsCaps.hasTimerQuery) + return 0; + + if(m_GPUQueries[m_CurrentQueryBuffer].size()<=identifier) + return 0; + + UINT64 time; + while (S_OK != m_GPUQueries[m_CurrentQueryBuffer][identifier]->GetData(&time, sizeof(time), D3DGETDATA_FLUSH)) {} + return (double)time * m_TimeMultiplier; +} + +void GfxDeviceD3D9::CleanupTimerQueries () +{ + if(!gGraphicsCaps.hasTimerQuery) + return; + + for(int buffer = 0; buffer < 2; buffer++) + { + for(int i = 0; i < m_GPUQueries[buffer].size(); i++) + m_GPUQueries[buffer][i]->Release(); + m_GPUQueries[buffer].clear(); + if(m_FrequencyQuery[buffer]) + m_FrequencyQuery[buffer]->Release(); + m_FrequencyQuery[buffer] = NULL; + m_QueryCount[buffer] = 0; + } +} +*/ + +#endif // ENABLE_PROFILER + + +// -------- editor only functions + +#if UNITY_EDITOR +void GfxDeviceD3D9::SetAntiAliasFlag( bool aa ) +{ + #pragma message("! implement SetAntiAliasFlag") +} + + +void GfxDeviceD3D9::DrawUserPrimitives( GfxPrimitiveType type, int vertexCount, UInt32 vertexChannels, const void* data, int stride ) +{ + if( vertexCount == 0 ) + return; + + AssertIf(vertexCount > 60000); // TODO: handle this by multi-batching + + AssertIf( !data || vertexCount < 0 || vertexChannels == 0 ); + + IDirect3DDevice9* dev = GetD3DDevice(); + + IDirect3DVertexDeclaration9* vertexDecl = GetD3DVertexDeclaration( vertexChannels ); + + ChannelAssigns channels; + for( int i = 0; i < kShaderChannelCount; ++i ) + { + if( !( vertexChannels & (1<<i) ) ) + continue; + VertexComponent destComponent = kSuitableVertexComponentForChannel[i]; + channels.Bind( (ShaderChannel)i, destComponent ); + } + D3D9_CALL(dev->SetVertexDeclaration( vertexDecl )); + UpdateChannelBindingsD3D( channels ); + BeforeDrawCall(false); + + HRESULT hr; + switch( type ) { + case kPrimitiveTriangles: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_TRIANGLELIST, vertexCount/3, data, stride )); + m_Stats.AddDrawCall( vertexCount / 3, vertexCount ); + break; + case kPrimitiveQuads: + while (vertexCount > 0) + { + int vcount = std::min(vertexCount,kMaxImmediateVerticesPerDraw); + hr = D3D9_CALL_HR(dev->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, vcount, vcount / 4 * 2, m_Imm.m_QuadsIB, D3DFMT_INDEX16, data, stride)); + m_Stats.AddDrawCall(vcount / 4 * 2, vcount); + data = (const UInt8*)data + vcount * stride; + vertexCount -= vcount; + } + break; + case kPrimitiveLines: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINELIST, vertexCount/2, data, stride )); + m_Stats.AddDrawCall( vertexCount / 2, vertexCount ); + break; + case kPrimitiveLineStrip: + hr = D3D9_CALL_HR(dev->DrawPrimitiveUP( D3DPT_LINESTRIP, vertexCount-1, data, stride )); + m_Stats.AddDrawCall( vertexCount-1, vertexCount ); + break; + default: + ErrorString("Primitive type not supported"); + return; + } + Assert(SUCCEEDED(hr)); +} + +int GfxDeviceD3D9::GetCurrentTargetAA() const +{ + return GetCurrentD3DFSAALevel(); +} + +GfxDeviceWindow* GfxDeviceD3D9::CreateGfxWindow( HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias ) +{ + return new D3D9Window( GetD3DDevice(), window, width, height, depthFormat, antiAlias); +} + +#endif + +int GfxDeviceD3D9::GetCurrentTargetWidth() const +{ + return m_CurrTargetWidth; +} + +int GfxDeviceD3D9::GetCurrentTargetHeight() const +{ + return m_CurrTargetHeight; +} + +void GfxDeviceD3D9::SetCurrentTargetSize(int width, int height) +{ + m_CurrTargetWidth = width; + m_CurrTargetHeight = height; +} + +void GfxDeviceD3D9::SetCurrentWindowSize(int width, int height) +{ + m_CurrWindowWidth = m_CurrTargetWidth = width; + m_CurrWindowHeight = m_CurrTargetHeight = height; +} + + +#if UNITY_EDITOR + +static IDirect3DTexture9* FindD3D9TextureByID (TextureID tid) +{ + GfxDevice& device = GetRealGfxDevice(); + if (device.GetRenderer() != kGfxRendererD3D9) + return NULL; + GfxDeviceD3D9& dev = static_cast<GfxDeviceD3D9&>(device); + IDirect3DBaseTexture9* basetex = dev.GetTextures().GetTexture (tid); + if (!basetex) + return NULL; + if (basetex->GetType() != D3DRTYPE_TEXTURE) + return NULL; + return static_cast<IDirect3DTexture9*>(basetex); +} + +// In the editor, for drawing directly into HDC of D3D texture. +// Functions not defined in any header; declare prototypes manually: +// HDC AcquireHDCForTextureD3D9 (TextureID tid, int& outWidth, int& outHeight); +// void ReleaseHDCForTextureD3D9 (TextureID tid, HDC dc); +// AcquireHDCForTextureD3D9 _can_ return NULL if it can't get to DC (not D3D9, no +// texture, wrong texture format, ...). + +HDC AcquireHDCForTextureD3D9 (TextureID tid, int& outWidth, int& outHeight) +{ + IDirect3DTexture9* tex = FindD3D9TextureByID (tid); + if (!tex) + return NULL; + SurfacePointer surface; + if (FAILED(tex->GetSurfaceLevel(0,&surface))) + return NULL; + D3DSURFACE_DESC desc; + if (FAILED(surface->GetDesc (&desc))) + return NULL; + outWidth = desc.Width; + outHeight = desc.Height; + HDC dc = NULL; + if (FAILED(surface->GetDC(&dc))) + return NULL; + return dc; +} + +void ReleaseHDCForTextureD3D9 (TextureID tid, HDC dc) +{ + IDirect3DTexture9* tex = FindD3D9TextureByID (tid); + if (!tex) + return; + SurfacePointer surface; + if (FAILED(tex->GetSurfaceLevel(0,&surface))) + return; + surface->ReleaseDC (dc); +} + +#endif + + +// ---------------------------------------------------------------------- +// verification of state + +#if GFX_DEVICE_VERIFY_ENABLE + +#include "Runtime/Utilities/Utility.h" + +void VerifyStateF(D3DRENDERSTATETYPE rs, float val, const char *str); +#define VERIFYF(s,t) VerifyState (s, t, #s " (" #t ")") +void VerifyStateI(D3DRENDERSTATETYPE rs, int val, const char *str); +#define VERIFYI(s,t) VerifyStateI (s, t, #s " (" #t ")") +void VerifyEnabled(D3DRENDERSTATETYPE rs, bool val, const char *str); +#define VERIFYENAB(s,t) VerifyEnabled ( s, t, #s " (" #t ")") + +static void VERIFY_PRINT( const char* format, ... ) +{ + ErrorString( VFormat( format, va_list(&format + 1) ) ); +} + +const float kVerifyDelta = 0.0001f; + +void VerifyStateF(D3DRENDERSTATETYPE rs, float val, const char *str) +{ + float temp = 0; + GetD3DDevice()->GetRenderState(rs,(DWORD*)&temp); + if( !CompareApproximately(temp,val,kVerifyDelta) ) { + VERIFY_PRINT ("%s differs from cache (%f != %f)\n", str, val, temp); + } +} + +void VerifyStateI(D3DRENDERSTATETYPE rs, int val, const char *str) +{ + int temp; + GetD3DDevice()->GetRenderState(rs,(DWORD*)&temp); + if (temp != val) { + VERIFY_PRINT ("%s differs from cache (%i != %i)\n", str, val, temp); + } +} + +void VerifyEnabled(D3DRENDERSTATETYPE rs, bool val, const char *str) +{ + DWORD v; + GetD3DDevice()->GetRenderState(rs,&v); + bool temp = v==TRUE ? true : false; + if (temp != val) { + VERIFY_PRINT ("%s differs from cache (%d != %d)\n", str, val, temp); + } +} + +void GfxDeviceD3D9::VerifyState() +{ + // check if current state blocks match internal state + if (m_CurrBlendState != NULL) { + if (m_State.blending == 0) { + Assert (D3DBLEND_ONE == kBlendModeD3D9[m_CurrBlendState->sourceState.srcBlend]); + Assert (D3DBLEND_ZERO == kBlendModeD3D9[m_CurrBlendState->sourceState.dstBlend]); + } else { + Assert (m_State.srcBlend == kBlendModeD3D9[m_CurrBlendState->sourceState.srcBlend]); + Assert (m_State.destBlend == kBlendModeD3D9[m_CurrBlendState->sourceState.dstBlend]); + } + #if !UNITY_EDITOR // Editor does some funkiness when emulating alpha test, see SetBlendState + Assert (kCmpFuncD3D9[m_State.alphaFunc] == m_CurrBlendState->alphaFunc); + #endif + } + + m_State.Verify(); +} + + + +void DeviceStateD3D::Verify() +{ + #ifdef DUMMY_D3D9_CALLS + return; + #endif + if( !GetD3DDevice() ) { + ErrorString("Verify: no D3D device"); + return; + } + + if( depthFunc != kFuncUnknown ) { + VERIFYI( D3DRS_ZFUNC, kCmpFuncD3D9[depthFunc] ); + } + if( depthWrite != -1 ) { + VERIFYI( D3DRS_ZWRITEENABLE, (depthWrite ? TRUE : FALSE) ); + } + if( blending != -1 ) { + VERIFYENAB( D3DRS_ALPHABLENDENABLE, blending != 0 ); + if( blending ) { + VERIFYI( D3DRS_SRCBLEND, srcBlend ); + VERIFYI( D3DRS_DESTBLEND, destBlend ); + } + } + + if( alphaFunc != kFuncUnknown ) { + VERIFYENAB( D3DRS_ALPHATESTENABLE, alphaFunc != kFuncDisabled ); + if( alphaFunc != kFuncDisabled ) { + VERIFYI( D3DRS_ALPHAFUNC, kCmpFuncD3D9[alphaFunc] ); + if( alphaValue != -1 ) + VERIFYI( D3DRS_ALPHAREF, alphaValue*255.0f ); + } + } +} + +#endif // GFX_DEVICE_VERIFY_ENABLE + diff --git a/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h new file mode 100644 index 0000000..f648a35 --- /dev/null +++ b/Runtime/GfxDevice/d3d/GfxDeviceD3D9.h @@ -0,0 +1,361 @@ +#pragma once + +#include "D3D9Includes.h" +#include "VertexDeclarations.h" +#include "TexturesD3D9.h" +#include "Runtime/GfxDevice/ShaderConstantCache.h" +#include "Runtime/Shaders/MaterialProperties.h" +#include "VertexPipeD3D9.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "D3D9Context.h" +#include "Runtime/Math/FloatConversion.h" +#include "D3D9VBO.h" +#include "CombinerD3D.h" +#include "External/shaderlab/Library/program.h" +#include "External/shaderlab/Library/TextureBinding.h" +#include "External/shaderlab/Library/texenv.h" +#include "Runtime/Math/Matrix4x4.h" +#include "Runtime/GfxDevice/ChannelAssigns.h" +#include "Runtime/GfxDevice/BuiltinShaderParams.h" +#include "Runtime/Graphics/Image.h" +#include "PlatformDependent/Win/SmartComPointer.h" +#include "Runtime/Utilities/Utility.h" +#include "D3D9Utils.h" +#include "D3D9Window.h" +#include "GpuProgramsD3D.h" +#include "TimerQueryD3D9.h" + +typedef SmartComPointer<IDirect3DSurface9> SurfacePointer; + +struct TextureUnitStateD3D +{ + TextureID texID; + float bias; + + void Invalidate() + { + texID.m_ID = -1; + bias = 1.0e6f; + } +}; + +class GfxDeviceD3D9; + +struct DeviceStateD3D +{ + int viewport[4]; + int scissorRect[4]; + + CompareFunction depthFunc; + int depthWrite; // 0/1 or -1 + + int blending; + int srcBlend, destBlend, srcBlendAlpha, destBlendAlpha; // D3D modes + int blendOp, blendOpAlpha; // D3D modes + CompareFunction alphaFunc; + float alphaValue; + + CullMode culling; + D3DCULL d3dculling; + bool appBackfaceMode, userBackfaceMode, invertProjMatrix; + bool wireframe; + int scissor; + + // [0] is front, [1] is back, unless invertProjMatrix is true + D3DCMPFUNC stencilFunc[2]; + D3DSTENCILOP stencilFailOp[2], depthFailOp[2], depthPassOp[2]; + + float offsetFactor, offsetUnits; + + GpuProgram* activeGpuProgram[kShaderTypeCount]; + const GpuProgramParameters* activeGpuProgramParams[kShaderTypeCount]; + IUnknown* activeShader[kShaderTypeCount]; + + int colorWriteMask; // ColorWriteMask combinations + + int m_StencilRef; + + TextureUnitStateD3D texturesPS[kMaxSupportedTextureUnits]; + TextureUnitStateD3D texturesVS[4]; + + int fixedFunctionPS; + + bool m_DeviceLost; + + bool m_SoftwareVP; + UInt32 m_NeedsSofwareVPFlags; + + void Invalidate( GfxDeviceD3D9& device ); + void Verify(); +}; + +// TODO: optimize this. Right now we just send off whole 8 float3 UVs with each +// immediate mode vertex. We could at least detect the number of them used from +// ImmediateTexCoord calls. +struct ImmediateVertexD3D { + D3DVECTOR vertex; + D3DVECTOR normal; + D3DCOLOR color; + D3DVECTOR texCoords[8]; +}; + +struct ImmediateModeD3D { + std::vector<ImmediateVertexD3D> m_Vertices; + ImmediateVertexD3D m_Current; + GfxPrimitiveType m_Mode; + IDirect3DVertexDeclaration9* m_ImmVertexDecl; + UInt16* m_QuadsIB; + + ImmediateModeD3D(); + ~ImmediateModeD3D(); + void Invalidate(); +}; + +class GfxDeviceD3D9 : public GfxThreadableDevice +{ +public: + struct DeviceBlendStateD3D9 : public DeviceBlendState + { + UInt8 renderTargetWriteMask; + D3DCMPFUNC alphaFunc; + }; + + struct DeviceDepthStateD3D9 : public DeviceDepthState + { + D3DCMPFUNC depthFunc; + }; + + struct DeviceStencilStateD3D9 : public DeviceStencilState + { + D3DCMPFUNC stencilFuncFront; + D3DSTENCILOP stencilFailOpFront; + D3DSTENCILOP depthFailOpFront; + D3DSTENCILOP depthPassOpFront; + D3DCMPFUNC stencilFuncBack; + D3DSTENCILOP stencilFailOpBack; + D3DSTENCILOP depthFailOpBack; + D3DSTENCILOP depthPassOpBack; + }; + + + typedef std::map< GfxBlendState, DeviceBlendStateD3D9, memcmp_less<GfxBlendState> > CachedBlendStates; + typedef std::map< GfxDepthState, DeviceDepthStateD3D9, memcmp_less<GfxDepthState> > CachedDepthStates; + typedef std::map< GfxStencilState, DeviceStencilStateD3D9, memcmp_less<GfxStencilState> > CachedStencilStates; + typedef std::map< GfxRasterState, DeviceRasterState, memcmp_less<GfxRasterState> > CachedRasterStates; + + +public: + GfxDeviceD3D9(); + GFX_API ~GfxDeviceD3D9(); + + GFX_API void InvalidateState(); + #if GFX_DEVICE_VERIFY_ENABLE + GFX_API void VerifyState(); + #endif + + GFX_API void Clear(UInt32 clearFlags, const float color[4], float depth, int stencil); + GFX_API void SetUserBackfaceMode( bool enable ); + GFX_API void SetWireframe(bool wire); + GFX_API bool GetWireframe() const; + GFX_API void SetInvertProjectionMatrix( bool enable ); + GFX_API bool GetInvertProjectionMatrix() const; + + GFX_API GPUSkinningInfo *CreateGPUSkinningInfo() { return NULL; } + GFX_API void DeleteGPUSkinningInfo(GPUSkinningInfo *info) { AssertBreak(false); } + GFX_API void SkinOnGPU( GPUSkinningInfo * info, bool lastThisFrame ) { AssertBreak(false); } + GFX_API void UpdateSkinSourceData(GPUSkinningInfo *info, const void *vertData, const BoneInfluence *skinData, bool dirty) { AssertBreak(false); } + GFX_API void UpdateSkinBonePoses(GPUSkinningInfo *info, const int boneCount, const Matrix4x4f* poses) { AssertBreak(false); } + + GFX_API DeviceBlendState* CreateBlendState(const GfxBlendState& state); + GFX_API DeviceDepthState* CreateDepthState(const GfxDepthState& state); + GFX_API DeviceStencilState* CreateStencilState(const GfxStencilState& state); + GFX_API DeviceRasterState* CreateRasterState(const GfxRasterState& state); + + GFX_API void SetBlendState(const DeviceBlendState* state, float alphaRef); + GFX_API void SetRasterState(const DeviceRasterState* state); + GFX_API void SetDepthState(const DeviceDepthState* state); + GFX_API void SetStencilState(const DeviceStencilState* state, int stencilRef); + GFX_API void SetSRGBWrite (const bool); + GFX_API bool GetSRGBWrite (); + + GFX_API void SetWorldMatrix( const float matrix[16] ); + GFX_API void SetViewMatrix( const float matrix[16] ); + GFX_API void SetProjectionMatrix(const Matrix4x4f& matrix); + GFX_API void GetMatrix( float outMatrix[16] ) const; + + GFX_API const float* GetWorldMatrix() const ; + GFX_API const float* GetViewMatrix() const ; + GFX_API const float* GetProjectionMatrix() const ; + GFX_API const float* GetDeviceProjectionMatrix() const; + + GFX_API void SetNormalizationBackface( NormalizationMode mode, bool backface ); + GFX_API void SetFFLighting( bool on, bool separateSpecular, ColorMaterialMode colorMaterial ); + GFX_API void SetMaterial( const float ambient[4], const float diffuse[4], const float specular[4], const float emissive[4], const float shininess ); + GFX_API void SetColor( const float color[4] ); + GFX_API void SetViewport( int x, int y, int width, int height ); + GFX_API void GetViewport( int* port ) const; + + GFX_API void SetScissorRect( int x, int y, int width, int height ); + GFX_API void DisableScissor(); + GFX_API bool IsScissorEnabled() const; + GFX_API void GetScissorRect( int values[4] ) const; + + GFX_API bool IsCombineModeSupported( unsigned int combiner ); + GFX_API TextureCombinersHandle CreateTextureCombiners( int count, const ShaderLab::TextureBinding* texEnvs, const ShaderLab::PropertySheet* props, bool hasVertexColorOrLighting, bool usesAddSpecular ); + GFX_API void DeleteTextureCombiners( TextureCombinersHandle& textureCombiners ); + GFX_API void SetTextureCombinersThreadable( TextureCombinersHandle textureCombiners, const TexEnvData* texEnvData, const Vector4f* texColors ); + GFX_API void SetTextureCombiners( TextureCombinersHandle textureCombiners, const ShaderLab::PropertySheet* props ); + + GFX_API void SetTexture (ShaderType shaderType, int unit, int samplerUnit, TextureID texture, TextureDimension dim, float bias); + GFX_API void SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace ); + GFX_API void SetTextureTransform( int unit, TextureDimension dim, TexGenMode texGen, bool identity, const float matrix[16]); + GFX_API void SetTextureName ( TextureID texture, const char* name ) { } + + GFX_API void SetShadersThreadable (GpuProgram* programs[kShaderTypeCount], const GpuProgramParameters* params[kShaderTypeCount], UInt8 const * const paramsBuffer[kShaderTypeCount]); + GFX_API bool IsShaderActive( ShaderType type ) const; + GFX_API void DestroySubProgram( ShaderLab::SubProgram* subprogram ); + + GFX_API void DisableLights( int startLight ); + GFX_API void SetLight( int light, const GfxVertexLight& data); + GFX_API void SetAmbient( const float ambient[4] ); + + GFX_API void EnableFog(const GfxFogParams& fog); + GFX_API void DisableFog(); + + GFX_API VBO* CreateVBO(); + GFX_API void DeleteVBO( VBO* vbo ); + GFX_API DynamicVBO& GetDynamicVBO(); + + GFX_API RenderSurfaceHandle CreateRenderColorSurface (TextureID textureID, int width, int height, int samples, int depth, TextureDimension dim, RenderTextureFormat format, UInt32 createFlags); + GFX_API RenderSurfaceHandle CreateRenderDepthSurface(TextureID textureID, int width, int height, int samples, TextureDimension dim, DepthBufferFormat depthFormat, UInt32 createFlags); + GFX_API void DestroyRenderSurface(RenderSurfaceHandle& rs); + GFX_API void SetRenderTargets (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face = kCubeFaceUnknown); + GFX_API void ResolveColorSurface (RenderSurfaceHandle srcHandle, RenderSurfaceHandle dstHandle); + GFX_API void ResolveDepthIntoTexture (RenderSurfaceHandle colorHandle, RenderSurfaceHandle depthHandle); + GFX_API RenderSurfaceHandle GetActiveRenderColorSurface(int index); + GFX_API RenderSurfaceHandle GetActiveRenderDepthSurface(); + GFX_API void SetSurfaceFlags(RenderSurfaceHandle surf, UInt32 flags, UInt32 keepFlags); + + GFX_API void UploadTexture2D( TextureID texture, TextureDimension dimension, UInt8* srcData, int srcSize, int width, int height, TextureFormat format, int mipCount, UInt32 uploadFlags, int skipMipLevels, TextureUsageMode usageMode, TextureColorSpace colorSpace ); + GFX_API void UploadTextureSubData2D( TextureID texture, UInt8* srcData, int srcSize, int mipLevel, int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace ); + GFX_API void UploadTextureCube( TextureID texture, UInt8* srcData, int srcSize, int faceDataSize, int size, TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace ); + GFX_API void UploadTexture3D( TextureID texture, UInt8* srcData, int srcSize, int width, int height, int depth, TextureFormat format, int mipCount, UInt32 uploadFlags ); + GFX_API void DeleteTexture( TextureID texture ); + + GFX_API PresentMode GetPresentMode(); + + GFX_API void BeginFrame(); + GFX_API void EndFrame(); + GFX_API void PresentFrame(); + GFX_API bool IsValidState(); + GFX_API bool HandleInvalidState(); + GFX_API void FinishRendering(); + + // Immediate mode rendering + GFX_API void ImmediateVertex( float x, float y, float z ); + GFX_API void ImmediateNormal( float x, float y, float z ); + GFX_API void ImmediateColor( float r, float g, float b, float a ); + GFX_API void ImmediateTexCoordAll( float x, float y, float z ); + GFX_API void ImmediateTexCoord( int unit, float x, float y, float z ); + GFX_API void ImmediateBegin( GfxPrimitiveType type ); + GFX_API void ImmediateEnd(); + + GFX_API bool CaptureScreenshot( int left, int bottom, int width, int height, UInt8* rgba32 ); + GFX_API bool ReadbackImage( ImageReference& image, int left, int bottom, int width, int height, int destX, int destY ); + GFX_API void GrabIntoRenderTexture(RenderSurfaceHandle rs, RenderSurfaceHandle rd, int x, int y, int width, int height); + + GFX_API void BeforeDrawCall( bool immediateMode ); + + GFX_API bool IsPositionRequiredForTexGen(int texStageIndex) const { return false; } + GFX_API bool IsNormalRequiredForTexGen(int texStageIndex) const { return false; } + GFX_API bool IsPositionRequiredForTexGen() const { return false; } + GFX_API bool IsNormalRequiredForTexGen() const { return false; } + + GFX_API void DiscardContents (RenderSurfaceHandle& rs) {} + +#if ENABLE_PROFILER + GFX_API void BeginProfileEvent (const char* name); + GFX_API void EndProfileEvent (); + + TimerQueriesD3D9& GetTimerQueries() {return m_TimerQueriesD3D9;} + GFX_API GfxTimerQuery* CreateTimerQuery(); + GFX_API void DeleteTimerQuery(GfxTimerQuery* query); + GFX_API void BeginTimerQueries(); + GFX_API void EndTimerQueries(); + #endif + + #if UNITY_EDITOR + GFX_API void SetAntiAliasFlag( bool aa ); + GFX_API void DrawUserPrimitives( GfxPrimitiveType type, int vertexCount, UInt32 vertexChannels, const void* data, int stride ); + GFX_API int GetCurrentTargetAA() const; + GFX_API GfxDeviceWindow* CreateGfxWindow( HWND window, int width, int height, DepthBufferFormat depthFormat, int antiAlias ); + #endif + + GFX_API int GetCurrentTargetWidth() const; + GFX_API int GetCurrentTargetHeight() const; + GFX_API void SetCurrentTargetSize(int width, int height); + GFX_API void SetCurrentWindowSize(int width, int height); + + GFX_API void* GetNativeGfxDevice(); + GFX_API void* GetNativeTexturePointer(TextureID id); + GFX_API intptr_t CreateExternalTextureFromNative(intptr_t nativeTex); + GFX_API void UpdateExternalTextureFromNative(TextureID tex, intptr_t nativeTex); + + GFX_API void ResetDynamicResources(); + + IDirect3DVertexBuffer9* GetAllWhiteVertexStream(); + + VertexDeclarations& GetVertexDecls() { return m_VertexDecls; } + + const DeviceStateD3D& GetState() const { return m_State; } + DeviceStateD3D& GetState() { return m_State; } + VertexShaderConstantCache& GetVertexShaderConstantCache() { return m_VSConstantCache; } + PixelShaderConstantCache& GetPixelShaderConstantCache() { return m_PSConstantCache; } + + const VertexPipeConfig& GetVertexPipeConfig() const { return m_VertexConfig; } + VertexPipeConfig& GetVertexPipeConfig() { return m_VertexConfig; } + const VertexPipeDataD3D9& GetVertexPipeData() const { return m_VertexData; } + VertexPipeDataD3D9& GetVertexPipeData() { return m_VertexData; } + TexturesD3D9& GetTextures() { return m_Textures; } + + void PushEventQuery(); + +private: + + DeviceStateD3D m_State; + ImmediateModeD3D m_Imm; + VertexPipeConfig m_VertexConfig; + TransformState m_TransformState; + VertexPipeDataD3D9 m_VertexData; + VertexPipePrevious m_VertexPrevious; + + DeviceBlendStateD3D9* m_CurrBlendState; + DeviceDepthStateD3D9* m_CurrDepthState; + const DeviceStencilStateD3D9* m_CurrStencilState; + DeviceRasterState* m_CurrRasterState; + int m_CurrTargetWidth; + int m_CurrTargetHeight; + int m_CurrWindowWidth; + int m_CurrWindowHeight; + + IDirect3DVertexBuffer9* m_AllWhiteVertexStream; + + VertexDeclarations m_VertexDecls; + TexturesD3D9 m_Textures; + DynamicVBO* m_DynamicVBO; + + CachedBlendStates m_CachedBlendStates; + CachedDepthStates m_CachedDepthStates; + CachedStencilStates m_CachedStencilStates; + CachedRasterStates m_CachedRasterStates; + + VertexShaderConstantCache m_VSConstantCache; + PixelShaderConstantCache m_PSConstantCache; + +#if ENABLE_PROFILER + TimerQueriesD3D9 m_TimerQueriesD3D9; +#endif +}; + +GfxDeviceD3D9& GetD3D9GfxDevice(); diff --git a/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp b/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp new file mode 100644 index 0000000..9a67a54 --- /dev/null +++ b/Runtime/GfxDevice/d3d/GpuProgramsD3D.cpp @@ -0,0 +1,474 @@ +#include "UnityPrefix.h" +#include "GpuProgramsD3D.h" +#include "External/shaderlab/Library/ShaderLabErrors.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Math/Vector4.h" +#include "External/shaderlab/Library/shaderlab.h" +#include "External/shaderlab/Library/texenv.h" +#include "External/DirectX/builds/dx9include/d3dx9.h" +#include "D3D9Context.h" +#include "Runtime/GfxDevice/ShaderConstantCache.h" +#include "D3D9Utils.h" +#include "ShaderPatchingD3D9.h" + +#define ENABLE_GPU_PROGRAM_STATS 0 + + +#if ENABLE_GPU_PROGRAM_STATS +typedef std::map<ShaderLab::FastPropertyName, int> PropertyCount; +PropertyCount s_StatCounts[kShaderTypeCount]; +void PrintDebugGpuProgramStats () +{ + typedef std::pair<std::string, int> NameIntPair; + struct Sorter { + bool operator() (const NameIntPair& a, const NameIntPair& b) const { + return a.second > b.second; + } + }; + for (int i = kShaderVertex; i < kShaderTypeCount; ++i) + { + std::vector<NameIntPair> sorted; + sorted.reserve (s_StatCounts[i].size()); + int totalCount = 0; + for (PropertyCount::const_iterator it = s_StatCounts[i].begin(); it != s_StatCounts[i].end(); ++it) + { + sorted.push_back (std::make_pair(it->first.GetName(), it->second)); + totalCount += it->second; + } + std::sort (sorted.begin(), sorted.end(), Sorter()); + printf_console ("%i Shader Stats: %i props, %i requests\n", i, sorted.size(), totalCount); + for (size_t j = 0; j < sorted.size(); ++j) + { + printf_console (" %-25s %6i %5.1f%%\n", sorted[j].first.c_str(), sorted[j].second, sorted[j].second*100.0/totalCount); + } + s_StatCounts[i].clear(); + } +} +#define ADD_TO_VS_STATS(name) ++s_StatCounts[kShaderVertex][name] +#define ADD_TO_PS_STATS(name) ++s_StatCounts[kShaderFragment][name] +#else +#define ADD_TO_VS_STATS(name) +#define ADD_TO_PS_STATS(name) +#endif + + +VertexShaderConstantCache& GetD3D9VertexShaderConstantCache(); // GfxDeviceD3D9.cpp +PixelShaderConstantCache& GetD3D9PixelShaderConstantCache(); // GfxDeviceD3D9.cpp + + +// non static; used by CombinerD3D.cpp and VertexPipeD3D9.cpp +ID3DXBuffer* AssembleD3DShader (const std::string& source) +{ + ID3DXBuffer *compiledShader, *compileErrors; + + // Skip validation of shaders at assembly time when in release mode. Saves + // some time when loading them. + DWORD flags = D3DXSHADER_SKIPVALIDATION; + #if DEBUGMODE + flags = 0; + #endif + + HRESULT hr = D3DXAssembleShader( source.c_str(), source.size(), NULL, NULL, flags, &compiledShader, &compileErrors ); + if( FAILED(hr) ) + { + if (compileErrors && compileErrors->GetBufferSize() > 0) + { + std::string error = Format ("Shader error in '%s': D3D shader assembly failed with: %s\nShader Assembly: %s", g_LastParsedShaderName.c_str(), (const char*)compileErrors->GetBufferPointer(), source.c_str()); + compileErrors->Release(); + ErrorString (error); + } + if( compiledShader ) + compiledShader->Release(); + return NULL; + } + + return compiledShader; +} + +// -------------------------------------------------------------------------- + +template <typename CACHE> +static const UInt8* ApplyValueParametersD3D9 (CACHE& constantCache, const UInt8* buffer, const GpuProgramParameters::ValueParameterArray& valueParams) +{ + GpuProgramParameters::ValueParameterArray::const_iterator valueParamsEnd = valueParams.end(); + for (GpuProgramParameters::ValueParameterArray::const_iterator i = valueParams.begin(); i != valueParamsEnd; ++i) + { + if (i->m_RowCount == 1 && i->m_ArraySize == 1) + { + // Apply vector parameters + const Vector4f* val = reinterpret_cast<const Vector4f*>(buffer); + constantCache.SetValues(i->m_Index, val->GetPtr(), 1); + buffer += sizeof(Vector4f); + } + else + { + // matrix/array + int size = *reinterpret_cast<const int*>(buffer); buffer += sizeof(int); + Assert (i->m_RowCount == 4 && size == 16); + const Matrix4x4f* val = reinterpret_cast<const Matrix4x4f*>(buffer); + Matrix4x4f transposed; + TransposeMatrix4x4 (val, &transposed); + const float *ptr = transposed.GetPtr(); + constantCache.SetValues (i->m_Index, ptr, 4); + buffer += size * sizeof(float); + } + } + return buffer; +} + + + +// -------------------------------------------------------------------------- + +D3D9VertexShader::D3D9VertexShader( const std::string& source ) +: m_FogFailed(0) +{ + for (int i = 0; i < kFogModeCount; ++i) + { + m_Shaders[i] = NULL; + } + m_ImplType = kShaderImplVertex; + if( !Create(source) ) + m_NotSupported = true; +} + +D3D9VertexShader::~D3D9VertexShader () +{ + for (int i = 0; i < kFogModeCount; ++i) + { + if( m_Shaders[i] ) + { + ULONG refCount = m_Shaders[i]->Release(); + AssertIf( refCount != 0 ); + } + } +} + + +bool D3D9VertexShader::Create( const std::string& source ) +{ + // fast skip 3.0 shaders on unsupporting hardware + bool isShaderModel3 = !strncmp(source.c_str(), "vs_3_0", 6); + if( gGraphicsCaps.shaderCaps < kShaderLevel3 && isShaderModel3 ) + return false; + + if (isShaderModel3) + m_GpuProgramLevel = kGpuProgramSM3; + else + { + bool isShaderModel1 = !strncmp(source.c_str(), "vs_1_1", 6); + m_GpuProgramLevel = isShaderModel1 ? kGpuProgramSM1 : kGpuProgramSM2; + } + + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + + // assemble shader + ID3DXBuffer *compiledShader = AssembleD3DShader( source ); + if( !compiledShader ) + { + return false; + } + + // create shader + hr = dev->CreateVertexShader( (const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[0] ); + compiledShader->Release(); + if( FAILED(hr) ) + { + printf_console( "D3D shader create error for shader %s\n", source.c_str() ); + return false; + } + + if (isShaderModel3) + { + m_SourceForFog = source; + } + + return true; +} + +void D3D9VertexShader::ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer) +{ + GfxDevice& device = GetRealGfxDevice(); + IDirect3DDevice9* dev = GetD3DDevice(); + VertexShaderConstantCache& constantCache = GetD3D9VertexShaderConstantCache(); + + const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams(); + buffer = ApplyValueParametersD3D9<VertexShaderConstantCache>(constantCache, buffer, valueParams); + + // Apply textures + if (gGraphicsCaps.hasVertexTextures) + { + const GpuProgramParameters::TextureParameterList& textureParams = params.GetTextureParams(); + const GpuProgramParameters::TextureParameterList::const_iterator textureParamsEnd = textureParams.end(); + for( GpuProgramParameters::TextureParameterList::const_iterator i = textureParams.begin(); i != textureParamsEnd; ++i ) + { + const GpuProgramParameters::TextureParameter& t = *i; + const TexEnvData* texdata = reinterpret_cast<const TexEnvData*>(buffer); + device.SetTexture (kShaderVertex, t.m_Index, 0, texdata->textureID, static_cast<TextureDimension>(texdata->texDim), 0); + buffer += sizeof(*texdata); + } + } +} + +IDirect3DVertexShader9* D3D9VertexShader::GetShader (FogMode fog, bool& outResetToNoFog) +{ + int index = 0; + outResetToNoFog = false; + if (fog > kFogDisabled && !m_SourceForFog.empty()) + { + Assert (fog >= 0 && fog < kFogModeCount); + + if (m_Shaders[fog]) + { + // already have patched fog shader + index = fog; + } + else if (!(m_FogFailed & (1<<fog))) + { + // patch fog shader on demand + std::string src = m_SourceForFog; + + if (PatchVertexShaderFogD3D9 (src)) + { + // assemble & create the shader + ID3DXBuffer *compiledShader = AssembleD3DShader (src); + if (compiledShader) + { + HRESULT hr = GetD3DDevice()->CreateVertexShader ((const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[fog]); + compiledShader->Release(); + if (SUCCEEDED(hr)) + { + index = fog; + } + else + { + printf_console ("D3D vertex shader create error for patched fog mode %d shader %s\n", (int)fog, src.c_str()); + } + } + } + } + if (index == 0) + { + outResetToNoFog = true; + m_FogFailed |= (1<<fog); + } + } + return m_Shaders[index]; +} + +// -------------------------------------------------------------------------- + +D3D9PixelShader::D3D9PixelShader( const std::string& source ) +: m_FogFailed(0) +{ + for (int i = 0; i < kFogModeCount; ++i) + { + m_Shaders[i] = NULL; + m_FogRegisters[i] = NULL; + } + m_ImplType = kShaderImplFragment; + if( !Create(source) ) + m_NotSupported = true; +} + +D3D9PixelShader::~D3D9PixelShader () +{ + for (int i = 0; i < kFogModeCount; ++i) + { + if( m_Shaders[i] ) + { + ULONG refCount = m_Shaders[i]->Release(); + AssertIf( refCount != 0 ); + } + } +} + +bool D3D9PixelShader::Create( const std::string& source ) +{ + // fast skip 3.0 shaders on unsupporting hardware + bool isShaderModel3 = !strncmp(source.c_str(), "ps_3_0", 6); + if( gGraphicsCaps.shaderCaps < kShaderLevel3 && isShaderModel3 ) + return false; + + m_GpuProgramLevel = isShaderModel3 ? kGpuProgramSM3 : kGpuProgramSM2; + + HRESULT hr; + IDirect3DDevice9* dev = GetD3DDevice(); + + // assemble shader + ID3DXBuffer *compiledShader = AssembleD3DShader( source ); + if( !compiledShader ) + { + return false; + } + + // create shader + hr = dev->CreatePixelShader( (const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[0] ); + compiledShader->Release(); + if( FAILED(hr) ) + { + printf_console( "D3D shader create error for shader %s\n", source.c_str() ); + return false; + } + + if (isShaderModel3) + { + m_SourceForFog = source; + } + + return true; +} + +void D3D9PixelShader::ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer) +{ + GfxDevice& device = GetRealGfxDevice(); + IDirect3DDevice9* dev = GetD3DDevice(); + PixelShaderConstantCache& constantCache = GetD3D9PixelShaderConstantCache(); + + const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams(); + buffer = ApplyValueParametersD3D9<PixelShaderConstantCache>(constantCache, buffer, valueParams); + + // Apply textures + const GpuProgramParameters::TextureParameterList& textureParams = params.GetTextureParams(); + GpuProgramParameters::TextureParameterList::const_iterator textureParamsEnd = textureParams.end(); + for( GpuProgramParameters::TextureParameterList::const_iterator i = textureParams.begin(); i != textureParamsEnd; ++i ) + { + const GpuProgramParameters::TextureParameter& t = *i; + const TexEnvData* texdata = reinterpret_cast<const TexEnvData*>(buffer); + ApplyTexEnvData (t.m_Index, t.m_SamplerIndex, *texdata); + buffer += sizeof(*texdata); + } + + // Apply fog parameters if needed + if (!m_SourceForFog.empty()) + { + const GfxFogParams& fog = device.GetFogParams(); + if (fog.mode > kFogDisabled && !(m_FogFailed & (1<<fog.mode))) + { + int reg = m_FogRegisters[fog.mode]; + constantCache.SetValues (reg, fog.color.GetPtr(), 1); + float params[4]; + params[0] = fog.density * 1.2011224087f ; // density / sqrt(ln(2)) + params[1] = fog.density * 1.4426950408f; // density / ln(2) + if (fog.mode == kFogLinear) + { + float diff = fog.end - fog.start; + float invDiff = Abs(diff) > 0.0001f ? 1.0f/diff : 0.0f; + params[2] = -invDiff; + params[3] = fog.end * invDiff; + } + else + { + params[2] = 0.0f; + params[3] = 0.0f; + } + constantCache.SetValues (reg+1, params, 1); + } + } +} + +static int FindUnusedConstantRegister (const std::string& src, const GpuProgramParameters& params) +{ + int maxRegisterUsed = -1; + + const GpuProgramParameters::ValueParameterArray& valueParams = params.GetValueParams(); + for (GpuProgramParameters::ValueParameterArray::const_iterator it = valueParams.begin(), itEnd = valueParams.end(); it != itEnd; ++it) + { + int idx = it->m_Index + it->m_RowCount - 1; + if (idx > maxRegisterUsed) + maxRegisterUsed = idx; + } + + // Built-ins + const BuiltinShaderParamIndices& builtins = params.GetBuiltinParams(); + for (int i = 0; i < kShaderInstanceMatCount; ++i) + { + int index = builtins.mat[i].gpuIndex; + if (index >= 0 && index + 3 > maxRegisterUsed) + maxRegisterUsed = index + 3; + } + + // Explicit constants in the shader ("def c*") + size_t pos = 0; + const size_t n = src.size(); + while ((pos = src.find("def c", pos)) != std::string::npos) + { + pos += 5; // skip "def c" + int reg = -1; + sscanf(src.c_str() + pos, "%d", ®); + if (reg > maxRegisterUsed) + maxRegisterUsed = reg; + } + + return maxRegisterUsed + 1; +} + +IDirect3DPixelShader9* D3D9PixelShader::GetShader(FogMode fog, const GpuProgramParameters& params) +{ + int index = 0; + if (fog > kFogDisabled && !m_SourceForFog.empty()) + { + Assert (fog >= 0 && fog < kFogModeCount); + + if (m_Shaders[fog]) + { + // already have patched fog shader + index = fog; + } + else if (!(m_FogFailed & (1<<fog))) + { + // patch fog shader on demand + std::string src = m_SourceForFog; + + // find constant register that we'll use to store fog params + int reg = FindUnusedConstantRegister (src, params); + m_FogRegisters[fog] = reg; + + if (PatchPixelShaderFogD3D9 (src, fog, reg, reg+1)) + { + // assemble & create the shader + ID3DXBuffer *compiledShader = AssembleD3DShader (src); + if (compiledShader) + { + HRESULT hr = GetD3DDevice()->CreatePixelShader ((const DWORD*)compiledShader->GetBufferPointer(), &m_Shaders[fog]); + compiledShader->Release(); + if (SUCCEEDED(hr)) + { + index = fog; + } + else + { + printf_console ("D3D pixel shader create error for patched fog mode %d shader %s\n", (int)fog, src.c_str()); + } + } + } + + if (index == 0) + m_FogFailed |= (1<<fog); + } + } + return m_Shaders[index]; +} + + + +// -------------------------------------------------------------------------- + +#if ENABLE_UNIT_TESTS +#include "External/UnitTest++/src/UnitTest++.h" +SUITE (GpuProgramsD3DTests) +{ + +TEST(FindUnusedConstantRegisterCanHandleUnsortedParams) +{ + GpuProgramParameters pp; + pp.AddVectorParam(1,kShaderParamFloat,4,"A",-1,NULL); + pp.AddVectorParam(0,kShaderParamFloat,4,"B",-1,NULL); + pp.MakeReady(); // this does sort, but sorts by name; NOT the GPU index! + CHECK_EQUAL(2,FindUnusedConstantRegister("", pp)); +} + +} // SUITE +#endif // ENABLE_UNIT_TESTS diff --git a/Runtime/GfxDevice/d3d/GpuProgramsD3D.h b/Runtime/GfxDevice/d3d/GpuProgramsD3D.h new file mode 100644 index 0000000..6b21fa9 --- /dev/null +++ b/Runtime/GfxDevice/d3d/GpuProgramsD3D.h @@ -0,0 +1,40 @@ +#pragma once + +#include "D3D9Includes.h" +#include "Runtime/GfxDevice/GpuProgram.h" + + +class D3D9VertexShader : public GpuProgram { +public: + D3D9VertexShader( const std::string& source ); + virtual ~D3D9VertexShader(); + + virtual void ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer); + IDirect3DVertexShader9* GetShader(FogMode fog, bool& outResetToNoFog); + IDirect3DVertexShader9* GetShaderAtFogIndex(FogMode fog) { return m_Shaders[fog]; } + +private: + bool Create( const std::string& source ); + + std::string m_SourceForFog; // original source, used for fog patching if needed + IDirect3DVertexShader9* m_Shaders[kFogModeCount]; + unsigned m_FogFailed; // bit per fog mode +}; + +class D3D9PixelShader : public GpuProgram { +public: + D3D9PixelShader( const std::string& source ); + virtual ~D3D9PixelShader(); + + virtual void ApplyGpuProgram (const GpuProgramParameters& params, const UInt8* buffer); + IDirect3DPixelShader9* GetShader(FogMode fog, const GpuProgramParameters& params); + IDirect3DPixelShader9* GetShaderAtFogIndex(FogMode fog) { return m_Shaders[fog]; } + +private: + bool Create( const std::string& source ); + + std::string m_SourceForFog; // original source, used for fog patching if needed + IDirect3DPixelShader9* m_Shaders[kFogModeCount]; + int m_FogRegisters[kFogModeCount]; + unsigned m_FogFailed; // bit per fog mode +}; diff --git a/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp b/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp new file mode 100644 index 0000000..f1d95c8 --- /dev/null +++ b/Runtime/GfxDevice/d3d/GraphicsCapsD3D9.cpp @@ -0,0 +1,384 @@ +#include "UnityPrefix.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "D3D9Context.h" +#include "Runtime/Utilities/Utility.h" +#include "PlatformDependent/Win/WinDriverUtils.h" +#include "D3D9Utils.h" +#include <Shlwapi.h> + +#define CAPS_DEBUG_DISABLE_RT 0 + + +extern D3DFORMAT kD3D9RenderTextureFormats[kRTFormatCount]; + + +extern D3DDEVTYPE g_D3DDevType; +extern DWORD g_D3DAdapter; + +static bool IsTextureFormatSupported( D3DFORMAT format ) +{ + if( format == D3DFMT_UNKNOWN ) + return false; + HRESULT hr = GetD3DObject()->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), 0, D3DRTYPE_TEXTURE, format ); + return SUCCEEDED( hr ); +} +static bool IsSRGBTextureReadSupported( D3DFORMAT format ) +{ + if( format == D3DFMT_UNKNOWN ) + return false; + HRESULT hr = GetD3DObject()->CheckDeviceFormat (g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_SRGBREAD, D3DRTYPE_TEXTURE, format); + return SUCCEEDED( hr ); +} +static bool IsSRGBTextureWriteSupported( D3DFORMAT format ) +{ + if( format == D3DFMT_UNKNOWN ) + return false; + HRESULT hr = GetD3DObject()->CheckDeviceFormat (g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_SRGBWRITE, D3DRTYPE_TEXTURE, format); + return SUCCEEDED( hr ); +} +static bool IsRenderTextureFormatSupported( D3DFORMAT format ) +{ + if( format == D3DFMT_UNKNOWN ) + return false; + HRESULT hr = GetD3DObject()->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_TEXTURE, format ); + return SUCCEEDED( hr ); +} + +D3DFORMAT GetD3D9TextureFormat( TextureFormat inFormat ); // TexturesD3D9.cpp + + +enum { + kVendorDummyRef = 0x0000, + kVendor3DLabs = 0x3d3d, + kVendorMatrox = 0x102b, + kVendorS3 = 0x5333, + kVendorSIS = 0x1039, + kVendorXGI = 0x18ca, + kVendorIntel = 0x8086, + kVendorATI = 0x1002, + kVendorNVIDIA = 0x10de, + kVendorTrident = 0x1023, + kVendorImgTech = 0x104a, + kVendorVIAS3G = 0x1106, + kVendor3dfx = 0x121a, + kVendorParallels= 0x1ab8, + kVendorMicrosoft= 0x1414, + kVendorVMWare = 0x15ad, +}; +struct KnownVendors { + DWORD vendorId; + const char* name; +}; +static KnownVendors s_KnownVendors[] = { + { kVendorDummyRef, "REFERENCE" }, + { kVendor3DLabs, "3dLabs" }, + { kVendorMatrox, "Matrox" }, + { kVendorS3, "S3" }, + { kVendorSIS, "SIS" }, + { kVendorXGI, "XGI" }, + { kVendorIntel, "Intel" }, + { kVendorATI, "ATI" }, + { kVendorNVIDIA, "NVIDIA" }, + { kVendorTrident, "Trident" }, + { kVendorImgTech, "Imagination Technologies" }, + { kVendorVIAS3G, "VIA/S3" }, + { kVendor3dfx, "3dfx" }, + { kVendorParallels, "Parallels" }, + { kVendorMicrosoft, "Microsoft" }, + { kVendorVMWare, "VMWare" }, +}; +static int kKnownVendorsSize = sizeof(s_KnownVendors)/sizeof(s_KnownVendors[0]); + + +void GraphicsCaps::InitD3D9() +{ + IDirect3D9* d3dobject = GetD3DObject(); + d3dobject->GetDeviceCaps( g_D3DAdapter, g_D3DDevType, &d3d.d3dcaps ); + + // get renderer, vendor & driver information + D3DADAPTER_IDENTIFIER9 adapterInfo; + d3dobject->GetAdapterIdentifier( g_D3DAdapter, 0, &adapterInfo ); + adapterInfo.Driver[MAX_DEVICE_IDENTIFIER_STRING-1] = 0; + adapterInfo.Description[MAX_DEVICE_IDENTIFIER_STRING-1] = 0; + adapterInfo.DeviceName[31] = 0; + rendererString = adapterInfo.Description; + + if (g_D3DDevType == D3DDEVTYPE_REF) + { + adapterInfo.VendorId = kVendorDummyRef; + rendererString = "REF on " + rendererString; + } + + int i; + for( i = 0; i < kKnownVendorsSize; ++i ) + { + if( s_KnownVendors[i].vendorId == adapterInfo.VendorId ) + { + vendorString = s_KnownVendors[i].name; + break; + } + } + if( i == kKnownVendorsSize ) + { + vendorString = Format( "Unknown (ID=%x)", adapterInfo.VendorId ); + } + windriverutils::VersionInfo driverVersion( HIWORD(adapterInfo.DriverVersion.HighPart), LOWORD(adapterInfo.DriverVersion.HighPart), + HIWORD(adapterInfo.DriverVersion.LowPart), LOWORD(adapterInfo.DriverVersion.LowPart) ); + driverVersionString = Format( "%s %i.%i.%i.%i", adapterInfo.Driver, + HIWORD(adapterInfo.DriverVersion.HighPart), LOWORD(adapterInfo.DriverVersion.HighPart), + HIWORD(adapterInfo.DriverVersion.LowPart), LOWORD(adapterInfo.DriverVersion.LowPart) ); + driverLibraryString = driverVersionString; + fixedVersionString = "Direct3D 9.0c [" + driverVersionString + ']'; + + rendererID = adapterInfo.DeviceId; + vendorID = adapterInfo.VendorId; + + // We can't use GetAvailableTextureMem here because the device is not created yet! + // And besides that, it would return much more than VRAM on Vista (virtualization and so on). + // Use WMI instead. + int vramMB; + const char* vramMethod = ""; + if (g_D3DDevType != D3DDEVTYPE_REF) + vramMB = windriverutils::GetVideoMemorySizeMB (d3dobject->GetAdapterMonitor(g_D3DAdapter), &vramMethod); + else + vramMB = 128; + videoMemoryMB = vramMB; + + // On windows, we always output D3D info. There is so much variety that it always helps! + printf_console( "Direct3D:\n" ); + printf_console( " Version: %s\n", fixedVersionString.c_str() ); + printf_console( " Renderer: %s\n", rendererString.c_str() ); + printf_console( " Vendor: %s\n", vendorString.c_str() ); + printf_console( " VRAM: %i MB (via %s)\n", (int)videoMemoryMB, vramMethod ); + + maxVSyncInterval = 0; + if( d3d.d3dcaps.PresentationIntervals & D3DPRESENT_INTERVAL_ONE ) + { + maxVSyncInterval = 1; + if( d3d.d3dcaps.PresentationIntervals & D3DPRESENT_INTERVAL_TWO ) + maxVSyncInterval = 2; + } + + DWORD declTypesFloat16 = D3DDTCAPS_FLOAT16_2 | D3DDTCAPS_FLOAT16_4; + has16BitFloatVertex = (d3d.d3dcaps.DeclTypes & declTypesFloat16) == declTypesFloat16; + needsToSwizzleVertexColors = true; + + bool usesSoftwareVP = !(d3d.d3dcaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT); + if( usesSoftwareVP ) + maxLights = 8; // software T&L always has 8 lights + else + maxLights = clamp<unsigned int>( d3d.d3dcaps.MaxActiveLights, 0, 8 ); + + // Texture sizes + maxTextureSize = std::min( d3d.d3dcaps.MaxTextureWidth, d3d.d3dcaps.MaxTextureHeight ); + maxRenderTextureSize = maxTextureSize; + maxCubeMapSize = maxTextureSize; + + has3DTexture = d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_VOLUMEMAP; + maxTexUnits = d3d.d3dcaps.MaxSimultaneousTextures; + maxTexImageUnits = 16; + maxTexCoords = d3d.d3dcaps.MaxSimultaneousTextures; + if (maxTexCoords > 8) + maxTexCoords = 8; + + // In theory, vertex texturing is texture format dependent. However, in practice the caps lie, + // especially on NVIDIA hardware. + // + // ATI cards: all DX10+ GPUs report all texture formats as vertex texture capable (good!) + // Intel cards: all SM3.0+ GPUs report all texture formats as vertex texture capable (good!) + // NV cards: all DX10+ GPUs report only floating point formats as capable, but all others actually work as well. + // GeForce 6&7 only report R32F and A32R32G32B32F, and only those work. + // + // So we check for R16F support; this will return true on all GPUs that can handle ALL + // texture formats. + hasVertexTextures = ((LOWORD(d3d.d3dcaps.VertexShaderVersion) >= (3<<8)+0)) && + SUCCEEDED(d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_QUERY_VERTEXTEXTURE, D3DRTYPE_TEXTURE, D3DFMT_R16F)); + + hasAnisoFilter = d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY; + maxAnisoLevel = hasAnisoFilter ? d3d.d3dcaps.MaxAnisotropy : 1; + hasMipLevelBias = d3d.d3dcaps.RasterCaps & D3DPRASTERCAPS_MIPMAPLODBIAS; + + for( i = 0; i < kTexFormatPCCount; ++i ) + { + d3d.hasBaseTextureFormat[i] = IsTextureFormatSupported( GetD3D9TextureFormat( static_cast<TextureFormat>(i) ) ); + supportsTextureFormat[i] = d3d.hasBaseTextureFormat[i]; + } + + hasS3TCCompression = IsTextureFormatSupported(D3DFMT_DXT1) && IsTextureFormatSupported(D3DFMT_DXT3) && IsTextureFormatSupported(D3DFMT_DXT5); + d3d.hasTextureFormatA8 = IsTextureFormatSupported(D3DFMT_A8); + d3d.hasTextureFormatL8 = IsTextureFormatSupported(D3DFMT_L8); + d3d.hasTextureFormatA8L8 = IsTextureFormatSupported(D3DFMT_A8L8); + d3d.hasTextureFormatL16 = IsTextureFormatSupported(D3DFMT_L16); + + if (!(d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_POW2)) + npot = kNPOTFull; + else if (d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_NONPOW2CONDITIONAL) + npot = kNPOTRestricted; + else + npot = kNPOTNone; + + npotRT = npot; + + hasSRGBReadWrite = + IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatRGB24))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatRGBA32))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatARGB32))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatBGR24))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT1))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT3))) + && IsSRGBTextureReadSupported(GetD3D9TextureFormat(static_cast<TextureFormat>(kTexFormatDXT5))); + + // we only do sRGB writes to an 8 bit buffer ... + hasSRGBReadWrite = hasSRGBReadWrite && IsSRGBTextureWriteSupported(D3DFMT_A8R8G8B8); + + hasInstancing = false; //@TODO: instancing! + + hasBlendSquare = (d3d.d3dcaps.SrcBlendCaps & D3DPBLENDCAPS_SRCCOLOR) && (d3d.d3dcaps.DestBlendCaps & D3DPBLENDCAPS_DESTCOLOR); + hasSeparateAlphaBlend = d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND; + hasBlendSub = hasBlendMinMax = d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_BLENDOP; + + hasAutoMipMapGeneration = d3d.d3dcaps.Caps2 & D3DCAPS2_CANAUTOGENMIPMAP; + + for (int i = 0; i < kRTFormatCount; ++i) + { + if (i == kRTFormatDefault || i == kRTFormatDefaultHDR || i == kRTFormatShadowMap) + continue; + supportsRenderTextureFormat[i] = IsRenderTextureFormatSupported(kD3D9RenderTextureFormats[i]); + } + hasRenderToTexture = supportsRenderTextureFormat[kRTFormatARGB32]; + supportsRenderTextureFormat[kRTFormatDefault] = hasRenderToTexture; + + hasRenderToCubemap = hasRenderToTexture; + hasStencil = true; + hasRenderTargetStencil = true; + hasTwoSidedStencil = d3d.d3dcaps.StencilCaps & D3DSTENCILCAPS_TWOSIDED; + maxMRTs = clamp<int> (d3d.d3dcaps.NumSimultaneousRTs, 1, kMaxSupportedRenderTargets); + if (!(d3d.d3dcaps.PrimitiveMiscCaps & D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING)) + maxMRTs = 1; + + d3d.hasATIDepthFormat16 = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatDF16 ) ); + supportsRenderTextureFormat[kRTFormatDepth] |= d3d.hasATIDepthFormat16; + d3d.hasNVDepthFormatINTZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatINTZ ) ); + supportsRenderTextureFormat[kRTFormatDepth] |= d3d.hasNVDepthFormatINTZ; + d3d.hasNVDepthFormatRAWZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, kD3D9FormatRAWZ ) ); + d3d.hasNULLFormat = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, kD3D9FormatNULL ) ); + d3d.hasDepthResolveRESZ = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, kD3D9FormatRESZ ) ); + + hasNativeDepthTexture = d3d.hasATIDepthFormat16 || d3d.hasNVDepthFormatINTZ; + hasStencilInDepthTexture = d3d.hasNVDepthFormatINTZ; + hasNativeShadowMap = SUCCEEDED( d3dobject->CheckDeviceFormat( g_D3DAdapter, g_D3DDevType, GetD3DFormatForChecks(), D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, D3DFMT_D16 ) ); + supportsRenderTextureFormat[kRTFormatShadowMap] = hasRenderToTexture && hasNativeShadowMap; + + #if CAPS_DEBUG_DISABLE_RT + hasRenderToTexture = hasRenderToCubemap = false; + for (int i = 0; i < kRTFormatCount; ++i) + supportsRenderTextureFormat[i] = false; + maxMRTs = 1; + #endif + + // This is somewhat dummy; actual resolving of FSAA levels and types supported happens later when choosing presentation parameters. + hasMultiSample = true; + + // Driver bugs/workarounds following + DetectDriverBugsD3D9( adapterInfo.VendorId, driverVersion ); + + // safeguards + maxRenderTextureSize = std::min( maxRenderTextureSize, maxTextureSize ); + maxCubeMapSize = std::min( maxCubeMapSize, maxTextureSize ); + + // in the very end, figure out shader capabilities level (after all workarounds are applied) + if( LOWORD(d3d.d3dcaps.PixelShaderVersion) < (3<<8)+0 ) + { + // no ps3.0: 2.x shaders + shaderCaps = kShaderLevel2; + } + else + { + // has everything we care about! + shaderCaps = kShaderLevel3; + } + + // Print overall caps & D3D9 hacks used + printf_console( " Caps: Shader=%i DepthRT=%i NativeDepth=%i NativeShadow=%i DF16=%i INTZ=%i RAWZ=%i NULL=%i RESZ=%i SlowINTZ=%i\n", + shaderCaps, + supportsRenderTextureFormat[kRTFormatDepth], hasNativeDepthTexture, hasNativeShadowMap, + d3d.hasATIDepthFormat16, + d3d.hasNVDepthFormatINTZ, d3d.hasNVDepthFormatRAWZ, + d3d.hasNULLFormat, d3d.hasDepthResolveRESZ, + d3d.slowINTZSampling + ); +} + + +enum WindowsVersion { + kWindows2000 = 50, // 5.0 + kWindowsXP = 51, // 5.1 + kWindows2003 = 52, // 5.2 + kWindowsVista = 60, // 6.0 +}; + +static int GetWindowsVersion() +{ + OSVERSIONINFO osinfo; + osinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + if( !GetVersionEx(&osinfo) ) + return 0; + + if( osinfo.dwPlatformId == VER_PLATFORM_WIN32_NT ) + return osinfo.dwMajorVersion * 10 + osinfo.dwMinorVersion % 10; + else + return 0; +} + + +void GraphicsCaps::DetectDriverBugsD3D9( UInt32 vendorCode, const windriverutils::VersionInfo& driverVersion ) +{ + d3d.slowINTZSampling = false; + + + if( vendorCode == kVendorNVIDIA ) + { + // GeForceFX and earlier have sort-of-buggy render to cubemap. E.g. skybox draws correctly, + // but objects do not appear. Huh. + const int kShaderVersion30 = (3 << 8) + 0; + bool isFXOrEarlier = LOWORD(gGraphicsCaps.d3d.d3dcaps.PixelShaderVersion) < kShaderVersion30; + if( isFXOrEarlier ) + { + printf_console( "D3D: disabling render to cubemap on pre-GeForce6\n" ); + buggyCameraRenderToCubemap = true; + } + + // Also, native shadow maps seem to have problems on GeForce FX; perhaps it needs to use tex2Dproj instead of tex2D, + // or something (FX 5200). Since FX cards are really dying, and the only left ones are FX 5200/5500, + // let's just turn shadows off. You don't want them on those cards anyway! + if (isFXOrEarlier) + { + printf_console ("D3D: disabling shadows on pre-GeForce6\n"); + hasNativeShadowMap = false; + hasNativeDepthTexture = false; + supportsRenderTextureFormat[kRTFormatDepth] = false; + } + + // GeForceFX on 6.14.10.9147 drivers has buggy fullscreen FSAA. + // It displays everything stretched, as if AA samples map to pixels directly. + if( isFXOrEarlier && driverVersion <= windriverutils::VersionInfo(6,14,10,9147) ) + { + printf_console( "D3D: disabling fullscreen AA (buggy pre-GeForce6 driver)\n" ); + buggyFullscreenFSAA = true; + } + } + if( vendorCode == kVendorATI ) + { + // On D3D9 Radeon HD cards have big performance hit when using INTZ texture for both sampling & depth testing + // (Radeon HD 3xxx-5xxx, Catalyst 9.10 to 10.5). Talking with AMD, we found that using RESZ to copy it into a separate + // texture is a decent workaround that results in ok performance. + if (d3d.hasDepthResolveRESZ) + d3d.slowINTZSampling = true; + } + + // Sanitize VRAM amount + if( videoMemoryMB < 32 ) { + printf_console("D3D: VRAM amount suspiciously low (less than 32MB)\n"); + videoMemoryMB = 32; + } +} diff --git a/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp b/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp new file mode 100644 index 0000000..0d444b3 --- /dev/null +++ b/Runtime/GfxDevice/d3d/RenderTextureD3D.cpp @@ -0,0 +1,583 @@ +#include "UnityPrefix.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "Runtime/Graphics/ScreenManager.h" +#include "Runtime/Graphics/Image.h" +#include "D3D9Context.h" +#include "TexturesD3D9.h" +#include "RenderTextureD3D.h" +#include "D3D9Utils.h" + + +// defined in GfxDeviceD3D9.cpp +void UnbindTextureD3D9( TextureID texture ); + + +// define to 1 to print lots of activity info +#define DEBUG_RENDER_TEXTURES 0 + + +D3DFORMAT kD3D9RenderTextureFormats[kRTFormatCount] = { + D3DFMT_A8R8G8B8, + D3DFMT_R32F, // Depth + D3DFMT_A16B16G16R16F, + D3DFMT_D16, // Shadowmap + D3DFMT_R5G6B5, + D3DFMT_A4R4G4B4, + D3DFMT_A1R5G5B5, + (D3DFORMAT)-1, // Default + D3DFMT_A2R10G10B10, + (D3DFORMAT)-1, // DefaultHDR + D3DFMT_A16B16G16R16, + D3DFMT_A32B32G32R32F, + D3DFMT_G32R32F, + D3DFMT_G16R16F, + D3DFMT_R32F, + D3DFMT_R16F, + D3DFMT_L8, // R8 + (D3DFORMAT)-1, // ARGBInt + (D3DFORMAT)-1, // RGInt + (D3DFORMAT)-1, // RInt + (D3DFORMAT)-1, // BGRA32 +}; + + +static D3DMULTISAMPLE_TYPE FindSupportedD3DMultiSampleType (D3DFORMAT d3dformat, int maxSamples) +{ + BOOL windowed = !GetScreenManager().IsFullScreen(); + for (int samples = maxSamples; samples >= 1; samples--) + { + D3DMULTISAMPLE_TYPE msaa = GetD3DMultiSampleType( samples ); + HRESULT hr = GetD3DObject()->CheckDeviceMultiSampleType( g_D3DAdapter, g_D3DDevType, d3dformat, windowed, msaa, NULL ); + if (SUCCEEDED(hr)) + return msaa; + } + return D3DMULTISAMPLE_NONE; +} + +static bool InitD3DRenderColorSurface (RenderColorSurfaceD3D9& rs, TexturesD3D9& textures) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + + HRESULT hr; + DWORD usage; + + if (rs.textureID.m_ID) + { + // Regular render texture + usage = D3DUSAGE_RENDERTARGET; + int mipCount = 1; + if (rs.flags & kSurfaceCreateMipmap && !IsDepthRTFormat(rs.format)) + { + Assert(gGraphicsCaps.hasAutoMipMapGeneration); + if (rs.flags & kSurfaceCreateAutoGenMips) + usage |= D3DUSAGE_AUTOGENMIPMAP; + else + mipCount = CalculateMipMapCount3D (rs.width, rs.height, 1); + } + if (rs.dim == kTexDim2D) + { + IDirect3DTexture9* rt; + D3DFORMAT d3dformat = D3DFMT_UNKNOWN; + d3dformat = kD3D9RenderTextureFormats[rs.format]; + hr = dev->CreateTexture (rs.width, rs.height, mipCount, usage, d3dformat, D3DPOOL_DEFAULT, &rt, NULL); + if( FAILED(hr) ) + { + ErrorString( Format( "RenderTexture creation error: CreateTexture failed [%s]", GetD3D9Error(hr) ) ); + return false; + } + rs.m_Texture = rt; + rt->GetSurfaceLevel( 0, &rs.m_Surface ); + } + else if (rs.dim == kTexDimCUBE) + { + Assert(rs.width == rs.height); + IDirect3DCubeTexture9* rt; + hr = dev->CreateCubeTexture (rs.width, mipCount, usage, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &rt, NULL); + if( FAILED(hr) ) + { + ErrorString( Format( "RenderTexture creation error: CreateCubeTexture failed [%s]", GetD3D9Error(hr) ) ); + return false; + } + rs.m_Texture = rt; + } + else + { + ErrorString("RenderTexture creation error: D3D9 only supports 2D or CUBE textures"); + return false; + } + } + else + { + D3DFORMAT d3dformat = D3DFMT_UNKNOWN; + D3DMULTISAMPLE_TYPE msaa = D3DMULTISAMPLE_NONE; + if (!(rs.flags & kSurfaceCreateNeverUsed)) + { + // Create surface without texture to resolve from + // Find supported MSAA type based on device and format + d3dformat = kD3D9RenderTextureFormats[rs.format]; + msaa = FindSupportedD3DMultiSampleType( d3dformat, rs.samples ); + } + else + { + // Dummy render target surface (only needed to make D3D runtime happy) + d3dformat = gGraphicsCaps.d3d.hasNULLFormat ? kD3D9FormatNULL : D3DFMT_A8R8G8B8; + } + IDirect3DSurface9* ds = NULL; + hr = dev->CreateRenderTarget( rs.width, rs.height, d3dformat, msaa, 0, FALSE, &ds, NULL ); + if (FAILED(hr)) + { + ErrorString( Format( "RenderTexture creation error: CreateRenderTarget failed [%s]", GetD3D9Error(hr) ) ); + return false; + } + rs.m_Surface = ds; + } + + // add to textures map + if (rs.textureID.m_ID) + textures.AddTexture( rs.textureID, rs.m_Texture ); + + return true; +} + +static bool InitD3DRenderDepthSurface (RenderDepthSurfaceD3D9& rs, TexturesD3D9& textures) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + + HRESULT hr; + + if (!rs.textureID.m_ID) + { + // Create depth buffer surface + if( rs.depthFormat == kDepthFormatNone ) + { + rs.m_Surface = NULL; + } + else + { + // Create surface without texture to resolve from + // Find supported MSAA type based on device and format + D3DFORMAT d3dformat = (rs.depthFormat == kDepthFormat16 ? D3DFMT_D16 : D3DFMT_D24S8); + D3DMULTISAMPLE_TYPE msaa = FindSupportedD3DMultiSampleType( d3dformat, rs.samples ); + hr = dev->CreateDepthStencilSurface( rs.width, rs.height, d3dformat, msaa, 0, TRUE, &rs.m_Surface, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(rs.m_Surface, rs.width * rs.height * GetBPPFromD3DFormat(d3dformat), &rs); + if( FAILED(hr) ) + { + ErrorString( Format( "RenderTexture creation error: CreateDepthStencilSurface failed [%s]", GetD3D9Error(hr) ) ); + return false; + } + } + } + else + { + // Create depth buffer as texture + D3DFORMAT d3dformat = D3DFMT_UNKNOWN; + if (rs.flags & kSurfaceCreateShadowmap) + { + Assert (rs.depthFormat == kDepthFormat16); + Assert (gGraphicsCaps.hasNativeShadowMap); + d3dformat = D3DFMT_D16; + } + else + { + Assert (gGraphicsCaps.hasNativeDepthTexture); + if (gGraphicsCaps.d3d.hasNVDepthFormatINTZ) + d3dformat = kD3D9FormatINTZ; + else if (gGraphicsCaps.d3d.hasATIDepthFormat16) + d3dformat = kD3D9FormatDF16; + else + { + AssertString ("No available native depth format"); + } + } + IDirect3DTexture9* texture = NULL; + hr = dev->CreateTexture (rs.width, rs.height, 1, D3DUSAGE_DEPTHSTENCIL, d3dformat, D3DPOOL_DEFAULT, &texture, NULL); + if( FAILED(hr) ) + { + ErrorString( Format( "RenderTexture creation error: CreateTexture failed [%s]", GetD3D9Error(hr) ) ); + return false; + } + rs.m_Texture = texture; + texture->GetSurfaceLevel (0, &rs.m_Surface); + } + + if (rs.textureID.m_ID) + textures.AddTexture( rs.textureID, rs.m_Texture ); + + return true; +} + + +static RenderColorSurfaceD3D9* s_ActiveColorTargets[kMaxSupportedRenderTargets]; +static int s_ActiveColorTargetCount; +static RenderDepthSurfaceD3D9* s_ActiveDepthTarget = NULL; +static int s_ActiveMip = 0; +static CubemapFace s_ActiveFace = kCubeFaceUnknown; + +static RenderColorSurfaceD3D9* s_ActiveColorBackBuffer = NULL; +static RenderDepthSurfaceD3D9* s_ActiveDepthBackBuffer = NULL; + +// on dx editor we can switch swapchain underneath +// so lets do smth like gl's default FBO +// it will be used only from "user" code and we will select proper swap chain here +static RenderColorSurfaceD3D9* s_DummyColorBackBuffer = NULL; +static RenderDepthSurfaceD3D9* s_DummyDepthBackBuffer = NULL; + +RenderSurfaceBase* DummyColorBackBuferD3D9() +{ + if(s_DummyColorBackBuffer == 0) + { + static RenderColorSurfaceD3D9 __bb; + RenderSurfaceBase_InitColor(__bb); + __bb.backBuffer = true; + + s_DummyColorBackBuffer = &__bb; + } + return s_DummyColorBackBuffer; +} + +RenderSurfaceBase* DummyDepthBackBuferD3D9() +{ + if(s_DummyDepthBackBuffer == 0) + { + static RenderDepthSurfaceD3D9 __bb; + RenderSurfaceBase_InitDepth(__bb); + __bb.backBuffer = true; + + s_DummyDepthBackBuffer = &__bb; + } + return s_DummyDepthBackBuffer; +} + +bool SetRenderTargetD3D9 (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face, int& outRenderTargetWidth, int& outRenderTargetHeight, bool& outIsBackBuffer) +{ + RenderColorSurfaceD3D9* rcolorZero = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[0].object); + RenderDepthSurfaceD3D9* rdepth = reinterpret_cast<RenderDepthSurfaceD3D9*>( depthHandle.object ); + + #if DEBUG_RENDER_TEXTURES + printf_console( "RT: SetRenderTargetD3D9 color=%i depth=%i (%x) mip=%i face=%i\n", + rcolorZero ? rcolorZero->textureID.m_ID : 0, + rdepth ? rdepth->textureID.m_ID : 0, rdepth ? rdepth->m_Surface : 0, + mipLevel, face ); + #endif + + outIsBackBuffer = false; + + if (count == s_ActiveColorTargetCount && s_ActiveDepthTarget == rdepth && s_ActiveMip == mipLevel && s_ActiveFace == face) + { + bool colorsSame = true; + for (int i = 0; i < count; ++i) + { + if (s_ActiveColorTargets[i] != reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object)) + colorsSame = false; + } + if (colorsSame) + return false; + } + + IDirect3DDevice9* dev = GetD3DDeviceNoAssert(); + // Happens at startup, when deleting all RenderTextures + if( !dev ) + { + Assert (!rcolorZero && !rdepth); + return false; + } + + HRESULT hr = S_FALSE; + + Assert(colorHandles[0].IsValid() && depthHandle.IsValid()); + Assert(rcolorZero->backBuffer == rdepth->backBuffer); + + outIsBackBuffer = rcolorZero->backBuffer; + if (!outIsBackBuffer) + GetRealGfxDevice().GetFrameStats().AddRenderTextureChange(); // stats + + if(rcolorZero->backBuffer && rcolorZero == s_DummyColorBackBuffer) + colorHandles[0].object = rcolorZero = s_ActiveColorBackBuffer; + if(rdepth->backBuffer && rdepth == s_DummyDepthBackBuffer) + depthHandle.object = rdepth = s_ActiveDepthBackBuffer; + + + // color surfaces + for (int i = 0; i < count; ++i) + { + RenderColorSurfaceD3D9* rcolor = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object); + if(rcolor) + { + // color surface + Assert (rcolor->colorSurface); + // Make sure this texture is not used when setting it as render target + if (rcolor->textureID.m_ID) + UnbindTextureD3D9( rcolor->textureID ); + + // Set color surface + IDirect3DSurface9* surface = NULL; + bool needsRelease = false; + if( !rcolor->m_Texture ) + { + Assert (rcolor->m_Surface); + surface = rcolor->m_Surface; + #if DEBUG_RENDER_TEXTURES + printf_console( " RT: color buffer plain\n" ); + #endif + } + else if (rcolor->dim == kTexDimCUBE) + { + Assert (rcolor->m_Texture); + IDirect3DCubeTexture9* rt = static_cast<IDirect3DCubeTexture9*>( rcolor->m_Texture ); + hr = rt->GetCubeMapSurface((D3DCUBEMAP_FACES)(D3DCUBEMAP_FACE_POSITIVE_X + clamp<int>(face,0,5)), mipLevel, &surface); + needsRelease = true; + } + else + { + #if DEBUG_RENDER_TEXTURES + printf_console( " RT: color buffer texture %i\n", rcolor->textureID.m_ID ); + #endif + Assert (rcolor->m_Texture); + IDirect3DTexture9* rt = static_cast<IDirect3DTexture9*>( rcolor->m_Texture ); + hr = rt->GetSurfaceLevel (mipLevel, &surface); + needsRelease = true; + } + + if( surface ) + { + hr = dev->SetRenderTarget (i, surface); + if( FAILED(hr) ) { + ErrorString( Format("RenderTexture error: failed to set render target [%s]", GetD3D9Error(hr)) ); + } + if (needsRelease) + surface->Release(); + } + else + { + ErrorString( Format("RenderTexture error: failed to retrieve color surface [%s]", GetD3D9Error(hr)) ); + } + outRenderTargetWidth = rcolor->width; + outRenderTargetHeight = rcolor->height; + } + else + { + hr = dev->SetRenderTarget (i, NULL); + } + } + for (int i = count; i < s_ActiveColorTargetCount; ++i) + { + hr = dev->SetRenderTarget (i, NULL); + } + + + // depth surface + Assert (!rdepth || !rdepth->colorSurface); + + if (rdepth && rdepth->m_Surface) + { + // Make sure this texture is not used when setting it as render target + if (rdepth->textureID.m_ID) + UnbindTextureD3D9( rdepth->textureID ); + + // Set depth surface + if( rdepth->m_Surface ) + { + #if DEBUG_RENDER_TEXTURES + if (rdepth->textureID.m_ID) + printf_console( " RT: depth buffer texture %i\n", rdepth->textureID.m_ID ); + else + printf_console( " RT: depth buffer plain %x\n", rdepth->m_Surface ); + #endif + hr = dev->SetDepthStencilSurface( rdepth->m_Surface ); + if( FAILED(hr) ) { + ErrorString( Format("RenderTexture error: failed to set depth stencil [%s]", GetD3D9Error(hr)) ); + } + g_D3DHasDepthStencil = true; + D3DSURFACE_DESC desc; + desc.Format = D3DFMT_D16; + rdepth->m_Surface->GetDesc( &desc ); + g_D3DDepthStencilFormat = desc.Format; + } + } + else + { + #if DEBUG_RENDER_TEXTURES + printf_console( " RT: depth buffer none\n" ); + #endif + dev->SetDepthStencilSurface( NULL ); + g_D3DHasDepthStencil = false; + g_D3DDepthStencilFormat = D3DFMT_UNKNOWN; + } + + for (int i = 0; i < count; ++i) + s_ActiveColorTargets[i] = reinterpret_cast<RenderColorSurfaceD3D9*>(colorHandles[i].object); + s_ActiveColorTargetCount = count; + s_ActiveDepthTarget = rdepth; + s_ActiveFace = face; + s_ActiveMip = mipLevel; + + if (outIsBackBuffer) + { + s_ActiveColorBackBuffer = (RenderColorSurfaceD3D9*)colorHandles[0].object; + s_ActiveDepthBackBuffer = (RenderDepthSurfaceD3D9*)depthHandle.object; + + // we are rendering to "default FBO", so current target is dummy + // as a side effect, if we change swap chain, it will be set correctly, and active remain valid + s_ActiveColorTargets[0] = s_DummyColorBackBuffer; + s_ActiveDepthTarget = s_DummyDepthBackBuffer; + } + return true; +} + +RenderSurfaceHandle GetActiveRenderColorSurfaceD3D9(int index) +{ + return RenderSurfaceHandle(s_ActiveColorTargets[index]); +} +RenderSurfaceHandle GetActiveRenderDepthSurfaceD3D9() +{ + return RenderSurfaceHandle(s_ActiveDepthTarget); +} + +bool IsActiveRenderTargetWithColorD3D9() +{ + return !s_ActiveColorTargets[0] || s_ActiveColorTargets[0]->backBuffer || !(s_ActiveColorTargets[0]->flags & kSurfaceCreateNeverUsed); +} + + +RenderSurfaceHandle CreateRenderColorSurfaceD3D9( TextureID textureID, int width, int height, int samples, TextureDimension dim, UInt32 createFlags, RenderTextureFormat format, TexturesD3D9& textures ) +{ + RenderSurfaceHandle rsHandle; + + if( !gGraphicsCaps.hasRenderToTexture ) + return rsHandle; + if( !gGraphicsCaps.supportsRenderTextureFormat[format] ) + return rsHandle; + + RenderColorSurfaceD3D9* rs = new RenderColorSurfaceD3D9; + rs->width = width; + rs->height = height; + rs->samples = samples; + rs->format = format; + rs->textureID = textureID; + rs->dim = dim; + rs->flags = createFlags; + + // Create it + if (!InitD3DRenderColorSurface(*rs, textures)) + { + delete rs; + return rsHandle; + } + + rsHandle.object = rs; + return rsHandle; +} + +RenderSurfaceHandle CreateRenderDepthSurfaceD3D9( TextureID textureID, int width, int height, int samples, DepthBufferFormat depthFormat, UInt32 createFlags, TexturesD3D9& textures ) +{ + RenderSurfaceHandle rsHandle; + + if( !gGraphicsCaps.hasRenderToTexture ) + return rsHandle; + + RenderDepthSurfaceD3D9* rs = new RenderDepthSurfaceD3D9; + rs->width = width; + rs->height = height; + rs->samples = samples; + rs->depthFormat = depthFormat; + rs->textureID = textureID; + rs->flags = createFlags; + + // Create it + if (!InitD3DRenderDepthSurface( *rs, textures)) + { + delete rs; + return rsHandle; + } + + rsHandle.object = rs; + return rsHandle; +} + + +void DestroyRenderSurfaceD3D9 (RenderSurfaceD3D9* rs) +{ + Assert(rs); + + if(rs == s_ActiveColorBackBuffer || rs == s_ActiveDepthBackBuffer) + { + #if DEBUG_RENDER_TEXTURES + printf_console( " RT: Destroying main %s buffer.\n", s == s_ActiveColorBackBuffer ? "color" : "depth" ); + #endif + s_ActiveColorBackBuffer = NULL; + s_ActiveDepthBackBuffer = NULL; + } + + RenderSurfaceHandle defaultColor(s_DummyColorBackBuffer); + RenderSurfaceHandle defaultDepth(s_DummyDepthBackBuffer); + + if (s_ActiveDepthTarget == rs) + { + ErrorString( "RenderTexture warning: Destroying active render texture. Switching to main context." ); + int targetWidth, targetHeight; + bool isBackBuffer; + SetRenderTargetD3D9 (1, &defaultColor, defaultDepth, 0, kCubeFaceUnknown, targetWidth, targetHeight, isBackBuffer); + } + for (int i = 0; i < s_ActiveColorTargetCount; ++i) + { + if (s_ActiveColorTargets[i] == rs) + { + ErrorString( "RenderTexture warning: Destroying active render texture. Switching to main context." ); + int targetWidth, targetHeight; + bool isBackBuffer; + SetRenderTargetD3D9 (1, &defaultColor, defaultDepth, 0, kCubeFaceUnknown, targetWidth, targetHeight, isBackBuffer); + } + } + + if (rs->m_Surface) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(rs->m_Surface); + ULONG refCount = rs->m_Surface->Release(); + Assert(refCount == (rs->m_Texture ? 1 : 0)); + rs->m_Surface = NULL; + } + if( rs->m_Texture ) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(rs->m_Texture); + ULONG refCount = rs->m_Texture->Release(); + Assert(refCount == 0); + rs->m_Texture = NULL; + } +} + +void DestroyRenderSurfaceD3D9 (RenderSurfaceHandle& rsHandle, TexturesD3D9& textures) +{ + if( !rsHandle.IsValid() ) + return; + + RenderSurfaceD3D9* rs = reinterpret_cast<RenderSurfaceD3D9*>( rsHandle.object ); + DestroyRenderSurfaceD3D9( rs ); + + if (rs->m_Texture || rs->textureID.m_ID) + textures.RemoveTexture (rs->textureID); + + delete rs; + rsHandle.object = NULL; +} + + + +// -------------------------------------------------------------------------- + + +#if ENABLE_UNIT_TESTS +#include "External/UnitTest++/src/UnitTest++.h" + +SUITE ( RenderTextureD3DTests ) +{ +TEST(RenderTextureD3DTests_FormatTableCorrect) +{ + // checks that you did not forget to update format table when adding a new format :) + for (int i = 0; i < kRTFormatCount; ++i) + { + CHECK(kD3D9RenderTextureFormats[i] != 0); + } +} +} +#endif diff --git a/Runtime/GfxDevice/d3d/RenderTextureD3D.h b/Runtime/GfxDevice/d3d/RenderTextureD3D.h new file mode 100644 index 0000000..255e89d --- /dev/null +++ b/Runtime/GfxDevice/d3d/RenderTextureD3D.h @@ -0,0 +1,17 @@ +#pragma once + +#include "D3D9Includes.h" + + +RenderSurfaceHandle CreateRenderColorSurfaceD3D9 (TextureID textureID, int width, int height, int samples, TextureDimension dim, UInt32 createFlags, RenderTextureFormat format, TexturesD3D9& textures); +RenderSurfaceHandle CreateRenderDepthSurfaceD3D9 (TextureID textureID, int width, int height, int samples, DepthBufferFormat depthFormat, UInt32 createFlags, TexturesD3D9& textures); +void DestroyRenderSurfaceD3D9 (RenderSurfaceD3D9* rs); +void DestroyRenderSurfaceD3D9 (RenderSurfaceHandle& rsHandle, TexturesD3D9& textures); +bool SetRenderTargetD3D9 (int count, RenderSurfaceHandle* colorHandles, RenderSurfaceHandle depthHandle, int mipLevel, CubemapFace face, int& outRenderTargetWidth, int& outRenderTargetHeight, bool& outIsBackBuffer); +RenderSurfaceHandle GetActiveRenderColorSurfaceD3D9(int index); +RenderSurfaceHandle GetActiveRenderDepthSurfaceD3D9(); + +RenderSurfaceHandle GetBackBufferColorSurfaceD3D9(); +RenderSurfaceHandle GetBackBufferDepthSurfaceD3D9(); +void SetBackBufferColorSurfaceD3D9(RenderSurfaceBase* color); +void SetBackBufferDepthSurfaceD3D9(RenderSurfaceBase* depth); diff --git a/Runtime/GfxDevice/d3d/ShaderGenerator.cpp b/Runtime/GfxDevice/d3d/ShaderGenerator.cpp new file mode 100644 index 0000000..b62e5c7 --- /dev/null +++ b/Runtime/GfxDevice/d3d/ShaderGenerator.cpp @@ -0,0 +1,948 @@ +#include "UnityPrefix.h" +#include <stdlib.h> +#include <string> +#include <vector> +#include <algorithm> +#include <assert.h> +#include "ShaderGenerator.h" +#include "Runtime/Utilities/Word.h" + +enum ShaderInputRegister { + kInputPosition, + kInputNormal, + kInputUV0, + kInputUV1, + kInputColor, + kInputCount +}; + +const char* kShaderInputNames[kInputCount] = { + "$IPOS", + "$INOR", + "$IUV0", + "$IUV1", + "$ICOL", +}; + +const char* kShaderInputDecls[kInputCount] = { + "dcl_position", + "dcl_normal", + "dcl_texcoord0", + "dcl_texcoord1", + "dcl_color", +}; + + + +enum ShaderFragmentOptions { + kOptionHasTexMatrix = (1<<0), +}; + +const int kConstantLocations[kConstCount] = { + 0, // kConstMatrixMVP + 4, // kConstMatrixMV + 8, // kConstMatrixMV_IT + 12, // kConstMatrixTexture + 44, // kConstAmbient + 57, // kConstColorMatAmbient + 45, // kConstLightMisc + 46, // kConstMatDiffuse + 47, // kConstMatSpecular + 48, // kConstLightIndexes +}; + +enum CommonDependencies { + kDep_CamSpacePos, + kDep_CamSpaceN, + kDep_ViewVector, + kDep_ReflVector, + kDep_Normal, + kDepCount +}; + + +// -------------------------------------------------------------------------- + +// transform position +const ShaderFragment kVS_Pos = { + (1<<kInputPosition), // input + (1<<kConstMatrixMVP), // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + NULL, // outs + "dp4 oPos.x, $IPOS, c0\n" + "dp4 oPos.y, $IPOS, c1\n" + "dp4 oPos.z, $IPOS, c2\n" + "dp4 oPos.w, $IPOS, c3\n", +}; + +// -------------------------------------------------------------------------- +// temps + +// NORM = vertex normal +const ShaderFragment kVS_Load_Normal = { + (1<<kInputNormal), // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "NORM", // outs + "mov $O_NORM, $INOR\n" +}; + +// NORM = normalized vertex normal +const ShaderFragment kVS_Normalize_Normal = { + 0, // input + 0, // constants + (1<<kDep_Normal), // deps + 0, // options + 1, // temps + "NORM", // ins + "NORM", // outs + "nrm $TMP0.xyz, $O_NORM\n" + "mov $O_NORM.xyz, $TMP0\n" +}; + + +// OPOS = input position of the vertex +const ShaderFragment kVS_Temp_ObjSpacePos = { + (1<<kInputPosition), // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "OPOS", // outs + "mov $O_OPOS, $IPOS\n" +}; + +// CPOS = camera space position of the vertex +const ShaderFragment kVS_Temp_CamSpacePos = { + (1<<kInputPosition), // input + (1<<kConstMatrixMV), // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "CPOS", // outs + "mul $O_CPOS, $IPOS.y, c5\n" + "mad $O_CPOS, c4, $IPOS.x, $O_CPOS\n" + "mad $O_CPOS, c6, $IPOS.z, $O_CPOS\n" + "mad $O_CPOS, c7, $IPOS.w, $O_CPOS\n", +}; + +// CNOR = camera space normal of the vertex +const ShaderFragment kVS_Temp_CamSpaceN = { + 0, // input + (1<<kConstMatrixMV_IT), // constants + (1<<kDep_Normal), // deps + 0, // options + 0, // temps + "NORM", // ins + "CNOR", // outs + "mul $O_CNOR, $O_NORM.y, c9\n" + "mad $O_CNOR, c8, $O_NORM.x, $O_CNOR\n" + "mad $O_CNOR, c10, $O_NORM.z, $O_CNOR\n", +}; + +// VIEW = normalized vertex-to-eye vector +const ShaderFragment kVS_Temp_ViewVector = { + 0, // input + 0, // constants + (1<<kDep_CamSpacePos), // deps + 0, // options + 0, // temps + "CPOS", // ins + "VIEW", // outs + "dp3 $O_VIEW.w, $O_CPOS, $O_CPOS\n" + "rsq $O_VIEW.w, $O_VIEW.w\n" + "mul $O_VIEW, -$O_CPOS, $O_VIEW.w\n", +}; + +// REFL = camera space reflection vector: 2*dot(V,N)*N-V +const ShaderFragment kVS_Temp_CamSpaceRefl = { + 0, // input + 0, // constants + (1<<kDep_CamSpaceN) | (1<<kDep_ViewVector), // deps + 0, // options + 0, // temps + "CNOR VIEW", // ins + "REFL", // outs + "mov $O_REFL.xyz, $O_VIEW\n" + "dp3 $O_REFL.w, $O_REFL, $O_CNOR\n" + "add $O_REFL.w, $O_REFL.w, $O_REFL.w\n" + "mad $O_REFL.xyz, $O_REFL.w, $O_CNOR, -$O_REFL\n" +}; + +// cheap version +// SPHR = sphere map: N*0.5+0.5 +//const ShaderFragment kVS_Temp_SphereMap = { +// 0, // input +// (1<<kConstLightMisc), // constants +// (1<<kDep_CamSpaceN), // deps +// 0, // options +// 0, // temps +// "CNOR", // ins +// "SPHR", // outs +// "mad $O_SPHR.xyz, $O_CNOR, c45.w, c45.w" +//}; + +// SPHR = sphere map. R = reflection vector +// m = 2*sqrt(Rx*Rx + Ry*Ry + (Rz+1)*(Rz+1)) +// SPHR = Rx/m + 0.5, Ry/m + 0.5 +const ShaderFragment kVS_Temp_SphereMap = { + 0, // input + (1<<kConstLightMisc), // constants + (1<<kDep_ReflVector), // deps + 0, // options + 1, // temps + "REFL", // ins + "SPHR", // outs + "mul $TMP0.xy, $O_REFL, $O_REFL\n" // Rx*Rx, Ry*Ry + "add $O_SPHR.w, $TMP0.y, $TMP0.x\n" // Rx*Rx + Ry*Ry + "add $O_SPHR.z, $O_REFL.z, c45.z\n" // Rz+1 + "mad $O_SPHR.z, $O_SPHR.z, $O_SPHR.z, $O_SPHR.w\n" // (Rz+1)*(Rz+1) + Rx*Rx + Ry*Ry + "mul $O_SPHR.z, $O_SPHR.z, c45.y\n" // * 4 + "rsq $O_SPHR.z, $O_SPHR.z\n" // m + "mad $O_SPHR.xy, $O_REFL, $O_SPHR.z, c45.w\n" // R/m+0.5 +}; + +// -------------------------------------------------------------------------- +// Texture coordinates + +const ShaderFragment kVS_Load_UV0 = { + (1<<kInputUV0), // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "UV0", // outs + "mov $O_UV0, $IUV0\n" +}; + +const ShaderFragment kVS_Load_UV1 = { + (1<<kInputUV1), // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "UV1", // outs + "mov $O_UV1, $IUV1\n" +}; + +const ShaderFragment kVS_Out_TexCoord = { + 0, // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + "$0", // ins + NULL, // outs + "mov oT$PARAM, $I_0\n" +}; + + +const ShaderFragment kVS_Out_Matrix2 = { + 0, // input + (1<<kConstMatrixTexture), // constants + 0, // deps + kOptionHasTexMatrix, // options + 1, // temps + "$0", // ins + NULL, // outs + "mul $TMP0, $I_0.y, $TMPARAM1\n" + "mad $TMP0, $TMPARAM0, $I_0.x, $TMP0\n" + "add oT$PARAM, $TMPARAM3, $TMP0\n" +}; + +const ShaderFragment kVS_Out_Matrix3 = { + 0, // input + (1<<kConstMatrixTexture), // constants + 0, // deps + kOptionHasTexMatrix, // options + 1, // temps + "$0", // ins + NULL, // outs + "mul $TMP0, $I_0.y, $TMPARAM1\n" + "mad $TMP0, $TMPARAM0, $I_0.x, $TMP0\n" + "mad $TMP0, $TMPARAM2, $I_0.z, $TMP0\n" + "add oT$PARAM, $TMPARAM3, $TMP0\n" +}; + +// -------------------------------------------------------------------------- +// Lighting + +const ShaderFragment kVS_Out_Diffuse_VertexColor= { + (1<<kInputColor), // input + 0, // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + NULL, // outs + "mov oD0, $ICOL\n" +}; + +const ShaderFragment kVS_Light_Diffuse_Pre = { + 0, // input + (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "DIFF", // outs + + "mov $O_DIFF, c45.xxxz\n" // diffuse = 0 +}; + +const ShaderFragment kVS_Light_Diffuse_Dir = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants + (1<<kDep_CamSpaceN), // deps + 0, // options + 1, // temps + "CNOR", // ins + "DIFF", // outs + + "mov $O_CNOR.w, c48.y\n" // CNOR.w is reused as light data index + "rep i1\n" + " mova a0.x, $O_CNOR.w\n" + " dp3 $TMP0.x, $O_CNOR, c61[a0.x]\n" // NdotL + " slt $TMP0.w, c45.x, $TMP0.x\n" // clamp = NdotL > 0 + " mul $TMP0.xyz, $TMP0.x, c62[a0.x]\n" // doff = NdotL * lightColor + " mad $O_DIFF.xyz, $TMP0.w, $TMP0, $O_DIFF\n" // diffuse += diff * clamp + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + +const ShaderFragment kVS_Light_Diffuse_Point = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants + (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos), // deps + 0, // options + 3, // temps + "CNOR CPOS", // ins + "DIFF", // outs + + "mov $O_CNOR.w, c48.z\n" // CNOR.w is reused as light data index + "rep i2\n" + " mova a0.x, $O_CNOR.w\n" + " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space + " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight) + " rsq $TMP1.w, $TMP0.w\n" + " mul $TMP1.xyz, $TMP1, $TMP1.w\n" + " dp3 $TMP1.x, $O_CNOR, $TMP1\n" // NdotL + " slt $TMP1.y, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2 + " mov $TMP1.z, c45.z\n" // 1 + " mad $TMP0.w, c63[a0.x].w, $TMP0.w, $TMP1.z\n" // 1 + toLight2 * quadAttenuation + " rcp $TMP0.w, $TMP0.w\n" // attenuation + " mad $TMP0.w, $TMP1.y, -$TMP0.w, $TMP0.w\n" // attenuation = 0 if out of range + " sge $TMP1.y, $TMP1.x, c45.x\n" // clamp = NdotL > 0 + " mul $TMP2, $TMP1.x, c62[a0.x]\n" // diff = NdotL * lightColor + " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation + " mad $O_DIFF.xyz, $TMP1.y, $TMP2, $O_DIFF\n" // diffuse += diff * clamp + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + + + +const ShaderFragment kVS_Light_Diffuse_Spot = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes), // constants + (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos), // deps + 0, // options + 3, // temps + "CNOR CPOS", // ins + "DIFF", // outs + + "mov $O_CNOR.w, c48.x\n" // CNOR.w is reused as light data index + "rep i0\n" + " mova a0.x, $O_CNOR.w\n" + " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space + " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight) + " rsq $TMP1.w, $TMP0.w\n" + " mul $TMP1.xyz, $TMP1, $TMP1.w\n" + " dp3 $TMP1.w, $O_CNOR, $TMP1\n" // NdotL + " dp3 $TMP1.x, $TMP1, c61[a0.x]\n" // rho = dot(L,lightAxisDirection) + " add $TMP1.x, $TMP1.x, -c63[a0.x].y\n" // rho-cos(phi/2) + " mul $TMP1.x, $TMP1.x, c63[a0.x].x\n" // spotAtten = (rho-cos(phi/2)) / (cos(theta/2)-cos(phi/2)) + " mov $TMP1.z, c45.z\n" // 1 + " mad $TMP1.y, c63[a0.x].w, $TMP0.w, $TMP1.z\n" // 1 + toLight2 * quadAttenuation + " rcp $TMP1.y, $TMP1.y\n" // attenuation + " slt $TMP0.w, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2 + " mad $TMP0.w, $TMP0.w, -$TMP1.y, $TMP1.y\n" // attenuation = 0 if out of range + " max $TMP1.x, $TMP1.x, c45.x\n" // spotAtten = saturate(spotAtten) + " min $TMP1.x, $TMP1.x, c45.z\n" + " mul $TMP0.w, $TMP0.w, $TMP1.x\n" // attenuation *= spotAtten + " sge $TMP1.x, $TMP1.w, c45.x\n" // clamp = NdotL > 0 + " mul $TMP2, $TMP1.w, c62[a0.x]\n" // diff = NdotL * lightColor + " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation + " mad $O_DIFF.xyz, $TMP1.x, $TMP2, $O_DIFF\n" // diffuse += diff * clamp + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + + +const ShaderFragment kVS_Light_Specular_Pre = { + 0, // input + (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + "DIFF SPEC", // outs + "mov $O_DIFF, c45.xxxz\n" // diffuse = 0 + "mov $O_SPEC, c45.x\n" // specular = 0 +}; + + +const ShaderFragment kVS_Light_Specular_Dir = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants + (1<<kDep_CamSpaceN) | (1<<kDep_ViewVector), // deps + 0, // options + 2, // temps + "CNOR VIEW", // ins + "DIFF SPEC", // outs + + "mov $O_CNOR.w, c48.y\n" // CNOR.w is reused as light data index + "rep i1\n" + " mova a0.x, $O_CNOR.w\n" + " mov $TMP0.xyz, c61[a0.x]\n" // L = lightDirection + // diffuse + " dp3 $TMP1.x, $O_CNOR, $TMP0\n" // NdotL + " slt $TMP0.w, c45.x, $TMP1.x\n" // clamp = NdotL > 0 + " mul $TMP1, $TMP1.x, c62[a0.x]\n" // diff = NdotL * lightColor + " mad $O_DIFF.xyz, $TMP0.w, $TMP1, $O_DIFF\n" // diffuse += diff * clamp + // spec + " add $TMP0.xyz, $TMP0, $O_VIEW\n" // L + V + " nrm $TMP1.xyz, $TMP0\n" // H = normalize(L + V) + " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N + " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0) + " pow $TMP1.w, $TMP1.w, c47.w\n" // sp = pow(sp, exponent) + " mul $TMP1.w, $TMP1.w, $TMP0.w\n" // sp *= clamp + " mad $O_SPEC.xyz, $TMP1.w, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor + + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + + +const ShaderFragment kVS_Light_Specular_Point = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants + (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos) | (1<<kDep_ViewVector), // deps + 0, // options + 3, // temps + "CNOR CPOS VIEW", // ins + "DIFF SPEC", // outs + + "mov $O_CNOR.w, c48.z\n" // CNOR.w is reused as light data index + "rep i2\n" + " mova a0.x, $O_CNOR.w\n" + " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space + " dp3 $TMP0.w, $TMP1, $TMP1\n" // L = normalize(toLight) + " rsq $TMP1.w, $TMP0.w\n" + " mul $TMP1.xyz, $TMP1, $TMP1.w\n" + // diffuse + " dp3 $TMP0.x, $O_CNOR, $TMP1\n" // NdotL + " slt $TMP0.y, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2 + " mov $TMP0.z, c45.z\n" // 1 + " mad $TMP0.w, c63[a0.x].w, $TMP0.w, $TMP0.z\n" // 1 + toLight2 * quadAttenuation + " rcp $TMP0.w, $TMP0.w\n" // attenuation + " mad $TMP0.w, $TMP0.y, -$TMP0.w, $TMP0.w\n" // attenuation = 0 if out of range + " sge $TMP0.y, $TMP0.x, c45.x\n" // clamp = NdotL > 0 + " mul $TMP2, $TMP0.x, c62[a0.x]\n" // diff = NdotL * lightColor + " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation + " mad $O_DIFF.xyz, $TMP0.y, $TMP2, $O_DIFF\n" // diffuse += diff * clamp + // spec + " add $TMP2.xyz, $TMP1, $O_VIEW\n" // L + V + " nrm $TMP1.xyz, $TMP2\n" // H = normalize(L + V) + " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N + " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0) + " pow $TMP1.w, $TMP1.w, c47.w\n" // sp = pow(sp, exponent) + " mul $TMP1.w, $TMP1.w, $TMP0.w\n" // sp *= attenuation + " mul $TMP1.w, $TMP1.w, $TMP0.y\n" // sp *= clamp + " mad $O_SPEC.xyz, $TMP1.w, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor + + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + +const ShaderFragment kVS_Light_Specular_Spot = { + 0, // input + (1<<kConstLightMisc) | (1<<kConstLightIndexes) | (1<<kConstMatSpecular), // constants + (1<<kDep_CamSpaceN) | (1<<kDep_CamSpacePos) | (1<<kDep_ViewVector), // deps + 0, // options + 3, // temps + "CNOR CPOS VIEW", // ins + "DIFF SPEC", // outs + + "mov $O_CNOR.w, c48.x\n" // CNOR.w is reused as light data index + "rep i0\n" + " mova a0.x, $O_CNOR.w\n" + " add $TMP1.xyz, -$O_CPOS, c60[a0.x]\n" // toLight in view space + " dp3 $TMP0.w, $TMP1, $TMP1\n" // lightDirection = normalize(toLight) + " rsq $TMP1.w, $TMP0.w\n" + " mul $TMP1.xyz, $TMP1, $TMP1.w\n" + // diffuse + " dp3 $TMP1.w, $O_CNOR, $TMP1\n" // NdotL + " dp3 $TMP0.x, $TMP1, c61[a0.x]\n" // rho = dot(L,lightAxisDirection) + " add $TMP0.x, $TMP0.x, -c63[a0.x].y\n" // rho-cos(phi/2) + " mul $TMP0.x, $TMP0.x, c63[a0.x].x\n" // spotAtten = (rho-cos(phi/2)) / (cos(theta/2)-cos(phi/2)) + " mov $TMP0.z, c45.z\n" // 1 + " mad $TMP0.y, c63[a0.x].w, $TMP0.w, $TMP0.z\n" // 1 + toLight2 * quadAttenuation + " rcp $TMP0.y, $TMP0.y\n" // attenuation + " slt $TMP0.w, c63[a0.x].z, $TMP0.w\n" // range = range2 < toLight2 + " mad $TMP0.w, $TMP0.w, -$TMP0.y, $TMP0.y\n" // attenuation = 0 if out of range + " max $TMP0.x, $TMP0.x, c45.x\n" // spotAtten = saturate(spotAtten) + " min $TMP0.x, $TMP0.x, c45.z\n" + " mul $TMP0.w, $TMP0.w, $TMP0.x\n" // attenuation *= spotAtten + " sge $TMP0.x, $TMP1.w, c45.x\n" // clamp = NdotL > 0 + " mul $TMP2, $TMP1.w, c62[a0.x]\n" // diff = NdotL * lightColor + " mul $TMP2, $TMP0.w, $TMP2\n" // diff *= attenuation + " mad $O_DIFF.xyz, $TMP0.x, $TMP2, $O_DIFF\n" // diffuse += diff * clamp + // spec + " add $TMP2.xyz, $TMP1, $O_VIEW\n" // L + V + " nrm $TMP1.xyz, $TMP2\n" // H = normalize(L + V) + " dp3 $TMP1.w, $TMP1, $O_CNOR\n" // H dot N + " max $TMP1.w, $TMP1.w, c45.x\n" // sp = max(H dot N, 0) + " pow $TMP2.x, $TMP1.w, c47.w\n" // sp = pow(sp, exponent) + " mul $TMP2.x, $TMP2.x, $TMP0.w\n" // sp *= attenuation + " mul $TMP2.x, $TMP2.x, $TMP0.x\n" // sp *= clamp + " mad $O_SPEC.xyz, $TMP2.x, c62[a0.x], $O_SPEC\n" // spec += sp * lightColor + + " add $O_CNOR.w, $O_CNOR.w, c45.y\n" // index += 4 + "endrep\n" +}; + + +const ShaderFragment kVS_Out_Diffuse_Lighting = { + 0, // input + (1<<kConstAmbient) | (1<<kConstMatDiffuse) | (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + "DIFF", // ins + NULL, // outs + "mul $O_DIFF, $O_DIFF, c46\n" // diffuse *= materialDiffuse + "add $O_DIFF.xyz, $O_DIFF, c44\n" // diffuse += ambient + "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1) +}; + +const ShaderFragment kVS_Out_Specular_Lighting = { + 0, // input + (1<<kConstMatSpecular) | (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + "SPEC", // ins + NULL, // outs + "mul $O_SPEC, $O_SPEC, c47\n" // specular *= materialSpecular + "min oD1, $O_SPEC, c45.z\n" // specular = max(specular,1) +}; + +const ShaderFragment kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient = { + (1<<kInputColor), // input + (1<<kConstColorMatAmbient) | (1<<kConstAmbient) | (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + "DIFF", // ins + NULL, // outs + "mul $O_DIFF, $O_DIFF, $ICOL\n" // diffuse *= vertexColor + "mad $O_DIFF.xyz, $ICOL, c57, $O_DIFF\n" // diffuse += ambient * vertexColor + "add $O_DIFF.xyz, $O_DIFF, c44\n" // diffuse += emissive + "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1) +}; + +const ShaderFragment kVS_Out_Diffuse_Lighting_ColorEmission = { + (1<<kInputColor), // input + (1<<kConstAmbient) | (1<<kConstMatDiffuse) | (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + "DIFF", // ins + NULL, // outs + "mul $O_DIFF, $O_DIFF, c46\n" // diffuse *= materialDiffuse + "add $O_DIFF.xyz, c44, $O_DIFF\n" // diffuse += ambient + "add $O_DIFF, $O_DIFF, $ICOL\n" // diffuse += vertex color + "min oD0, $O_DIFF, c45.z\n" // diffuse = max(diffuse,1) +}; + + +const ShaderFragment kVS_Out_Diffuse_White = { + 0, // input + (1<<kConstLightMisc), // constants + 0, // deps + 0, // options + 0, // temps + NULL, // ins + NULL, // outs + "mov oD0, c45.z\n" +}; + + +// -------------------------------------------------------------------------- + + +static const ShaderFragment* kCommonDependencies[kDepCount] = { + &kVS_Temp_CamSpacePos, + &kVS_Temp_CamSpaceN, + &kVS_Temp_ViewVector, + &kVS_Temp_CamSpaceRefl, + &kVS_Load_Normal, +}; + +static bool IsAlNum( char c ) { + return c=='$' || c>='A' && c<='Z' || c>='0' && c<='9'; +} + +static const char* SkipTokens( const char* p, int count ) { + while( count-- ) { + while( IsAlNum(*p++) ) ; + if( *p == 0 ) + return p; + ++p; + } + return p; +} + +static std::string ExtractToken( const char** text ) { + const char* ptr = *text; + char c = *ptr; + while( IsAlNum(c) ) { + ++ptr; + c = *ptr; + } + + if( ptr == *text ) + return std::string(); + + // result + std::string res(*text, ptr); + + // skip space after result + ++ptr; + *text = ptr; + + return res; +} + +void ShaderGenerator::AddFragment( const ShaderFragment* fragment, const char* inputNames, int param ) +{ + // is already added? + FragmentData data(fragment, inputNames, param); + for( int i = 0; i < m_FragmentCount; ++i ) { + if( m_Fragments[i] == data ) + return; + } + + // add it's dependencies first + if( fragment->dependencies ) { + for( int i = 0; i < kDepCount; ++i ) { + // has this dependency? + if( !(fragment->dependencies & (1<<i)) ) + continue; + AddFragment( kCommonDependencies[i] ); + } + } + + // add itself + m_Fragments[m_FragmentCount] = data; + m_FragmentCount++; + assert( m_FragmentCount < kMaxShaderFragments ); +} + +// Register plus it's living range - first and last shader fragment indices +// on where it can be used. +struct SavedRegister { + std::string name; + int firstUse; + int lastUse; + int regIndex; +}; +typedef std::vector<SavedRegister> SavedRegisters; + +static inline int FindSavedRegister( const SavedRegisters& regs, const std::string& name ) +{ + int n = regs.size(); + for( int i = 0; i < n; ++i ) + if( regs[i].name == name ) + return i; + return -1; +} + +void ShaderGenerator::GenerateShader( std::string& output, unsigned int& usedConstants ) +{ + unsigned int usedConstantsMask = 0; + + output.clear(); + output.reserve(1024); + //debug.clear(); + + // shader input mappings + int inputMapping[kInputCount]; + for( int i = 0; i < kInputCount; ++i ) + inputMapping[i] = -1; + int usedInputStack[kInputCount]; + int usedInputs = 0; + + // saved registers across fragments + SavedRegisters savedRegisters; + + // go over fragments and figure out inputs, saved registers and used constants + int maxTemps = 0; + for( int fi = 0; fi < m_FragmentCount; ++fi ) { + const ShaderFragment& frag = *m_Fragments[fi].fragment; + + // fragment vertex inputs + for( int i = 0; i < kInputCount; ++i ) { + // does fragment use this input? + if( frag.inputs & (1<<i) ) { + // add to inputs list of in there yet + if( inputMapping[i] == -1 ) { + usedInputStack[usedInputs] = i; + inputMapping[i] = usedInputs; + ++usedInputs; + } + } + } + + // remember output registers + if( frag.outs ) { + const char* outputs = frag.outs; + std::string token; + while( !(token = ExtractToken(&outputs)).empty() ) { + token = "$O_" + token; + //TODO: check that text has that token. + //TODO: check that text has no $O_ tokens that are not in the output + // add to list if not there yet + int savedIndex = FindSavedRegister( savedRegisters, token ); + if( savedIndex == -1 ) + { + SavedRegister r; + r.name = token; + r.firstUse = fi; + r.lastUse = fi; + r.regIndex = -1; + savedRegisters.push_back( r ); + } + else + { + savedRegisters[savedIndex].lastUse = fi; + assert(savedRegisters[savedIndex].firstUse <= savedRegisters[savedIndex].lastUse); + } + } + } + + // from fragment input registers, determine last use of saved registers + if( frag.ins ) { + const char* inputs = frag.ins; + std::string token; + while( !(token = ExtractToken(&inputs)).empty() ) { + // a parametrized token? + if( token[0] == '$' ) { + assert(token.size()==2); + assert(token[1]>='0' && token[1]<='9'); + int index = token[1]-'0'; + const char* inputNames = m_Fragments[fi].inputNames; + inputNames = SkipTokens( inputNames, index ); + token = ExtractToken(&inputNames); + } + token = "$O_" + token; + + //TODO: check that text has that token. + //TODO: check that text has no $O_ tokens that are not in the input + int savedIndex = FindSavedRegister( savedRegisters, token ); + assert(savedIndex != -1); + assert(savedRegisters[savedIndex].lastUse <= fi); + savedRegisters[savedIndex].lastUse = fi; + } + } + + maxTemps = std::max(maxTemps, frag.temps); + + // used constants + usedConstantsMask |= frag.constants; + } + + assert( savedRegisters.size() <= kMaxSavedRegisters ); + + // assign register indices to saved registers + int mapFragmentRegister[kMaxShaderFragments][kMaxTempRegisters]; // [fragment][index] = used or not? + memset(mapFragmentRegister, 0, sizeof(mapFragmentRegister)); + for( size_t i = 0; i < savedRegisters.size(); ++i ) { + // find unused register over whole lifetime, and assign it + SavedRegister& sr = savedRegisters[i]; + assert(sr.regIndex == -1); + for( int regIndex = 0; regIndex < kMaxTempRegisters; ++regIndex ) { + bool unused = true; + for( int fi = sr.firstUse; fi <= sr.lastUse; ++fi ) { + if( mapFragmentRegister[fi][regIndex] != 0 ) { + unused = false; + break; + } + } + if( unused ) { + for( int fi = sr.firstUse; fi <= sr.lastUse; ++fi ) + mapFragmentRegister[fi][regIndex] = 1; + sr.regIndex = regIndex; + break; + } + } + assert(sr.regIndex != -1); + } + + // generate prolog with declarations + output += "vs_2_0\n"; + for( int i = 0; i < usedInputs; ++i ) { + output += kShaderInputDecls[usedInputStack[i]]; + output += " v"; + assert(i<=9); + output += ('0' + i); + output += '\n'; + } + + // go over fragments, transform register names and output + for( int fi = 0; fi < m_FragmentCount; ++fi ) { + const ShaderFragment& frag = *m_Fragments[fi].fragment; + int param = m_Fragments[fi].param; + + output += '\n'; + std::string text = frag.text; + + std::string regname("r0"); + std::string regname2("r00"); + + // input registers + regname[0] = 'v'; + for( int i = 0; i < usedInputs; ++i ) { + int inputIndex = usedInputStack[i]; + assert(i<=9); + regname[1] = '0' + i; + replace_string(text, kShaderInputNames[inputIndex], regname); + } + + // fragment inputs + if( frag.ins ) { + const char* inputs = frag.ins; + std::string token; + while( !(token = ExtractToken(&inputs)).empty() ) { + std::string searchName; + std::string savedName; + // a parametrized token? + if( token[0] == '$' ) { + assert(token.size()==2); + assert(token[1]>='0' && token[1]<='9'); + int index = token[1]-'0'; + const char* inputNames = m_Fragments[fi].inputNames; + inputNames = SkipTokens( inputNames, index ); + token = ExtractToken(&inputNames); + searchName = std::string("$I_") + char('0'+index); + } else { + searchName = "$O_" + token; + } + savedName = "$O_" + token; + + // Assign register index to this saved reg + SavedRegisters::iterator it, itEnd = savedRegisters.end(); + for( it = savedRegisters.begin(); it != itEnd; ++it ) { + const SavedRegister& sr = *it; + if( sr.name == savedName ) + { + // replace with register value + regname[0] = 'r'; + assert(sr.regIndex<=9); + regname[1] = '0' + sr.regIndex; + replace_string(text, searchName, regname); + break; + } + } + assert( it != itEnd ); + } + } + + // saved registers + if( frag.outs ) { + regname[0] = 'r'; + SavedRegisters::iterator it, itEnd = savedRegisters.end(); + for( it = savedRegisters.begin(); it != itEnd; ++it ) { + const SavedRegister& sr = *it; + assert(sr.regIndex<=9); + regname[1] = '0' + sr.regIndex; + replace_string(text, sr.name, regname); + } + } + + // fragment-private temporary registers + regname[0] = 'r'; + regname2[0] = 'r'; + std::string tmpname("$TMP0"); + int regIndex = 0; + for( int i = 0; i < frag.temps; ++i ) { + assert(i<=9); + tmpname[4] = '0' + i; + // find unused register at this fragment + while( regIndex < kMaxTempRegisters && mapFragmentRegister[fi][regIndex] != 0 ) + ++regIndex; + assert(regIndex < kMaxTempRegisters); + if( regIndex > 9 ) { + regname2[1] = '1'; + regname2[2] = '0' + (regIndex-10); + replace_string(text, tmpname, regname2); + } else { + regname[1] = '0' + regIndex; + replace_string(text, tmpname, regname); + } + ++regIndex; + } + + // parameter + if( param >= 0 ) { + std::string paramString("0"); + assert(param<=9); + paramString[0] = '0'+param; + replace_string(text, "$PARAM", paramString); + } + + // texture matrix parameters + if( frag.options & kOptionHasTexMatrix ) { + std::string tmpstring("$TMPARAM0"); + std::string paramString("c00"); + for( int i = 0; i < 4; ++i ) { + assert(i<=9); + tmpstring[8] = '0' + i; + int constant = kConstantLocations[kConstMatrixTexture] + param*4 + i; + paramString[1] = '0' + constant/10; + paramString[2] = '0' + constant%10; + replace_string(text, tmpstring, paramString); + } + } + + output += text; + } + + + usedConstants = usedConstantsMask; + + // checks + + // should be no '$' left + assert( output.find('$') == std::string::npos ); + + // debug info + //char buffer[1000]; + //_snprintf_s( buffer, 1000, "Fragments: %i SavedRegs: %i\n", m_FragmentCount, maxTemps ); + //debug += buffer; + //for( size_t i = 0; i < savedRegisters.size(); ++i ) { + // _snprintf_s( buffer, 1000, " saved %s [%i..%i] r%i\n", savedRegisters[i].name.c_str(), savedRegisters[i].firstUse, savedRegisters[i].lastUse, savedRegisters[i].regIndex ); + // debug += buffer; + //} +} diff --git a/Runtime/GfxDevice/d3d/ShaderGenerator.h b/Runtime/GfxDevice/d3d/ShaderGenerator.h new file mode 100644 index 0000000..ccc52b7 --- /dev/null +++ b/Runtime/GfxDevice/d3d/ShaderGenerator.h @@ -0,0 +1,100 @@ +#pragma once +#include <string> + +enum ShaderConstant { + kConstMatrixMVP, // model*view*proj + kConstMatrixMV, // model*view + kConstMatrixMV_IT, // model*view inverse transpose + kConstMatrixTexture,// texture matrix + kConstAmbient, // materialEmissive + sceneAmbient * materialAmbient + kConstColorMatAmbient, // various combos of kConstAmbient, based on color material mode + kConstLightMisc, // 0, 4, 1, 0.5 + kConstMatDiffuse, // material diffuse + kConstMatSpecular, // material specular + kConstLightIndexes, // light start indexes * 4 + kConstCount +}; + +extern const int kConstantLocations[kConstCount]; + + +struct ShaderFragment +{ + unsigned int inputs; + unsigned int constants; + unsigned int dependencies; + unsigned int options; + int temps; + const char* ins; + const char* outs; + const char* text; +}; + + +class ShaderGenerator +{ +public: + enum { + kMaxShaderFragments = 32, + kMaxTempRegisters = 12, + kMaxSavedRegisters = 16, + }; + +private: + struct FragmentData { + FragmentData() : fragment(NULL), inputNames(NULL), param(0) { } + FragmentData( const ShaderFragment* f, const char* inames, int p ) : fragment(f), inputNames(inames), param(p) { } + bool operator==( const FragmentData& rhs ) const { + return + fragment==rhs.fragment && + param==rhs.param && + ((inputNames==NULL && rhs.inputNames==NULL) || (inputNames && rhs.inputNames && !strcmp(inputNames, rhs.inputNames))); + } + + const ShaderFragment* fragment; + const char* inputNames; + int param; + }; + +public: + + ShaderGenerator() : m_FragmentCount(0) + { + } + + void AddFragment( const ShaderFragment* fragment, const char* inputNames = NULL, int param = -1 ); + void GenerateShader( std::string& output, unsigned int& usedConstants ); + +private: + int m_FragmentCount; + FragmentData m_Fragments[kMaxShaderFragments]; +}; + + +extern const ShaderFragment kVS_Pos; +extern const ShaderFragment kVS_Light_Diffuse_Pre; +extern const ShaderFragment kVS_Light_Diffuse_Dir; +extern const ShaderFragment kVS_Light_Diffuse_Point; +extern const ShaderFragment kVS_Light_Diffuse_Spot; +extern const ShaderFragment kVS_Light_Specular_Pre; +extern const ShaderFragment kVS_Light_Specular_Dir; +extern const ShaderFragment kVS_Light_Specular_Point; +extern const ShaderFragment kVS_Light_Specular_Spot; +extern const ShaderFragment kVS_Out_Diffuse_Lighting; +extern const ShaderFragment kVS_Out_Specular_Lighting; +extern const ShaderFragment kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient; +extern const ShaderFragment kVS_Out_Diffuse_Lighting_ColorEmission; +extern const ShaderFragment kVS_Out_Diffuse_VertexColor; +extern const ShaderFragment kVS_Out_Diffuse_White; +extern const ShaderFragment kVS_Load_UV0; +extern const ShaderFragment kVS_Load_UV1; +extern const ShaderFragment kVS_Load_Normal; +extern const ShaderFragment kVS_Normalize_Normal; +extern const ShaderFragment kVS_Out_TexCoord; +extern const ShaderFragment kVS_Out_Matrix2; +extern const ShaderFragment kVS_Out_Matrix3; +extern const ShaderFragment kVS_Temp_CamSpacePos; +extern const ShaderFragment kVS_Temp_CamSpaceN; +extern const ShaderFragment kVS_Temp_CamSpaceRefl; +extern const ShaderFragment kVS_Temp_ObjSpacePos; +extern const ShaderFragment kVS_Temp_SphereMap; diff --git a/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp new file mode 100644 index 0000000..87f8e17 --- /dev/null +++ b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.cpp @@ -0,0 +1,376 @@ +#include "UnityPrefix.h" +#include "ShaderPatchingD3D9.h" +#include "Runtime/Utilities/BitUtility.h" +#include "Runtime/Utilities/Word.h" + +#define DEBUG_FOG_PATCHING 0 + + +static inline bool IsNewline( char c ) { return c == '\n' || c == '\r'; } + +static int FindMaxUsedDclIndex (const std::string& src, char registerName) +{ + size_t n = src.size(); + size_t pos = 0; + int maxDcl = -1; + while ((pos = src.find("dcl_", pos)) != std::string::npos) + { + // skip "dcl_" + pos += 4; + + // skip until end of dcl_* + while (pos < n && !isspace(src[pos])) + ++pos; + // skip space + while (pos < n && isspace(src[pos])) + ++pos; + // is this an needed register type? + if (pos < n && src[pos] == registerName) { + int number = -1; + sscanf (src.c_str() + pos + 1, "%d", &number); + if (number > maxDcl) + maxDcl = number; + } + } + return maxDcl; +} + + +static bool InsertFogDcl (std::string& src, const std::string& registerName) +{ + // insert dcl_fog after vs_3_0/ps_3_0 line + size_t pos = 6; + while (pos < src.size() && !IsNewline(src[pos])) // skip until newline + ++pos; + while (pos < src.size() && IsNewline(src[pos])) // skip newlines + ++pos; + if (pos >= src.size()) + return false; + src.insert (pos, Format("dcl_fog %s\n", registerName.c_str())); + return true; +} + + +bool PatchPixelShaderFogD3D9 (std::string& src, FogMode fog, int fogColorReg, int fogParamsReg) +{ + const bool isPS3 = !strncmp(src.c_str(), "ps_3_0", 6); + if (!isPS3) + return true; // nothing to do + + #if DEBUG_FOG_PATCHING + printf_console ("D3D9 fog patching: original pixel shader:\n%s\n", src.c_str()); + #endif + + // SM3.0 has 10 input registers (v0..v9). + + const int maxDclReg = FindMaxUsedDclIndex (src, 'v'); + if (maxDclReg >= 9) + { + // out of registers + return false; + } + const int fogReg = 9; + if (!InsertFogDcl (src, Format("v%d.x", fogReg))) + { + DebugAssert (!"failed to insert fog dcl"); + return false; + } + + // Remap writes to oC0 with r30 + const int colorReg = 30; + const int tempReg = 31; + replace_string (src, "oC0", "r30"); + + // make sure source ends with a newline + if (!IsNewline(src[src.size()-1])) + src += '\n'; + + // inject fog handling code + if (fog == kFogExp2) + { + // fog = exp(-(density*z)^2) + src += Format("mul r%d.x, c%d.x, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/sqrt(ln(2))) * fog + src += Format("mul r%d.x, r%d.x, r%d.x\n", tempReg, tempReg, tempReg); // tmp = tmp * tmp + src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp)) + src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp) + } + else if (fog == kFogExp) + { + // fog = exp(-density*z) + src += Format("mul r%d.x, c%d.y, v%d.x\n", tempReg, fogParamsReg, fogReg); // tmp = (density/ln(2)) * fog + src += Format("exp_sat r%d.x, -r%d.x\n", tempReg, tempReg); // tmp = saturate (exp2 (-tmp)) + src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp) + } + else if (fog == kFogLinear) + { + // fog = (end-z)/(end-start) + src += Format("mad_sat r%d.x, c%d.z, v%d.x, c%d.w\n", tempReg, fogParamsReg, fogReg, fogParamsReg); // tmp = (-1/(end-start)) * fog + (end/(end-start)) + src += Format("lrp r%d.rgb, r%d.x, r%d, c%d\n", colorReg, tempReg, colorReg, fogColorReg); // color.rgb = lerp (color, fogColor, tmp) + } + + + // append final move into oC0 + src += Format("mov oC0, r%d\n", colorReg); + + #if DEBUG_FOG_PATCHING + printf_console ("D3D9 fog patching: after patching, fog mode %d:\n%s\n", fog, src.c_str()); + #endif + + return true; +} + + +bool PatchVertexShaderFogD3D9 (std::string& src) +{ + const bool isVS3 = !strncmp(src.c_str(), "vs_3_0", 6); + if (!isVS3) + return true; // nothing to do + + #if DEBUG_FOG_PATCHING + printf_console ("D3D9 fog patching: original vertex shader:\n%s\n", src.c_str()); + #endif + + // SM3.0 has 12 output registers (o0..o11), but the pixel shader only has 10 input ones. + // Play it safe and let's assume we only have 10 here. + + const int maxDclReg = FindMaxUsedDclIndex (src, 'o'); + if (maxDclReg >= 9) + { + // out of registers + return false; + } + const int fogReg = 9; + std::string fogRegName = Format("o%d", fogReg); + if (!InsertFogDcl (src, fogRegName)) + { + DebugAssert (!"failed to insert fog dcl"); + return false; + } + + // find write to o0, and do the same for oFog + size_t posWrite = src.find ("o0.z,"); + bool writesFullPos = false; + if (posWrite == std::string::npos) + { + posWrite = src.find ("o0,"); + if (posWrite == std::string::npos) + { + DebugAssert (!"couldn't find write to o0"); + return false; + } + writesFullPos = true; + } + + // get whole line + size_t n = src.size(); + size_t posWriteStart = posWrite, posWriteEnd = posWrite; + while (posWriteStart > 0 && !IsNewline(src[posWriteStart])) --posWriteStart; + ++posWriteStart; + while (posWriteEnd < n && !IsNewline(src[posWriteEnd])) ++posWriteEnd; + + std::string instr = src.substr (posWriteStart, posWriteEnd-posWriteStart); + if (writesFullPos) + { + replace_string (instr, "o0", fogRegName, 0); + instr += ".z"; + } + else + { + replace_string (instr, "o0.z", fogRegName, 0); + } + instr += '\n'; + + // insert fog code just after write to position + src.insert (posWriteEnd+1, instr); + + #if DEBUG_FOG_PATCHING + printf_console ("D3D9 fog patching: after patching:\n%s\n", src.c_str()); + #endif + + return true; +} + + +// -------------------------------------------------------------------------- + +#if ENABLE_UNIT_TESTS + +#include "External/UnitTest++/src/UnitTest++.h" + +SUITE (ShaderPatchingD3D9Tests) +{ + +TEST(FindMaxDclIndexNotPresent) +{ + CHECK_EQUAL (-1, FindMaxUsedDclIndex("", 'v')); + CHECK_EQUAL (-1, FindMaxUsedDclIndex("foobar", 'v')); + CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_", 'v')); + CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo", 'v')); + CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo ", 'v')); + CHECK_EQUAL (-1, FindMaxUsedDclIndex("dcl_foo v", 'v')); +} +TEST(FindMaxDclIndexOne) +{ + CHECK_EQUAL (0, FindMaxUsedDclIndex("dcl_foobar v0", 'v')); + CHECK_EQUAL (1, FindMaxUsedDclIndex("dcl_foobar v1", 'v')); + CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v2.x", 'v')); + CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3.rgb", 'v')); + CHECK_EQUAL (6, FindMaxUsedDclIndex("dcl_foobar v6", 'v')); + CHECK_EQUAL (10, FindMaxUsedDclIndex("dcl_foobar v10", 'v')); + CHECK_EQUAL (0, FindMaxUsedDclIndex("ps_3_0\ndcl_foobar v0\nmov oC0, v0", 'v')); +} +TEST(FindMaxDclIndexMultiple) +{ + CHECK_EQUAL (2, FindMaxUsedDclIndex("dcl_foobar v0\ndcl_foobar v2", 'v')); + CHECK_EQUAL (3, FindMaxUsedDclIndex("dcl_foobar v3\ndcl_foobar v1", 'v')); +} + +TEST(PatchVSZWrite) +{ + std::string s; + s = "vs_3_0\n" + "dcl_position o0\n" + "dp4 o0.z, c0, c1\n" + ; + CHECK (PatchVertexShaderFogD3D9(s)); + CHECK_EQUAL( + "vs_3_0\n" + "dcl_fog o9\n" + "dcl_position o0\n" + "dp4 o0.z, c0, c1\n" + "dp4 o9, c0, c1\n" + , s); +} +TEST(PatchVSFullWrite) +{ + std::string s; + s = "vs_3_0\n" + "dcl_position o0\n" + "mov o0, c0\n" + ; + CHECK (PatchVertexShaderFogD3D9(s)); + CHECK_EQUAL( + "vs_3_0\n" + "dcl_fog o9\n" + "dcl_position o0\n" + "mov o0, c0\n" + "mov o9, c0.z\n" + , s); +} +TEST(PatchVSWriteNotAtEnd) +{ + std::string s; + s = "vs_3_0\n" + "dcl_position o0\n" + "mov o0, r0\n" + "mov r0, r1\n" + ; + CHECK (PatchVertexShaderFogD3D9(s)); + CHECK_EQUAL( + "vs_3_0\n" + "dcl_fog o9\n" + "dcl_position o0\n" + "mov o0, r0\n" + "mov o9, r0.z\n" + "mov r0, r1\n" + , s); +} +TEST(PatchPSDisjointColorAlphaWrite) +{ + std::string s = + "ps_3_0\n" + "; 31 ALU, 2 TEX\n" + "dcl_2d s0\n" + "dcl_2d s1\n" + "def c5, 0.0, 128.0, 2.0, 0\n" + "dcl_texcoord0 v0.xy\n" + "dcl_texcoord1 v1.xyz\n" + "dcl_texcoord2 v2.xyz\n" + "dcl_texcoord3 v3.xyz\n" + "dcl_texcoord4 v4\n" + "texldp r3.x, v4, s1\n" + "dp3_pp r0.x, v3, v3\n" + "rsq_pp r0.x, r0.x\n" + "mad_pp r0.xyz, r0.x, v3, c0\n" + "dp3_pp r0.w, r0, r0\n" + "rsq_pp r0.w, r0.w\n" + "mul_pp r0.xyz, r0.w, r0\n" + "mov_pp r0.w, c4.x\n" + "dp3_pp r0.x, v1, r0\n" + "dp3_pp r2.x, v1, c0\n" + "mul_pp r1.y, c5, r0.w\n" + "max_pp r1.x, r0, c5\n" + "pow r0, r1.x, r1.y\n" + "mov r1.x, r0\n" + "texld r0, v0, s0\n" + "mul r1.w, r0, r1.x\n" + "mul_pp r1.xyz, r0, c3\n" + "mul_pp r0.xyz, r1, c1\n" + "max_pp r2.x, r2, c5\n" + "mul_pp r2.xyz, r0, r2.x\n" + "mov_pp r0.xyz, c1\n" + "mul_pp r0.xyz, c2, r0\n" + "mad r0.xyz, r0, r1.w, r2\n" + "mul_pp r2.w, r3.x, c5.z\n" + "mul r0.xyz, r0, r2.w\n" + "mad_pp oC0.xyz, r1, v2, r0\n" // color RGB + "mov_pp r2.x, c1.w\n" + "mul_pp r0.x, c2.w, r2\n" + "mul_pp r0.y, r0.w, c3.w\n" + "mul r0.x, r1.w, r0\n" + "mad oC0.w, r3.x, r0.x, r0.y\n"; // color A + std::string exps = + "ps_3_0\n" + "dcl_fog v9.x\n" + "; 31 ALU, 2 TEX\n" + "dcl_2d s0\n" + "dcl_2d s1\n" + "def c5, 0.0, 128.0, 2.0, 0\n" + "dcl_texcoord0 v0.xy\n" + "dcl_texcoord1 v1.xyz\n" + "dcl_texcoord2 v2.xyz\n" + "dcl_texcoord3 v3.xyz\n" + "dcl_texcoord4 v4\n" + "texldp r3.x, v4, s1\n" + "dp3_pp r0.x, v3, v3\n" + "rsq_pp r0.x, r0.x\n" + "mad_pp r0.xyz, r0.x, v3, c0\n" + "dp3_pp r0.w, r0, r0\n" + "rsq_pp r0.w, r0.w\n" + "mul_pp r0.xyz, r0.w, r0\n" + "mov_pp r0.w, c4.x\n" + "dp3_pp r0.x, v1, r0\n" + "dp3_pp r2.x, v1, c0\n" + "mul_pp r1.y, c5, r0.w\n" + "max_pp r1.x, r0, c5\n" + "pow r0, r1.x, r1.y\n" + "mov r1.x, r0\n" + "texld r0, v0, s0\n" + "mul r1.w, r0, r1.x\n" + "mul_pp r1.xyz, r0, c3\n" + "mul_pp r0.xyz, r1, c1\n" + "max_pp r2.x, r2, c5\n" + "mul_pp r2.xyz, r0, r2.x\n" + "mov_pp r0.xyz, c1\n" + "mul_pp r0.xyz, c2, r0\n" + "mad r0.xyz, r0, r1.w, r2\n" + "mul_pp r2.w, r3.x, c5.z\n" + "mul r0.xyz, r0, r2.w\n" + "mad_pp r30.xyz, r1, v2, r0\n" + "mov_pp r2.x, c1.w\n" + "mul_pp r0.x, c2.w, r2\n" + "mul_pp r0.y, r0.w, c3.w\n" + "mul r0.x, r1.w, r0\n" + "mad r30.w, r3.x, r0.x, r0.y\n" + "mul r31.x, c7.x, v9.x\n" + "mul r31.x, r31.x, r31.x\n" + "exp_sat r31.x, -r31.x\n" + "lrp r30.rgb, r31.x, r30, c6\n" + "mov oC0, r30\n"; + CHECK (PatchPixelShaderFogD3D9(s, kFogExp2, 6, 7)); + CHECK_EQUAL(exps, s); +} + +} // SUITE + +#endif // ENABLE_UNIT_TESTS diff --git a/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h new file mode 100644 index 0000000..e36a619 --- /dev/null +++ b/Runtime/GfxDevice/d3d/ShaderPatchingD3D9.h @@ -0,0 +1,7 @@ +#pragma once + +#include <string> +#include "Runtime/GfxDevice/GfxDeviceTypes.h" + +bool PatchVertexShaderFogD3D9 (std::string& src); +bool PatchPixelShaderFogD3D9 (std::string& src, FogMode fog, int fogColorReg, int fogParamsReg); diff --git a/Runtime/GfxDevice/d3d/TexturesD3D9.cpp b/Runtime/GfxDevice/d3d/TexturesD3D9.cpp new file mode 100644 index 0000000..d2baef9 --- /dev/null +++ b/Runtime/GfxDevice/d3d/TexturesD3D9.cpp @@ -0,0 +1,696 @@ +#include "UnityPrefix.h" +#include "TexturesD3D9.h" +#include "Runtime/Graphics/TextureFormat.h" +#include "Runtime/Graphics/Image.h" +#include "D3D9Context.h" +#include "Runtime/Allocator/FixedSizeAllocator.h" +#include "Runtime/Utilities/BitUtility.h" +#include "Runtime/Graphics/S3Decompression.h" +#include "Runtime/Shaders/GraphicsCaps.h" +#include "D3D9Utils.h" +#include "Runtime/GfxDevice/GfxDevice.h" +#include "Runtime/GfxDevice/VramLimits.h" +#include "Runtime/GfxDevice/TextureUploadUtils.h" +#include "Runtime/GfxDevice/TextureIdMap.h" +#include "External/ProphecySDK/include/prcore/Surface.hpp" +#include "Runtime/Profiler/MemoryProfiler.h" +#include "Runtime/Utilities/InitializeAndCleanup.h" + +struct D3DTexture +{ + explicit D3DTexture( IDirect3DBaseTexture9* tex ) + : texture(tex), wrapMode(D3DTADDRESS_CLAMP), minFilter(D3DTEXF_POINT), magFilter(D3DTEXF_POINT), mipFilter(D3DTEXF_NONE), aniso(1), sRGB(0) { } + + IDirect3DBaseTexture9* texture; + D3DTEXTUREADDRESS wrapMode; + D3DTEXTUREFILTERTYPE minFilter; + D3DTEXTUREFILTERTYPE magFilter; + D3DTEXTUREFILTERTYPE mipFilter; + int aniso; + bool sRGB; +}; + +typedef FixedSizeAllocator<sizeof(D3DTexture)> TextureAllocator; +static TextureAllocator* _TextureAlloc = NULL; + +namespace TextureD3D9Alloc +{ + void StaticInitialize() + { + _TextureAlloc = UNITY_NEW_AS_ROOT(TextureAllocator(kMemGfxDevice),kMemGfxDevice, "TextureStructs", ""); + } + + void StaticDestroy() + { + UNITY_DELETE(_TextureAlloc, kMemGfxDevice); + } +} + +static RegisterRuntimeInitializeAndCleanup s_TextureAllocManagerCallbacks(TextureD3D9Alloc::StaticInitialize, TextureD3D9Alloc::StaticDestroy); + +static inline intptr_t AllocD3DTexture(IDirect3DBaseTexture9* tex) +{ + return (intptr_t)(new (_TextureAlloc->alloc()) D3DTexture(tex)); +} + +static inline D3DTexture* QueryD3DTexture(TextureID textureID) +{ + return (D3DTexture*)TextureIdMap::QueryNativeTexture(textureID); +} + + +static D3DCOLOR ColorToD3D( const float color[4] ) +{ + return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) ); +} + + +struct FormatDesc { + TextureFormat unityformat; + D3DFORMAT d3dformat; + int bpp; + prcore::PixelFormat prformat; +}; + +const static FormatDesc kTextureFormatTable[kTexFormatPCCount+2] = // +1 for A8L8 case +{ + { kTexFormatPCCount, D3DFMT_UNKNOWN, 0, prcore::PixelFormat() }, + { kTexFormatAlpha8, D3DFMT_A8, 1, prcore::PixelFormat(8,0,0xff) }, // Alpha8 + { kTexFormatARGB4444, D3DFMT_A4R4G4B4, 2, prcore::PixelFormat(16,0x00000f00,0x000000f0,0x0000000f,0x0000f000) }, // ARGB4444 + { kTexFormatRGB24, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGB24 + { kTexFormatRGBA32, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGBA32 + { kTexFormatARGB32, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // ARGB32 + { kTexFormatARGBFloat, D3DFMT_UNKNOWN, 0, prcore::PixelFormat() }, // ARGBFloat + { kTexFormatRGB565, D3DFMT_R5G6B5, 2, prcore::PixelFormat(16,0x0000f800,0x000007e0,0x0000001f,0x00000000) }, // RGB565 + { kTexFormatBGR24, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // BGR24 + { kTexFormatAlphaLum16, D3DFMT_L16, 0, prcore::PixelFormat() }, // AlphaLum16 + { kTexFormatDXT1, D3DFMT_DXT1, 0, prcore::PixelFormat() }, // DXT1 + { kTexFormatDXT3, D3DFMT_DXT3, 0, prcore::PixelFormat() }, // DXT3 + { kTexFormatDXT5, D3DFMT_DXT5, 0, prcore::PixelFormat() }, // DXT5 + { kTexFormatRGBA4444, D3DFMT_A4R4G4B4, 2, prcore::PixelFormat(16,0x00000f00,0x000000f0,0x0000000f,0x0000f000) }, // RGBA4444 + + // following are not Unity formats, but might be used as fallbacks for some unsupported formats + { kTexFormatAlphaLum16, D3DFMT_A8L8, 2, prcore::PixelFormat(16,0x00ff,0xff00) }, // A8L8, used on cards that don't support A8; alpha -> alpha +}; + +const static FormatDesc kTextureFormatETC = +{ + kTexFormatETC_RGB4, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) +}; + +const static FormatDesc kTextureFormatATC[2] = +{ + { kTexFormatATC_RGB4, D3DFMT_X8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGB24 + { kTexFormatATC_RGBA8, D3DFMT_A8R8G8B8, 4, prcore::PixelFormat(32,0x00ff0000,0x0000ff00,0x000000ff,0xff000000) }, // RGBA32 +}; + + +D3DFORMAT GetD3D9TextureFormat( TextureFormat inFormat ) +{ + return kTextureFormatTable[inFormat].d3dformat; +} + +static const FormatDesc& GetUploadFormat( TextureFormat inFormat, bool forceFallbackFormat = false ) +{ + if (forceFallbackFormat) + { + return kTextureFormatTable[kTexFormatARGB32]; + } + else if( inFormat == kTexFormatAlpha8 && !gGraphicsCaps.d3d.hasTextureFormatA8 ) + { + // A8 not supported: A8L8 or fallback one depending on support + if( gGraphicsCaps.d3d.hasTextureFormatA8L8 ) + return kTextureFormatTable[ kTexFormatPCCount ]; // return A8L8 option, see table above + else + return kTextureFormatTable[kTexFormatARGB32]; + } + else if( IsCompressedDXTTextureFormat(inFormat) && !gGraphicsCaps.hasS3TCCompression ) + { + // Compressed format not supported: decompress into fallback format + return kTextureFormatTable[kTexFormatARGB32]; + } + else if ( IsCompressedETCTextureFormat(inFormat) ) + { + return kTextureFormatETC; + } + else if ( IsCompressedATCTextureFormat(inFormat) ) + { + return kTextureFormatATC[ HasAlphaTextureFormat(inFormat)? 1 : 0 ]; + } + else if (!gGraphicsCaps.d3d.hasBaseTextureFormat[inFormat]) + { + // This format not supported in general: convert to fallback format + return kTextureFormatTable[kTexFormatARGB32]; + } + + // All ok, return incoming format + return kTextureFormatTable[inFormat]; +} + +intptr_t TexturesD3D9::RegisterNativeTexture(IDirect3DBaseTexture9* texture) const +{ + return AllocD3DTexture(texture); +} + +void TexturesD3D9::UpdateNativeTexture(TextureID textureID, IDirect3DBaseTexture9* texture) +{ + D3DTexture* target = QueryD3DTexture(textureID); + if(target) + target->texture = texture; + else + AddTexture(textureID, texture); +} + +void TexturesD3D9::AddTexture( TextureID textureID, IDirect3DBaseTexture9* texture ) +{ + TextureIdMap::UpdateTexture(textureID, AllocD3DTexture(texture)); +} + +void TexturesD3D9::RemoveTexture( TextureID textureID ) +{ + D3DTexture* target = QueryD3DTexture(textureID); + if(target) + { + target->~D3DTexture(); + _TextureAlloc->free(target); + } + TextureIdMap::RemoveTexture(textureID); +} + +IDirect3DBaseTexture9* TexturesD3D9::GetTexture( TextureID textureID ) const +{ + D3DTexture* target = QueryD3DTexture(textureID); + return target ? target->texture : 0; +} + + + +static void BlitAlphaLum16 (int width, int height, D3DFORMAT d3dFormat, const UInt8* srcData, UInt8* destData, int pitch) +{ + // Handle AlphaLum16 case. ProphecySDK does not support 16 bit/channel formats, + // so we blit manually. + UInt32 rowBytes = GetRowBytesFromWidthAndFormat(width,kTexFormatAlphaLum16); + const UInt8* srcRowData = srcData; + UInt8* destRowData = destData; + if( d3dFormat == D3DFMT_L16 ) + { + for( int r = 0; r < height; ++r ) + { + memcpy( destRowData, srcRowData, rowBytes ); + srcRowData += rowBytes; + destRowData += pitch; + } + } + else if( d3dFormat == D3DFMT_L8 ) + { + for( int r = 0; r < height; ++r ) + { + for( int c = 0; c < width; ++c ) + destRowData[c] = srcRowData[c*2+1]; + srcRowData += rowBytes; + destRowData += pitch; + } + } + else + { + AssertIf( d3dFormat != D3DFMT_A8R8G8B8 ); + for( int r = 0; r < height; ++r ) + { + for( int c = 0; c < width; ++c ) + { + DWORD val = srcRowData[c*2+1]; + ((D3DCOLOR*)destRowData)[c] = 0xFF000000 | (val<<16) | (val<<8) | (val); + } + srcRowData += rowBytes; + destRowData += pitch; + } + } +} + +void InitRGBA32Buffer(int width, int height, UInt8*& buffer, int& srcPitch, prcore::PixelFormat& pf) +{ + int imageSize = CalculateImageSize( width, height, kTexFormatRGBA32 ); + if( buffer == NULL ) + buffer = new UInt8[imageSize]; + srcPitch = GetRowBytesFromWidthAndFormat(width, kTexFormatRGBA32); + pf = GetProphecyPixelFormat(kTexFormatRGBA32); +} + +void TexturesD3D9::UploadTexture2D( + TextureID tid, TextureDimension dimension, UInt8* srcData, int width, int height, + TextureFormat format, int mipCount, UInt32 uploadFlags, int masterTextureLimit, TextureUsageMode usageMode, TextureColorSpace colorSpace ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + + AssertIf( srcData == NULL ); + AssertIf( (!IsPowerOfTwo(width) || !IsPowerOfTwo(height)) && !IsNPOTTextureAllowed(mipCount > 1) ); + + if( dimension != kTexDim2D ) + { + ErrorString( "Incorrect texture dimension!" ); + return; + } + + // Nothing to do here. Early out instead of failing, empty textures are serialized by dynamic fonts. + if( width == 0 || height == 0 ) + return; + + bool uploadIsCompressed, decompressOnTheFly; + HandleFormatDecompression (format, &usageMode, colorSpace, &uploadIsCompressed, &decompressOnTheFly); + + if( decompressOnTheFly ) + uploadIsCompressed = false; + + const FormatDesc& uploadFormat = GetUploadFormat (decompressOnTheFly ? kTexFormatRGBA32 : format, usageMode != kTexUsageNone); + D3DFORMAT d3dFormat = uploadFormat.d3dformat; + + if( format == kTexFormatAlphaLum16 && !gGraphicsCaps.d3d.hasTextureFormatL16 ) + { + // AlphaLum16 requires some trickery if hardware does not support L16: + // first we try to do L8 instead, then fallback to A8R8G8B8. + if( gGraphicsCaps.d3d.hasTextureFormatL8 ) + d3dFormat = D3DFMT_L8; + else + d3dFormat = D3DFMT_A8R8G8B8; + } + + int baseLevel, maxLevel, texWidth, texHeight; + size_t textureSize; + prcore::Surface::BlitMode blitMode = prcore::Surface::BLIT_COPY; + if (SkipLevelsForMasterTextureLimit (masterTextureLimit, format, uploadFormat.unityformat, mipCount, uploadIsCompressed, &srcData, &width, &height, &baseLevel, &maxLevel, &texWidth, &texHeight, &textureSize)) + blitMode = prcore::Surface::BLIT_SCALE; + + // if we don't support mip maps - don't use them + if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPMAP) ) + { + mipCount = 1; + baseLevel = 0; + } + + // create texture if it does not exist already + IDirect3DTexture9* texture = NULL; + + D3DTexture* target = QueryD3DTexture(tid); + if(!target) + { + HRESULT hr = dev->CreateTexture( texWidth, texHeight, mipCount - baseLevel, 0, d3dFormat, D3DPOOL_MANAGED, &texture, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, CalculateImageSize(texWidth, texHeight,format)*(mipCount>1?1.33:1),tid.m_ID); + if( FAILED(hr) ) + printf_console( "d3d: failed to create 2D texture id=%i w=%i h=%i mips=%i d3dfmt=%i [%s]\n", tid, texWidth, texHeight, mipCount-baseLevel, d3dFormat, GetD3D9Error(hr) ); + TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture)); + } + else + { + texture = (IDirect3DTexture9*)target->texture; + } + + if( !texture ) + { + AssertString( "failed to create 2D texture" ); + return; + } + + UInt8* decompressBuffer = NULL; + UInt8* tempBuffer = NULL; + int bufferPitch; + + // Upload the mip levels + for( int level = baseLevel; level <= maxLevel; ++level ) + { + D3DLOCKED_RECT lr; + HRESULT hr = texture->LockRect( level-baseLevel, &lr, NULL, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock level %i of texture %i [%s]\n", level-baseLevel, tid, GetD3D9Error(hr) ); + if( decompressBuffer ) + delete[] decompressBuffer; + return; + } + + if( decompressOnTheFly ) + { + ConvertCompressedTextureUpload (width, height, format, srcData, decompressBuffer, bufferPitch, usageMode, colorSpace, level); + + prcore::Surface srcSurface( width, height, bufferPitch, GetProphecyPixelFormat(kTexFormatRGBA32), decompressBuffer ); + prcore::Surface dstSurface( texWidth, texHeight, lr.Pitch, uploadFormat.prformat, lr.pBits ); + dstSurface.BlitImage( srcSurface, blitMode ); + } + else if( format == kTexFormatAlphaLum16 ) + { + BlitAlphaLum16( width, height, d3dFormat, srcData, (UInt8*)lr.pBits, lr.Pitch ); + } + else if( !uploadIsCompressed ) + { + prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat( width,format ), GetProphecyPixelFormat(format), srcData ); + prcore::Surface dstSurface( texWidth, texHeight, lr.Pitch, uploadFormat.prformat, lr.pBits ); + + if (!ConvertUncompressedTextureUpload(srcSurface, dstSurface, blitMode, uploadFormat.unityformat, usageMode, colorSpace, width, height, (UInt8*)lr.pBits, lr.Pitch, uploadFormat.prformat, tempBuffer, bufferPitch)) + { + dstSurface.BlitImage( srcSurface, blitMode ); + } + } + else + { + if( width == texWidth && height == texHeight ) + { + BlitCopyCompressedImage( format, srcData, width, height, (UInt8*)lr.pBits, width, height, false ); + } + else + { + // TODO: fill with garbage? + } + } + + texture->UnlockRect( level-baseLevel ); + + // Go to next level + AssertIf( width == 1 && height == 1 && level != maxLevel ); + AdvanceToNextMipLevel (format, srcData, width, height, texWidth, texHeight); + } + + delete[] decompressBuffer; +} + +void TexturesD3D9::UploadTextureSubData2D( + TextureID tid, UInt8* srcData, int mipLevel, + int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + if( !dev ) + return; + + // if we don't support mip maps and want to change higher level - don't + if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPMAP) && mipLevel != 0 ) + return; + + AssertIf( srcData == NULL ); + AssertIf( IsCompressedDXTTextureFormat( format ) ); + + // find the texture + D3DTexture* target = QueryD3DTexture(tid); + if(target == 0) + { + AssertString( "Texture not found" ); + return; + } + + const FormatDesc& uploadFormat = GetUploadFormat( format ); + IDirect3DTexture9* texture = (IDirect3DTexture9*)target->texture; + AssertIf( !texture ); + + RECT rect; + rect.left = x; + rect.top = y; + rect.right = x + width; + rect.bottom = y + height; + D3DLOCKED_RECT lr; + HRESULT hr = texture->LockRect( mipLevel, &lr, &rect, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock sub level %i of texture %i [%s]\n", mipLevel, tid, GetD3D9Error(hr) ); + return; + } + + // TODO: handle other format conversions + + prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat(width,format), GetProphecyPixelFormat(format), srcData ); + prcore::Surface dstSurface( width, height, lr.Pitch, uploadFormat.prformat, lr.pBits ); + dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY ); + + texture->UnlockRect( mipLevel ); +} + + +void TexturesD3D9::UploadTextureCube( + TextureID tid, UInt8* srcData, int faceDataSize, int size, + TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + if (!dev) + return; + + // if we don't support cube mip maps - don't use them + if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_MIPCUBEMAP) ) + mipCount = 1; + + const FormatDesc& uploadFormat = GetUploadFormat(format); + IDirect3DCubeTexture9* texture = NULL; + + D3DTexture* target = QueryD3DTexture(tid); + if(!target) + { + HRESULT hr = dev->CreateCubeTexture( size, mipCount, 0, uploadFormat.d3dformat, D3DPOOL_MANAGED, &texture, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, 6*CalculateImageSize(size, size, format)*(mipCount>1?1.33:1),tid.m_ID); + if( FAILED(hr) ) + printf_console( "d3d: failed to create cubemap id=%i size=%i mips=%i d3dfmt=%i [%s]\n", tid, size, mipCount, uploadFormat.d3dformat, GetD3D9Error(hr) ); + TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture)); + } + else + { + texture = (IDirect3DCubeTexture9*)target->texture; + } + if( !texture ) + { + AssertString( "failed to create cubemap" ); + return; + } + + // Upload data + bool uploadIsCompressed = IsCompressedDXTTextureFormat(format); // TODO: handle when we don't have DXT + + static const D3DCUBEMAP_FACES faces[6] = + { + D3DCUBEMAP_FACE_POSITIVE_X, + D3DCUBEMAP_FACE_NEGATIVE_X, + D3DCUBEMAP_FACE_POSITIVE_Y, + D3DCUBEMAP_FACE_NEGATIVE_Y, + D3DCUBEMAP_FACE_POSITIVE_Z, + D3DCUBEMAP_FACE_NEGATIVE_Z, + }; + + int maxLevel = mipCount - 1; + for (int face=0;face<6;face++) + { + int mipSize = size; + UInt8* data = srcData + face * faceDataSize; + + // Upload the mip levels + for( int level = 0; level <= maxLevel; ++level ) + { + D3DLOCKED_RECT lr; + HRESULT hr = texture->LockRect( faces[face], level, &lr, NULL, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock level %i of face %i of cubemap %i [%s]\n", level, face, tid, GetD3D9Error(hr) ); + return; + } + + // TODO: handle DXT decompression on the fly + // TODO: handle other format conversions + + if( !uploadIsCompressed ) + { + prcore::Surface srcSurface( mipSize, mipSize, GetRowBytesFromWidthAndFormat(mipSize,format), GetProphecyPixelFormat(format), data ); + prcore::Surface dstSurface( mipSize, mipSize, lr.Pitch, uploadFormat.prformat, lr.pBits ); + dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY ); + } + else + { + BlitCopyCompressedImage( format, data, mipSize, mipSize, (UInt8*)lr.pBits, mipSize /* TODO */, mipSize, false ); + } + + texture->UnlockRect( faces[face], level ); + + // Go to next level + data += CalculateImageSize( mipSize, mipSize, format ); + AssertIf( mipSize == 1 && level != maxLevel ); + + mipSize = std::max( mipSize / 2, 1 ); + } + } +} + +void TexturesD3D9::UploadTexture3D( + TextureID tid, UInt8* srcData, int width, int height, int depth, + TextureFormat format, int mipCount, UInt32 uploadFlags ) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + if (!dev || !gGraphicsCaps.has3DTexture) + return; + + // if we don't support volume mip maps - don't use them + if( !(gGraphicsCaps.d3d.d3dcaps.TextureCaps & D3DPTEXTURECAPS_VOLUMEMAP) ) + mipCount = 1; + + + const FormatDesc& uploadFormat = GetUploadFormat( format ); + D3DFORMAT d3dFormat = uploadFormat.d3dformat; + if( format == kTexFormatAlphaLum16 ) + { + // AlphaLum16 requires some trickery if hardware does not support L16: + // first we try to do L8 instead, then fallback to A8R8G8B8. + if( !gGraphicsCaps.d3d.hasTextureFormatL16 && gGraphicsCaps.d3d.hasTextureFormatL8 ) + d3dFormat = D3DFMT_L8; + else + d3dFormat = D3DFMT_A8R8G8B8; + } + + IDirect3DVolumeTexture9* texture = NULL; + + D3DTexture* target = QueryD3DTexture(tid); + if(!target) + { + HRESULT hr = dev->CreateVolumeTexture( width, height, depth, mipCount, 0, d3dFormat, D3DPOOL_MANAGED, &texture, NULL ); + REGISTER_EXTERNAL_GFX_ALLOCATION_REF(texture, depth*CalculateImageSize(width, height, format)*(mipCount>1?1.33:1),tid.m_ID); + if( FAILED(hr) ) + printf_console( "d3d: failed to create 3D texture id=%i w=%i h=%i d=%i mips=%i d3dfmt=%i [%s]\n", tid, width, height, depth, mipCount, d3dFormat, GetD3D9Error(hr) ); + TextureIdMap::UpdateTexture(tid, AllocD3DTexture(texture)); + } + else + { + texture = (IDirect3DVolumeTexture9*)target->texture; + } + if( !texture ) + { + AssertString( "failed to create 3D texture" ); + return; + } + + int maxLevel = mipCount - 1; + for( int level=0; level <= maxLevel; ++level ) + { + D3DLOCKED_BOX lr; + HRESULT hr = texture->LockBox( level, &lr, NULL, 0 ); + if( FAILED(hr) ) + { + printf_console( "d3d: failed to lock level %i of 3D texture %i [%s]\n", level, tid, GetD3D9Error(hr) ); + return; + } + + UInt8* destData = (UInt8*)lr.pBits; + const int sliceSize = CalculateImageSize(width, height, format); + for( int slice = 0; slice < depth; ++slice ) + { + if( format == kTexFormatAlphaLum16 ) + { + BlitAlphaLum16 (width, height, d3dFormat, srcData, destData, lr.RowPitch); + } + else + { + // Regular ProphecySDK blit + prcore::Surface srcSurface( width, height, GetRowBytesFromWidthAndFormat(width,format), GetProphecyPixelFormat(format), srcData ); + prcore::Surface dstSurface( width, height, lr.RowPitch, uploadFormat.prformat, destData ); + dstSurface.BlitImage( srcSurface, prcore::Surface::BLIT_COPY ); + } + srcData += sliceSize; + destData += lr.SlicePitch; + } + + texture->UnlockBox( level ); + + AssertIf( width == 1 && height == 1 && level != maxLevel ); + + width = std::max( width / 2, 1 ); + height = std::max( height / 2, 1 ); + depth = std::max( depth / 2, 1 ); + } +} + + + +bool TexturesD3D9::SetTexture (ShaderType shaderType, int unit, TextureID textureID) +{ + IDirect3DDevice9* dev = GetD3DDevice(); + + D3DTexture* target = QueryD3DTexture(textureID); + if(target) + { + const D3DTexture& texture = *target; + DWORD d3dUnit = GetD3D9SamplerIndex (shaderType, unit); + D3D9_CALL(dev->SetTexture( d3dUnit, texture.texture )); + // TODO: caching of those! + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSU, texture.wrapMode )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSV, texture.wrapMode )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_ADDRESSW, texture.wrapMode )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MINFILTER, texture.minFilter )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MAGFILTER, texture.magFilter )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MIPFILTER, texture.mipFilter )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_MAXANISOTROPY, texture.aniso )); + D3D9_CALL(dev->SetSamplerState( d3dUnit, D3DSAMP_SRGBTEXTURE, texture.sRGB )); + return true; + } + else + { + // Ok, just don't complain here. Mostly with render textures, once in a while it + // happens that RT is not created yet, and someone tries to render with it. + // Just silently ignore that case. + //ErrorString( Format("SetTexture with unknown texture %i", textureID) ); + return false; + } +} + +static D3DTEXTUREADDRESS s_D3DWrapModes[kTexWrapCount] = { + D3DTADDRESS_WRAP, + D3DTADDRESS_CLAMP, +}; +static D3DTEXTUREFILTERTYPE s_D3DMinMagFilters[kTexFilterCount] = { + D3DTEXF_POINT, + D3DTEXF_LINEAR, + D3DTEXF_LINEAR, +}; +static D3DTEXTUREFILTERTYPE s_D3DMipFilters[kTexFilterCount] = { + D3DTEXF_POINT, + D3DTEXF_POINT, + D3DTEXF_LINEAR, +}; + + +void TexturesD3D9::SetTextureParams( TextureID textureID, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace ) +{ + D3DTexture* target = QueryD3DTexture(textureID); + if(!target) + return; + + D3DTexture& texture = *target; + AssertIf( !texture.texture ); + + if( gGraphicsCaps.hasAnisoFilter && texDim != kTexDim3D ) + texture.aniso = std::min( anisoLevel, gGraphicsCaps.maxAnisoLevel ); + else + texture.aniso = 1; + texture.wrapMode = s_D3DWrapModes[wrap]; + + if( !hasMipMap && filter == kTexFilterTrilinear ) + filter = kTexFilterBilinear; + + texture.minFilter = texture.magFilter = s_D3DMinMagFilters[filter]; + if( texture.aniso > 1 ) + { + texture.minFilter = D3DTEXF_ANISOTROPIC; + // some cards (notably GeForces) can do min anisotropic filter, but not mag anisotropic filter + if( gGraphicsCaps.d3d.d3dcaps.TextureFilterCaps & D3DPTFILTERCAPS_MAGFANISOTROPIC ) + texture.magFilter = D3DTEXF_ANISOTROPIC; + } + texture.mipFilter = s_D3DMipFilters[filter]; + + //sRGB + texture.sRGB = colorSpace == kTexColorSpaceSRGB || colorSpace == kTexColorSpaceSRGBXenon; + // actual setting of sampler states will happen in SetTexture +} + + +void TexturesD3D9::DeleteTexture( TextureID textureID ) +{ + D3DTexture* target = QueryD3DTexture(textureID); + if(!target) + return; + + // texture can be null if texture creation failed. At least don't make it crash here + if( target->texture ) + { + REGISTER_EXTERNAL_GFX_DEALLOCATION(target->texture); + ULONG refCount = target->texture->Release(); + AssertIf( refCount != 0 ); + } + TextureIdMap::RemoveTexture(textureID); +} diff --git a/Runtime/GfxDevice/d3d/TexturesD3D9.h b/Runtime/GfxDevice/d3d/TexturesD3D9.h new file mode 100644 index 0000000..113434c --- /dev/null +++ b/Runtime/GfxDevice/d3d/TexturesD3D9.h @@ -0,0 +1,90 @@ +#pragma once + +#include "D3D9Includes.h" +#include "Runtime/Graphics/TextureFormat.h" +#include "Runtime/Graphics/RenderSurface.h" +#include "Runtime/GfxDevice/GfxDeviceTypes.h" +#include "Runtime/Threads/AtomicOps.h" +#include <map> + +class ImageReference; + +class TexturesD3D9 +{ +public: + TexturesD3D9() {} + ~TexturesD3D9() {} + bool SetTexture (ShaderType shaderType, int unit, TextureID textureID); + void SetTextureParams( TextureID texture, TextureDimension texDim, TextureFilterMode filter, TextureWrapMode wrap, int anisoLevel, bool hasMipMap, TextureColorSpace colorSpace ); + + void DeleteTexture( TextureID textureID ); + + void UploadTexture2D( + TextureID tid, TextureDimension dimension, UInt8* srcData, int width, int height, + TextureFormat format, int mipCount, UInt32 uploadFlags, int masterTextureLimit, TextureUsageMode usageMode, TextureColorSpace colorSpace ); + + void UploadTextureSubData2D( + TextureID tid, UInt8* srcData, int mipLevel, + int x, int y, int width, int height, TextureFormat format, TextureColorSpace colorSpace ); + + void UploadTextureCube( + TextureID tid, UInt8* srcData, int faceDataSize, int size, + TextureFormat format, int mipCount, UInt32 uploadFlags, TextureColorSpace colorSpace ); + + void UploadTexture3D( + TextureID tid, UInt8* srcData, int width, int height, int depth, + TextureFormat format, int mipCount, UInt32 uploadFlags ); + + void AddTexture( TextureID textureID, IDirect3DBaseTexture9* texture ); + void RemoveTexture( TextureID textureID ); + IDirect3DBaseTexture9* GetTexture( TextureID textureID ) const; + + intptr_t RegisterNativeTexture(IDirect3DBaseTexture9* texture) const; + void UpdateNativeTexture(TextureID textureID, IDirect3DBaseTexture9* texture); +}; + +struct RenderSurfaceD3D9 : RenderSurfaceBase +{ + RenderSurfaceD3D9() + : m_Texture(NULL) + , m_Surface(NULL) + { + RenderSurfaceBase_Init(*this); + } + void Release() { + if (m_Texture) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Texture); + m_Texture->Release(); + m_Texture = NULL; + } + if (m_Surface) { + REGISTER_EXTERNAL_GFX_DEALLOCATION(m_Surface); + m_Surface->Release(); + m_Surface = NULL; + } + } + IDirect3DBaseTexture9* m_Texture; + IDirect3DSurface9* m_Surface; +}; + +struct RenderColorSurfaceD3D9 : public RenderSurfaceD3D9 +{ + RenderColorSurfaceD3D9() + : format(kRTFormatARGB32) + , dim(kTexDim2D) + { + RenderSurfaceBase_InitColor(*this); + } + RenderTextureFormat format; + TextureDimension dim; +}; + +struct RenderDepthSurfaceD3D9 : public RenderSurfaceD3D9 +{ + RenderDepthSurfaceD3D9() + : depthFormat(kDepthFormatNone) + { + RenderSurfaceBase_InitDepth(*this); + } + DepthBufferFormat depthFormat; +}; diff --git a/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp b/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp new file mode 100644 index 0000000..70f9ed7 --- /dev/null +++ b/Runtime/GfxDevice/d3d/TimerQueryD3D9.cpp @@ -0,0 +1,196 @@ +#include "UnityPrefix.h" +#if ENABLE_PROFILER +#include "GfxDeviceD3D9.h" +#include "TimerQueryD3D9.h" + + +TimerQueryD3D9::TimerQueryD3D9() + : m_Query(NULL), m_Time(0), m_Active(false) +{ + GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &m_Query); + m_TimeMultiplier = 0.0f; +} + +TimerQueryD3D9::~TimerQueryD3D9() +{ + SAFE_RELEASE(m_Query); +} + +void TimerQueryD3D9::Measure() +{ + // Flush previous result + GetElapsed(kWaitRenderThread); + + TimerQueriesD3D9& queries = GetD3D9GfxDevice().GetTimerQueries(); + if (m_Query && queries.HasFrequencyQuery()) + { + queries.AddActiveTimerQuery(this); + m_Query->Issue(D3DISSUE_END); + m_Active = true; + m_Time = kInvalidProfileTime; + } + else + m_Time = 0; + m_TimeMultiplier = 0.0f; +} + +ProfileTimeFormat TimerQueryD3D9::GetElapsed(UInt32 flags) +{ + while (m_Active) + { + bool wait = (flags & kWaitRenderThread) != 0; + if (!GetD3D9GfxDevice().GetTimerQueries().PollNextTimerQuery(wait)) + break; + } + return m_Time; +} + +bool TimerQueryD3D9::PollResult(UInt64& prevTime, bool wait) +{ + for (;;) + { + UINT64 time; + DWORD flags = wait ? D3DGETDATA_FLUSH : 0; + HRESULT hr = m_Query->GetData(&time, sizeof(time), flags); + if (hr == S_OK) + { + UInt64 elapsed = prevTime ? (time - prevTime) : 0; + m_Time = ProfileTimeFormat(elapsed * m_TimeMultiplier); + prevTime = time; + return true; + } + // Stop polling on unknown result (e.g D3DERR_DEVICELOST) + if (hr != S_FALSE) + { + m_Time = 0; + prevTime = 0; + return true; + } + if (!wait) + break; + } + return false; +} + +TimerQueriesD3D9::TimerQueriesD3D9() +{ + m_LastQueryTime = 0; + m_FrequencyQuery = NULL; + memset(m_StartTimeQueries, 0, sizeof(m_StartTimeQueries)); + m_StartTimeQueryIndex = 0; +} + +void TimerQueriesD3D9::ReleaseAllQueries() +{ + SAFE_RELEASE(m_FrequencyQuery); + for (int i = 0; i < kStartTimeQueryCount; i++) + { + delete m_StartTimeQueries[i]; + m_StartTimeQueries[i] = NULL; + } + m_InactiveTimerQueries.append(m_ActiveTimerQueries); + m_InactiveTimerQueries.append(m_PolledTimerQueries); + TimerQueryList& queries = m_InactiveTimerQueries; + for (TimerQueryList::iterator it = queries.begin(); it != queries.end(); ++it) + { + TimerQueryD3D9& query = *it; + query.m_Active = false; + query.m_Time = 0; + SAFE_RELEASE(query.m_Query); + } +} + +void TimerQueriesD3D9::RecreateAllQueries() +{ + Assert(m_ActiveTimerQueries.empty()); + Assert(m_PolledTimerQueries.empty()); + TimerQueryList& queries = m_InactiveTimerQueries; + for (TimerQueryList::iterator it = queries.begin(); it != queries.end(); ++it) + { + TimerQueryD3D9& query = *it; + GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &query.m_Query); + } +} + +void TimerQueriesD3D9::BeginTimerQueries() +{ + // Poll queries from previous frames + PollTimerQueries(); + + if (m_FrequencyQuery == NULL) + { + GetD3DDevice()->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, &m_FrequencyQuery); + } + if (m_FrequencyQuery) + m_FrequencyQuery->Issue(D3DISSUE_END); + + int& index = m_StartTimeQueryIndex; + if (m_StartTimeQueries[index] == NULL) + { + m_StartTimeQueries[index] = new TimerQueryD3D9; + } + m_StartTimeQueries[index]->Measure(); + index = (index + 1) % kStartTimeQueryCount; +} + +void TimerQueriesD3D9::EndTimerQueries() +{ + if(m_FrequencyQuery == NULL) + return; + + HRESULT hr; + UINT64 freq; + do + { + hr = m_FrequencyQuery->GetData(&freq, sizeof(freq), D3DGETDATA_FLUSH); + } while (hr == S_FALSE); + if (hr == S_OK) + { + float timeMult = float(1000000000.0 / (double)freq); + TimerQueryList::iterator query, queryEnd = m_ActiveTimerQueries.end(); + for (query = m_ActiveTimerQueries.begin(); query != queryEnd; ++query) + query->SetTimeMultiplier(timeMult); + } + // Move queries from active to polled list + m_PolledTimerQueries.append(m_ActiveTimerQueries); +} + +TimerQueryD3D9* TimerQueriesD3D9::CreateTimerQuery() +{ + TimerQueryD3D9* query = new TimerQueryD3D9; + m_InactiveTimerQueries.push_back(*query); + return query; +} + +void TimerQueriesD3D9::AddActiveTimerQuery(TimerQueryD3D9* query) +{ + query->RemoveFromList(); + m_ActiveTimerQueries.push_back(*query); +} + +void TimerQueriesD3D9::PollTimerQueries() +{ + for (;;) + { + if (!PollNextTimerQuery(false)) + break; + } +} + +bool TimerQueriesD3D9::PollNextTimerQuery(bool wait) +{ + if (m_PolledTimerQueries.empty()) + return false; + + TimerQueryD3D9& query = m_PolledTimerQueries.front(); + if (query.PollResult(m_LastQueryTime, wait)) + { + query.m_Active = false; + query.RemoveFromList(); + m_InactiveTimerQueries.push_back(query); + return true; + } + return false; +} + +#endif diff --git a/Runtime/GfxDevice/d3d/TimerQueryD3D9.h b/Runtime/GfxDevice/d3d/TimerQueryD3D9.h new file mode 100644 index 0000000..ecc4a94 --- /dev/null +++ b/Runtime/GfxDevice/d3d/TimerQueryD3D9.h @@ -0,0 +1,67 @@ +#ifndef TIMERQUERYD3D9_H +#define TIMERQUERYD3D9_H + +#if ENABLE_PROFILER + +#include "Runtime/GfxDevice/GfxTimerQuery.h" + +class TimerQueriesD3D9; + +class TimerQueryD3D9 : public GfxTimerQuery +{ +public: + ~TimerQueryD3D9(); + + virtual void Measure(); + virtual ProfileTimeFormat GetElapsed(UInt32 flags); + + bool PollResult(UInt64& prevTime, bool wait); + void SetTimeMultiplier(float tm) { m_TimeMultiplier = tm; } + +private: + friend TimerQueriesD3D9; + TimerQueryD3D9(); + + IDirect3DQuery9* m_Query; + ProfileTimeFormat m_Time; + float m_TimeMultiplier; + bool m_Active; +}; + +class TimerQueriesD3D9 +{ +public: + TimerQueriesD3D9(); + + void ReleaseAllQueries(); + void RecreateAllQueries(); + + void BeginTimerQueries(); + void EndTimerQueries(); + + TimerQueryD3D9* CreateTimerQuery(); + + void AddActiveTimerQuery(TimerQueryD3D9* query); + void PollTimerQueries(); + bool PollNextTimerQuery(bool wait); + + bool HasFrequencyQuery() const { return m_FrequencyQuery != NULL; } + +private: + enum + { + kStartTimeQueryCount = 3 + }; + + UInt64 m_LastQueryTime; + IDirect3DQuery9* m_FrequencyQuery; + TimerQueryD3D9* m_StartTimeQueries[kStartTimeQueryCount]; + int m_StartTimeQueryIndex; + typedef List<TimerQueryD3D9> TimerQueryList; + TimerQueryList m_InactiveTimerQueries; + TimerQueryList m_ActiveTimerQueries; + TimerQueryList m_PolledTimerQueries; +}; + +#endif +#endif diff --git a/Runtime/GfxDevice/d3d/VertexDeclarations.cpp b/Runtime/GfxDevice/d3d/VertexDeclarations.cpp new file mode 100644 index 0000000..180a105 --- /dev/null +++ b/Runtime/GfxDevice/d3d/VertexDeclarations.cpp @@ -0,0 +1,124 @@ +#include "UnityPrefix.h" +#include "VertexDeclarations.h" +#include "D3D9Context.h" +#include "Runtime/GfxDevice/GfxDeviceTypes.h" + +bool VertexDeclarations::KeyType::operator < (const KeyType& rhs) const +{ + return memcmp(channels, rhs.channels, sizeof(channels)) < 0; +} + +VertexDeclarations::VertexDeclarations() +{ +} + +VertexDeclarations::~VertexDeclarations() +{ + Clear(); +} + +struct D3DVertexSemantics +{ + UInt8 usage; + UInt8 index; +}; + +static D3DVertexSemantics kChannelVertexSemantics[kShaderChannelCount] = +{ + { D3DDECLUSAGE_POSITION, 0 }, // position + { D3DDECLUSAGE_NORMAL, 0 }, // normal + { D3DDECLUSAGE_COLOR, 0 }, // color + { D3DDECLUSAGE_TEXCOORD, 0 }, // uv + { D3DDECLUSAGE_TEXCOORD, 1 }, // uv2 + { D3DDECLUSAGE_TANGENT, 0 }, // tangent +}; + +static FORCE_INLINE D3DDECLTYPE GetD3DVertexDeclType(const ChannelInfo& info) +{ + switch (info.format) + { + case kChannelFormatFloat: + { + switch (info.dimension) + { + case 1: return D3DDECLTYPE_FLOAT1; + case 2: return D3DDECLTYPE_FLOAT2; + case 3: return D3DDECLTYPE_FLOAT3; + case 4: return D3DDECLTYPE_FLOAT4; + } + break; + } + case kChannelFormatFloat16: + { + switch (info.dimension) + { + case 2: return D3DDECLTYPE_FLOAT16_2; + case 4: return D3DDECLTYPE_FLOAT16_4; + } + break; + } + case kChannelFormatColor: + { + return D3DDECLTYPE_D3DCOLOR; + } + } + Assert("No matching D3D vertex decl type!"); + return D3DDECLTYPE_UNUSED; +} + +IDirect3DVertexDeclaration9* VertexDeclarations::GetVertexDecl( const ChannelInfoArray channels ) +{ + KeyType key; + memcpy(key.channels, channels, sizeof(key.channels)); + + // already have vertex declaration for these formats? + VertexDeclMap::iterator it = m_VertexDeclMap.find( key ); + if( it != m_VertexDeclMap.end() ) + return it->second; + + // don't have this declaration yet - create one + // KD: not sure if elements need to be ordered by stream, playing it safe + D3DVERTEXELEMENT9 elements[kShaderChannelCount+1]; + int elIndex = 0; + for( int stream = 0; stream < kMaxVertexStreams; stream++ ) + { + for( int chan = 0; chan < kShaderChannelCount; chan++ ) + { + if( channels[chan].stream == stream && channels[chan].IsValid() ) + { + DebugAssert(elIndex < kShaderChannelCount); + D3DVERTEXELEMENT9& elem = elements[elIndex]; + elem.Stream = stream; + elem.Offset = channels[chan].offset; + elem.Type = GetD3DVertexDeclType(channels[chan]); + elem.Method = D3DDECLMETHOD_DEFAULT; + elem.Usage = kChannelVertexSemantics[chan].usage; + elem.UsageIndex = kChannelVertexSemantics[chan].index; + ++elIndex; + } + } + } + D3DVERTEXELEMENT9 declEnd = D3DDECL_END(); + elements[elIndex] = declEnd; + + IDirect3DVertexDeclaration9* decl = NULL; + HRESULT hr = GetD3DDevice()->CreateVertexDeclaration( elements, &decl ); + if( FAILED(hr) ) { + // TODO: error! + } + m_VertexDeclMap[key] = decl; + return decl; +} + +void VertexDeclarations::Clear() +{ + VertexDeclMap::iterator it; + for( it = m_VertexDeclMap.begin(); it != m_VertexDeclMap.end(); ++it ) + { + if( it->second ) { + ULONG refCount = it->second->Release(); + AssertIf( refCount != 0 ); + } + } + m_VertexDeclMap.clear(); +} diff --git a/Runtime/GfxDevice/d3d/VertexDeclarations.h b/Runtime/GfxDevice/d3d/VertexDeclarations.h new file mode 100644 index 0000000..f737f5a --- /dev/null +++ b/Runtime/GfxDevice/d3d/VertexDeclarations.h @@ -0,0 +1,26 @@ +#pragma once + +#include "D3D9Includes.h" +#include "Runtime\Filters\Mesh\VertexData.h" +#include <map> + + +class VertexDeclarations +{ +public: + VertexDeclarations(); + ~VertexDeclarations(); + + IDirect3DVertexDeclaration9* GetVertexDecl( const ChannelInfoArray channels ); + void Clear(); + +private: + struct KeyType + { + bool operator < (const KeyType& rhs) const; + ChannelInfoArray channels; + }; + + typedef UNITY_MAP(kMemVertexData, KeyType, IDirect3DVertexDeclaration9*) VertexDeclMap; + VertexDeclMap m_VertexDeclMap; +}; diff --git a/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp b/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp new file mode 100644 index 0000000..8a91f74 --- /dev/null +++ b/Runtime/GfxDevice/d3d/VertexPipeD3D9.cpp @@ -0,0 +1,705 @@ +#include "UnityPrefix.h" +#include "VertexPipeD3D9.h" +#include "ShaderGenerator.h" +#include "D3D9Utils.h" +#include "Runtime/GfxDevice/BuiltinShaderParams.h" +#include "External/DirectX/builds/dx9include/d3dx9.h" +#include <map> + + + +#define PRINT_VERTEX_PIPE_STATS 0 + +#define PRINT_AMD_SHADER_ANALYZER_OUTPUT 0 + + + + +// GpuProgramsD3D.cpp +ID3DXBuffer* AssembleD3DShader( const std::string& source ); + + +#if PRINT_AMD_SHADER_ANALYZER_OUTPUT +void PrintAMDShaderAnalyzer( const std::string& source ) +{ + const char* kPath = "C:\\Program Files\\AMD\\GPU ShaderAnalyzer 1.45\\GPUShaderAnalyzer.exe"; + const char* kInputPath = "ShaderInput.txt"; + const char* kOutputPath = "ShaderOutput.txt"; + DeleteFileA(kInputPath); + DeleteFileA(kOutputPath); + FILE* fout = fopen(kInputPath, "wt"); + fwrite(source.c_str(), source.size(), 1, fout); + fclose(fout); + + std::string commandLine = std::string(kPath) + " " + kInputPath + " -Analyze " + kOutputPath + " -Module Latest -ASIC HD3870"; + + STARTUPINFOA si; + ZeroMemory( &si, sizeof(si) ); + si.cb = sizeof(si); + + PROCESS_INFORMATION pi; + ZeroMemory( &pi, sizeof(pi) ); + + if( CreateProcessA( + NULL, // name of executable module + (char*)commandLine.c_str(), // command line string + NULL, // process attributes + NULL, // thread attributes + FALSE, // handle inheritance option + 0, // creation flags + NULL, // new environment block + NULL, // current directory name + &si, // startup information + &pi ) ) // process information + { + WaitForSingleObject( pi.hProcess, INFINITE ); + CloseHandle( pi.hProcess ); + CloseHandle( pi.hThread ); + + FILE* fin = fopen(kOutputPath, "rt"); + if( fin ) { + fseek(fin, 0, SEEK_END); + int length = ftell(fin); + fseek(fin, 0, SEEK_SET); + char* buffer = new char[length+1]; + memset(buffer, 0,length+1); + fread(buffer, length, 1, fin); + fclose(fin); + } + } + //DeleteFileA(kInputPath); + //DeleteFileA(kOutputPath); +} +#endif + + + +static inline D3DCOLOR ColorToD3D( const float color[4] ) +{ + return D3DCOLOR_RGBA( NormalizedToByte(color[0]), NormalizedToByte(color[1]), NormalizedToByte(color[2]), NormalizedToByte(color[3]) ); +} + + +static void ResetDeviceVertexPipeStateD3D9 (IDirect3DDevice9* dev, const TransformState& state, const BuiltinShaderParamValues& builtins, const VertexPipeConfig& config, const VertexPipeDataD3D9& data) +{ + DebugAssertIf (!dev); + + data.haveToResetDeviceState = false; + + dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() ); + Matrix4x4f dummyViewMatrix; + dummyViewMatrix.SetIdentity(); dummyViewMatrix.Get(2,2) = -1.0f; + dev->SetTransform( D3DTS_VIEW, (const D3DMATRIX*)dummyViewMatrix.GetPtr() ); + dev->SetTransform( D3DTS_PROJECTION, (const D3DMATRIX*)builtins.GetMatrixParam(kShaderMatProj).GetPtr() ); + + dev->SetRenderState( D3DRS_COLORVERTEX, FALSE ); + + for( int i = 0; i < kMaxSupportedVertexLights; ++i ) + dev->LightEnable( i, FALSE ); + + dev->SetRenderState( D3DRS_AMBIENT, 0 ); + dev->SetRenderState( D3DRS_LIGHTING, FALSE ); + dev->SetRenderState( D3DRS_SPECULARENABLE, FALSE ); + + for( int i = 0; i < kMaxSupportedTextureCoords; ++i ) { + dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i ); + dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTSS_TCI_PASSTHRU ); + } +} + +void ResetVertexPipeStateD3D9 (IDirect3DDevice9* dev, TransformState& state, BuiltinShaderParamValues& builtins, VertexPipeConfig& config, VertexPipeDataD3D9& data, VertexPipePrevious& previous) +{ + config.Reset(); + data.Reset(); + state.Invalidate(builtins); + previous.Reset(); + + data.haveToResetDeviceState = true; + if (dev) + ResetDeviceVertexPipeStateD3D9 (dev, state, builtins, config, data); +} + + +void SetupFixedFunctionD3D9 ( + IDirect3DDevice9* dev, + TransformState& state, + BuiltinShaderParamValues& builtins, + const VertexPipeConfig& config, + const VertexPipeDataD3D9& data, + VertexPipePrevious& previous, + bool vsActive, bool immediateMode) +{ + if (dev && data.haveToResetDeviceState) + ResetDeviceVertexPipeStateD3D9 (dev, state, builtins, config, data); + + // matrices + if (!vsActive) + { + D3D9_CALL(dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() )); + } + + // set color material first, then material, then color + if( config.colorMaterial != previous.config.colorMaterial ) + { + if( config.colorMaterial != kColorMatDisabled ) + { + D3DMATERIALCOLORSOURCE srcAmbient, srcDiffuse, srcEmission; + switch( config.colorMaterial ) + { + case kColorMatEmission: + srcAmbient = D3DMCS_MATERIAL; + srcDiffuse = D3DMCS_MATERIAL; + srcEmission = D3DMCS_COLOR1; + break; + case kColorMatAmbientAndDiffuse: + srcAmbient = D3DMCS_COLOR1; + srcDiffuse = D3DMCS_COLOR1; + srcEmission = D3DMCS_MATERIAL; + break; + default: + return; + } + D3D9_CALL(dev->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, srcAmbient )); + D3D9_CALL(dev->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, srcDiffuse )); + D3D9_CALL(dev->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL )); + D3D9_CALL(dev->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, srcEmission )); + D3D9_CALL(dev->SetRenderState( D3DRS_COLORVERTEX, TRUE )); + } + else + { + D3D9_CALL(dev->SetRenderState( D3DRS_COLORVERTEX, FALSE )); + } + } + + // material + if( !vsActive && config.hasLighting ) + D3D9_CALL(dev->SetMaterial( &data.material )); + + // lights + D3DLIGHT9 d3dlight; + d3dlight.Ambient.r = d3dlight.Ambient.g = d3dlight.Ambient.b = d3dlight.Ambient.a = 0.0f; + d3dlight.Falloff = 1.0f; + d3dlight.Attenuation0 = 1.0f; + d3dlight.Attenuation1 = 0.0f; + + const UInt32 lightsEnabled = (1<<data.vertexLightCount)-1; + const UInt32 lightsPrevious = (1<<previous.vertexLightCount)-1; + const UInt32 lightsDifferent = lightsPrevious ^ lightsEnabled; + UInt32 lightMask = 1; + for (int i = 0; i < kMaxSupportedVertexLights; ++i, lightMask <<= 1) + { + const UInt32 lightDiff = lightsDifferent & lightMask; + if( lightsEnabled & lightMask ) + { + const GfxVertexLight& l = data.lights[i]; + static D3DLIGHTTYPE kD3DTypes[kLightTypeCount] = { D3DLIGHT_SPOT, D3DLIGHT_DIRECTIONAL, D3DLIGHT_POINT }; + d3dlight.Type = kD3DTypes[l.type]; + d3dlight.Diffuse = *(const D3DCOLORVALUE*)&l.color; + d3dlight.Specular = *(const D3DCOLORVALUE*)&l.color; + d3dlight.Position = *(const D3DVECTOR*)&l.position; + d3dlight.Direction = *(const D3DVECTOR*)&l.spotDirection; + d3dlight.Range = l.range; + d3dlight.Attenuation2 = l.quadAtten; + d3dlight.Theta = Deg2Rad(l.spotAngle) * 0.5f; + d3dlight.Phi = Deg2Rad(l.spotAngle); + D3D9_CALL(dev->SetLight (i,&d3dlight)); + if (lightDiff) + D3D9_CALL(dev->LightEnable (i,TRUE)); + } + else + { + if (lightDiff) + D3D9_CALL(dev->LightEnable (i, FALSE)); + } + } + previous.vertexLightCount = data.vertexLightCount; + + + // ambient, lighting & specular + if( data.ambient != previous.ambient ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_AMBIENT, ColorToD3D(data.ambient.GetPtr()) )); + previous.ambient = data.ambient; + } + if( config.hasLighting != previous.config.hasLighting ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_LIGHTING, config.hasLighting ? TRUE : FALSE )); + } + if( config.hasSpecular != previous.config.hasSpecular ) + { + D3D9_CALL(dev->SetRenderState( D3DRS_SPECULARENABLE, config.hasSpecular ? TRUE : FALSE )); + } + if (config.hasNormalization != previous.config.hasNormalization) + { + D3D9_CALL(dev->SetRenderState (D3DRS_NORMALIZENORMALS, config.hasNormalization ? TRUE : FALSE)); + } + + + UInt32 textureMatrixModes = config.textureMatrixModes; + UInt32 projectedTextures = data.projectedTextures; + UInt32 textureSources = config.textureSources; + for( int i = 0; i < config.texCoordCount; ++i ) + { + // texgen + UInt32 texSource = (textureSources >> (i*3)) & 0x7; + if( !vsActive ) + { + static DWORD kTexSourceFlags[kTexSourceTypeCount] = { 0, 1, D3DTSS_TCI_SPHEREMAP, D3DTSS_TCI_CAMERASPACEPOSITION, D3DTSS_TCI_CAMERASPACEPOSITION, D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR, D3DTSS_TCI_CAMERASPACENORMAL }; + DWORD d3dsource = kTexSourceFlags[texSource]; + if( immediateMode && texSource <= kTexSourceUV1 ) + d3dsource = i; + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, d3dsource )); + } + else + { + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i )); + } + + // matrix + unsigned matmode = (textureMatrixModes >> (i*2)) & 3; + static DWORD kTexFlags[kTexMatrixTypeCount] = { D3DTTFF_DISABLE, D3DTTFF_COUNT2, D3DTTFF_COUNT3, D3DTTFF_COUNT4 }; + DWORD textureTransformFlags = kTexFlags[matmode]; + if (projectedTextures & (1<<i)) + textureTransformFlags |= D3DTTFF_PROJECTED; + if (vsActive) + textureTransformFlags = D3DTTFF_DISABLE; + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, textureTransformFlags )); + + if( !vsActive ) + { + if( texSource == kTexSourceObject ) + { + // D3D has no "object space" texture generation. + // So instead we use camera space, and multiply the matrix so it matches: + // newMatrix = matrix * inverse(modelview) * mirrorZ + // Mirror along Z is required to match OpenGL's generation (eye space Z is negative). + Matrix4x4f mv = state.worldViewMatrix; + mv.Invert_Full(); + // Negate Z axis (mv = mv * Scale(1,1,-1)) + mv.Get(0,2) = -mv.Get(0,2); + mv.Get(1,2) = -mv.Get(1,2); + mv.Get(2,2) = -mv.Get(2,2); + mv.Get(3,2) = -mv.Get(3,2); + Matrix4x4f texmat; + MultiplyMatrices4x4 (&state.texMatrices[i], &mv, &texmat); + D3D9_CALL(dev->SetTransform( (D3DTRANSFORMSTATETYPE)(D3DTS_TEXTURE0 + i), (const D3DMATRIX*)texmat.GetPtr() )); + } + else + { + D3D9_CALL(dev->SetTransform( (D3DTRANSFORMSTATETYPE)(D3DTS_TEXTURE0 + i), (const D3DMATRIX*)state.texMatrices[i].GetPtr() )); + } + } + } + if( config.texCoordCount != previous.config.texCoordCount ) + { + for( int i = config.texCoordCount; i < kMaxSupportedTextureCoords; ++i ) + { + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE )); + } + } + + if( !vsActive ) + D3D9_CALL(dev->SetVertexShader(NULL)); + previous.vertexShader = NULL; + previous.config = config; +} + + + + + +// ---------------------------------------------------------------------- + + + + +struct VSLightData { + Vector4f pos; + Vector4f dir; + Vector4f color; + Vector4f params; +}; + +struct ShaderData { + IDirect3DVertexShader9* shader; + unsigned int usedConstants; + std::string text; + //std::string debug; +}; + + +struct VertexPipeKeyCompare { + union { + VertexPipeConfig key; + UInt64 asint; + } u; + VertexPipeKeyCompare() { u.asint = 0; } + bool operator <( const VertexPipeKeyCompare& r ) const { return u.asint < r.u.asint; } +}; + +typedef std::map<VertexPipeKeyCompare, ShaderData> ShaderCache; +static ShaderCache g_Shaders; + + +static IDirect3DVertexShader9* GetShaderForConfig( const VertexPipeConfig& config, IDirect3DDevice9* dev, unsigned int& usedConstants ) +{ + VertexPipeKeyCompare key; + key.u.key = config; + ShaderCache::iterator it = g_Shaders.find(key); + if( it != g_Shaders.end() ) { + const ShaderData& sdata = it->second; + usedConstants = sdata.usedConstants; + return sdata.shader; + } + + ShaderGenerator gen; + gen.AddFragment( &kVS_Pos ); + + // lighting + if( config.hasLighting ) + { + // normalize normals? + if (config.hasNormalization) + gen.AddFragment (&kVS_Normalize_Normal); + + UInt32 hasLightType = config.hasLightType; + if( config.hasSpecular ) + { + gen.AddFragment( &kVS_Light_Specular_Pre ); + if( hasLightType & (1<<kLightDirectional) ) + gen.AddFragment( &kVS_Light_Specular_Dir ); + if( hasLightType & (1<<kLightPoint) ) + gen.AddFragment( &kVS_Light_Specular_Point ); + if( hasLightType & (1<<kLightSpot) ) + gen.AddFragment( &kVS_Light_Specular_Spot ); + } + else + { + gen.AddFragment( &kVS_Light_Diffuse_Pre ); + if( hasLightType & (1<<kLightDirectional) ) + gen.AddFragment( &kVS_Light_Diffuse_Dir ); + if( hasLightType & (1<<kLightPoint) ) + gen.AddFragment( &kVS_Light_Diffuse_Point ); + if( hasLightType & (1<<kLightSpot) ) + gen.AddFragment( &kVS_Light_Diffuse_Spot ); + } + + const ShaderFragment* frag = NULL; + if( config.hasVertexColor ) { + switch( config.colorMaterial ) { + case kColorMatAmbientAndDiffuse: frag = &kVS_Out_Diffuse_Lighting_ColorDiffuseAmbient; break; + case kColorMatEmission: frag = &kVS_Out_Diffuse_Lighting_ColorEmission; break; + default: frag = &kVS_Out_Diffuse_Lighting; break; + } + } else { + frag = &kVS_Out_Diffuse_Lighting; + } + gen.AddFragment( frag ); + + if( config.hasSpecular ) { + gen.AddFragment( &kVS_Out_Specular_Lighting ); + } + + } + else + { + if( config.hasVertexColor ) + gen.AddFragment( &kVS_Out_Diffuse_VertexColor ); + else + gen.AddFragment( &kVS_Out_Diffuse_White ); + } + // texgen + static const ShaderFragment* kFragSources[kTexSourceTypeCount] = { + &kVS_Load_UV0, + &kVS_Load_UV1, + &kVS_Temp_SphereMap, + &kVS_Temp_ObjSpacePos, + &kVS_Temp_CamSpacePos, + &kVS_Temp_CamSpaceRefl, + &kVS_Temp_CamSpaceN, + }; + static const char* kFragSourceNames[kTexSourceTypeCount] = { + "UV0", + "UV1", + "SPHR", + "OPOS", + "CPOS", + "REFL", + "CNOR", + }; + static const ShaderFragment* kFragMatrices[kTexMatrixTypeCount] = { + &kVS_Out_TexCoord, + &kVS_Out_Matrix2, + &kVS_Out_Matrix3, + &kVS_Out_Matrix3 + }; + for( int i = 0; i < config.texCoordCount; ++i ) + { + unsigned src = (config.textureSources >> (i*3)) & 7; + // normalize normals? + if (config.hasNormalization) + { + if (src == kTexSourceSphereMap || src == kTexSourceCubeReflect || src == kTexSourceCubeNormal) + gen.AddFragment (&kVS_Normalize_Normal); + } + gen.AddFragment( kFragSources[src] ); + } + for( int i = 0; i < config.texCoordCount; ++i ) + { + unsigned src = (config.textureSources >> (i*3)) & 7; + unsigned matmode = (config.textureMatrixModes >> (i*2)) & 3; + gen.AddFragment (kFragMatrices[matmode], kFragSourceNames[src], i); + } + ShaderData data; + data.shader = NULL; + gen.GenerateShader( data.text, data.usedConstants ); + + ID3DXBuffer* compiledShader = AssembleD3DShader( data.text ); + if( compiledShader ) { + dev->CreateVertexShader( (const DWORD*)compiledShader->GetBufferPointer(), &data.shader ); + compiledShader->Release(); + } + + AssertIf(!data.shader); + g_Shaders.insert( std::make_pair(key, data) ); + + #if PRINT_AMD_SHADER_ANALYZER_OUTPUT + PrintAMDShaderAnalyzer( data.text ); + #endif + + usedConstants = data.usedConstants; + return data.shader; +} + +void SetupVertexShaderD3D9 ( + IDirect3DDevice9* dev, + TransformState& state, + const BuiltinShaderParamValues& builtins, + VertexPipeConfig& config, + const VertexPipeDataD3D9& data, + VertexPipePrevious& previous, + VertexShaderConstantCache& cache, + bool vsActive, bool immediateMode) +{ + if( vsActive ) + return; + + D3D9_CALL(dev->SetTransform( D3DTS_WORLD, (const D3DMATRIX*)state.worldViewMatrix.GetPtr() )); + + // figure out which light types do we have + if( !config.hasLighting ) { + config.hasLightType = 0; + } else { + UInt32 hasLightType = 0; + for (int i = 0; i < data.vertexLightCount; ++i) + { + hasLightType |= (1<<data.lights[i].type); + } + config.hasLightType = hasLightType; + } + + // create vertex shader + unsigned int usedConstants; + IDirect3DVertexShader9* shader = GetShaderForConfig(config, dev, usedConstants); + AssertIf(!shader); + + // set shader + if( shader != previous.vertexShader ) + { + D3D9_CALL(dev->SetVertexShader( shader )); + previous.vertexShader = shader; + } + + // matrices + Matrix4x4f mvp; + MultiplyMatrices4x4 (&builtins.GetMatrixParam(kShaderMatProj), &state.worldViewMatrix, &mvp ); + mvp.Transpose(); + cache.SetValues( kConstantLocations[kConstMatrixMVP], mvp.GetPtr(), 4 ); + + const Matrix4x4f& mv = state.worldViewMatrix; + cache.SetValues( kConstantLocations[kConstMatrixMV], mv.GetPtr(), 4 ); + + if( usedConstants & (1<<kConstMatrixMV_IT) ) + { + Matrix4x4f matrixTemp; + Matrix4x4f::Invert_General3D( mv, matrixTemp ); + matrixTemp.Transpose(); + if (data.normalization == kNormalizationScale) + { + // Inverse transpose of modelview is only used to transform the normals + // in our generated shader. We can just stuff mesh scale in there. + float scale = Magnitude (state.worldMatrix.GetAxisX()); + matrixTemp.Get (0, 0) *= scale; + matrixTemp.Get (1, 0) *= scale; + matrixTemp.Get (2, 0) *= scale; + matrixTemp.Get (0, 1) *= scale; + matrixTemp.Get (1, 1) *= scale; + matrixTemp.Get (2, 1) *= scale; + matrixTemp.Get (0, 2) *= scale; + matrixTemp.Get (1, 2) *= scale; + matrixTemp.Get (2, 2) *= scale; + } + cache.SetValues( kConstantLocations[kConstMatrixMV_IT], matrixTemp.GetPtr(), 4 ); + } + + // misc + float misc[4] = { 0, 4, 1, 0.5f }; + cache.SetValues( kConstantLocations[kConstLightMisc], misc, 1 ); + + // if lighting is used: + if( config.hasLighting ) + { + // ambient + if( config.colorMaterial != kColorMatAmbientAndDiffuse ) + { + SimpleVec4 amb; + amb.val[0] = data.ambientClamped.val[0] * data.material.Ambient.r; + amb.val[1] = data.ambientClamped.val[1] * data.material.Ambient.g; + amb.val[2] = data.ambientClamped.val[2] * data.material.Ambient.b; + amb.val[3] = data.ambientClamped.val[3] * data.material.Ambient.a; + if( config.colorMaterial != kColorMatEmission ) { + amb.val[0] += data.material.Emissive.r; + amb.val[1] += data.material.Emissive.g; + amb.val[2] += data.material.Emissive.b; + amb.val[3] += data.material.Emissive.a; + } + cache.SetValues( kConstantLocations[kConstAmbient], amb.GetPtr(), 1 ); + } + else + { + cache.SetValues( kConstantLocations[kConstColorMatAmbient], data.ambientClamped.GetPtr(), 1 ); + cache.SetValues( kConstantLocations[kConstAmbient], &data.material.Emissive.r, 1 ); + } + previous.ambient = data.ambient; + + // material + cache.SetValues( kConstantLocations[kConstMatDiffuse], &data.material.Diffuse.r, 1 ); + D3D9_CALL(dev->SetVertexShaderConstantF( kConstantLocations[kConstMatDiffuse], &data.material.Diffuse.r, 1 )); + if( usedConstants & (1<<kConstMatSpecular) ) + { + D3DCOLORVALUE specAndPower = data.material.Specular; + specAndPower.a = data.material.Power; + cache.SetValues( kConstantLocations[kConstMatSpecular], &specAndPower.r, 1 ); + } + + // pack the lights + int lightCounts[kLightTypeCount]; + float lightStart[kLightTypeCount]; + int lightsTotal = 0; + float lightsTotalF = 0; + memset(lightCounts, 0, sizeof(lightCounts)); + memset(lightStart, 0, sizeof(lightStart)); + VSLightData lights[kMaxSupportedVertexLights]; + for( int t = 0; t < kLightTypeCount; ++t ) + { + lightStart[t] = lightsTotalF; + for( int i = 0; i < data.vertexLightCount; ++i ) + { + const GfxVertexLight& src = data.lights[i]; + if( src.type != t ) + continue; + + VSLightData& dst = lights[lightsTotal]; + // position + dst.pos.Set( src.position.x, src.position.y, src.position.z, 1.0f ); + // direction + dst.dir.Set( -src.spotDirection.x, -src.spotDirection.y, -src.spotDirection.z, 0.0f ); + // color + dst.color.Set( src.color.x, src.color.y, src.color.z, 1.0f ); + // params: 1/(cos(theta/2)-cos(phi/2), cos(phi/2), range^2, d^2 attenuation + float sqrRange = src.range * src.range; + if( src.type == kLightSpot ) + { + float cosTheta = cosf(Deg2Rad(src.spotAngle)*0.25f); + float cosPhi = cosf(Deg2Rad(src.spotAngle)*0.5f); + float cosDiff = cosTheta - cosPhi; + dst.params.Set( + cosDiff != 0.0f ? 1.0f / cosDiff : 0.0f, + cosPhi, + src.range * src.range, + src.quadAtten + ); + } + else + { + dst.params.Set( + 0.0f, + 0.0f, + src.range * src.range, + src.quadAtten + ); + } + + ++lightCounts[t]; + ++lightsTotal; + ++lightsTotalF; + } + } + + // light indices + int miscI[kLightTypeCount][4]; + for( int t = 0; t < kLightTypeCount; ++t ) { + miscI[t][0] = lightCounts[t]; + miscI[t][1] = 0; + miscI[t][2] = 0; + miscI[t][3] = 0; + } + D3D9_CALL(dev->SetVertexShaderConstantI( 0, miscI[0], kLightTypeCount )); + + if (lightsTotal) + cache.SetValues( 60, (const float*)lights, 4*lightsTotal ); + misc[0] = lightStart[0] * 4.0f; + misc[1] = lightStart[1] * 4.0f; + misc[2] = lightStart[2] * 4.0f; + misc[3] = 0.0f; + cache.SetValues(kConstantLocations[kConstLightIndexes], misc, 1); + } + + // texture matrices & transform flags + UInt32 matrixModes = config.textureMatrixModes; + UInt32 projectedTextures = data.projectedTextures; + UInt32 textureSources = config.textureSources; + for( int i = 0; i < config.texCoordCount; ++i ) + { + unsigned matmode = (matrixModes >> (i*2)) & 0x3; + if( matmode != kTexMatrixNone ) + { + cache.SetValues(kConstantLocations[kConstMatrixTexture]+i*4, state.texMatrices[i].GetPtr(), 4); + } + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i )); + // projected texture flag + DWORD textureTransformFlags = (projectedTextures & (1<<i)) ? D3DTTFF_PROJECTED : D3DTTFF_DISABLE; + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, textureTransformFlags )); + } + + if( config.texCoordCount != previous.config.texCoordCount ) + { + for( int i = config.texCoordCount; i < kMaxSupportedTextureCoords; ++i ) + { + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i )); + D3D9_CALL(dev->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE )); + } + } + + previous.config = config; +} + + +void CleanupVertexShadersD3D9 () +{ + #if PRINT_VERTEX_PIPE_STATS + printf_console("Vertex pipe shader cache: %i shaders generated\n", g_Shaders.size()); + #endif + ShaderCache::iterator it, itEnd = g_Shaders.end(); + for( it = g_Shaders.begin(); it != itEnd; ++it ) + { + IDirect3DVertexShader9* vs = it->second.shader; + if( vs ) { + ULONG refCount = vs->Release(); + AssertIf( refCount != 0 ); + } + } + g_Shaders.clear (); +} + diff --git a/Runtime/GfxDevice/d3d/VertexPipeD3D9.h b/Runtime/GfxDevice/d3d/VertexPipeD3D9.h new file mode 100644 index 0000000..af9d8d3 --- /dev/null +++ b/Runtime/GfxDevice/d3d/VertexPipeD3D9.h @@ -0,0 +1,139 @@ +#pragma once + +#include "Runtime/GfxDevice/GfxDeviceTypes.h" +#include "Runtime/GfxDevice/GfxDeviceObjects.h" +#include "Runtime/Math/Vector4.h" +#include "Runtime/Math/Matrix4x4.h" +#include "Runtime/GfxDevice/ShaderConstantCache.h" +#include "Runtime/GfxDevice/TransformState.h" +#include "D3D9Includes.h" + +class BuiltinShaderParamValues; + +enum TextureSourceMode { + kTexSourceUV0, + kTexSourceUV1, + // match the order of TexGenMode! + kTexSourceSphereMap, + kTexSourceObject, + kTexSourceEyeLinear, + kTexSourceCubeReflect, + kTexSourceCubeNormal, + kTexSourceTypeCount +}; + +enum TextureMatrixMode { + kTexMatrixNone, + kTexMatrix2, + kTexMatrix3, + kTexMatrix4, + kTexMatrixTypeCount +}; + +struct VertexPipeConfig { + // 2 bytes + UInt64 textureMatrixModes : 16; // TextureMatrixMode: 2 bits for each unit + // 3 bytes + UInt64 textureSources : 24; // TextureSourceMode: 3 bits for each unit + // 1 byte + UInt64 colorMaterial : 3; // ColorMaterialMode + UInt64 texCoordCount : 4; // number of texture coordinates + UInt64 hasVertexColor : 1; // is vertex color coming from per-vertex data? + // 1 byte + UInt64 hasLighting : 1; // lighting on? + UInt64 hasSpecular : 1; // specular on? + UInt64 hasLightType : 3; // has light of given type? (bit per type) + UInt64 hasNormalization : 1; // needs to normalize normals? + // 10 bits left + + void Reset() { + memset(this, 0, sizeof(*this)); + } + + void SetTextureUnit( UInt32 unit ) { + Assert (unit < 8); + UInt32 tc = texCoordCount; + if( unit >= tc ) { + tc = unit+1; + texCoordCount = tc; + } + } + void ClearTextureUnit( UInt32 unit ) { + Assert (unit < 8); + UInt32 tc = texCoordCount; + if( unit < tc ) { + tc = unit; + texCoordCount = tc; + } + } +}; + + +struct VertexPipeDataD3D9 +{ + GfxVertexLight lights[kMaxSupportedVertexLights]; + D3DMATERIAL9 material; + SimpleVec4 ambient; + SimpleVec4 ambientClamped; + int vertexLightCount; + UInt32 projectedTextures; // 1 bit per unit + + + NormalizationMode normalization; + + mutable bool haveToResetDeviceState; + + void Reset() { + memset (&material, 0, sizeof(material)); + ambient.set (0,0,0,0); + ambientClamped.set (0,0,0,0); + vertexLightCount = 0; + projectedTextures = 0; + normalization = kNormalizationUnknown; + haveToResetDeviceState = false; + } +}; + + +struct VertexPipePrevious { + VertexPipeConfig config; + SimpleVec4 ambient; + int vertexLightCount; + IDirect3DVertexShader9* vertexShader; + + void Reset() { + config.Reset (); + ambient.set(-1,-1,-1,-1); + vertexLightCount = 0; + vertexShader = NULL; + } +}; + +void ResetVertexPipeStateD3D9 ( + IDirect3DDevice9* dev, + TransformState& state, + BuiltinShaderParamValues& builtins, + VertexPipeConfig& config, + VertexPipeDataD3D9& data, + VertexPipePrevious& previous); + +void SetupFixedFunctionD3D9 ( + IDirect3DDevice9* dev, + TransformState& state, + BuiltinShaderParamValues& builtins, + const VertexPipeConfig& config, + const VertexPipeDataD3D9& data, + VertexPipePrevious& previous, + bool vsActive, bool immediateMode); + +void SetupVertexShaderD3D9 ( + IDirect3DDevice9* dev, + TransformState& state, + const BuiltinShaderParamValues& builtins, + VertexPipeConfig& config, + const VertexPipeDataD3D9& data, + VertexPipePrevious& previous, + VertexShaderConstantCache& cache, + bool vsActive, bool immediateMode); + +void CleanupVertexShadersD3D9 (); |