summaryrefslogtreecommitdiff
path: root/Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2019-08-14 22:50:43 +0800
committerchai <chaifix@163.com>2019-08-14 22:50:43 +0800
commit15740faf9fe9fe4be08965098bbf2947e096aeeb (patch)
treea730ec236656cc8cab5b13f088adfaed6bb218fb /Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp
+Unity Runtime codeHEADmaster
Diffstat (limited to 'Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp')
-rw-r--r--Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp2237
1 files changed, 2237 insertions, 0 deletions
diff --git a/Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp b/Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp
new file mode 100644
index 0000000..8ec6c6a
--- /dev/null
+++ b/Runtime/GfxDevice/d3d11/D3D11ByteCode.cpp
@@ -0,0 +1,2237 @@
+#include "UnityPrefix.h"
+#include "D3D11ByteCode.h"
+#include "D3D11Includes.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "External/DirectX/builds/dx9include/d3d9.h"
+
+// Some things in this file are based on Mesa3d DX11 state tracker:
+
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+
+enum SM4SystemValue
+{
+ kSM4SV_UNDEFINED,
+ kSM4SV_POSITION,
+ kSM4SV_CLIP_DISTANCE,
+ kSM4SV_CULL_DISTANCE,
+ kSM4SV_RENDER_TARGET_ARRAY_INDEX,
+ kSM4SV_VIEWPORT_ARRAY_INDEX,
+ kSM4SV_VERTEX_ID,
+ kSM4SV_PRIMITIVE_ID,
+ kSM4SV_INSTANCE_ID,
+ kSM4SV_IS_FRONT_FACE,
+ kSM4SV_SAMPLE_INDEX,
+ kSM4SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_QUAD_U_INSIDE_TESSFACTOR,
+ kSM4SV_FINAL_QUAD_V_INSIDE_TESSFACTOR,
+ kSM4SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR,
+ kSM4SV_FINAL_TRI_INSIDE_TESSFACTOR,
+ kSM4SV_FINAL_LINE_DETAIL_TESSFACTOR,
+ kSM4SV_FINAL_LINE_DENSITY_TESSFACTOR,
+ kSM4SV_COUNT
+};
+
+enum SM4InstrExtype
+{
+ SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_EMPTY,
+ SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_SAMPLE_CONTROLS,
+ SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_DIM,
+ SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_RETURN_TYPE,
+ SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_COUNT
+};
+
+enum SM4OperExtype
+{
+ SM4_TOKEN_OPERAND_EXTENDED_TYPE_EMPTY,
+ SM4_TOKEN_OPERAND_EXTENDED_TYPE_MODIFIER,
+ SM4_TOKEN_OPERAND_EXTENDED_TYPE_COUNT
+};
+
+enum SM4ReturnType
+{
+ kSM4RetType_UNORM = 1,
+ kSM4RetType_SNORM = 2,
+ kSM4RetType_SINT = 3,
+ kSM4RetType_UINT = 4,
+ kSM4RetType_FLOAT = 5,
+ kSM4RetType_MIXED = 6,
+};
+
+
+const char* kSM4OpcodeNames[kSM4Op_COUNT] = {
+ "add",
+ "and",
+ "break",
+ "breakc",
+ "call",
+ "callc",
+ "case",
+ "continue",
+ "continuec",
+ "cut",
+ "default",
+ "deriv_rtx",
+ "deriv_rty",
+ "discard",
+ "div",
+ "dp2",
+ "dp3",
+ "dp4",
+ "else",
+ "emit",
+ "emitthencut",
+ "endif",
+ "endloop",
+ "endswitch",
+ "eq",
+ "exp",
+ "frc",
+ "ftoi",
+ "ftou",
+ "ge",
+ "iadd",
+ "if",
+ "ieq",
+ "ige",
+ "ilt",
+ "imad",
+ "imax",
+ "imin",
+ "imul",
+ "ine",
+ "ineg",
+ "ishl",
+ "ishr",
+ "itof",
+ "label",
+ "ld",
+ "ldms",
+ "log",
+ "loop",
+ "lt",
+ "mad",
+ "min",
+ "max",
+ "customdata",
+ "mov",
+ "movc",
+ "mul",
+ "ne",
+ "nop",
+ "not",
+ "or",
+ "resinfo",
+ "ret",
+ "retc",
+ "round_ne",
+ "round_ni",
+ "round_pi",
+ "round_z",
+ "rsq",
+ "sample",
+ "sample_c",
+ "sample_c_lz",
+ "sample_l",
+ "sample_d",
+ "sample_b",
+ "sqrt",
+ "switch",
+ "sincos",
+ "udiv",
+ "ult",
+ "uge",
+ "umul",
+ "umad",
+ "umax",
+ "umin",
+ "ushr",
+ "utof",
+ "xor",
+ "dcl_resource",
+ "dcl_constant_buffer",
+ "dcl_sampler",
+ "dcl_index_range",
+ "dcl_gs_output_primitive_topology",
+ "dcl_gs_input_primitive",
+ "dcl_max_output_vertex_count",
+ "dcl_input",
+ "dcl_input_sgv",
+ "dcl_input_siv",
+ "dcl_input_ps",
+ "dcl_input_ps_sgv",
+ "dcl_input_ps_siv",
+ "dcl_output",
+ "dcl_output_sgv",
+ "dcl_output_siv",
+ "dcl_temps",
+ "dcl_indexable_temp",
+ "dcl_global_flags",
+ "d3d10_count",
+ "lod",
+ "gather4",
+ "sample_pos",
+ "sample_info",
+ "d3d10_1_count",
+ "hs_decls",
+ "hs_control_point_phase",
+ "hs_fork_phase",
+ "hs_join_phase",
+ "emit_stream",
+ "cut_stream",
+ "emitthencut_stream",
+ "interface_call",
+ "bufinfo",
+ "deriv_rtx_coarse",
+ "deriv_rtx_fine",
+ "deriv_rty_coarse",
+ "deriv_rty_fine",
+ "gather4_c",
+ "gather4_po",
+ "gather4_po_c",
+ "rcp",
+ "f32tof16",
+ "f16tof32",
+ "uaddc",
+ "usubb",
+ "countbits",
+ "firstbit_hi",
+ "firstbit_lo",
+ "firstbit_shi",
+ "ubfe",
+ "ibfe",
+ "bfi",
+ "bfrev",
+ "swapc",
+ "dcl_stream",
+ "dcl_function_body",
+ "dcl_function_table",
+ "dcl_interface",
+ "dcl_input_control_point_count",
+ "dcl_output_control_point_count",
+ "dcl_tess_domain",
+ "dcl_tess_partitioning",
+ "dcl_tess_output_primitive",
+ "dcl_hs_max_tessfactor",
+ "dcl_hs_fork_phase_instance_count",
+ "dcl_hs_join_phase_instance_count",
+ "dcl_thread_group",
+ "dcl_unordered_access_view_typed",
+ "dcl_unordered_access_view_raw",
+ "dcl_unordered_access_view_structured",
+ "dcl_thread_group_shared_memory_raw",
+ "dcl_thread_group_shared_memory_structured",
+ "dcl_resource_raw",
+ "dcl_resource_structured",
+ "ld_uav_typed",
+ "store_uav_typed",
+ "ld_raw",
+ "store_raw",
+ "ld_structured",
+ "store_structured",
+ "atomic_and",
+ "atomic_or",
+ "atomic_xor",
+ "atomic_cmp_store",
+ "atomic_iadd",
+ "atomic_imax",
+ "atomic_imin",
+ "atomic_umax",
+ "atomic_umin",
+ "imm_atomic_alloc",
+ "imm_atomic_consume",
+ "imm_atomic_iadd",
+ "imm_atomic_and",
+ "imm_atomic_or",
+ "imm_atomic_xor",
+ "imm_atomic_exch",
+ "imm_atomic_cmp_exch",
+ "imm_atomic_imax",
+ "imm_atomic_imin",
+ "imm_atomic_umax",
+ "imm_atomic_umin",
+ "sync",
+ "dadd",
+ "dmax",
+ "dmin",
+ "dmul",
+ "deq",
+ "dge",
+ "dlt",
+ "dne",
+ "dmov",
+ "dmovc",
+ "dtof",
+ "ftod",
+ "eval_snapped",
+ "eval_sample_index",
+ "eval_centroid",
+ "dcl_gs_instance_count",
+};
+
+struct SM4TokVersion
+{
+ unsigned minor : 4;
+ unsigned major : 4;
+ unsigned format : 8;
+ unsigned type : 16;
+};
+
+
+
+struct SM4TokResourceReturnType
+{
+ unsigned x : 4;
+ unsigned y : 4;
+ unsigned z : 4;
+ unsigned w : 4;
+};
+
+
+#define SM4_OPERAND_SEL_MASK(sel) ((sel) & 0xf)
+#define SM4_OPERAND_SEL_SWZ(sel, i) (((sel) >> ((i) * 2)) & 3)
+#define SM4_OPERAND_SEL_SCALAR(sel) ((sel) & 3)
+
+struct SM4TokOperandEx
+{
+ union {
+ UInt32 dword;
+ struct {
+ unsigned type : 6;
+ unsigned neg : 1;
+ unsigned abs : 1;
+ };
+ };
+};
+
+struct SM4TokResourceRetType
+{
+ union {
+ UInt32 dword;
+ struct {
+ unsigned x : 4;
+ unsigned y : 4;
+ unsigned z : 4;
+ unsigned w : 4;
+ unsigned reserved : 16;
+ };
+ };
+};
+
+
+union SM4Any
+{
+ float f32;
+ SInt64 i64;
+ SInt32 i32;
+};
+
+struct SM4Op;
+struct SM4Instr;
+struct SM4Decl;
+struct SM4Program;
+
+
+struct SM4Op
+{
+ UInt8 mode;
+ UInt8 comps;
+ UInt8 mask;
+ UInt8 num_indices;
+ UInt8 swizzle[4];
+ SM4RegFile file;
+ SM4Any imm_values[4];
+ bool neg;
+ bool abs;
+ struct
+ {
+ SInt64 disp;
+ std::auto_ptr<SM4Op> reg;
+ } indices[3];
+
+ bool is_index_simple(unsigned i) const
+ {
+ return !indices[i].reg.get() && indices[i].disp >= 0 && (SInt64)(SInt32)indices[i].disp == indices[i].disp;
+ }
+
+ bool has_simple_index() const
+ {
+ return num_indices == 1 && is_index_simple(0);
+ }
+
+ SM4Op()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+
+private:
+ SM4Op(const SM4Op& op)
+ {}
+};
+
+/* for sample_d */
+#define SM4_MAX_OPS 6
+
+struct SM4Instr : public SM4TokInstruction
+{
+ SInt8 sample_offset[3];
+ UInt8 resource_target;
+ UInt8 resource_return_type[4];
+
+ unsigned num;
+ unsigned num_ops;
+ std::auto_ptr<SM4Op> ops[SM4_MAX_OPS];
+
+ SM4Instr()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+
+private:
+ SM4Instr(const SM4Instr& op)
+ {}
+};
+
+struct SM4Decl : public SM4TokInstruction
+{
+ std::auto_ptr<SM4Op> op;
+ union
+ {
+ unsigned num;
+ float f32;
+ SM4SystemValue sv;
+ struct
+ {
+ unsigned id;
+ unsigned expected_function_table_length;
+ unsigned table_length;
+ unsigned array_length;
+ } intf;
+ unsigned thread_group_size[3];
+ SM4TokResourceReturnType rrt;
+ struct
+ {
+ unsigned num;
+ unsigned comps;
+ } indexable_temp;
+ struct
+ {
+ unsigned stride;
+ unsigned count;
+ } structured;
+ };
+
+ void* data;
+
+ SM4Decl()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+
+ ~SM4Decl()
+ {
+ free(data);
+ }
+
+private:
+ SM4Decl(const SM4Decl& op)
+ {}
+};
+
+struct _D3D11_SIGNATURE_PARAMETER_DESC;
+
+struct SM4Program
+{
+ SM4TokVersion version;
+ dynamic_array<SM4Decl*> dcls;
+ dynamic_array<SM4Instr*> insns;
+
+ SM4Program()
+ {
+ memset(&version, 0, sizeof(version));
+ }
+
+ ~SM4Program()
+ {
+ for(dynamic_array<SM4Decl*>::iterator i = dcls.begin(), e = dcls.end(); i != e; ++i)
+ delete *i;
+ for(dynamic_array<SM4Instr*>::iterator i = insns.begin(), e = insns.end(); i != e; ++i)
+ delete *i;
+ }
+
+private:
+ SM4Program(const SM4Decl& op)
+ {}
+};
+
+SM4Program* sm4_parse(const void* tokens, int size);
+
+
+
+
+struct DXBCHeader
+{
+ UInt32 fourcc;
+ UInt32 hash[4];
+ UInt32 one;
+ UInt32 total_size;
+ UInt32 chunk_count;
+};
+
+
+static inline DXBCChunkHeader* dxbc_find_shader_bytecode(const void* data, int size)
+{
+ DXBCChunkHeader* chunk;
+ chunk = dxbc_find_chunk(data, size, kFOURCC_SHDR);
+ if(!chunk)
+ chunk = dxbc_find_chunk(data, size, kFOURCC_SHEX);
+ return chunk;
+}
+
+#define DXBC_FIND_INPUT_SIGNATURE 0
+#define DXBC_FIND_OUTPUT_SIGNATURE 1
+#define DXBC_FIND_PATCH_SIGNATURE 2
+
+static inline DXBCChunkSig* dxbc_find_signature(const void* data, int size, unsigned kind)
+{
+ unsigned fourcc;
+ switch(kind) {
+ case DXBC_FIND_INPUT_SIGNATURE: fourcc = kFOURCC_ISGN; break;
+ case DXBC_FIND_OUTPUT_SIGNATURE: fourcc = kFOURCC_OSGN; break;
+ case DXBC_FIND_PATCH_SIGNATURE: fourcc = kFOURCC_PCSG; break;
+ default:
+ return NULL;
+ }
+ return (DXBCChunkSig*)dxbc_find_chunk(data, size, fourcc);
+}
+
+std::pair<void*, size_t> dxbc_assemble(struct DXBCChunkHeader** chunks, unsigned num_chunks);
+
+
+DXBCContainer* dxbc_parse(const void* data, int size)
+{
+ std::auto_ptr<DXBCContainer> container(new DXBCContainer());
+ container->data = data;
+ DXBCHeader* header = (DXBCHeader*)data;
+ UInt32* chunk_offsets = (UInt32*)(header + 1);
+ if(header->fourcc != kFOURCC_DXBC)
+ return 0;
+ unsigned num_chunks = header->chunk_count;
+ for(unsigned i = 0; i < num_chunks; ++i)
+ {
+ unsigned offset = chunk_offsets[i];
+ DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset);
+ unsigned fourcc = chunk->fourcc;
+ container->chunks.push_back(chunk);
+ }
+ return container.release();
+}
+
+DXBCChunkHeader* dxbc_find_chunk(const void* data, int size, unsigned fourcc)
+{
+ DXBCHeader* header = (DXBCHeader*)data;
+ UInt32* chunk_offsets = (UInt32*)(header + 1);
+ if(header->fourcc != kFOURCC_DXBC)
+ return 0;
+ unsigned num_chunks = header->chunk_count;
+ for(unsigned i = 0; i < num_chunks; ++i)
+ {
+ unsigned offset = chunk_offsets[i];
+ DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset);
+ if(chunk->fourcc == fourcc)
+ return chunk;
+ }
+ return 0;
+}
+
+static void print_binary_chunk (const DXBCChunkHeader& chk, int perLine = 16)
+{
+ const char* kHex = "0123456789abcdef";
+ const UInt8* ptr = ((const UInt8*)&chk) + sizeof(DXBCChunkHeader);
+ std::string res;
+ for (unsigned i = 0; i < chk.size; ++i)
+ {
+ if (i != 0 && i%perLine == 0)
+ res += '\n';
+ if ((i & 3) == 0)
+ res += ' ';
+ UInt8 b = ptr[i];
+ res += kHex[b>>4];
+ res += kHex[b&0xF];
+ }
+ printf_console ("%s\n", res.c_str());
+}
+
+static void print_sm4_program (const SM4Program& prog)
+{
+}
+
+void dxbc_print(const DXBCContainer* dxbc)
+{
+ printf_console ("DXBC dump:\n");
+ if (!dxbc)
+ {
+ printf_console ("null\n");
+ return;
+ }
+
+ printf_console ("chunk count: %d\n", (int)dxbc->chunks.size());
+ for (size_t i = 0; i < dxbc->chunks.size(); ++i)
+ {
+ const DXBCChunkHeader& chk = *dxbc->chunks[i];
+ printf_console ("chunk #%i: %c%c%c%c size %u\n", (int)i, chk.fourcc&0xFF, (chk.fourcc>>8)&0xFF, (chk.fourcc>>16)&0xFF, (chk.fourcc>>24)&0xFF, chk.size);
+ if (chk.fourcc == kFOURCC_ISGN || chk.fourcc == kFOURCC_OSGN)
+ {
+ print_binary_chunk (chk);
+ const DXBCChunkSig* sig = (const DXBCChunkSig*)&chk;
+ D3D11_SIGNATURE_PARAMETER_DESC* params;
+ int count = dxbc_parse_signature (sig, &params);
+ for (int j = 0; j < count; ++j)
+ {
+ const D3D11_SIGNATURE_PARAMETER_DESC& p = params[j];
+ printf_console (" #%i: %s/%u reg %u sv %u type %u mask %x rwmask %x stream %u\n",
+ j, p.SemanticName, p.SemanticIndex, p.Register, p.SystemValueType, p.ComponentType, p.Mask, p.ReadWriteMask, p.Stream);
+ }
+ free (params);
+ }
+ else if (chk.fourcc == kFOURCC_SHDR || chk.fourcc == kFOURCC_SHEX)
+ {
+ printf_console ("shader code:\n");
+ print_binary_chunk (chk);
+ SM4Program* prog = sm4_parse((&chk)+1, chk.size);
+ if (prog)
+ {
+ print_sm4_program(*prog);
+ delete prog;
+ }
+ }
+ }
+}
+
+
+int dxbc_parse_signature(const DXBCChunkSig* sig, D3D11_SIGNATURE_PARAMETER_DESC** params)
+{
+ unsigned count = sig->count;
+ *params = (D3D11_SIGNATURE_PARAMETER_DESC*)malloc(sizeof(D3D11_SIGNATURE_PARAMETER_DESC) * count);
+
+ for (unsigned i = 0; i < count; ++i)
+ {
+ D3D11_SIGNATURE_PARAMETER_DESC& param = (*params)[i];
+ param.SemanticName = (char*)&sig->count + sig->elements[i].name_offset;
+ param.SemanticIndex = sig->elements[i].semantic_index;
+ param.SystemValueType = (D3D_NAME)sig->elements[i].system_value_type;
+ param.ComponentType = (D3D_REGISTER_COMPONENT_TYPE)sig->elements[i].component_type;
+ param.Register = sig->elements[i].register_num;
+ param.Mask = sig->elements[i].mask;
+ param.ReadWriteMask = sig->elements[i].read_write_mask;
+ param.Stream = sig->elements[i].stream;
+ }
+ return count;
+}
+
+// sm4_parse.cpp
+
+struct SM4Parser
+{
+ const unsigned* tokens;
+ const unsigned* tokens_end;
+ SM4Program& program;
+
+ SM4Parser(SM4Program& program, const void* p_tokens, unsigned size)
+ : program(program)
+ {
+ tokens = (const unsigned*)p_tokens;
+ tokens_end = (const unsigned*)((const char*)p_tokens + size);
+ }
+
+ UInt32 read32()
+ {
+ Assert(tokens < tokens_end);
+ return *tokens++;
+ }
+
+ template<typename T>
+ void read_token(T* tok)
+ {
+ *(unsigned*)tok = read32();
+ }
+
+ UInt64 read64()
+ {
+ unsigned a = read32();
+ unsigned b = read32();
+ return (UInt64)a | ((UInt64)b << 32);
+ }
+
+ void skip(unsigned toskip)
+ {
+ tokens += toskip;
+ }
+
+ bool read_op(SM4Op* pop)
+ {
+ SM4Op& op = *pop;
+ SM4TokOperand optok;
+ read_token(&optok);
+ if (optok.file >= kSM4File_COUNT)
+ {
+ AssertString ("DXBC: unknown register type");
+ return false;
+ }
+ op.swizzle[0] = 0;
+ op.swizzle[1] = 1;
+ op.swizzle[2] = 2;
+ op.swizzle[3] = 3;
+ op.mask = 0xf;
+ switch(optok.comps_enum)
+ {
+ case kSM4OperComp0:
+ op.comps = 0;
+ break;
+ case kSM4OperComp1:
+ op.comps = 1;
+ op.swizzle[1] = op.swizzle[2] = op.swizzle[3] = 0;
+ break;
+ case kSM4OperComp4:
+ op.comps = 4;
+ op.mode = optok.mode;
+ switch(optok.mode)
+ {
+ case SM4_OPERAND_MODE_MASK:
+ op.mask = SM4_OPERAND_SEL_MASK(optok.sel);
+ break;
+ case SM4_OPERAND_MODE_SWIZZLE:
+ op.swizzle[0] = SM4_OPERAND_SEL_SWZ(optok.sel, 0);
+ op.swizzle[1] = SM4_OPERAND_SEL_SWZ(optok.sel, 1);
+ op.swizzle[2] = SM4_OPERAND_SEL_SWZ(optok.sel, 2);
+ op.swizzle[3] = SM4_OPERAND_SEL_SWZ(optok.sel, 3);
+ break;
+ case SM4_OPERAND_MODE_SCALAR:
+ op.swizzle[0] = op.swizzle[1] = op.swizzle[2] = op.swizzle[3] = SM4_OPERAND_SEL_SCALAR(optok.sel);
+ break;
+ }
+ break;
+ case kSM4OperCompN:
+ AssertString("Unhandled operand component type");
+ return false;
+ break;
+ }
+ op.file = (SM4RegFile)optok.file;
+ op.num_indices = optok.num_indices;
+
+ if(optok.extended)
+ {
+ SM4TokOperandEx optokext;
+ read_token(&optokext);
+ if(optokext.type == 0)
+ {}
+ else if(optokext.type == 1)
+ {
+ op.neg = optokext.neg;
+ op.abs= optokext.abs;
+ }
+ else
+ {
+ AssertString("Unhandled extended operand token type");
+ return false;
+ }
+ }
+
+ for(unsigned i = 0; i < op.num_indices; ++i)
+ {
+ unsigned repr;
+ if(i == 0)
+ repr = optok.index0_repr;
+ else if(i == 1)
+ repr = optok.index1_repr;
+ else if(i == 2)
+ repr = optok.index2_repr;
+ else
+ {
+ AssertString("Unhandled operand index representation");
+ return false;
+ }
+ op.indices[i].disp = 0;
+ // TODO: is disp supposed to be signed here??
+ switch(repr)
+ {
+ case SM4_OPERAND_INDEX_REPR_IMM32:
+ op.indices[i].disp = (SInt32)read32();
+ break;
+ case SM4_OPERAND_INDEX_REPR_IMM64:
+ op.indices[i].disp = read64();
+ break;
+ case SM4_OPERAND_INDEX_REPR_REG:
+relative:
+ op.indices[i].reg.reset(new SM4Op());
+ if (!read_op(&*op.indices[i].reg))
+ return false;
+ break;
+ case SM4_OPERAND_INDEX_REPR_REG_IMM32:
+ op.indices[i].disp = (SInt32)read32();
+ goto relative;
+ case SM4_OPERAND_INDEX_REPR_REG_IMM64:
+ op.indices[i].disp = read64();
+ goto relative;
+ }
+ }
+
+ if(op.file == kSM4File_IMMEDIATE32)
+ {
+ for(unsigned i = 0; i < op.comps; ++i)
+ op.imm_values[i].i32 = read32();
+ }
+ else if(op.file == kSM4File_IMMEDIATE64)
+ {
+ for(unsigned i = 0; i < op.comps; ++i)
+ op.imm_values[i].i64 = read64();
+ }
+ return true;
+ }
+
+ bool parse()
+ {
+ read_token(&program.version);
+
+ unsigned lentok = read32();
+ tokens_end = tokens - 2 + lentok;
+
+ while(tokens != tokens_end)
+ {
+ SM4TokInstruction insntok;
+ read_token(&insntok);
+ const unsigned* insn_end = tokens - 1 + insntok.length;
+ SM4Opcode opcode = (SM4Opcode)insntok.opcode;
+ if (opcode >= kSM4Op_COUNT)
+ {
+ AssertString ("Unknown DXBC opcode");
+ return false;
+ }
+
+ if(opcode == kSM4Op_CUSTOMDATA)
+ {
+ // immediate constant buffer data
+ unsigned customlen = read32() - 2;
+
+ SM4Decl& dcl = *new SM4Decl;
+ program.dcls.push_back(&dcl);
+
+ dcl.opcode = kSM4Op_CUSTOMDATA;
+ dcl.num = customlen;
+ dcl.data = malloc(customlen * sizeof(tokens[0]));
+
+ memcpy(dcl.data, &tokens[0], customlen * sizeof(tokens[0]));
+
+ skip(customlen);
+ continue;
+ }
+
+ if(opcode == kSM4Op_HS_FORK_PHASE || opcode == kSM4Op_HS_JOIN_PHASE)
+ {
+ // need to interleave these with the declarations or we cannot
+ // assign fork/join phase instance counts to phases
+ SM4Decl& dcl = *new SM4Decl;
+ program.dcls.push_back(&dcl);
+ dcl.opcode = opcode;
+ }
+
+ if((opcode >= kSM4Op_DCL_RESOURCE && opcode <= kSM4Op_DCL_GLOBAL_FLAGS)
+ || (opcode >= kSM4Op_DCL_STREAM && opcode <= kSM4Op_DCL_RESOURCE_STRUCTURED))
+ {
+ SM4Decl& dcl = *new SM4Decl;
+ program.dcls.push_back(&dcl);
+ (SM4TokInstruction&)dcl = insntok;
+
+ SM4TokInstructionEx exttok;
+ memcpy(&exttok, &insntok, sizeof(exttok));
+ while(exttok.extended)
+ {
+ read_token(&exttok);
+ }
+
+#define READ_OP_ANY dcl.op.reset(new SM4Op()); if (!read_op(&*dcl.op)) return false;
+#define READ_OP(FILE) READ_OP_ANY
+ //Assert(dcl.op->file == kSM4File_##FILE);
+
+ switch(opcode)
+ {
+ case kSM4Op_DCL_GLOBAL_FLAGS:
+ break;
+ case kSM4Op_DCL_RESOURCE:
+ READ_OP(RESOURCE);
+ read_token(&dcl.rrt);
+ break;
+ case kSM4Op_DCL_SAMPLER:
+ READ_OP(SAMPLER);
+ break;
+ case kSM4Op_DCL_INPUT:
+ case kSM4Op_DCL_INPUT_PS:
+ READ_OP(INPUT);
+ break;
+ case kSM4Op_DCL_INPUT_SIV:
+ case kSM4Op_DCL_INPUT_SGV:
+ case kSM4Op_DCL_INPUT_PS_SIV:
+ case kSM4Op_DCL_INPUT_PS_SGV:
+ READ_OP(INPUT);
+ dcl.sv = (SM4SystemValue)(UInt16)read32();
+ break;
+ case kSM4Op_DCL_OUTPUT:
+ READ_OP(OUTPUT);
+ break;
+ case kSM4Op_DCL_OUTPUT_SIV:
+ case kSM4Op_DCL_OUTPUT_SGV:
+ READ_OP(OUTPUT);
+ dcl.sv = (SM4SystemValue)(UInt16)read32();
+ break;
+ case kSM4Op_DCL_INDEX_RANGE:
+ READ_OP_ANY;
+ Assert(dcl.op->file == kSM4File_INPUT || dcl.op->file == kSM4File_OUTPUT);
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_TEMPS:
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_INDEXABLE_TEMP:
+ READ_OP(INDEXABLE_TEMP);
+ dcl.indexable_temp.num = read32();
+ dcl.indexable_temp.comps = read32();
+ break;
+ case kSM4Op_DCL_CONSTANT_BUFFER:
+ READ_OP(CONSTANT_BUFFER);
+ break;
+ case kSM4Op_DCL_GS_INPUT_PRIMITIVE:
+ case kSM4Op_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+ break;
+ case kSM4Op_DCL_MAX_OUTPUT_VERTEX_COUNT:
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_GS_INSTANCE_COUNT:
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_INPUT_CONTROL_POINT_COUNT:
+ case kSM4Op_DCL_OUTPUT_CONTROL_POINT_COUNT:
+ case kSM4Op_DCL_TESS_DOMAIN:
+ case kSM4Op_DCL_TESS_PARTITIONING:
+ case kSM4Op_DCL_TESS_OUTPUT_PRIMITIVE:
+ break;
+ case kSM4Op_DCL_HS_MAX_TESSFACTOR:
+ dcl.f32 = read32();
+ break;
+ case kSM4Op_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_FUNCTION_BODY:
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_FUNCTION_TABLE:
+ dcl.num = read32();
+ dcl.data = malloc(dcl.num * sizeof(UInt32));
+ for(unsigned i = 0; i < dcl.num; ++i)
+ ((UInt32*)dcl.data)[i] = read32();
+ break;
+ case kSM4Op_DCL_INTERFACE:
+ dcl.intf.id = read32();
+ dcl.intf.expected_function_table_length = read32();
+ {
+ UInt32 v = read32();
+ dcl.intf.table_length = v & 0xffff;
+ dcl.intf.array_length = v >> 16;
+ }
+ dcl.data = malloc(dcl.intf.table_length * sizeof(UInt32));
+ for(unsigned i = 0; i < dcl.intf.table_length; ++i)
+ ((UInt32*)dcl.data)[i] = read32();
+ break;
+ case kSM4Op_DCL_THREAD_GROUP:
+ dcl.thread_group_size[0] = read32();
+ dcl.thread_group_size[1] = read32();
+ dcl.thread_group_size[2] = read32();
+ break;
+ case kSM4Op_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+ READ_OP(UNORDERED_ACCESS_VIEW);
+ read_token(&dcl.rrt);
+ break;
+ case kSM4Op_DCL_UNORDERED_ACCESS_VIEW_RAW:
+ READ_OP(UNORDERED_ACCESS_VIEW);
+ break;
+ case kSM4Op_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+ READ_OP(UNORDERED_ACCESS_VIEW);
+ dcl.structured.stride = read32();
+ break;
+ case kSM4Op_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+ READ_OP(THREAD_GROUP_SHARED_MEMORY);
+ dcl.num = read32();
+ break;
+ case kSM4Op_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+ READ_OP(THREAD_GROUP_SHARED_MEMORY);
+ dcl.structured.stride = read32();
+ dcl.structured.count = read32();
+ break;
+ case kSM4Op_DCL_RESOURCE_RAW:
+ READ_OP(RESOURCE);
+ break;
+ case kSM4Op_DCL_RESOURCE_STRUCTURED:
+ READ_OP(RESOURCE);
+ dcl.structured.stride = read32();
+ break;
+ case kSM4Op_DCL_STREAM:
+ // TODO: dcl_stream is undocumented: what is it?
+ AssertString("DXBC: Unhandled dcl_stream since it's undocumented");
+ return false;
+ break;
+ default:
+ AssertString("DXBC: Unhandled declaration type");
+ return false;
+ }
+
+ if (tokens != insn_end)
+ {
+ AssertString("DXBC: token size mismatch");
+ return false;
+ }
+ }
+ else
+ {
+ SM4Instr& insn = *new SM4Instr;
+ program.insns.push_back(&insn);
+ (SM4TokInstruction&)insn = insntok;
+
+ SM4TokInstructionEx exttok;
+ memcpy(&exttok, &insntok, sizeof(exttok));
+ while(exttok.extended)
+ {
+ read_token(&exttok);
+ if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_SAMPLE_CONTROLS)
+ {
+ insn.sample_offset[0] = exttok.sample_controls.offset_u;
+ insn.sample_offset[1] = exttok.sample_controls.offset_v;
+ insn.sample_offset[2] = exttok.sample_controls.offset_w;
+ }
+ else if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_DIM)
+ insn.resource_target = exttok.resource_target.target;
+ else if(exttok.type == SM4_TOKEN_INSTRUCTION_EXTENDED_TYPE_RESOURCE_RETURN_TYPE)
+ {
+ insn.resource_return_type[0] = exttok.resource_return_type.x;
+ insn.resource_return_type[1] = exttok.resource_return_type.y;
+ insn.resource_return_type[2] = exttok.resource_return_type.z;
+ insn.resource_return_type[3] = exttok.resource_return_type.w;
+ }
+ }
+
+ switch(opcode)
+ {
+ case kSM4Op_INTERFACE_CALL:
+ insn.num = read32();
+ break;
+ default:
+ break;
+ }
+
+ unsigned op_num = 0;
+ while(tokens != insn_end)
+ {
+ if (tokens >= insn_end)
+ {
+ AssertString ("DXBC: token size mismatch");
+ return false;
+ }
+ if (op_num >= SM4_MAX_OPS)
+ {
+ AssertString ("DXBC: too many operands");
+ return false;
+ }
+ insn.ops[op_num].reset(new SM4Op);
+ if (!read_op(&*insn.ops[op_num]))
+ return false;
+ ++op_num;
+ }
+ insn.num_ops = op_num;
+ }
+ }
+ return true;
+ }
+};
+
+SM4Program* sm4_parse(const void* tokens, int size)
+{
+ SM4Program* program = new SM4Program;
+ SM4Parser parser(*program, tokens, size);
+ if(parser.parse())
+ return program;
+ delete program;
+ return 0;
+}
+
+static void dxbc_create_internal(struct DXBCChunkHeader** chunks, unsigned num_chunks, void* buffer, unsigned total_size)
+{
+ DXBCHeader* header = (DXBCHeader*)buffer;
+
+ header->fourcc = kFOURCC_DXBC;
+ memset(header->hash, 0, sizeof(header->hash));
+
+ header->one = 1;
+ header->total_size = total_size;
+ header->chunk_count = num_chunks;
+
+ UInt32* chunk_offsets = (UInt32*)(header + 1);
+ UInt32 off = sizeof(struct DXBCHeader) + num_chunks * sizeof(UInt32);
+ for(unsigned i = 0; i < num_chunks; ++i)
+ {
+ chunk_offsets[i] = off;
+ unsigned chunk_full_size = sizeof(DXBCChunkHeader) + chunks[i]->size;
+ memcpy((char*)header + off, chunks[i], chunk_full_size);
+ off += chunk_full_size;
+ }
+
+ void D3DHash (const unsigned char* data, unsigned size, unsigned char res[16]);
+ D3DHash ((const UInt8*)&header->one, total_size-20, (UInt8*)header->hash);
+}
+
+
+static std::pair<void*, size_t> dxbc_create(struct DXBCChunkHeader** chunks, unsigned num_chunks)
+{
+ size_t data_size = 0;
+ for(unsigned i = 0; i < num_chunks; ++i)
+ data_size += sizeof(UInt32) + sizeof(DXBCChunkHeader) + chunks[i]->size;
+ const size_t total_size = sizeof(DXBCHeader) + data_size;
+
+ void* buffer = malloc(total_size);
+ if (!buffer)
+ return std::make_pair((void*)0, 0);
+
+ dxbc_create_internal (chunks, num_chunks, buffer, total_size);
+
+ return std::make_pair(buffer, total_size);
+}
+
+
+void dxbc_create(struct DXBCChunkHeader** chunks, unsigned num_chunks, dynamic_array<UInt8>& out)
+{
+ size_t data_size = 0;
+ for(unsigned i = 0; i < num_chunks; ++i)
+ data_size += sizeof(UInt32) + sizeof(DXBCChunkHeader) + chunks[i]->size;
+ const size_t total_size = sizeof(DXBCHeader) + data_size;
+
+ out.resize_uninitialized (total_size);
+
+ dxbc_create_internal (chunks, num_chunks, out.data(), total_size);
+}
+
+
+
+
+// -------------------------------------------------------------------
+
+enum DXBCBuilderChunks {
+ kBuilderChunkInput = 0,
+ kBuilderChunkOutput,
+ kBuilderChunkCode,
+ kBuilderChunkSM20,
+ kBuilderChunkCount
+};
+struct SigElement {
+ const char* name;
+ int index;
+ int reg;
+ int mask;
+};
+
+struct DXBCCodeBuilder
+{
+ DXBCCodeBuilder(dynamic_array<UInt32>& destArray)
+ : insns(destArray)
+ , curInsnIndex(-1)
+ , tempCount(0)
+ , opcode(kSM4Op_MOV)
+ , opcode2(kSM2Op_MOV)
+ , saturate(0)
+ { }
+ dynamic_array<UInt32>& insns; // reference to actual output array!
+ int curInsnIndex;
+ int tempCount;
+
+ SM4Opcode opcode;
+ SM2Opcode opcode2;
+
+ int saturate;
+ char dstRegType;
+ int dstRegIndex;
+ unsigned dstRegMask;
+ unsigned dstRegComps;
+ int tmpSatRegIndex;
+};
+
+struct DXBCBuilder
+{
+ DXBCBuilder() : codeBuilder(insns) { }
+ DXBCCodeBuilder codeBuilder;
+ dynamic_array<SigElement> inputs;
+ dynamic_array<SigElement> outputs;
+ dynamic_array<UInt32> insns;
+ dynamic_array<UInt32> dcls;
+ //sm20 stuff
+ dynamic_array<UInt32> inputs2;
+ dynamic_array<UInt32> defs2;
+ dynamic_array<UInt32> insns2;
+ std::map<UInt32,UInt32> outputMap;
+ std::map<UInt32,UInt32> inputMap;
+
+ int numTextures2;
+ int curInsnIndex2;
+
+ SM4TokVersion version;
+ DXBCChunkHeader* chunks[kBuilderChunkCount];
+};
+
+DXBCCodeBuilder* dxb_create_code(dynamic_array<UInt32>& destArray)
+{
+ DXBCCodeBuilder* b = new DXBCCodeBuilder(destArray);
+ return b;
+}
+
+void dxb_destroy_code(DXBCCodeBuilder* b)
+{
+ delete b;
+}
+
+const UInt32* dxb_get_code(DXBCCodeBuilder* b, size_t* outSize)
+{
+ *outSize = b->insns.size();
+ return b->insns.data();
+}
+
+
+DXBCBuilder* dxb_create(int major, int minor, SM4ShaderType type)
+{
+ DXBCBuilder* b = new DXBCBuilder();
+ memset (b->chunks, 0, sizeof(b->chunks));
+ b->version.major = major;
+ b->version.minor = minor;
+ b->version.type = type;
+ b->version.format = 0; //@TODO?
+ b->numTextures2 = 0;
+
+ return b;
+}
+void dxb_destroy(DXBCBuilder* b)
+{
+ if (b)
+ {
+ for (int i = 0; i < kBuilderChunkCount; ++i)
+ if (b->chunks[i])
+ free(b->chunks[i]);
+ }
+ delete b;
+}
+
+DXBCCodeBuilder* dxb_get_code_builder(DXBCBuilder* b)
+{
+ return &b->codeBuilder;
+}
+
+
+void dxb_dcl_input (DXBCBuilder* b, const char* name, int index, int reg, int mask)
+{
+ SigElement el;
+ el.name = name;
+ el.index = index;
+ el.reg = reg;
+ el.mask = mask;
+ b->inputs.push_back (el);
+
+ dxb_dcl_input2(b,name,index,reg,mask);
+}
+
+void dxb_dcl_output (DXBCBuilder* b, const char* name, int index, int reg, int mask)
+{
+ SigElement el;
+ el.name = name;
+ el.index = index;
+ el.reg = reg;
+ el.mask = mask;
+ b->outputs.push_back (el);
+
+ dxb_dcl_output2(b,name,index,reg,mask);
+}
+
+
+void dxb_dcl_tex (DXBCBuilder* b, int index, SM4Target dim)
+{
+ SM4TokInstruction dcl;
+ dcl.dword = 0;
+ dcl.opcode = kSM4Op_DCL_SAMPLER;
+ dcl.length = 3;
+ b->dcls.push_back (dcl.dword);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.file = kSM4File_SAMPLER;
+ op.num_indices = 1;
+ b->dcls.push_back (op.dword);
+ b->dcls.push_back (index);
+
+ dcl.dword = 0;
+ dcl.opcode = kSM4Op_DCL_RESOURCE;
+ dcl.length = 4;
+ dcl.dcl_resource.target = dim;
+ b->dcls.push_back (dcl.dword);
+ op.dword = 0;
+ op.file = kSM4File_RESOURCE;
+ op.num_indices = 1;
+ SM4TokResourceRetType ret;
+ ret.dword = 0;
+ ret.x = ret.y = ret.z = ret.w = kSM4RetType_FLOAT;
+ b->dcls.push_back (op.dword);
+ b->dcls.push_back (index);
+ b->dcls.push_back (ret.dword);
+
+ dxb_dcl_tex2 (b, index, dim);
+}
+
+
+void dxb_dcl_cb (DXBCBuilder* b, int index, int size)
+{
+ SM4TokInstruction dcl;
+ dcl.dword = 0;
+ dcl.opcode = kSM4Op_DCL_CONSTANT_BUFFER;
+ dcl.length = 4;
+ dcl.dcl_constant_buffer.dynamic = 0;
+ b->dcls.push_back (dcl.dword);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = kSM4OperComp4;
+ op.file = kSM4File_CONSTANT_BUFFER;
+ op.num_indices = 2;
+ b->dcls.push_back (op.dword);
+ b->dcls.push_back (index);
+ b->dcls.push_back (size);
+}
+
+
+static void dxbc_update_insn_length (DXBCCodeBuilder* b, int len)
+{
+ DebugAssert (b->curInsnIndex >= 0 && b->curInsnIndex < b->insns.size());
+ SM4TokInstruction* tok = (SM4TokInstruction*)&b->insns[b->curInsnIndex];
+ tok->length += len;
+}
+
+void dxb_op (DXBCCodeBuilder* b, SM4Opcode op, bool sat)
+{
+ b->opcode = op;
+ b->curInsnIndex = b->insns.size();
+ SM4TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = op;
+ tok.insn.sat = sat ? 1 : 0;
+ tok.length = 1;
+ if (op == kSM4Op_DISCARD) // discard instructions emitted by HLSL have nz flag set
+ tok.insn.test_nz = 1;
+ b->insns.push_back (tok.dword);
+}
+
+static inline SM4RegFile dxb_reg_type(char c)
+{
+ switch (c)
+ {
+ case 'r': return kSM4File_TEMP;
+ case 'v': return kSM4File_INPUT;
+ case 'o': return kSM4File_OUTPUT;
+ case 'c': return kSM4File_CONSTANT_BUFFER;
+ case 's': return kSM4File_SAMPLER;
+ case 't': return kSM4File_RESOURCE;
+ default: AssertString("unknown register type"); return kSM4File_TEMP;
+ }
+}
+
+
+void dxb_reg (DXBCCodeBuilder* b, char rchar, int reg, unsigned mask)
+{
+ b->dstRegMask = mask;
+ b->dstRegComps = BitsInMask(mask);
+
+ SM4RegFile rtype = dxb_reg_type(rchar);
+ dxbc_update_insn_length (b, 2);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = rtype==kSM4File_SAMPLER ? kSM4OperComp0 : kSM4OperComp4;
+
+ // discard only has 1 parameter which is similar to a source register argument:
+ // if it's one channel only, then emit it as a "scalar channel index"
+ op.mode = (b->dstRegComps==1 && b->opcode==kSM4Op_DISCARD) ? SM4_OPERAND_MODE_SCALAR : SM4_OPERAND_MODE_MASK;
+ op.sel = (b->dstRegComps==1 && b->opcode==kSM4Op_DISCARD) ? HighestBit(mask) : mask;
+ if (rtype==kSM4File_SAMPLER)
+ op.sel = 0;
+ op.file = rtype;
+ op.num_indices = 1;
+ b->insns.push_back (op.dword);
+ b->insns.push_back (reg);
+ if (rtype == kSM4File_TEMP)
+ b->tempCount = std::max (b->tempCount, reg+1);
+}
+
+
+static unsigned adjust_swizzle (DXBCCodeBuilder* b, unsigned swizzle, bool sm2)
+{
+ // For DP3 with "no swizzle", SM2 expects to not have the swizzle, while SM4 expects xyzx
+ if (b->opcode == kSM4Op_DP3 && swizzle == kSM4SwzNone && !sm2)
+ swizzle = kSM4SwzXYZX;
+
+ // arguments for single-channel destinations in SM4 should be emitted as "1 channel index"
+ // instead of "swizzle that replicates all channels"
+ if (!sm2 && b->dstRegComps==1)
+ {
+ if (swizzle == kSM4SwzRepX)
+ swizzle = 0;
+ else if (swizzle == kSM4SwzRepY)
+ swizzle = 1;
+ else if (swizzle == kSM4SwzRepZ)
+ swizzle = 2;
+ else if (swizzle == kSM4SwzRepW)
+ swizzle = 3;
+ }
+ return swizzle;
+}
+
+
+void dxb_swz (DXBCCodeBuilder* b, char rchar, int reg, unsigned swizzle, bool neg)
+{
+ swizzle = adjust_swizzle (b, swizzle, false);
+
+ SM4RegFile rtype = dxb_reg_type(rchar);
+ int len = 2;
+ if (neg) ++len;
+ if (rtype==kSM4File_CONSTANT_BUFFER) ++len;
+ dxbc_update_insn_length (b, len);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = rtype==kSM4File_SAMPLER ? kSM4OperComp0 : kSM4OperComp4;
+ op.mode = (b->dstRegComps==1 && swizzle<4) ? SM4_OPERAND_MODE_SCALAR : SM4_OPERAND_MODE_SWIZZLE;
+ op.sel = swizzle;
+ op.file = rtype;
+ op.num_indices = rtype==kSM4File_CONSTANT_BUFFER ? 2 : 1;
+ op.extended = neg ? 1 : 0;
+ b->insns.push_back (op.dword);
+ if (neg)
+ {
+ SM4TokOperandEx opex;
+ opex.dword = 0;
+ opex.type = 1;
+ opex.neg = 1;
+ b->insns.push_back (opex.dword);
+ }
+ if (rtype==kSM4File_CONSTANT_BUFFER)
+ {
+ b->insns.push_back (reg >> 16);
+ b->insns.push_back (reg & 0xFFFF);
+ }
+ else
+ {
+ b->insns.push_back (reg);
+ }
+ if (rtype == kSM4File_TEMP)
+ b->tempCount = std::max (b->tempCount, reg+1);
+}
+
+void dxb_float1 (DXBCCodeBuilder* b, float v)
+{
+ dxbc_update_insn_length (b, 2);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = kSM4OperComp1;
+ op.file = kSM4File_IMMEDIATE32;
+ b->insns.push_back (op.dword);
+ union { float f; UInt32 i; } f2i;
+ f2i.f = v;
+ b->insns.push_back (f2i.i);
+}
+
+void dxb_int1 (DXBCCodeBuilder* b, int i)
+{
+ dxbc_update_insn_length (b, 2);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = kSM4OperComp1;
+ op.file = kSM4File_IMMEDIATE32;
+ b->insns.push_back (op.dword);
+ b->insns.push_back (i);
+}
+
+
+void dxb_float4 (DXBCCodeBuilder* b, float v0, float v1, float v2, float v3)
+{
+ dxbc_update_insn_length (b, 5);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = kSM4OperComp4;
+ op.file = kSM4File_IMMEDIATE32;
+ b->insns.push_back (op.dword);
+ union { float f; UInt32 i; } f2i;
+ f2i.f = v0; b->insns.push_back (f2i.i);
+ f2i.f = v1; b->insns.push_back (f2i.i);
+ f2i.f = v2; b->insns.push_back (f2i.i);
+ f2i.f = v3; b->insns.push_back (f2i.i);
+}
+
+
+
+static void dxbc_builder_build_sig (DXBCBuilder* b, bool input)
+{
+ int chunkIdx = input ? kBuilderChunkInput : kBuilderChunkOutput;
+ const dynamic_array<SigElement>& sigs = input ? b->inputs : b->outputs;
+ const int nsigs = sigs.size();
+
+ // size = 8 (header) + 8 (sig chunk header) + count * DXBCSignatureElement + names
+ unsigned size = 16 + nsigs * sizeof(DXBCSignatureElement);
+ unsigned namesOffset = size;
+ for (int i = 0; i < nsigs; ++i)
+ size += strlen(sigs[i].name) + 1;
+ size = (size + 3) & ~3; // align to next dword
+
+ UInt8* buf = (UInt8*)malloc(size);
+ DXBCChunkSig* chunk = (DXBCChunkSig*)buf;
+ memset (chunk, 0xAB, size);
+
+ chunk->fourcc = input ? kFOURCC_ISGN : kFOURCC_OSGN;
+ chunk->size = size-8;
+ chunk->count = nsigs;
+ chunk->unk8 = 8;
+ for (int i = 0; i < nsigs; ++i)
+ {
+ DXBCSignatureElement* s = &chunk->elements[i];
+ s->name_offset = namesOffset-8;
+ int len = strlen(sigs[i].name) + 1;
+ memcpy (buf + namesOffset, sigs[i].name, len);
+ namesOffset += len;
+ const bool outputPos = !strcmp(sigs[i].name,"SV_POSITION");
+ unsigned mask = sigs[i].mask;
+ s->semantic_index = sigs[i].index;
+ s->system_value_type = outputPos ? 1 : 0;
+ s->component_type = 3; // float
+ s->register_num = sigs[i].reg;
+ s->mask = mask; // mask of channels used
+ if (input)
+ s->read_write_mask = mask;
+ else
+ s->read_write_mask = ~mask & 0xF;
+ s->stream = 0;
+ s->unused = 0;
+
+
+ SM4TokInstruction dcl;
+ dcl.dword = 0;
+ dcl.opcode = input ?
+ (b->version.type == kSM4Shader_Pixel ? kSM4Op_DCL_INPUT_PS : kSM4Op_DCL_INPUT) :
+ (outputPos ? kSM4Op_DCL_OUTPUT_SIV : kSM4Op_DCL_OUTPUT);
+ dcl.length = outputPos ? 4 : 3;
+ if (dcl.opcode == kSM4Op_DCL_INPUT_PS)
+ dcl.dcl_input_ps.interpolation = kSM4Interp_LINEAR;
+ b->dcls.push_back (dcl.dword);
+ SM4TokOperand op;
+ op.dword = 0;
+ op.comps_enum = kSM4OperComp4;
+ op.mode = SM4_OPERAND_MODE_MASK;
+ op.sel = mask;
+ op.file = input ? kSM4File_INPUT : kSM4File_OUTPUT;
+ op.num_indices = 1;
+ op.index0_repr = SM4_OPERAND_INDEX_REPR_IMM32;
+ b->dcls.push_back (op.dword);
+ b->dcls.push_back (sigs[i].reg);
+ if (outputPos)
+ b->dcls.push_back (s->system_value_type);
+ }
+
+ b->chunks[chunkIdx] = chunk;
+}
+
+
+static void dxbc_builder_build_code (DXBCBuilder* b)
+{
+ const unsigned n = b->dcls.size() + b->insns.size();
+
+ // size = 8 (header) + 8 (code chunk header) + tokens
+ unsigned size = 16 + n*4;
+
+ UInt8* buf = (UInt8*)malloc(size);
+ DXBCChunkCode* chunk = (DXBCChunkCode*)buf;
+
+ chunk->fourcc = kFOURCC_SHDR;
+ chunk->size = size-8;
+ chunk->version = *(UInt32*)&b->version;
+ chunk->length = n+2;
+
+ UInt8* codePtr = (UInt8*)(chunk+1);
+ if (!b->dcls.empty())
+ {
+ size_t size = b->dcls.size()*sizeof(b->dcls[0]);
+ memcpy (codePtr, &b->dcls[0], size);
+ codePtr += size;
+ }
+ if (!b->insns.empty())
+ {
+ size_t size = b->insns.size()*sizeof(b->insns[0]);
+ memcpy (codePtr, &b->insns[0], size);
+ codePtr += size;
+ }
+ DebugAssert (buf + size == codePtr);
+
+ b->chunks[kBuilderChunkCode] = chunk;
+}
+
+void dxbc_builder_build_code2 (DXBCBuilder* b);
+
+void* dxb_build (DXBCBuilder* b, size_t& outSize)
+{
+ dxbc_builder_build_sig (b, true);
+ dxbc_builder_build_sig (b, false);
+ if (b->codeBuilder.tempCount > 0)
+ {
+ SM4TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = kSM4Op_DCL_TEMPS;
+ tok.length = 2;
+ b->dcls.push_back (tok.dword);
+ b->dcls.push_back (b->codeBuilder.tempCount);
+ }
+ dxbc_builder_build_code (b);
+ dxbc_builder_build_code2(b);
+
+ std::pair<void*,size_t> dxbc = dxbc_create (b->chunks, kBuilderChunkCount);
+ outSize = dxbc.second;
+
+ return dxbc.first;
+}
+
+
+
+//------------------------------------------------------------------------------------
+//SM 2.0 stuff
+
+
+//table format was reversed and is still not 100% known
+static dynamic_array<UInt32> dxbc_builder_build_mapping_table2 (DXBCBuilder* b)
+{
+ const unsigned shader_size = b->inputs2.size() + b->insns2.size() + b->defs2.size() + 1;
+ dynamic_array<UInt32> table;
+
+ const unsigned rmap_offset = 0x24;
+ const unsigned cmap_offset = rmap_offset + b->numTextures2*4;
+ const unsigned imap_offset = cmap_offset + 3*4;
+ const unsigned shdr_offset = imap_offset + (b->version.type == kSM4Shader_Vertex ? 4 : 0);
+
+ unsigned t = b->version.type == kSM4Shader_Pixel ? 4 : 0;
+ table.push_back(b->version.type == kSM4Shader_Pixel ? 0xffff0200 : 0xfffe0200);
+ table.push_back(shader_size*4);
+ table.push_back(shdr_offset);
+ table.push_back(1|(cmap_offset<<16));
+ table.push_back(shdr_offset<<16);
+ table.push_back(shdr_offset<<16);
+ table.push_back(b->numTextures2|(rmap_offset<<16));
+ table.push_back((b->version.type == kSM4Shader_Pixel ? 0 : 1)|(imap_offset<<16));
+
+ // Sampler mapping: 16-23 bits target sampler, 8-15 bits source sampler, 0-7 bits source resource
+ for (int n = 0;n < b->numTextures2;n++)
+ table.push_back(n|(n<<8)|(n<<16));
+
+ //constant to CB mapping
+ //CBn:CBoff; always zero
+ table.push_back(0);
+ //REGnum:REGn
+ table.push_back(b->version.type == kSM4Shader_Pixel ? 10 : 63); //k11VertexSize or k11PixelSize
+ //convert mode 8:8:8:8; always zero
+ table.push_back(0);
+
+ //input mapping; always one constant for vertex shader with pixel offset: oPos.xy += offset.xy * oPos.w;
+ if (b->version.type == kSM4Shader_Vertex)
+ table.push_back(64<<16); //k11VertexPosOffset9x
+
+ return table;
+}
+
+
+static unsigned swizzle_from_mask (unsigned mask)
+{
+ if (mask == 1)
+ return kSM4SwzRepX;
+ if (mask == 2)
+ return kSM4SwzRepY;
+ if (mask == 4)
+ return kSM4SwzRepZ;
+ if (mask == 8)
+ return kSM4SwzRepW;
+ //@TODO: more?
+ return kSM4SwzNone;
+}
+
+static void dxb_handle_saturate2 (DXBCBuilder* b)
+{
+ if (!b->codeBuilder.saturate || b->version.type == kSM4Shader_Pixel)
+ return;
+
+ // previous instruction had saturate on destination, so insert manual min/max instructions
+ const int tmpReg = b->codeBuilder.tmpSatRegIndex;
+ DebugAssert(tmpReg >= 0);
+ b->codeBuilder.saturate = 0;
+
+ const int constReg = dxb_imm_f4 (b, 0, 1, 0, 0);
+ const unsigned mask = b->codeBuilder.dstRegMask;
+ const unsigned swz = swizzle_from_mask(mask);
+
+ dxb_op2(b, kSM2Op_MAX, false);
+ dxb_reg2(b,'r',tmpReg,mask);
+ dxb_swz2(b,'r',tmpReg,swz);
+ dxb_swz2(b,'c',constReg,kSM4SwzRepX);
+ dxb_op2(b, kSM2Op_MIN, false);
+ dxb_reg2(b,b->codeBuilder.dstRegType,b->codeBuilder.dstRegIndex,mask);
+ dxb_swz2(b,'r',tmpReg,swz);
+ dxb_swz2(b,'c',constReg,kSM4SwzRepY);
+}
+
+static void dxbc_builder_build_code2 (DXBCBuilder* b)
+{
+ dxb_handle_saturate2(b); // handle possible saturate on the last instruction
+
+ dynamic_array<UInt32> table = dxbc_builder_build_mapping_table2(b);
+
+ const unsigned shader_size = b->inputs2.size() + b->insns2.size() + b->defs2.size() + 1;
+
+ // size = 8 (header) + 8 (code chunk header) + tokens
+ unsigned size = 12 + 4*table.size() + shader_size*4;
+
+ UInt8* buf = (UInt8*)malloc(size);
+ DXBCChunkSM20* chunk = (DXBCChunkSM20*)buf;
+
+ chunk->fourcc = kFOURCC_SM20;
+ chunk->size = size - 8;
+ chunk->length = chunk->size;
+
+ UInt8* codePtr = (UInt8*)(chunk+1);
+ if (!table.empty())
+ {
+ size_t size = table.size()*sizeof(table[0]);
+ memcpy (codePtr, &table[0], size);
+ codePtr += size;
+ }
+
+ *(UInt32*)codePtr = b->version.type == kSM4Shader_Pixel ? 0xffff0201 : 0xfffe0201;
+ codePtr += 4;
+
+ if (!b->defs2.empty())
+ {
+ size_t size = b->defs2.size()*sizeof(b->defs2[0]);
+ memcpy (codePtr, &b->defs2[0], size);
+ codePtr += size;
+ }
+ if (!b->inputs2.empty())
+ {
+ size_t size = b->inputs2.size()*sizeof(b->inputs2[0]);
+ memcpy (codePtr, &b->inputs2[0], size);
+ codePtr += size;
+ }
+ if (!b->insns2.empty())
+ {
+ size_t size = b->insns2.size()*sizeof(b->insns2[0]);
+ memcpy (codePtr, &b->insns2[0], size);
+ codePtr += size;
+ }
+
+ b->chunks[kBuilderChunkSM20] = chunk;
+}
+
+
+struct SM2TokSrc
+{
+ union {
+ UInt32 dword;
+ struct {
+ unsigned reg_num : 11;
+ unsigned reg_type34 : 2;
+ unsigned _res0 : 1;
+ unsigned _res1 : 2;
+ unsigned swizzle : 8;
+ unsigned src_mod : 4;
+ unsigned reg_type02 : 3;
+ unsigned _one : 1;
+ };
+ };
+};
+
+static inline UInt32 dxb_reg_split2(UInt32 n)
+{
+ return ((n<<D3DSP_REGTYPE_SHIFT)&D3DSP_REGTYPE_MASK)|((n<<(D3DSP_REGTYPE_SHIFT2))&D3DSP_REGTYPE_MASK2);
+}
+
+static inline UInt32 dxb_reg_type2(char c)
+{
+ switch (c)
+ {
+ case 'r': return D3DSPR_TEMP;
+ case 'v': return D3DSPR_INPUT;
+ case 'c': return D3DSPR_CONST;
+ //should be D3DSPR_TEXTURE, however, I'm replacing
+ //t swizzle with sampler source and removing s source reg
+ //to convert sm40 sample to sm20 texld on-fly
+ case 't': return D3DSPR_SAMPLER;
+ // case 's': return D3DSPR_SAMPLER;
+
+ case 'x': return D3DSPR_RASTOUT;
+ case 'y': return D3DSPR_TEXCRDOUT;
+
+ case 'o': return D3DSPR_RASTOUT;
+
+ default: AssertString("unknown register type"); return D3DSPR_TEMP;
+ }
+}
+
+static void dxbc_update_insn_length2 (DXBCBuilder* b, int len)
+{
+ DebugAssert (b->curInsnIndex2 >= 0 && b->curInsnIndex2 < b->insns2.size());
+ SM2TokInstruction* tok = (SM2TokInstruction*)&b->insns2[b->curInsnIndex2];
+ tok->length += len;
+}
+
+struct SNameToID
+{
+ const char* name;
+ UInt32 id;
+};
+
+
+//SM4.0 semantic to SM2.0 VS usage mapping
+static const SNameToID s_VSDeclNames[] =
+{
+ {"POSITION", D3DDECLUSAGE_POSITION},
+ {"NORMAL", D3DDECLUSAGE_NORMAL},
+ {"TEXCOORD", D3DDECLUSAGE_TEXCOORD},
+ {"COLOR", D3DDECLUSAGE_COLOR},
+ {"FOG", D3DDECLUSAGE_FOG},
+
+ {"SV_POSITION", 0},
+
+ {NULL, 0},
+};
+
+
+//SM4.0 semantic to SM2.0 PS register file mapping
+static const SNameToID s_PSDeclNames[] =
+{
+ {"TEXCOORD", D3DSPR_TEXTURE},
+ {"COLOR", D3DSPR_INPUT},
+
+ {"SV_Target", D3DSPR_COLOROUT},
+
+ {NULL, 0},
+};
+
+
+//SM4.0 semantic to SM2.0 register file mapping
+static const SNameToID s_OutNames[] =
+{
+ {"POSITION", D3DSPR_RASTOUT},
+ {"SV_POSITION", D3DSPR_RASTOUT},
+ {"FOG", D3DSPR_RASTOUT},
+ {"TEXCOORD", D3DSPR_TEXCRDOUT},
+ {"COLOR", D3DSPR_ATTROUT},
+
+ {"SV_Target", D3DSPR_COLOROUT},
+
+ {NULL, 0},
+};
+
+
+static UInt32 find_dcl_by_name(const char* name,const SNameToID* p=s_VSDeclNames)
+{
+ while (p->name)
+ {
+ if (0 == strcmp(p->name,name))
+ return p->id;
+ p++;
+ }
+
+ Assert(0 || "DCL name not found!");
+
+ return -1;
+}
+
+void dxb_dcl_tex2 (DXBCBuilder* b, int index, SM4Target dim)
+{
+ UInt32 tok2 = 0x80000000;
+
+ switch (dim)
+ {
+ case kSM4Target_TEXTURE2D:
+ tok2 |= D3DSTT_2D;
+ break;
+ case kSM4Target_TEXTURE3D:
+ tok2 |= D3DSTT_VOLUME;
+ break;
+ case kSM4Target_TEXTURECUBE:
+ tok2 |= D3DSTT_CUBE;
+ break;
+ default:
+ Assert(0 || "Wrong texture type!");
+ };
+
+ SM2TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = kSM2Op_DCL;
+ tok.length = 2;
+
+ b->inputs2.push_back (tok.dword);
+ b->inputs2.push_back (tok2);
+
+ SM2TokDst op;
+ op.dword = dxb_reg_split2(D3DSPR_SAMPLER);
+ op.reg_num = index;
+ op.write_mask = 0xf;
+ op._one = 1;
+ b->inputs2.push_back (op.dword);
+
+ b->numTextures2++;
+}
+
+//CAVEAT: DX11 feature level 9.x vertex shaders use texcoord semantics for _all_ attributes
+int dxb_find_input_by_name2(DXBCBuilder* b,const char* name, int index)
+{
+ int index2 = 0;
+ while (0 != strcmp(b->inputs[index2].name,name) || b->inputs[index2].index != index)
+ index2++;
+
+ return index2;
+}
+
+int dxb_find_output_by_name2(DXBCBuilder* b,const char* name, int index)
+{
+ int index2 = 0;
+ while (0 != strcmp(b->outputs[index2].name,name) || b->outputs[index2].index != index)
+ index2++;
+
+ return index2;
+}
+
+
+//CAVEAT1: DX11 feature level 9.x vertex shaders use texcoord semantics for _all_ attributes
+//CAVEAT2: [Qualcomm] DX11 feature level 9.x uses texcoord interpolators for _all_ varyings
+void dxb_dcl_input2 (DXBCBuilder* b, const char* name, int index, int reg, int mask)
+{
+ //TODO: this is not needed anymore.
+ UInt32 decl = find_dcl_by_name(name,b->version.type == kSM4Shader_Vertex ? s_VSDeclNames : s_PSDeclNames);
+ UInt32 reg_type = b->version.type == kSM4Shader_Vertex ? dxb_reg_type2('v') : (decl);
+
+ SM2TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = kSM2Op_DCL;
+ tok.length = 2;
+
+ b->inputs2.push_back (tok.dword);
+
+ UInt32 tok2 = 0x80000000;
+ //CAVEAT1
+ if (b->version.type == kSM4Shader_Vertex)
+ tok2 |= D3DDECLUSAGE_TEXCOORD|(dxb_find_input_by_name2(b,name,index)<<16);
+ b->inputs2.push_back (tok2);
+
+#if 1
+ //CAVEAT2
+ if (b->version.type == kSM4Shader_Pixel)
+ {
+ index = dxb_find_input_by_name2(b,name,index);
+ reg_type = D3DSPR_TEXTURE;
+ }
+#endif
+
+ SM2TokDst op;
+ op.dword = dxb_reg_split2(reg_type);
+ op.reg_num = b->version.type == kSM4Shader_Vertex ? reg : index;
+ op.write_mask = mask;
+ op._one = 1;
+ op._res0 = 0;
+ op._res1 = 0;
+ b->inputs2.push_back (op.dword);
+
+ if (b->version.type == kSM4Shader_Pixel)
+ {
+ op.write_mask = 0;
+ b->inputMap[reg] = op.dword;
+ }
+}
+
+void dxb_dcl_output2 (DXBCBuilder* b, const char* name, int index, int reg, int mask)
+{
+ UInt32 reg_type = find_dcl_by_name(name,s_OutNames);
+#if 1
+ //CAVEAT2. Excluding POSITION
+ if (b->version.type == kSM4Shader_Vertex && NULL == strstr(name,"POSITION"))
+ {
+ index = dxb_find_output_by_name2(b,name,index);
+ reg_type = D3DSPR_TEXCRDOUT;
+ }
+#else
+ //special case in raster reg. file
+ if (0 == strcmp(name,"FOG"))
+ index = 2;
+#endif
+
+ SM2TokDst op;
+ op.dword = dxb_reg_split2(reg_type);
+ op.reg_num = index;
+
+ b->outputMap[reg] = op.dword;
+}
+
+static UInt32 dxb_find_output2 (DXBCBuilder* b, int reg)
+{
+ Assert (b->outputMap.find(reg) != b->outputMap.end());
+ return b->outputMap[reg];
+}
+
+static UInt32 dxb_find_input2 (DXBCBuilder* b, int reg)
+{
+ Assert (b->inputMap.find(reg) != b->inputMap.end());
+ return b->inputMap[reg];
+}
+
+
+void dxb_op2 (DXBCBuilder* b, SM2Opcode op, bool sat, int scratchTmpRegForSat)
+{
+ dxb_handle_saturate2 (b);
+
+ b->codeBuilder.opcode2 = op;
+ b->curInsnIndex2 = b->insns2.size();
+
+ SM2TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = op;
+ tok.specific = op>>16;
+ tok.length = 0;
+
+ b->insns2.push_back (tok.dword);
+ b->codeBuilder.saturate = sat;
+ b->codeBuilder.tmpSatRegIndex = scratchTmpRegForSat;
+}
+
+void dxb_reg2 (DXBCBuilder* b, char rchar, int reg,unsigned mask)
+{
+ b->codeBuilder.dstRegMask = mask;
+ b->codeBuilder.dstRegComps = BitsInMask(mask);
+ if (b->codeBuilder.saturate)
+ {
+ b->codeBuilder.dstRegType = rchar;
+ b->codeBuilder.dstRegIndex = reg;
+ if (b->version.type != kSM4Shader_Pixel)
+ {
+ rchar = 'r';
+ reg = b->codeBuilder.tmpSatRegIndex;
+ }
+ }
+
+ //for sm20 sampler will be emitted by texture coords swizzle source op
+ if ('s' == rchar && b->version.type == kSM4Shader_Pixel)
+ return;
+
+ dxbc_update_insn_length2 (b, 1);
+ SM2TokDst op;
+
+ if ('o' == rchar)
+ op.dword = dxb_find_output2(b,reg);
+ else if ('v' == rchar && b->version.type == kSM4Shader_Pixel)
+ op.dword = dxb_find_input2(b,reg);
+ else
+ {
+ op.dword = dxb_reg_split2(dxb_reg_type2(rchar));
+ op.reg_num = reg;
+ }
+
+ op.res_mod = 0;
+ if (b->codeBuilder.saturate && b->version.type == kSM4Shader_Pixel)
+ {
+ op.res_mod = D3DSPDM_SATURATE>>D3DSP_DSTMOD_SHIFT;
+ b->codeBuilder.saturate = 0;
+ }
+
+ op.write_mask = mask;
+ op._one = 1;
+ op._res0 = 0;
+ op._res1 = 0;
+
+ b->insns2.push_back (op.dword);
+}
+
+void dxb_swz2 (DXBCBuilder* b, char rchar, int reg, unsigned swizzle, bool neg)
+{
+ swizzle = adjust_swizzle (&b->codeBuilder, swizzle, true);
+
+ UInt32 rtype = dxb_reg_type2(rchar);
+ dxbc_update_insn_length2 (b, 1);
+
+ SM2TokSrc op;
+ if ('o' == rchar)
+ op.dword = dxb_find_output2(b,reg);
+ else if ('v' == rchar && b->version.type == kSM4Shader_Pixel)
+ op.dword = dxb_find_input2(b,reg);
+ else
+ {
+ op.dword = dxb_reg_split2(dxb_reg_type2(rchar));
+ op.reg_num = reg;
+ }
+
+ //texld must not swizzle
+ if ('s' != rchar && 't' != rchar)// && op.reg_type02 != D3DSPR_TEXTURE)
+ {
+ op.swizzle = swizzle;
+ op.src_mod = neg ? 1 : 0;
+ }
+ else
+ op.swizzle = kSM4SwzNone;
+
+ op._one = 1;
+ op._res0 = 0;
+ op._res1 = 0;
+
+ b->insns2.push_back (op.dword);
+}
+
+//find a constant, define a constant if not found
+int dxb_imm_f4 (DXBCBuilder* b, float v0, float v1, float v2, float v3)
+{
+ union { float f; UInt32 i; } f2i;
+ //TODO: do not hardcode!
+ int reg = b->version.type == kSM4Shader_Pixel ? 10 : 63 + 2;
+ dynamic_array<UInt32>::const_iterator it = b->defs2.begin();
+ for (;it != b->defs2.end();it += 6, reg++)
+ {
+ f2i.f = v0;
+ if (f2i.i != *(it + 2))
+ continue;
+ f2i.f = v1;
+ if (f2i.i != *(it + 3))
+ continue;
+ f2i.f = v2;
+ if (f2i.i != *(it + 4))
+ continue;
+ f2i.f = v3;
+ if (f2i.i != *(it + 5))
+ continue;
+
+ break;
+ }
+
+ if (it == b->defs2.end())
+ {
+ SM2TokInstruction tok;
+ tok.dword = 0;
+ tok.opcode = kSM2Op_DEF;
+ tok.specific = 0;
+ tok.length = 5;
+ b->defs2.push_back (tok.dword);
+
+ SM2TokDst op;
+ op.dword = dxb_reg_split2(dxb_reg_type2('c'));
+ op.reg_num = reg;
+ op.write_mask = 0xF;
+ op._one = 1;
+ b->defs2.push_back (op.dword);
+
+ f2i.f = v0; b->defs2.push_back (f2i.i);
+ f2i.f = v1; b->defs2.push_back (f2i.i);
+ f2i.f = v2; b->defs2.push_back (f2i.i);
+ f2i.f = v3; b->defs2.push_back (f2i.i);
+ }
+
+ return reg;
+}
+
+SM2Opcode dxb_to_sm2 (SM4Opcode op)
+{
+ switch (op)
+ {
+ case kSM4Op_SQRT:
+ break;
+ case kSM4Op_DIV:
+ break;
+ case kSM4Op_RSQ:
+ return kSM2Op_RSQ;
+ case kSM4Op_LOG:
+ return kSM2Op_LOG;
+ case kSM4Op_EXP:
+ return kSM2Op_EXP;
+ case kSM4Op_RCP:
+ return kSM2Op_RCP;
+
+ case kSM4Op_MOV:
+ return kSM2Op_MOV;
+ case kSM4Op_MOVC:
+ break;
+
+ case kSM4Op_MAD:
+ return kSM2Op_MAD;
+ case kSM4Op_DP2:
+ break;
+ case kSM4Op_DP3:
+ return kSM2Op_DP3;
+ case kSM4Op_DP4:
+ return kSM2Op_DP4;
+ case kSM4Op_MUL:
+ return kSM2Op_MUL;
+ case kSM4Op_ADD:
+ return kSM2Op_ADD;
+ case kSM4Op_AND:
+ break;
+ case kSM4Op_MIN:
+ return kSM2Op_MIN;
+ case kSM4Op_MAX:
+ return kSM2Op_MAX;
+
+ case kSM4Op_SAMPLE:
+ return kSM2Op_TEX;
+ case kSM4Op_DISCARD:
+ return kSM2Op_TEXKILL;
+
+ case kSM4Op_LT:
+ return kSM2Op_SLT;
+ case kSM4Op_GE:
+ return kSM2Op_SGE;
+ case kSM4Op_NE:
+ break;
+ case kSM4Op_EQ:
+ break;
+
+ case kSM4Op_RET:
+ return kSM2Op_END;
+ }
+
+ AssertString("unknown SM4 opcode");
+ __debugbreak();
+
+ return kSM2Op_NOP;
+}
+
+