12 files changed, 4919 insertions, 0 deletions
diff --git a/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.cpp b/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.cpp
new file mode 100644
index 0000000..99e5d22
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.cpp
@@ -0,0 +1,19 @@
+#include "UnityPrefix.h"
+#include "BuiltinShaderParamUtility.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Shaders/ShaderKeywords.h"
+
+static ShaderKeyword gSupportedLODFadeKeyword = keywords::Create("ENABLE_LOD_FADE");
+
+void SetObjectScale (GfxDevice& device, float lodFade, float invScale)
+{
+	device.SetInverseScale(invScale);
+
+	/////@TODO: Figure out why inverse scale is implemented in gfxdevice, and decide if we should do the same for lodFade?
+	device.GetBuiltinParamValues().SetInstanceVectorParam(kShaderInstanceVecScale, Vector4f(0,0,lodFade, invScale));
+
+	if (lodFade == LOD_FADE_DISABLED)
+		g_ShaderKeywords.Disable(gSupportedLODFadeKeyword);
+	else
+		g_ShaderKeywords.Enable(gSupportedLODFadeKeyword);
+}
diff --git a/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h b/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h
new file mode 100644
index 0000000..7b480ed
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/BuiltinShaderParamUtility.h
@@ -0,0 +1,11 @@
+#pragma once
+
+
+///@TODO: This should probably be 0. But for now we don't have proper ifdef support for switching to a different subshader.
+#define LOD_FADE_DISABLED 0.999F
+
+#define LOD_FADE_BATCH_EPSILON 0.0625 // 1/16
+
+class GfxDevice;
+
+void SetObjectScale (GfxDevice& device, float lodFade, float invScale);
diff --git a/Runtime/Camera/RenderLoops/ForwardShaderRenderLoop.cpp b/Runtime/Camera/RenderLoops/ForwardShaderRenderLoop.cpp
new file mode 100644
index 0000000..44e91b8
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/ForwardShaderRenderLoop.cpp
@@ -0,0 +1,1403 @@
+#include "UnityPrefix.h"
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+
+#include "RenderLoopPrivate.h"
+#include "RenderLoop.h"
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/Renderable.h"
+#include "Runtime/Camera/Light.h"
+#include "Runtime/Camera/RenderSettings.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "Runtime/Camera/Shadows.h"
+#include "Runtime/Camera/LODGroupManager.h"
+#include "Runtime/Graphics/RenderBufferManager.h"
+#include "Runtime/Graphics/GraphicsHelper.h"
+#include "Runtime/Graphics/LightmapSettings.h"
+#include "Runtime/Graphics/Transform.h"
+#include "External/shaderlab/Library/intshader.h"
+#include "External/shaderlab/Library/properties.h"
+#include "Runtime/Misc/QualitySettings.h"
+#include "Runtime/Misc/BuildSettings.h"
+#include "Runtime/Shaders/Shader.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/Utilities/dynamic_array.h"
+#include "BuiltinShaderParamUtility.h"
+#include "Runtime/Math/ColorSpaceConversion.h"
+#include "Runtime/Camera/LightManager.h"
+#include "External/MurmurHash/MurmurHash2.h"
+
+
+// Enable/disable hash based forward shader render loop sorting functionality.
+#define ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING 0
+
+PROFILER_INFORMATION(gFwdOpaquePrepare, "RenderForwardOpaque.Prepare", kProfilerRender)
+PROFILER_INFORMATION(gFwdOpaqueSort, "RenderForwardOpaque.Sort", kProfilerRender)
+PROFILER_INFORMATION(gFwdOpaqueCollectShadows, "RenderForwardOpaque.CollectShadows", kProfilerRender)
+PROFILER_INFORMATION(gFwdOpaqueRender, "RenderForwardOpaque.Render", kProfilerRender)
+PROFILER_INFORMATION(gFwdAlphaPrepare, "RenderForwardAlpha.Prepare", kProfilerRender)
+PROFILER_INFORMATION(gFwdAlphaSort, "RenderForwardAlpha.Sort", kProfilerRender)
+PROFILER_INFORMATION(gFwdAlphaRender, "RenderForwardAlpha.Render", kProfilerRender)
+
+static SHADERPROP (ShadowMapTexture);
+
+
+static inline bool CompareLights (ForwardLightsBlock const* a, ForwardLightsBlock const* b)
+{
+	if (!a || !b)
+		return false;
+
+	if (a->mainLight != b->mainLight)
+		return false;
+	if (a->vertexLightCount != b->vertexLightCount)
+		return false;
+	if (a->addLightCount != b->addLightCount)
+		return false;
+
+	int totalLightCount = a->vertexLightCount + a->addLightCount;
+	const ActiveLight* const* lightsA = a->GetLights();
+	const ActiveLight* const* lightsB = b->GetLights();
+	for (int i = 0; i < totalLightCount; ++i)
+		if (lightsA[i] != lightsB[i])
+			return false;
+
+	if (memcmp(a->sh, b->sh, sizeof(a->sh)) != 0)
+		return false;
+
+	if (!CompareApproximately(a->lastAddLightBlend, b->lastAddLightBlend))
+		return false;
+	if (!CompareApproximately(a->lastVertexLightBlend, b->lastVertexLightBlend))
+		return false;
+
+	return true;
+}
+
+struct RenderObjectDataCold {
+	float		invScale;						// 4
+	float		lodFade;						// 4
+	size_t		lightsDataOffset;				// 4	into memory block with all light data chunks
+	int			subshaderIndex;					// 4
+	// 16 bytes
+};
+
+
+namespace ForwardShaderRenderLoop_Enum
+{
+// Render pass data here is 8 bytes each; an index of the render object and "the rest" packed
+// into 4 bytes.
+enum {
+	kPackPassShift = 0,
+	kPackPassMask = 0xFF,
+	kPackTypeShift = 8,
+	kPackTypeMask = 0xFF,
+	kPackFirstPassFlag = (1<<24),
+	kPackMultiPassFlag = (1<<25),
+};
+
+} // namespace ForwardShaderRenderLoop_Enum
+
+struct RenderPassData {
+	int	roIndex;
+	// Packed into UInt32: pass number, pass type, first pass flag, multipass flag
+	UInt32 data;
+#if ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING
+	// state hash for optimizing render object sorter
+	UInt32 hash;
+#endif
+};
+typedef dynamic_array<RenderPassData> RenderPasses;
+
+
+struct ForwardShaderRenderState
+{
+	int rendererType;
+	int transformType;
+
+	float invScale;
+	float lodFade;
+
+	Material* material;
+	Shader* shader;
+	int subshaderIndex;
+	ShaderPassType passType;
+	int passIndex;
+
+	const ForwardLightsBlock* lights;
+	int receiveShadows;
+
+	int lightmapIndex;
+	Vector4f lightmapST;
+
+	UInt32 customPropsHash;
+
+
+	void Invalidate()
+	{
+		rendererType = -1;
+		transformType = -1;
+		invScale = 0.0f;
+		lodFade = 0.0F;
+		material = 0; shader = 0; subshaderIndex = -1; passType = kShaderPassTypeCount; passIndex = -1;
+		lights = 0;
+		lightmapIndex = -1; lightmapST = Vector4f(0,0,0,0);
+		receiveShadows = -1;
+		customPropsHash = 0;
+	}
+
+	bool operator == (const ForwardShaderRenderState& rhs) const
+	{
+		if (this == &rhs)
+			return true;
+
+		return (
+				rendererType == rhs.rendererType &&
+				transformType == rhs.transformType &&
+				material == rhs.material &&
+				shader == rhs.shader &&
+				CompareLights(lights, rhs.lights) &&
+				subshaderIndex == rhs.subshaderIndex &&
+				passType == rhs.passType &&
+				passIndex == rhs.passIndex &&
+				CompareApproximately(invScale,rhs.invScale) &&
+				CompareApproximately(lodFade,rhs.lodFade, LOD_FADE_BATCH_EPSILON) &&
+			#if ENABLE_SHADOWS
+				receiveShadows == rhs.receiveShadows &&
+			#endif
+				lightmapIndex == rhs.lightmapIndex &&
+				lightmapST == rhs.lightmapST &&
+				customPropsHash == rhs.customPropsHash
+				);
+	}
+
+	bool operator != (const ForwardShaderRenderState& rhs) const
+	{
+		return !(rhs == *this);
+	}
+};
+
+
+struct ForwardShadowMap
+{
+	ForwardShadowMap() : light(NULL), texture(NULL) {}
+	const ActiveLight* light;
+	RenderTexture* texture;
+	Matrix4x4f shadowMatrix;
+	MinMaxAABB receiverBounds;
+};
+typedef dynamic_array<ForwardShadowMap> ForwardShadowMaps;
+
+struct CompactShadowCollectorSortData;
+
+struct ForwardShaderRenderLoop
+{
+	const RenderLoopContext*	m_Context;
+	RenderObjectDataContainer*	m_Objects;
+
+	dynamic_array<RenderObjectDataCold> m_RenderObjectsCold;
+	dynamic_array<UInt8>		m_RenderObjectsLightData;
+
+	RenderPasses				m_PlainRenderPasses;
+	#if ENABLE_SHADOWS
+	ForwardShadowMap			m_MainShadowMap;
+	ForwardShadowMaps			m_ShadowMaps;
+	// Render object indices of shadow receivers.
+	// This includes both shadow receivers and objects that have shadows off, but
+	// are within shadow distance. They should still participate in screenspace shadow
+	// gathering, otherwise shadows will be visible through them.
+	dynamic_array<int>			m_ReceiverObjects;
+	#endif
+
+	BatchRenderer				m_BatchRenderer;
+
+	ForwardShaderRenderLoop()
+		: m_RenderObjectsCold		(kMemTempAlloc)
+		, m_RenderObjectsLightData	(kMemTempAlloc)
+		, m_PlainRenderPasses		(kMemTempAlloc)
+		#if ENABLE_SHADOWS
+		, m_ShadowMaps				(kMemTempAlloc)
+		, m_ReceiverObjects			(kMemTempAlloc)
+		#endif
+	{ }
+
+	void PerformRendering (const ActiveLight* mainDirShadowLight, RenderTexture* existingShadowMap, const ShadowCullData& shadowCullData, bool disableDynamicBatching, bool sRGBrenderTarget, bool clearFrameBuffer);
+	#if ENABLE_SHADOWS
+	RenderTexture* CollectShadows (RenderTexture* inputShadowMap, const Light* light, const Matrix4x4f* shadowMatrices, const float* splitDistances, const Vector4f* splitSphereCentersAndSquaredRadii, bool enableSoftShadows, bool useDualInForward, bool clearFrameBuffer);
+	void RenderLightShadowMaps (ForwardShadowMap& shadowMap, ShadowCameraData& camData, bool enableSoftShadows, bool useDualInForward, bool clearFrameBuffer);
+	int SortShadowCollectorsCompact(CompactShadowCollectorSortData* _resultOrder);
+	#endif
+
+	template <bool opaque>
+	struct RenderObjectSorter
+	{
+		bool operator()( const RenderPassData& ra, const RenderPassData& rb ) const;
+		const ForwardShaderRenderLoop* queue;
+	};
+
+	template <bool opaque>
+	void SortRenderPassData( RenderPasses& passes )
+	{
+		RenderObjectSorter<opaque> sorter;
+		sorter.queue = this;
+		std::sort( passes.begin(), passes.end(), sorter );
+	}
+};
+
+
+template <bool opaque>
+bool ForwardShaderRenderLoop::RenderObjectSorter<opaque>::operator() (const RenderPassData& ra, const RenderPassData& rb) const
+{
+	using namespace ForwardShaderRenderLoop_Enum;
+
+	const RenderObjectData& dataa = (*queue->m_Objects)[ra.roIndex];
+	const RenderObjectData& datab = (*queue->m_Objects)[rb.roIndex];
+
+	// Sort by layering depth.
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(dataa.globalLayeringData, datab.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+
+#if ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING
+
+	if (!opaque)
+	{
+		// Sort by render queues first
+		if( dataa.queueIndex != datab.queueIndex )
+			return dataa.queueIndex < datab.queueIndex;
+
+#if DEBUGMODE
+		DebugAssertIf (dataa.queueIndex >= kQueueIndexMin && dataa.queueIndex <= kGeometryQueueIndexMax); // this is alpha loop!
+#endif
+
+		// Sort strictly by distance unless they are equal
+		if( dataa.distance != datab.distance )
+			return dataa.distance < datab.distance;
+	}
+
+	UInt64 keya = (0x0000ffff-((dataa.queueIndex)&0x0000ffff))<<16;
+	UInt64 keyb = (0x0000ffff-((datab.queueIndex)&0x0000ffff))<<16;
+
+	keya |= (ra.data & kPackFirstPassFlag)>>(24-8);
+	keyb |= (rb.data & kPackFirstPassFlag)>>(24-8);
+	keya |= (0x000000ff-((dataa.lightmapIndex)&0x000000ff));
+	keyb |= (0x000000ff-((datab.lightmapIndex)&0x000000ff));
+	keya = keya << 32;
+	keyb = keyb << 32;
+	keya |= ra.hash;
+	keyb |= rb.hash;
+
+	//Sort keys, TODO try to move the key generation outside the sorting loop
+	if( keya != keyb )
+		return (keya > keyb);
+
+#if DEBUGMODE
+	if (opaque)
+	{
+		DebugAssertIf (dataa.queueIndex < kQueueIndexMin || dataa.queueIndex > kGeometryQueueIndexMax); // this is opaque loop!
+	}
+#endif
+
+	//fall though distance, TODO insert distance into the key
+	return dataa.distance > datab.distance;
+
+#else
+
+	// Sort by render queues first
+	if( dataa.queueIndex != datab.queueIndex )
+		return dataa.queueIndex < datab.queueIndex;
+
+#if DEBUGMODE
+	if (opaque) {
+		DebugAssertIf (dataa.queueIndex < kQueueIndexMin || dataa.queueIndex > kGeometryQueueIndexMax); // this is opaque loop!
+	} else {
+		DebugAssertIf (dataa.queueIndex >= kQueueIndexMin && dataa.queueIndex <= kGeometryQueueIndexMax); // this is alpha loop!
+	}
+#endif
+
+	if (!opaque)
+	{
+		// Sort strictly by distance unless they are equal
+		if( dataa.distance != datab.distance )
+			return dataa.distance < datab.distance;
+	}
+
+	UInt32 flagsa = ra.data;
+	UInt32 flagsb = rb.data;
+
+	// render all first passes first
+	if( (flagsa & kPackFirstPassFlag) != (flagsb & kPackFirstPassFlag) )
+		return (flagsa & kPackFirstPassFlag) > (flagsb & kPackFirstPassFlag);
+
+	// sort by lightmap index (fine to do it before source material index
+	// since every part of same mesh will have the same lightmap index)
+	if( dataa.lightmapIndex != datab.lightmapIndex )
+		return dataa.lightmapIndex < datab.lightmapIndex;
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+	// if part of predefined static batch, then sort by static batch index
+	// prefer static batched first as they usually cover quite a lot
+	if( dataa.staticBatchIndex != datab.staticBatchIndex )
+		return dataa.staticBatchIndex > datab.staticBatchIndex;
+
+	// otherwise sort by material index. Some people are using multiple materials
+	// on a single mesh and expect them to be rendered in order.
+	if( dataa.staticBatchIndex == 0 && dataa.sourceMaterialIndex != datab.sourceMaterialIndex )
+		return dataa.sourceMaterialIndex < datab.sourceMaterialIndex;
+#else
+	// Sort by material index. Some people are using multiple materials
+	// on a single mesh and expect them to be rendered in order.
+	if( dataa.sourceMaterialIndex != datab.sourceMaterialIndex )
+		return dataa.sourceMaterialIndex < datab.sourceMaterialIndex;
+#endif
+
+	// sort by shader
+	if( dataa.shader != datab.shader )
+		return dataa.shader->GetInstanceID() < datab.shader->GetInstanceID(); // just compare instance IDs
+
+	// then sort by material
+	if( dataa.material != datab.material )
+		return dataa.material->GetInstanceID() < datab.material->GetInstanceID(); // just compare instance IDs
+
+	// inside same material: by pass
+	UInt32 passa = (flagsa >> kPackPassShift) & kPackPassMask;
+	UInt32 passb = (flagsb >> kPackPassShift) & kPackPassMask;
+	if( passa != passb )
+		return passa < passb;
+
+	if (opaque)
+	{
+		// Sort by distance in reverse order.
+		// That way we get consistency in render order, and more pixels not rendered due to z-testing,
+		// which benefits performance.
+		if( dataa.distance != datab.distance )
+			return dataa.distance > datab.distance;
+	}
+
+	// fall through: roIndex
+	return ra.roIndex < rb.roIndex;
+
+#endif // ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING
+}
+
+#if ENABLE_SHADOWS
+static void SetLightShadowProps (const Camera& camera, const Light& light, Texture* shadowMap, const Matrix4x4f& shadowMatrix, bool useDualInForward)
+{
+	const float shadowStrength = light.GetShadowStrength();
+	DebugAssert (shadowMap);
+
+	ShaderLab::PropertySheet *props = ShaderLab::g_GlobalProperties;
+	BuiltinShaderParamValues& params = GetGfxDevice().GetBuiltinParamValues();
+
+	// shadow matrix
+	CopyMatrix (shadowMatrix.GetPtr(), params.GetWritableMatrixParam(kShaderMatWorldToShadow).GetPtr());
+
+	props->SetTexture( kSLPropShadowMapTexture, shadowMap );
+
+	if (light.GetType() == kLightPoint)
+	{
+		const Vector3f lightPos = light.GetWorldPosition();
+		params.SetVectorParam(kShaderVecLightPositionRange, Vector4f(lightPos.x, lightPos.y, lightPos.z, 1.0f/light.GetRange()));
+	}
+
+	// ambient & shadow fade out
+	Vector4f lightFade;
+	Vector4f fadeCenterAndType;
+	CalculateLightShadowFade (camera, shadowStrength, lightFade, fadeCenterAndType);
+	params.SetVectorParam(kShaderVecLightmapFade, lightFade);
+	if (useDualInForward)
+		lightFade.z = lightFade.w = 0.0f;
+	params.SetVectorParam(kShaderVecLightShadowData, lightFade);
+	params.SetVectorParam(kShaderVecShadowFadeCenterAndType, fadeCenterAndType);
+	// texel offsets for PCF
+	Vector4f offsets;
+	float offX = 0.5f / shadowMap->GetGLWidth();
+	float offY = 0.5f / shadowMap->GetGLHeight();
+	offsets.z = 0.0f; offsets.w = 0.0f;
+	offsets.x = -offX; offsets.y = -offY; params.SetVectorParam(kShaderVecShadowOffset0, offsets);
+	offsets.x =  offX; offsets.y = -offY; params.SetVectorParam(kShaderVecShadowOffset1, offsets);
+	offsets.x = -offX; offsets.y =  offY; params.SetVectorParam(kShaderVecShadowOffset2, offsets);
+	offsets.x =  offX; offsets.y =  offY; params.SetVectorParam(kShaderVecShadowOffset3, offsets);
+}
+static void SetLightShadowCollectProps (const Camera& camera, const Light& light, Texture* shadowMap, const Matrix4x4f* shadowMatrices, const float* splitDistances, const Vector4f* splitSphereCentersAndSquaredRadii, bool useDualInForward)
+{
+	DebugAssert (shadowMatrices && shadowMap);
+	SetLightShadowProps (camera, light, shadowMap, shadowMatrices[0], useDualInForward);
+	SetCascadedShadowShaderParams (shadowMatrices, splitDistances, splitSphereCentersAndSquaredRadii);
+}
+#endif // ENABLE_SHADOWS
+
+
+
+void ForwardShaderRenderLoop::PerformRendering (const ActiveLight* mainDirShadowLight, RenderTexture* existingShadowMap, const ShadowCullData& shadowCullData, bool disableDynamicBatching, bool sRGBrenderTarget, bool clearFrameBuffer)
+{
+	using namespace ForwardShaderRenderLoop_Enum;
+
+	const RenderManager::Renderables& renderables = GetRenderManager ().GetRenderables ();
+	RenderManager::Renderables::const_iterator renderablesBegin = renderables.begin(), renderablesEnd = renderables.end();
+
+	SetNoShadowsKeywords();
+
+	GfxDevice& device = GetGfxDevice();
+	// save current scissor params
+	int oldScissorRect[4];
+	device.GetScissorRect(oldScissorRect);
+	const bool oldScissor = device.IsScissorEnabled();
+
+	#if ENABLE_SHADOWS
+	const bool enableSoftShadows = GetSoftShadowsEnabled();
+	ShadowCameraData camData(shadowCullData);
+	ForwardShadowMap mainLightShadowMap;
+	const bool hasAnyShadows = (mainDirShadowLight != 0 || !m_ShadowMaps.empty());
+	const bool useDualInForward = GetLightmapSettings().GetUseDualLightmapsInForward();
+
+	// shadow map of main directional light
+	if (mainDirShadowLight != 0)
+	{
+		// Render shadow map
+		if (!existingShadowMap)
+		{
+			// Prevent receiver bounds to be zero size in any dimension;
+			// causes trouble with calculating intersection of frustum and bounds.
+			mainLightShadowMap.receiverBounds = m_MainShadowMap.receiverBounds;
+			mainLightShadowMap.receiverBounds.Expand (0.01f);
+			mainLightShadowMap.light = mainDirShadowLight;
+
+			// One directional light can have shadows in free version, so temporarily
+			// enable render textures just for that.
+			RenderTexture::SetTemporarilyAllowIndieRenderTexture (true);
+			RenderLightShadowMaps (mainLightShadowMap, camData, enableSoftShadows, useDualInForward, clearFrameBuffer);
+			RenderTexture::SetTemporarilyAllowIndieRenderTexture (false);
+
+			// There were no shadow casters - no shadowmap is produced
+			if (!mainLightShadowMap.texture)
+				mainDirShadowLight = 0;
+		}
+		else
+		{
+			mainLightShadowMap.texture = existingShadowMap;
+		}
+	}
+
+	// shadow maps of other lights
+	for (ForwardShadowMaps::iterator it = m_ShadowMaps.begin(), itEnd = m_ShadowMaps.end(); it != itEnd; ++it)
+	{
+		ForwardShadowMap& shadowMap = *it;
+
+		// Prevent receiver bounds to be zero size in any dimension;
+		// causes trouble with calculating intersection of frustum and bounds.
+		shadowMap.receiverBounds.Expand (0.01f);
+
+		RenderLightShadowMaps (shadowMap, camData, enableSoftShadows, false, clearFrameBuffer);
+	}
+
+	if (hasAnyShadows)
+	{
+		m_Context->m_Camera->SetupRender (Camera::kRenderFlagSetRenderTarget);
+		SetNoShadowsKeywords ();
+	}
+	#endif
+
+	const RenderSettings& renderSettings = GetRenderSettings();
+	const LightmapSettings& lightmapper = GetLightmapSettings();
+	size_t npasses = m_PlainRenderPasses.size();
+
+	int currentQueueIndex = m_Context->m_RenderQueueStart;
+
+	device.SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+
+	ForwardShaderRenderState prevRenderState;
+	prevRenderState.Invalidate();
+
+	//If we are in linear lighting enable sRGB writes here...
+	device.SetSRGBWrite(sRGBrenderTarget);
+	if (clearFrameBuffer)
+		m_Context->m_Camera->ClearNoSkybox(false);
+	else
+		device.IgnoreNextUnresolveOnCurrentRenderTarget();
+	
+	const ChannelAssigns* channels = NULL;
+
+	for( size_t i = 0; i < npasses; ++i )
+	{
+		const RenderPassData& rpData = m_PlainRenderPasses[i];
+		const RenderObjectData& roDataH = (*m_Objects)[rpData.roIndex];
+		const RenderObjectDataCold& roDataC = m_RenderObjectsCold[rpData.roIndex];
+		const ForwardLightsBlock& roDataL = *reinterpret_cast<ForwardLightsBlock*>(&m_RenderObjectsLightData[roDataC.lightsDataOffset]);
+
+		// We're going over all things that need to be rendered in increasing
+		// render queue order. Whenever we switch to the new queue, we must
+		// invoke all "camera renderables" (halos, flares and so on).
+		const int roQueueIndex = roDataH.queueIndex;
+		DebugAssert (roQueueIndex >= currentQueueIndex);
+		if( roQueueIndex > currentQueueIndex )
+		{
+			m_BatchRenderer.Flush();
+			
+			// Draw required renderables
+			if (!m_Context->m_DontRenderRenderables)
+			{
+				while( renderablesBegin != renderablesEnd && renderablesBegin->first <= roQueueIndex )
+				{
+					renderablesBegin->second->RenderRenderable(*m_Context->m_CullResults);
+					++renderablesBegin;
+				}
+			}
+
+			currentQueueIndex = roQueueIndex;
+		}
+
+		const VisibleNode *node = roDataH.visibleNode;
+		const UInt16 subsetIndex = roDataH.subsetIndex;
+
+		ForwardShaderRenderState rs;
+		{
+			rs.rendererType = node->renderer->GetRendererType();
+			rs.transformType = node->transformType;
+			rs.invScale = roDataC.invScale;
+			rs.lodFade = roDataC.lodFade;
+
+			rs.material = roDataH.material;
+			rs.shader = roDataH.shader;
+			rs.subshaderIndex = roDataC.subshaderIndex;
+			rs.passType = (ShaderPassType)((rpData.data >> kPackTypeShift) & kPackTypeMask);
+			rs.passIndex = (rpData.data >> kPackPassShift) & kPackPassMask;
+
+			rs.lights = &roDataL;
+			#if ENABLE_SHADOWS
+			rs.receiveShadows = hasAnyShadows && node->renderer->GetReceiveShadows() && IsObjectWithinShadowRange (*m_Context->m_ShadowCullData, node->worldAABB);
+			#endif
+
+			rs.lightmapIndex = roDataH.lightmapIndex;
+			DebugAssert(rs.lightmapIndex == node->renderer->GetLightmapIndex());
+			rs.lightmapST = node->renderer->GetLightmapSTForRendering();
+			rs.customPropsHash = node->renderer->GetCustomPropertiesHash();
+		}
+
+
+		// multi-pass requires vertex position values to be EXACTLY the same for all passes
+		// therefore do NOT batch dynamic multi-pass nodes
+		// same for shadow casters
+		const bool multiPass = (rpData.data & kPackMultiPassFlag) == kPackMultiPassFlag;
+		const bool dynamicShouldNotBatch = (node->renderer->GetStaticBatchIndex() == 0) && (multiPass || disableDynamicBatching);
+
+		#if ENABLE_SHADOWS
+		const bool dynamicAndShadowCaster = (node->renderer->GetStaticBatchIndex() == 0) && (mainDirShadowLight != 0) && node->renderer->GetCastShadows();
+		#else
+		const bool dynamicAndShadowCaster = false;
+		#endif
+
+		bool shouldResetPass;
+		if (rs.passType == kPassForwardAdd || // rendering multiple different lights in a row - impossible to batch
+			prevRenderState != rs)
+		{
+			// break the batch
+			m_BatchRenderer.Flush();
+			prevRenderState = rs;
+			shouldResetPass = true;
+		}
+		// We can not use dynamic batching for shadow casting renderers or multipass renderers,
+		// because that will lead to zfighting due to slightly different vertex positions
+		else if (dynamicAndShadowCaster || dynamicShouldNotBatch)
+		{
+			m_BatchRenderer.Flush();
+			shouldResetPass = false;
+		}
+		else
+			shouldResetPass = false;
+
+		renderSettings.SetupAmbient();
+		SetObjectScale(device, roDataC.lodFade, roDataC.invScale);
+
+		node->renderer->ApplyCustomProperties(*roDataH.material, rs.shader, rs.subshaderIndex);
+
+		// non batchable and generally inefficient multi-pass path
+		if (rs.passType == kPassForwardAdd)
+		{
+			const int lightCount = rs.lights->addLightCount;
+			const ActiveLight* const* addLights = rs.lights->GetLights();
+			for( int lightNo = 0; lightNo < lightCount; ++lightNo )
+			{
+				const ActiveLight& activeLight = *addLights[lightNo];
+				Light* light = activeLight.light;
+				LightManager::SetupForwardAddLight (light, lightNo==lightCount-1 ? rs.lights->lastAddLightBlend : 1.0f);
+
+				if (light->GetType() != kLightDirectional)
+					SetLightScissorRect (activeLight.screenRect, m_Context->m_CameraViewport, false, device);
+
+
+				#if ENABLE_SHADOWS
+				if (rs.receiveShadows && light->GetShadows() != kShadowNone)
+				{
+					// find light among additional shadow lights
+					ForwardShadowMaps::iterator sl, slEnd = m_ShadowMaps.end();
+					for (sl = m_ShadowMaps.begin(); sl != slEnd; ++sl)
+					{
+						if (sl->light == &activeLight && sl->texture)
+						{
+							const Light& light = *activeLight.light;
+							SetLightShadowProps (*m_Context->m_Camera, light, sl->texture, sl->shadowMatrix, false);
+							SetShadowsKeywords (light.GetType(), light.GetShadows(), light.GetType()==kLightDirectional, enableSoftShadows);
+							break;
+						}
+					}
+				}
+				#endif
+
+				channels = rs.material->SetPassWithShader(rs.passIndex, rs.shader, rs.subshaderIndex);
+				if (channels)
+				{
+					SetupObjectMatrix (node->worldMatrix, rs.transformType);
+					node->renderer->Render( subsetIndex, *channels );
+				}
+
+				#if ENABLE_SHADOWS
+				if (rs.receiveShadows && light->GetShadows() != kShadowNone)
+				{
+					SetNoShadowsKeywords ();
+				}
+				#endif
+
+				if (light->GetType() != kLightDirectional)
+					ClearScissorRect (oldScissor, oldScissorRect, device);
+			}
+		}
+		else
+		{
+			// only setup lights & pass state when they're differ from previous
+			if (shouldResetPass)
+			{
+				// only setup lights & pass state when they're differ from previous
+				switch( rs.passType )
+				{
+					case kPassAlways:
+					{
+						// Disable all fixed function lights for consistency (so if user
+						// has accidentally Lighting On in an Always pass, it will not produce
+						// random results)
+						device.DisableLights (0);
+
+						// Reset SH lighting
+						float blackSH [9][3];
+						memset (blackSH, 0, (9 * 3)* sizeof(float));
+						SetSHConstants (blackSH, GetGfxDevice().GetBuiltinParamValues());
+
+						SetupObjectLightmaps (lightmapper, rs.lightmapIndex, rs.lightmapST, false);
+					}
+					break;
+
+					case kPassForwardBase:
+					{
+						// NOTE: identity matrix has to be set for GLSL & OpenGLES before vertex lights are set
+						// as lighting is specified in World space
+						device.SetWorldMatrix( Matrix4x4f::identity.GetPtr() );
+
+						LightManager::SetupForwardBaseLights (*rs.lights);
+						SetupObjectLightmaps (lightmapper, rs.lightmapIndex, rs.lightmapST, false);
+
+					#if ENABLE_SHADOWS
+						if (rs.receiveShadows && mainDirShadowLight && rs.lights->mainLight == mainDirShadowLight)
+						{
+							const Light& light = *mainDirShadowLight->light;
+							SetLightShadowProps (*m_Context->m_Camera, light, mainLightShadowMap.texture, mainLightShadowMap.shadowMatrix, false);
+							SetShadowsKeywords (light.GetType(), light.GetShadows(), true, enableSoftShadows);
+						}
+					#endif
+					}
+					break;
+
+					case kPassVertex:
+					case kPassVertexLM:
+					case kPassVertexLMRGBM:
+					{
+						// NOTE: identity matrix has to be set for GLSL & OpenGLES before vertex lights are set
+						// as lighting is specified in World space
+						device.SetWorldMatrix( Matrix4x4f::identity.GetPtr() );
+
+						SetupObjectLightmaps (lightmapper, rs.lightmapIndex, rs.lightmapST, true);
+						LightManager::SetupVertexLights( rs.lights->vertexLightCount, rs.lights->GetLights() );
+					}
+					break;
+
+					default:
+					{
+						AssertString ("This pass type should not happen");
+						break;
+					}
+				}
+
+				channels = roDataH.material->SetPassWithShader(rs.passIndex, rs.shader, rs.subshaderIndex);
+			}
+
+			if (channels)
+				m_BatchRenderer.Add(node->renderer, subsetIndex, channels, node->worldMatrix, rs.transformType);
+
+			if (ENABLE_SHADOWS && rs.passType == kPassForwardBase)
+				SetNoShadowsKeywords ();
+		}
+	}
+
+	m_BatchRenderer.Flush();
+
+	SetNoShadowsKeywords ();
+
+	// restore scissor
+	ClearScissorRect (oldScissor, oldScissorRect, device);
+
+	#if ENABLE_SHADOWS
+	if (mainLightShadowMap.texture && mainLightShadowMap.texture != existingShadowMap)
+		GetRenderBufferManager().ReleaseTempBuffer( mainLightShadowMap.texture );
+	for (ForwardShadowMaps::iterator it = m_ShadowMaps.begin(), itEnd = m_ShadowMaps.end(); it != itEnd; ++it)
+	{
+		ForwardShadowMap& sl = *it;
+		if (sl.texture)
+			GetRenderBufferManager().ReleaseTempBuffer (sl.texture);
+	}
+	#endif
+
+	// After everything we might still have renderables that should be drawn and the
+	// very end. Do it.
+	if (!m_Context->m_DontRenderRenderables)
+	{
+		while (renderablesBegin != renderablesEnd && renderablesBegin->first < m_Context->m_RenderQueueStart)
+			++renderablesBegin;
+		while( renderablesBegin != renderablesEnd && renderablesBegin->first < m_Context->m_RenderQueueEnd )
+		{
+			renderablesBegin->second->RenderRenderable(*m_Context->m_CullResults);
+			++renderablesBegin;
+		}
+	}
+	GetGfxDevice().SetSRGBWrite(false);
+	device.SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+}
+
+
+// ------------------------------------------------------------------------
+//  collect cascaded shadows into screen-space texture; apply blur
+
+#if ENABLE_SHADOWS
+
+struct ShadowCollectorSorter
+{
+	bool operator() (int raIndex, int rbIndex) const;
+	const ForwardShaderRenderLoop* queue;
+};
+
+bool ShadowCollectorSorter::operator()(int raIndex, int rbIndex) const
+{
+	const RenderObjectData& ra = (*queue->m_Objects)[raIndex];
+	const RenderObjectData& rb = (*queue->m_Objects)[rbIndex];
+
+	// Sort by layering depth. //@TODO:should this be here?
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(ra.globalLayeringData, rb.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+
+	// Sort front to back
+	return ra.distance > rb.distance;
+}
+
+struct CompactShadowCollectorSortData
+{
+	UInt64 key;			// 64b key, stores full 32b material instance ID, 16b internal static batch ID, 2b for transform type, and 14b depth
+	int collectorIndex;
+
+	CompactShadowCollectorSortData(UInt32 _smallMeshIndex, UInt32 _instanceID, TransformType _transformType, float _depth, int _collectorIndex )
+	{
+		key=0;
+		UInt32 transformType = static_cast<UInt32>(_transformType);
+		UInt32 z = (UInt32)(16383.0f*_depth);
+
+		key |= (_instanceID);
+		key = key << 32;
+		key |= ((_smallMeshIndex&0x0000ffff)<<16)|((transformType&0x00000003)<<14)|(z&0x00003fff);
+
+		collectorIndex = _collectorIndex;
+	}
+};
+
+struct CompactShadowCollectorKeySorter
+{
+	inline bool operator()(const CompactShadowCollectorSortData& a, const CompactShadowCollectorSortData& b)
+	{
+		return a.key < b.key;
+	}
+};
+
+// Shadow collector sorting
+// Sorted shadow collector order is stored into m_ReceiverObjects
+// Output:
+//		_resultOrder		- Sorted shadow caster sort data
+// Returns:
+//		Number of active collectors
+int ForwardShaderRenderLoop::SortShadowCollectorsCompact(CompactShadowCollectorSortData* _resultOrder)
+{
+	int activeShadowCollectors = 0;
+
+	// Generate key array for sorting
+	for( int i = 0; i < m_ReceiverObjects.size(); ++i )
+	{
+		int roIndex = m_ReceiverObjects[i];
+		const RenderObjectData& roDataH = (*m_Objects)[roIndex];
+		Shader* shader = roDataH.shader;
+
+		if( shader->HasShadowCollectorPass() )
+		{
+			const TransformInfo& xformInfo = roDataH.visibleNode->renderer->GetTransformInfo();
+
+			Matrix4x4f worldToClipMatrix = m_Context->m_Camera->GetWorldToClipMatrix();
+			const Vector3f& worldPos = roDataH.visibleNode->worldAABB.GetCenter();
+			float z = worldToClipMatrix.Get (2, 0) * worldPos.x + worldToClipMatrix.Get (2, 1) * worldPos.y + worldToClipMatrix.Get (2, 2) * worldPos.z + worldToClipMatrix.Get (2, 3);
+			float w = worldToClipMatrix.Get (3, 0) * worldPos.x + worldToClipMatrix.Get (3, 1) * worldPos.y + worldToClipMatrix.Get (3, 2) * worldPos.z + worldToClipMatrix.Get (3, 3);
+			float z_proj = z/w;
+			z_proj = max(z_proj,0.0f);
+			z_proj = min(z_proj,1.0f);
+
+			_resultOrder[activeShadowCollectors++] = CompactShadowCollectorSortData( roDataH.visibleNode->renderer->GetMeshIDSmall(), roDataH.material->GetShadowCollectorHash(),
+				xformInfo.transformType, z_proj, roIndex );
+
+		}
+	}
+
+	std::sort( _resultOrder, _resultOrder + activeShadowCollectors, CompactShadowCollectorKeySorter() );
+
+	return activeShadowCollectors;
+}
+
+RenderTexture* ForwardShaderRenderLoop::CollectShadows (RenderTexture* inputShadowMap, const Light* light, const Matrix4x4f* shadowMatrices, const float* splitDistances, const Vector4f* splitSphereCentersAndSquaredRadii, bool enableSoftShadows, bool useDualInForward, bool clearFrameBuffer)
+{
+	PROFILER_AUTO_GFX(gFwdOpaqueCollectShadows, m_Context->m_Camera)
+		GPU_AUTO_SECTION(kGPUSectionShadowPass);
+
+	DebugAssert (shadowMatrices && inputShadowMap && light && splitDistances);
+
+	//Sort shadow collectors
+#if GFX_ENABLE_SHADOW_BATCHING
+	CompactShadowCollectorSortData* sortOrder;
+	ALLOC_TEMP(sortOrder, CompactShadowCollectorSortData, m_ReceiverObjects.size());
+	int shadowColectors = SortShadowCollectorsCompact(sortOrder);
+#else
+	ShadowCollectorSorter sorter;
+	sorter.queue = this;
+	std::sort (m_ReceiverObjects.begin(), m_ReceiverObjects.end(), sorter);
+#endif
+
+	// If camera is rendering into a texture, we can share its depth buffer while collecting shadows.
+	// This doesn't apply if the target texture is antialiased (case 559079).
+	bool shareDepthBuffer = false;
+	RenderTexture* cameraRT = m_Context->m_Camera->GetCurrentTargetTexture();
+	if (cameraRT && cameraRT->GetDepthFormat() != kDepthFormatNone && !cameraRT->IsAntiAliased())
+	{
+		shareDepthBuffer = true;
+		if (!cameraRT->IsCreated())
+			cameraRT->Create();
+	}
+
+	// create screen-space render texture and collect shadows into it
+	RenderTexture* screenShadowMap = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, shareDepthBuffer ? kDepthFormatNone : kDepthFormat24, kRTFormatARGB32, 0, kRTReadWriteLinear);
+	if (shareDepthBuffer)
+	{
+		if (!screenShadowMap->IsCreated())
+			screenShadowMap->Create();
+		RenderSurfaceHandle rtSurfaceColor = screenShadowMap->GetColorSurfaceHandle();
+		RenderSurfaceHandle rtSurfaceDepth = cameraRT->GetDepthSurfaceHandle();
+		RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, screenShadowMap);
+	}
+	else
+	{
+		RenderTexture::SetActive (screenShadowMap);
+	}
+
+	GfxDevice& device = GetGfxDevice();
+	// (case 555375)
+	// Clear all, expect in cases where depth buffer is shared and forward path is used to render deferred path shadow receiving objects
+	// (clearFrameBuffer variable is false in those cases)
+	bool clearColorOnly = shareDepthBuffer && !clearFrameBuffer;
+	device.Clear (clearColorOnly ? kGfxClearColor : kGfxClearAll, ColorRGBAf(1,1,1,0).GetPtr(), 1.0f, 0);
+	if (clearColorOnly)
+		device.IgnoreNextUnresolveOnCurrentRenderTarget();
+	GPU_TIMESTAMP();
+	m_Context->m_Camera->SetupRender ();
+
+	SetLightShadowCollectProps (*m_Context->m_Camera, *light, inputShadowMap, shadowMatrices, splitDistances, splitSphereCentersAndSquaredRadii, useDualInForward);
+	light->SetPropsToShaderLab (1.0f);
+
+	device.SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+
+#if GFX_ENABLE_SHADOW_BATCHING
+
+	device.SetInverseScale(1.0f);
+	m_BatchRenderer.Flush();
+
+	if (shadowColectors > 0)
+	{
+		UInt64 previousKey = ((sortOrder[0].key)&0xFFFFFFFFFFFFC000ULL); // depth component does not affect state change boundaries
+		UInt32 previousHash = 0;
+		int roIndex = sortOrder[0].collectorIndex;
+		const ChannelAssigns* channels = (*m_Objects)[roIndex].material->SetShadowCollectorPassWithShader ((*m_Objects)[roIndex].shader, m_RenderObjectsCold[roIndex].subshaderIndex);
+
+		for(int i=0; i<shadowColectors;i++)
+		{
+			UInt64 currentKey = ((sortOrder[i].key)&0xFFFFFFFFFFFFC000ULL);
+
+			roIndex = sortOrder[i].collectorIndex;
+			const RenderObjectData& roDataH = (*m_Objects)[roIndex];
+			Shader* shader = roDataH.shader;
+			const TransformInfo& xformInfo = roDataH.visibleNode->renderer->GetTransformInfo ();
+			const RenderObjectDataCold& roDataC = m_RenderObjectsCold[roIndex];
+
+			roDataH.visibleNode->renderer->ApplyCustomProperties(*roDataH.material, shader, roDataC.subshaderIndex);
+
+			UInt32 currentHash = roDataH.visibleNode->renderer->GetCustomPropertiesHash();
+
+			// different property hasah or shared depth buffer cause Flush(), state setup, and one non-batched draw call
+			if (currentHash != previousHash || shareDepthBuffer)
+			{
+				m_BatchRenderer.Flush();	// empty BatchRenderer
+				channels = roDataH.material->SetShadowCollectorPassWithShader(shader, roDataC.subshaderIndex);
+				SetupObjectMatrix(xformInfo.worldMatrix, xformInfo.transformType);
+				roDataH.visibleNode->renderer->Render( roDataH.subsetIndex, *channels );
+			}
+			else
+			{
+				if (previousKey != currentKey)	// Flush() and update state when key changes
+				{
+					m_BatchRenderer.Flush();
+					channels = roDataH.material->SetShadowCollectorPassWithShader (shader, roDataC.subshaderIndex);
+				}
+
+				// if this pass needs to be rendered
+				if (channels)
+					m_BatchRenderer.Add(roDataH.visibleNode->renderer, roDataH.subsetIndex, channels, xformInfo.worldMatrix, xformInfo.transformType);
+			}
+			previousKey = currentKey;
+			previousHash = currentHash;
+		}
+		m_BatchRenderer.Flush();
+	}
+
+#else // GFX_ENABLE_SHADOW_BATCHING
+
+	size_t npasses = m_ReceiverObjects.size();
+	for( size_t i = 0; i < npasses; ++i )
+	{
+		int roIndex = m_ReceiverObjects[i];
+		const RenderObjectData& roDataH = (*m_Objects)[roIndex];
+		const RenderObjectDataCold& roDataC = m_RenderObjectsCold[roIndex];
+
+		Shader* shader = roDataH.shader;
+		if( !shader->HasShadowCollectorPass() )
+			continue;
+
+		const VisibleNode* node = roDataH.visibleNode;
+		BaseRenderer* renderer = node->renderer;
+		SetObjectScale(device, roDataC.lodFade, roDataC.invScale);
+
+		renderer->ApplyCustomProperties(*roDataH.material, shader, roDataC.subshaderIndex);
+
+		const ChannelAssigns* channels = roDataH.material->SetShadowCollectorPassWithShader(shader, roDataC.subshaderIndex);
+		SetupObjectMatrix (node->worldMatrix, node->transformType);
+		renderer->Render( roDataH.subsetIndex, *channels );
+	}
+
+#endif	// GFX_ENABLE_SHADOW_BATCHING
+
+	GetRenderBufferManager().ReleaseTempBuffer( inputShadowMap );
+
+	//
+	// possibly blur into another screen-space render texture
+
+	if( IsSoftShadow(light->GetShadows()) && enableSoftShadows )
+	{
+		return BlurScreenShadowMap (screenShadowMap, light->GetShadows(), m_Context->m_Camera->GetFar(), light->GetShadowSoftness(), light->GetShadowSoftnessFade());
+	}
+
+	return screenShadowMap;
+}
+#endif // ENABLE_SHADOWS
+
+// ------------------------------------------------------------------------
+//  render shadow maps for a single light
+
+
+#if ENABLE_SHADOWS
+
+void ForwardShaderRenderLoop::RenderLightShadowMaps (ForwardShadowMap& shadowMap, ShadowCameraData& camData, bool enableSoftShadows, bool useDualInForward, bool clearFrameBuffer)
+{
+	// Set correct keywords before rendering casters (caster passes use keywords for shader selection)
+	const Light* light = shadowMap.light->light;
+	SetShadowsKeywords( light->GetType(), light->GetShadows(), false, enableSoftShadows );
+	GetGfxDevice().SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+
+	Matrix4x4f shadowMatrices[kMaxShadowCascades];
+
+#if UNITY_EDITOR
+	bool useLightmaps = GetLightmapVisualization ().GetUseLightmapsForRendering ();
+#else
+	bool useLightmaps = true;
+#endif
+
+	bool excludeLightmapped = !useDualInForward && useLightmaps;
+	shadowMap.texture = RenderShadowMaps (camData, *shadowMap.light, shadowMap.receiverBounds, excludeLightmapped, shadowMatrices);
+	CopyMatrix (shadowMatrices[0].GetPtr(), shadowMap.shadowMatrix.GetPtr());
+
+	// Shadow map can be null if out of memory; or no shadow casters present
+	if (gGraphicsCaps.hasShadowCollectorPass && shadowMap.texture && light->GetType() == kLightDirectional)
+	{
+		SetShadowsKeywords( light->GetType(), light->GetShadows(), false, enableSoftShadows );
+		shadowMap.texture = CollectShadows (shadowMap.texture, light, shadowMatrices, camData.splitDistances, camData.splitSphereCentersAndSquaredRadii, enableSoftShadows, useDualInForward, clearFrameBuffer);
+	}
+	else
+	{
+		// If shadow map could not actually be created (out of VRAM, whatever), set the no shadows
+		// keywords and proceed. So there will be no shadows, but otherwise it will be ok.
+		SetNoShadowsKeywords();
+	}
+}
+
+#endif // ENABLE_SHADOWS
+
+
+// ------------------------------------------------------------------------
+//  rendering entry points
+
+ForwardShaderRenderLoop* CreateForwardShaderRenderLoop()
+{
+	return new ForwardShaderRenderLoop();
+}
+
+void DeleteForwardShaderRenderLoop (ForwardShaderRenderLoop* queue)
+{
+	delete queue;
+}
+
+static bool IsPassSuitable (UInt32 currentRenderOptions, UInt32 passRenderOptions, ShaderPassType passType,
+							 bool isLightmapped, bool useRGBM, bool useVertexLights, bool hasAddLights)
+{
+	// All options that a pass requires must be on
+	if( (currentRenderOptions & passRenderOptions) != passRenderOptions )
+		return false; // some options are off, skip this pass
+
+	if (useVertexLights)
+	{
+		if (passType != kPassAlways && passType != kPassVertex &&
+			passType != kPassVertexLM && passType != kPassVertexLMRGBM)
+			return false;
+
+		// Use either lightmapped or non-lightmapped pass
+		if ((passType == kPassVertex && isLightmapped) ||
+			((passType == kPassVertexLM || passType == kPassVertexLMRGBM) && !isLightmapped))
+			return false;
+
+		// Use pass that can properly decode the lightmap
+		if ((passType == kPassVertexLM && useRGBM) ||
+			(passType == kPassVertexLMRGBM && !useRGBM))
+			return false;
+	}
+	else
+	{
+		if (passType != kPassAlways && passType != kPassForwardBase && passType != kPassForwardAdd)
+			return false; // pass does not belong to forward loop
+
+		if (!hasAddLights && passType == kPassForwardAdd)
+			return false; // additive pass but have no additive lights
+	}
+	return true;
+}
+
+// A point or spot light might be completely behind shadow distance,
+// so there's no point in doing shadows on them.
+static bool IsLightBeyondShadowDistance (const Light& light, const Matrix4x4f& cameraMatrix, float shadowDistance)
+{
+	if (light.GetType() == kLightDirectional)
+		return false;
+	const Vector3f lightPos = light.GetComponent(Transform).GetPosition();
+	float distanceToLight = -cameraMatrix.MultiplyPoint3 (lightPos).z;
+	if (distanceToLight - light.GetRange() > shadowDistance)
+		return true;
+	return false;
+}
+
+
+static void PutAdditionalShadowLights (const AABB& bounds, ForwardLightsBlock& lights, const Matrix4x4f& cameraMatrix, float shadowDistance, ForwardShadowMaps& outShadowMaps)
+{
+	const int lightCount = lights.addLightCount;
+	const ActiveLight* const* addLights = lights.GetLights();
+	for (int lightNo = 0; lightNo < lightCount; ++lightNo)
+	{
+		const ActiveLight* light = addLights[lightNo];
+		if (light->light->GetShadows() == kShadowNone)
+			continue;
+
+		// Find this light's shadow data
+		ForwardShadowMaps::iterator sl, slEnd = outShadowMaps.end();
+		ForwardShadowMap* found = NULL;
+		for (sl = outShadowMaps.begin(); sl != slEnd; ++sl)
+		{
+			if (sl->light == light)
+			{
+				found = &(*sl);
+				break;
+			}
+		}
+		if (sl == slEnd)
+		{
+			// Point/Spot light beyond shadow distance: no need to add
+			if (IsLightBeyondShadowDistance (*light->light, cameraMatrix, shadowDistance))
+				continue;
+
+			ForwardShadowMap& shadowMap = outShadowMaps.push_back ();
+			shadowMap.light = light;
+			shadowMap.receiverBounds = bounds;
+			shadowMap.texture = NULL;
+		}
+		else
+		{
+			found->receiverBounds.Encapsulate (bounds);
+		}
+	}
+}
+
+#if ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING
+template<typename T>
+static UInt8* InsertIntoHashBuffer(const T* p, UInt8* buffer)
+{
+	Assert((sizeof(T) % 4) == 0);	// unaligned write
+	*reinterpret_cast<T*>(buffer) = *p;
+	return buffer + sizeof(T);
+}
+#endif
+
+void DoForwardShaderRenderLoop (
+	RenderLoopContext& ctx,
+	RenderObjectDataContainer& objects,
+	bool opaque,
+	bool disableDynamicBatching,
+	RenderTexture* mainShadowMap,
+	ActiveLights& activeLights,
+	bool linearLighting,
+	bool clearFrameBuffer)
+{
+	GPU_AUTO_SECTION(opaque ? kGPUSectionOpaquePass : kGPUSectionTransparentPass);
+	using namespace ForwardShaderRenderLoop_Enum;
+
+	const QualitySettings::QualitySetting& quality = GetQualitySettings().GetCurrent();
+
+	// figure out hardware supports shadows
+	#if ENABLE_SHADOWS
+	float shadowDistance = QualitySettings::GetShadowDistanceForRendering();
+	const bool receiveShadows =
+		opaque &&
+		GetBuildSettings().hasShadows &&
+		CheckPlatformSupportsShadows() &&
+		(quality.shadows != QualitySettings::kShadowsDisable) &&
+		(shadowDistance > 0.0f);
+	const bool localLightShadows = receiveShadows && GetBuildSettings().hasLocalLightShadows;
+	#endif
+
+	bool useRGBM = gGraphicsCaps.SupportsRGBM();
+
+	// Allocated on the stack each time, uses temp allocators
+	ForwardShaderRenderLoop queue;
+	queue.m_Context = &ctx;
+	queue.m_Objects = &objects;
+	queue.m_RenderObjectsCold.reserve(objects.size());
+	const int kEstimatedLightDataPerObject = sizeof(ForwardLightsBlock) + kEstimatedLightsPerObject * sizeof(Light*);
+	queue.m_RenderObjectsLightData.reserve(objects.size() * kEstimatedLightDataPerObject);
+
+	const ActiveLight* mainDirShadowLight = NULL;
+
+	// figure out current rendering options
+	UInt32 currentRenderOptions = GetCurrentRenderOptions ();
+
+	RenderSettings& renderSettings = GetRenderSettings();
+	LightManager& lightManager = GetLightManager();
+	const int pixelLightCount = quality.pixelLightCount;
+	const bool dualLightmapsMode = (GetLightmapSettings().GetLightmapsMode() == LightmapSettings::kDualLightmapsMode);
+
+#if UNITY_EDITOR
+	bool useLightmaps = GetLightmapVisualization ().GetUseLightmapsForRendering ();
+#endif
+
+	const CullResults& cullResults = *ctx.m_CullResults;
+
+	// Figure everything out
+	{
+		PROFILER_AUTO((opaque?gFwdOpaquePrepare:gFwdAlphaPrepare), ctx.m_Camera);
+
+		RenderObjectDataContainer::iterator itEnd = objects.end();
+		size_t roIndex = 0;
+		for (RenderObjectDataContainer::iterator it = objects.begin(); it != itEnd; ++it, ++roIndex)
+		{
+			RenderObjectData& odata = *it;
+			const VisibleNode *node = odata.visibleNode;
+			size_t visibleNodeIndex = node - cullResults.nodes.begin();
+
+			BaseRenderer* renderer = node->renderer;
+
+#if UNITY_EDITOR
+			const bool isLightmapped = renderer->IsLightmappedForRendering() && useLightmaps;
+#else
+			const bool isLightmapped = renderer->IsLightmappedForRendering();
+#endif
+
+			ShaderLab::IntShader& slshader = *odata.shader->GetShaderLabShader();
+			RenderObjectDataCold& roDataC = queue.m_RenderObjectsCold.push_back();
+			bool useVertexLights = false;
+			if (odata.subShaderIndex == -1)
+			{
+				int ss = slshader.GetDefaultSubshaderIndex (kRenderPathExtForward);
+				if (ss == -1)
+				{
+					ss = slshader.GetDefaultSubshaderIndex (isLightmapped ? kRenderPathExtVertexLM : kRenderPathExtVertex);
+					useVertexLights = true;
+				}
+				if (ss == -1)
+					continue;
+				roDataC.subshaderIndex = ss;
+			}
+			else
+			{
+				roDataC.subshaderIndex = odata.subShaderIndex;
+			}
+			ShaderLab::SubShader& subshader = slshader.GetSubShader(roDataC.subshaderIndex);
+
+			bool disableAddLights = false;
+			if (!useVertexLights)
+			{
+				// If we only have ForwardBase pass and no ForwardAdd,
+				// disable additive lights completely. Only support main directional,
+				// vertex & SH.
+				disableAddLights = !subshader.GetSupportsForwardAddLights();
+			}
+
+			size_t objectLightsOffset = queue.m_RenderObjectsLightData.size();
+			roDataC.lightsDataOffset = objectLightsOffset;
+
+			lightManager.FindForwardLightsForObject (
+				queue.m_RenderObjectsLightData,
+				GetObjectLightIndices(cullResults, visibleNodeIndex),
+				GetObjectLightCount(cullResults, visibleNodeIndex),
+				activeLights,
+				*node,
+				isLightmapped,
+				dualLightmapsMode,
+				useVertexLights,
+				pixelLightCount,
+				disableAddLights,
+				renderSettings.GetAmbientLightInActiveColorSpace());
+
+			ForwardLightsBlock& roDataL = *reinterpret_cast<ForwardLightsBlock*>(&queue.m_RenderObjectsLightData[objectLightsOffset]);
+			const bool hasAddLights = (roDataL.addLightCount != 0);
+
+			#if ENABLE_SHADOWS
+			bool objectReceivesShadows = renderer->GetReceiveShadows();
+			bool withinShadowDistance = IsObjectWithinShadowRange (*ctx.m_ShadowCullData, node->worldAABB);
+			if (receiveShadows && withinShadowDistance)
+			{
+				queue.m_ReceiverObjects.push_back (roIndex);
+
+				if (objectReceivesShadows)
+				{
+					// deal with main directional shadow light
+					if (roDataL.mainLight && roDataL.mainLight->light->GetShadows() != kShadowNone)
+					{
+						if (!mainDirShadowLight)
+							mainDirShadowLight = roDataL.mainLight;
+						if (mainDirShadowLight == roDataL.mainLight)
+							queue.m_MainShadowMap.receiverBounds.Encapsulate (node->worldAABB);
+					}
+
+					// deal with additive shadow lights if needed
+					if (localLightShadows && subshader.GetSupportsFullForwardShadows())
+					{
+						PutAdditionalShadowLights (node->worldAABB, roDataL, ctx.m_CurCameraMatrix, shadowDistance, queue.m_ShadowMaps);
+					}
+				}
+			}
+			#endif
+
+			roDataC.invScale = node->invScale;
+			roDataC.lodFade = node->lodFade;
+
+			int shaderPassCount = subshader.GetValidPassCount();
+
+			// Determine if we will need more than a single pass
+			int suitablePasses = 0;
+			for( int pass = 0; pass < shaderPassCount && suitablePasses < 2; ++pass )
+			{
+				ShaderPassType passType; UInt32 passRenderOptions;
+				subshader.GetPass(pass)->GetPassOptions( passType, passRenderOptions );
+
+				if (IsPassSuitable (currentRenderOptions, passRenderOptions, passType, isLightmapped, useRGBM, useVertexLights, hasAddLights))
+					++suitablePasses;
+			}
+
+			// Go over all passes in the shader
+			UInt32 firstPassFlag = kPackFirstPassFlag;
+			const UInt32 multiPassFlag = (suitablePasses > 1)? kPackMultiPassFlag: 0;
+			for( int pass = 0; pass < shaderPassCount; ++pass )
+			{
+				ShaderPassType passType; UInt32 passRenderOptions;
+				subshader.GetPass(pass)->GetPassOptions( passType, passRenderOptions );
+
+				if (!IsPassSuitable (currentRenderOptions, passRenderOptions, passType, isLightmapped, useRGBM, useVertexLights, hasAddLights))
+					continue; // skip this pass
+
+				RenderPassData rpData;
+				rpData.roIndex = roIndex;
+				rpData.data =
+					((pass & kPackPassMask) << kPackPassShift) |
+					(passType << kPackTypeShift) |
+					firstPassFlag |
+					multiPassFlag;
+
+#if ENABLE_FORWARD_SHADER_LOOP_HASH_SORTING
+
+				//hash state information for render object sorter
+				const int kHashBufferSize = 64;
+				UInt8 hashBuffer[kHashBufferSize];
+				UInt8* hashPtr = hashBuffer;
+
+				// Always write 32b granularity into the hash buffer to avoid unaligned writes
+				if (opaque)
+					hashPtr = InsertIntoHashBuffer(&node->invScale, hashPtr);
+				int materialID = odata.material->GetInstanceID();
+				hashPtr = InsertIntoHashBuffer(&materialID, hashPtr);
+				hashPtr = InsertIntoHashBuffer(&roDataC.subshaderIndex, hashPtr);
+				UInt32 shaderPassType = (ShaderPassType)((rpData.data >> kPackTypeShift) & kPackTypeMask);
+				hashPtr = InsertIntoHashBuffer(&shaderPassType, hashPtr);
+				UInt32 passIndex = (rpData.data >> kPackPassShift) & kPackPassMask;
+				hashPtr = InsertIntoHashBuffer(&passIndex, hashPtr);
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+				hashPtr = InsertIntoHashBuffer(&odata.staticBatchIndex, hashPtr);
+#endif
+				Assert(hashPtr-hashBuffer <= kHashBufferSize);
+
+				rpData.hash = MurmurHash2A(hashBuffer, hashPtr-hashBuffer, 0x9747b28c);
+#endif
+				queue.m_PlainRenderPasses.push_back( rpData );
+
+				firstPassFlag = 0;
+			}
+		}
+	}
+
+	// sort everything
+	{
+		PROFILER_AUTO((opaque?gFwdOpaqueSort:gFwdAlphaSort), ctx.m_Camera);
+		if (opaque)
+			queue.SortRenderPassData<true> (queue.m_PlainRenderPasses);
+		else
+			queue.SortRenderPassData<false> (queue.m_PlainRenderPasses);
+	}
+
+	// Render everything. When transitioning to render queues,
+	// it will invoke camera renderables (halos, and so on)
+	{
+		PROFILER_AUTO_GFX((opaque?gFwdOpaqueRender:gFwdAlphaRender), ctx.m_Camera);
+		RenderTexture* rtMain = ctx.m_Camera->GetCurrentTargetTexture ();
+		queue.PerformRendering (mainDirShadowLight, mainShadowMap, *ctx.m_ShadowCullData, disableDynamicBatching, linearLighting && (!rtMain || rtMain->GetSRGBReadWrite()), clearFrameBuffer);
+	}
+}
diff --git a/Runtime/Camera/RenderLoops/ForwardVertexRenderLoop.cpp b/Runtime/Camera/RenderLoops/ForwardVertexRenderLoop.cpp
new file mode 100644
index 0000000..60889ad
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/ForwardVertexRenderLoop.cpp
@@ -0,0 +1,637 @@
+#include "UnityPrefix.h"
+#include "RenderLoopPrivate.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Graphics/Transform.h"
+#include "External/shaderlab/Library/intshader.h"
+#include "Runtime/Camera/Renderable.h"
+#include "Runtime/Shaders/Shader.h"
+#include "Runtime/Camera/RenderSettings.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "RenderLoop.h"
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+#include "Runtime/Utilities/dynamic_array.h"
+#include "Runtime/Graphics/LightmapSettings.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Camera/LightManager.h"
+#if UNITY_EDITOR
+#include "Editor/Src/LightmapVisualization.h"
+#endif
+#include "BuiltinShaderParamUtility.h"
+#include "External/MurmurHash/MurmurHash2.h"
+
+// Enable/disable hash based forward shader render loop sorting functionality.
+#define ENABLE_VERTEX_LOOP_HASH_SORTING 0
+
+static inline bool CompareLights (VertexLightsBlock const* a, VertexLightsBlock const* b)
+{
+	if (!a || !b)
+		return false;
+
+	if (a->lightCount != b->lightCount)
+		return false;
+
+	const ActiveLight* const* lightsA = a->GetLights();
+	const ActiveLight* const* lightsB = b->GetLights();
+	for (int i = 0; i < a->lightCount; ++i)
+		if (lightsA[i] != lightsB[i])
+			return false;
+
+	return true;
+}
+
+
+struct RODataVLit {
+	// help the compiler here a bit...
+	RODataVLit() { }
+	RODataVLit( const RODataVLit& rhs ) { memcpy(this, &rhs, sizeof(*this)); }
+
+	float		invScale;						// 4
+	float		lodFade;						// 4
+	size_t		lightsDataOffset;				// 4	into memory block with all light data chunks
+	int			subshaderIndex;					// 4
+
+	// 16 bytes
+};
+
+namespace ForwardVertexRenderLoop_Enum
+{
+// Render pass data here is 8 bytes each; an index of the render object and "the rest" packed
+// into 4 bytes.
+enum {
+	kPackPassShift = 0,
+	kPackPassMask = 0xFF,
+	kPackFirstPassFlag = (1<<16),
+	kPackMultiPassFlag = (1<<17),
+};
+} // namespace ForwardVertexRenderLoop_Enum
+
+struct RPDataVLit {
+	int	roIndex;
+	// Packed into UInt32: pass number, first pass flag
+	UInt32 data;
+#if ENABLE_VERTEX_LOOP_HASH_SORTING
+	UInt32 hash;
+#endif
+};
+typedef dynamic_array<RPDataVLit> RenderPassesVLit;
+
+
+struct ForwardVertexRenderState
+{
+	int rendererType;
+	int transformType;
+	float invScale;
+	float lodFade;
+	
+	Material* material;
+	Shader* shader;
+	int subshaderIndex;
+	int passIndex;
+	
+	const VertexLightsBlock* lights;
+	
+	int lightmapIndex;
+	Vector4f lightmapST;
+
+	UInt32 customPropsHash;
+	
+	void Invalidate()
+	{
+		rendererType = -1;
+		transformType = -1;
+		invScale = 0.0f;
+		lodFade = 0.0f;
+		material = 0; shader = 0; subshaderIndex = -1; passIndex = -1;
+		lights = 0;
+		lightmapIndex = -1; lightmapST = Vector4f(0,0,0,0);
+		customPropsHash = 0;
+	}
+
+	bool operator == (const ForwardVertexRenderState& rhs) const
+	{
+		if (this == &rhs)
+			return true;
+		
+		return (
+				rendererType == rhs.rendererType &&
+				transformType == rhs.transformType &&
+				material == rhs.material &&
+				shader == rhs.shader &&
+				CompareLights(lights, rhs.lights) &&
+				subshaderIndex == rhs.subshaderIndex &&
+				passIndex == rhs.passIndex &&
+				CompareApproximately(invScale,rhs.invScale) &&
+				CompareApproximately(lodFade,rhs.lodFade, LOD_FADE_BATCH_EPSILON) &&
+				lightmapIndex == rhs.lightmapIndex &&
+				CompareMemory(lightmapST, rhs.lightmapST) &&
+				customPropsHash == rhs.customPropsHash);
+	}	
+	
+	bool operator != (const ForwardVertexRenderState& rhs) const
+	{
+		return !(rhs == *this);
+	}
+};
+	
+
+struct ForwardVertexRenderLoop
+{
+	ForwardVertexRenderLoop()
+		: m_RenderObjectsCold		(kMemTempAlloc)
+		, m_RenderObjectsLightData	(kMemTempAlloc)
+		, m_PlainRenderPasses		(kMemTempAlloc)
+	{ }
+
+	const RenderLoopContext*	m_Context;
+	RenderObjectDataContainer*	m_Objects;
+	dynamic_array<RODataVLit>	m_RenderObjectsCold;
+	dynamic_array<UInt8>		m_RenderObjectsLightData;
+	RenderPassesVLit			m_PlainRenderPasses;
+	BatchRenderer				m_BatchRenderer;
+
+	void PerformRendering (bool sSRGBRenderTarget, bool clearFrameBuffer);
+
+	template<bool opaque>
+	struct RenderObjectSorter
+	{
+		bool operator()( const RPDataVLit& ra, const RPDataVLit& rb ) const;
+		const ForwardVertexRenderLoop* queue;
+	};
+	template<bool opaque>
+	void SortRenderPassData( RenderPassesVLit& passes )
+	{
+		RenderObjectSorter<opaque> sorter;
+		sorter.queue = this;
+		std::sort( passes.begin(), passes.end(), sorter );
+	}
+};
+
+
+template<bool opaque>
+bool ForwardVertexRenderLoop::RenderObjectSorter<opaque>::operator() (const RPDataVLit& ra, const RPDataVLit& rb) const
+{
+	using namespace ForwardVertexRenderLoop_Enum;
+
+	const RenderObjectData& dataa = (*queue->m_Objects)[ra.roIndex];
+	const RenderObjectData& datab = (*queue->m_Objects)[rb.roIndex];
+
+	// Sort by layering depth.
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(dataa.globalLayeringData, datab.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+
+#if ENABLE_VERTEX_LOOP_HASH_SORTING
+
+	// Sort by render queues first
+	if( dataa.queueIndex != datab.queueIndex )
+		return dataa.queueIndex < datab.queueIndex;
+
+	if (opaque) {
+		DebugAssertIf (dataa.queueIndex < kQueueIndexMin || dataa.queueIndex > kGeometryQueueIndexMax); // this is opaque loop!
+	} else {
+		DebugAssertIf (dataa.queueIndex >= kQueueIndexMin && dataa.queueIndex <= kGeometryQueueIndexMax); // this is alpha loop!
+	}
+
+	if (!opaque)
+	{
+		if( dataa.distance != datab.distance )
+			return dataa.distance < datab.distance;
+	}
+
+	UInt32 flagsa = ra.data;
+	UInt32 flagsb = rb.data;
+
+	// render all first passes first
+	if( (flagsa & kPackFirstPassFlag) != (flagsb & kPackFirstPassFlag) )
+		return (flagsa & kPackFirstPassFlag) > (flagsb & kPackFirstPassFlag);
+
+	if (ra.hash != rb.hash)
+		return ra.hash < rb.hash;
+
+	// then sort by material
+	if( dataa.material != datab.material )
+		return dataa.material->GetInstanceID() < datab.material->GetInstanceID(); // just compare instance IDs
+
+	// inside same material: by pass
+	UInt32 passa = (flagsa >> kPackPassShift) & kPackPassMask;
+	UInt32 passb = (flagsb >> kPackPassShift) & kPackPassMask;
+	if( passa != passb )
+		return passa < passb;
+
+	// Sort by distance in reverse order.
+	// That way we get consistency in render order, and more pixels not rendered due to z-testing,
+	// which benefits performance.
+	if (opaque)
+	{
+		if( dataa.distance != datab.distance )
+			return dataa.distance > datab.distance;
+	}
+
+	// fall through: roIndex
+	return ra.roIndex < rb.roIndex;
+
+#else
+
+	// Sort by render queues first
+	if( dataa.queueIndex != datab.queueIndex )
+		return dataa.queueIndex < datab.queueIndex;
+
+	if (opaque) {
+		DebugAssertIf (dataa.queueIndex < kQueueIndexMin || dataa.queueIndex > kGeometryQueueIndexMax); // this is opaque loop!
+	} else {
+		DebugAssertIf (dataa.queueIndex >= kQueueIndexMin && dataa.queueIndex <= kGeometryQueueIndexMax); // this is alpha loop!
+	}
+
+	if (!opaque)
+	{
+		if( dataa.distance != datab.distance )
+			return dataa.distance < datab.distance;
+	}
+
+	UInt32 flagsa = ra.data;
+	UInt32 flagsb = rb.data;
+
+	// render all first passes first
+	if( (flagsa & kPackFirstPassFlag) != (flagsb & kPackFirstPassFlag) )
+		return (flagsa & kPackFirstPassFlag) > (flagsb & kPackFirstPassFlag);
+
+	// sort by lightmap index (fine to do it before source material index
+	// since every part of same mesh will have the same lightmap index)
+	if( dataa.lightmapIndex != datab.lightmapIndex )
+		return dataa.lightmapIndex < datab.lightmapIndex;
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+	// if part of predefined static batch, then sort by static batch index
+	if( dataa.staticBatchIndex != datab.staticBatchIndex )
+		return dataa.staticBatchIndex < datab.staticBatchIndex;
+
+	// otherwise sort by material index. Some people are using multiple materials
+	// on a single mesh and expect them to be rendered in order.
+	if( dataa.staticBatchIndex == 0 && dataa.sourceMaterialIndex != datab.sourceMaterialIndex )
+		return dataa.sourceMaterialIndex < datab.sourceMaterialIndex;
+#else
+	// Sort by material index. Some people are using multiple materials
+	// on a single mesh and expect them to be rendered in order.
+	if( dataa.sourceMaterialIndex != datab.sourceMaterialIndex )
+		return dataa.sourceMaterialIndex < datab.sourceMaterialIndex;
+#endif
+
+	// then sort by material
+	if( dataa.material != datab.material )
+		return dataa.material->GetInstanceID() < datab.material->GetInstanceID(); // just compare instance IDs
+
+	// inside same material: by pass
+	UInt32 passa = (flagsa >> kPackPassShift) & kPackPassMask;
+	UInt32 passb = (flagsb >> kPackPassShift) & kPackPassMask;
+	if( passa != passb )
+		return passa < passb;
+
+	// Sort by distance in reverse order.
+	// That way we get consistency in render order, and more pixels not rendered due to z-testing,
+	// which benefits performance.
+	if (opaque)
+	{
+		if( dataa.distance != datab.distance )
+			return dataa.distance > datab.distance;
+	}
+
+	// fall through: roIndex
+	return ra.roIndex < rb.roIndex;
+
+#endif
+}
+
+void ForwardVertexRenderLoop::PerformRendering (bool sSRGBRenderTarget, bool clearFrameBuffer)
+{
+	using namespace ForwardVertexRenderLoop_Enum;
+
+	GfxDevice& device = GetGfxDevice();
+	const RenderSettings& renderSettings = GetRenderSettings();
+
+	const RenderManager::Renderables& renderables = GetRenderManager ().GetRenderables ();
+	RenderManager::Renderables::const_iterator renderablesBegin = renderables.begin(), renderablesEnd = renderables.end();
+
+	const LightmapSettings& lightmapper = GetLightmapSettings();
+
+	size_t npasses = m_PlainRenderPasses.size();
+
+	int currentQueueIndex = m_Context->m_RenderQueueStart;
+	device.SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+	
+	ForwardVertexRenderState prevRenderState;
+	prevRenderState.Invalidate();
+
+	// SRGB read/write for vertexRenderLoop
+	device.SetSRGBWrite(sSRGBRenderTarget);
+	if (clearFrameBuffer)
+		m_Context->m_Camera->ClearNoSkybox(false);
+	
+	const ChannelAssigns* channels = NULL;
+	int canBatch = 0;
+	StartRenderLoop();
+	for( size_t i = 0; i < npasses; ++i )
+	{
+		const RPDataVLit& rpData = m_PlainRenderPasses[i];
+		DebugAssertIf (rpData.roIndex < 0 || rpData.roIndex >= m_Objects->size() || rpData.roIndex >= m_RenderObjectsCold.size());
+		const RenderObjectData& roDataH = (*m_Objects)[rpData.roIndex];
+		const RODataVLit& roDataC = m_RenderObjectsCold[rpData.roIndex];
+
+		const VertexLightsBlock& roDataL = *reinterpret_cast<VertexLightsBlock*>(&m_RenderObjectsLightData[roDataC.lightsDataOffset]);
+
+		const int roQueueIndex = roDataH.queueIndex;
+		DebugAssertIf( roQueueIndex < currentQueueIndex );
+		if( roQueueIndex > currentQueueIndex )
+		{
+			m_BatchRenderer.Flush();
+			canBatch = 0;
+			EndRenderLoop();
+			// Draw required renderables
+			if (!m_Context->m_DontRenderRenderables)
+			{
+				while( renderablesBegin != renderablesEnd && renderablesBegin->first <= roQueueIndex )
+				{
+					renderablesBegin->second->RenderRenderable(*m_Context->m_CullResults);
+
+					++renderablesBegin;
+				}
+			}
+
+			currentQueueIndex = roQueueIndex;
+			StartRenderLoop();
+		}
+
+		const VisibleNode* node = roDataH.visibleNode;
+		const UInt16 subsetIndex = roDataH.subsetIndex;
+		
+		ForwardVertexRenderState rs;
+		{
+			rs.rendererType = node->renderer->GetRendererType();
+			rs.transformType = node->transformType;
+			rs.invScale = roDataC.invScale;
+			rs.lodFade = roDataC.lodFade;
+			
+			rs.material = roDataH.material;
+			rs.shader = roDataH.shader;
+			rs.subshaderIndex = roDataC.subshaderIndex;
+			rs.passIndex = (rpData.data >> kPackPassShift) & kPackPassMask;
+			
+			rs.lights = &roDataL;
+			
+			rs.lightmapIndex = roDataH.lightmapIndex;
+			DebugAssert(rs.lightmapIndex == node->renderer->GetLightmapIndex());
+			rs.lightmapST = node->renderer->GetLightmapSTForRendering();
+			rs.customPropsHash = node->renderer->GetCustomPropertiesHash();
+		}
+		
+		// multi-pass requires vertex position values to be EXACTLY the same for all passes
+		// therefore do NOT batch dynamic multi-pass nodes
+		const bool multiPass = (rpData.data & kPackMultiPassFlag) == kPackMultiPassFlag;
+		const bool dynamicAndMultiPass = (node->renderer->GetStaticBatchIndex() == 0) && multiPass;
+		
+		if (dynamicAndMultiPass ||
+			prevRenderState != rs)
+		{
+			m_BatchRenderer.Flush();
+			prevRenderState = rs;
+			canBatch = 0;
+		}
+		else
+			++canBatch;
+
+		// NOTE: identity matrix has to be set on OpenGLES before lights are set
+		// as lighting is specified in World space
+		device.SetWorldMatrix( Matrix4x4f::identity.GetPtr() );
+
+		renderSettings.SetupAmbient ();
+		SetObjectScale(device, roDataC.lodFade, roDataC.invScale);
+
+		node->renderer->ApplyCustomProperties(*rs.material, rs.shader, rs.subshaderIndex);
+
+		// only setup lights & pass when not batching
+		if (canBatch < 1)
+		{
+			SetupObjectLightmaps (lightmapper, rs.lightmapIndex, rs.lightmapST, true);
+
+			LightManager::SetupVertexLights(rs.lights->lightCount, rs.lights->GetLights());
+			channels = rs.material->SetPassWithShader(rs.passIndex, rs.shader, rs.subshaderIndex);
+		}
+		if (channels)
+		{
+			m_BatchRenderer.Add(node->renderer, subsetIndex, channels, node->worldMatrix, rs.transformType);
+		}
+	}
+
+	m_BatchRenderer.Flush();
+	EndRenderLoop();
+	device.SetSRGBWrite(false);
+	device.SetViewMatrix( m_Context->m_CurCameraMatrix.GetPtr() );
+	
+
+	// After everything we might still have renderables that should be drawn at the
+	// very end. Do it.
+	if (!m_Context->m_DontRenderRenderables)
+	{
+		while (renderablesBegin != renderablesEnd && renderablesBegin->first < m_Context->m_RenderQueueStart)
+			++renderablesBegin;
+		while( renderablesBegin != renderablesEnd && renderablesBegin->first < m_Context->m_RenderQueueEnd )
+		{
+			renderablesBegin->second->RenderRenderable(*m_Context->m_CullResults);
+
+			++renderablesBegin;
+		}
+	}
+}
+
+
+ForwardVertexRenderLoop* CreateForwardVertexRenderLoop()
+{
+	return new ForwardVertexRenderLoop();
+}
+
+void DeleteForwardVertexRenderLoop (ForwardVertexRenderLoop* queue)
+{
+	delete queue;
+}
+
+
+static bool IsPassSuitable (UInt32 currentRenderOptions, UInt32 passRenderOptions, ShaderPassType passType,
+							 bool isLightmapped, bool useRGBM)
+{
+	// All options that a pass requires must be on
+	if( (currentRenderOptions & passRenderOptions) != passRenderOptions )
+		return false; // some options are off, skip this pass
+	
+	if (passType != kPassAlways && passType != kPassVertex &&
+		passType != kPassVertexLM && passType != kPassVertexLMRGBM)
+		return false; // unsuitable pass type
+	
+	// Use either lightmapped or non-lightmapped pass
+	if ((passType == kPassVertex && isLightmapped) ||
+		((passType == kPassVertexLM || passType == kPassVertexLMRGBM) && !isLightmapped))
+		return false;
+	
+	// Use pass that can properly decode the lightmap
+	if ((passType == kPassVertexLM && useRGBM) ||
+		(passType == kPassVertexLMRGBM && !useRGBM))
+		return false;
+	
+	return true;
+}
+
+#if ENABLE_VERTEX_LOOP_HASH_SORTING
+template<typename T>
+static UInt8* InsertIntoHashBufferVtx(const T* p, UInt8* buffer)
+{
+	Assert((sizeof(T) % 4) == 0);	// unaligned write
+	*reinterpret_cast<T*>(buffer) = *p;
+	return buffer + sizeof(T);
+}
+#endif
+
+void DoForwardVertexRenderLoop (RenderLoopContext& ctx, RenderObjectDataContainer& objects, bool opaque, ActiveLights& activeLights, bool linearLighting, bool clearFrameBuffer)
+{
+	GPU_AUTO_SECTION(opaque ? kGPUSectionOpaquePass : kGPUSectionTransparentPass);
+
+	using namespace ForwardVertexRenderLoop_Enum;
+
+	// Allocated on the stack each time, uses temp allocators
+	ForwardVertexRenderLoop queue;
+	queue.m_Context = &ctx;
+	queue.m_Objects = &objects;
+	queue.m_RenderObjectsCold.reserve(objects.size());
+	queue.m_PlainRenderPasses.reserve(objects.size());
+	const int kEstimatedLightDataPerObject = sizeof(VertexLightsBlock) + kEstimatedLightsPerObject * sizeof(Light*);
+	queue.m_RenderObjectsLightData.reserve(objects.size() * kEstimatedLightDataPerObject);
+
+	const CullResults& cullResults = *ctx.m_CullResults;
+
+	// figure out current rendering options
+	UInt32 currentRenderOptions = GetCurrentRenderOptions ();
+
+	//RenderSettings& renderSettings = GetRenderSettings();
+	const LightmapSettings& lightmapper = GetLightmapSettings();
+#if UNITY_EDITOR
+	bool useLightmaps = GetLightmapVisualization().GetUseLightmapsForRendering();
+#endif
+
+	bool useRGBM = gGraphicsCaps.SupportsRGBM();
+
+	// Figure everything out
+	RenderObjectDataContainer::iterator itEnd = objects.end();
+	size_t roIndex = 0;
+	for (RenderObjectDataContainer::iterator it = objects.begin(); it != itEnd; ++it, ++roIndex)
+	{
+		RenderObjectData& odata = *it;
+
+		const VisibleNode* node = odata.visibleNode;
+		RODataVLit& roDataC = queue.m_RenderObjectsCold.push_back();
+		size_t visibleNodeIndex = node - cullResults.nodes.begin();
+
+		LightmapSettings::TextureTriple lmTextures = lightmapper.GetLightmapTexture (node->renderer->GetLightmapIndex());
+#if UNITY_EDITOR
+		bool isLightmapped = useLightmaps && lmTextures.first.m_ID;
+#else
+		bool isLightmapped = lmTextures.first.m_ID;
+#endif
+		ShaderLab::IntShader& slshader = *odata.shader->GetShaderLabShader();
+		int vlitSS = odata.subShaderIndex;
+		if (vlitSS == -1)
+		{
+			vlitSS = slshader.GetDefaultSubshaderIndex (isLightmapped ? kRenderPathExtVertexLM : kRenderPathExtVertex);
+			if (vlitSS == -1)
+				continue;
+		}
+		roDataC.subshaderIndex = vlitSS;
+
+		size_t objectLightsOffset = queue.m_RenderObjectsLightData.size();
+		roDataC.lightsDataOffset = objectLightsOffset;
+
+		GetLightManager().FindVertexLightsForObject (
+			queue.m_RenderObjectsLightData,
+			GetObjectLightIndices(cullResults, visibleNodeIndex),
+			GetObjectLightCount(cullResults, visibleNodeIndex),
+			activeLights, *node);
+
+		roDataC.invScale = node->invScale;
+		roDataC.lodFade = node->lodFade;
+
+		// Go over all passes in the shader and add suitable ones for rendering
+		ShaderLab::SubShader& subshader = slshader.GetSubShader(roDataC.subshaderIndex);
+		int shaderPassCount = subshader.GetValidPassCount();
+		
+		// Determine if we will need more than a single pass
+		int suitablePasses = 0;
+		for( int pass = 0; pass < shaderPassCount && suitablePasses < 2; ++pass )
+		{
+			ShaderPassType passType; UInt32 passRenderOptions;
+			subshader.GetPass(pass)->GetPassOptions( passType, passRenderOptions );
+			
+			if (IsPassSuitable (currentRenderOptions, passRenderOptions, passType, isLightmapped, useRGBM))
+				++suitablePasses;
+		}
+		
+		// Go over all passes in the shader
+		UInt32 firstPassFlag = kPackFirstPassFlag;
+		const UInt32 multiPassFlag = (suitablePasses > 1)? kPackMultiPassFlag: 0;
+		for (int pass = 0; pass < shaderPassCount; ++pass)
+		{
+			ShaderPassType passType;
+			UInt32 passRenderOptions;
+			subshader.GetPass(pass)->GetPassOptions( passType, passRenderOptions );
+
+			if (!IsPassSuitable (currentRenderOptions, passRenderOptions, passType, isLightmapped, useRGBM))
+				continue;
+
+			RPDataVLit& rpData = queue.m_PlainRenderPasses.push_back();
+			rpData.roIndex = roIndex;
+			rpData.data = 
+				((pass & kPackPassMask) << kPackPassShift) |
+				firstPassFlag |
+				multiPassFlag;
+			firstPassFlag = 0;
+
+#if ENABLE_VERTEX_LOOP_HASH_SORTING
+
+			//hash state information for render object sorter
+			const int kHashBufferSize = 64;
+			UInt8 hashBuffer[kHashBufferSize];
+			UInt8* hashPtr = hashBuffer;
+
+			// Always write 32b granularity into the hash buffer to avoid unaligned writes
+			UInt32 rendererType = static_cast<UInt32>(node->renderer->GetRendererType());
+			hashPtr = InsertIntoHashBufferVtx(&rendererType, hashPtr);
+			UInt32 lightmapIndex = odata.lightmapIndex;
+			hashPtr = InsertIntoHashBufferVtx(&lightmapIndex, hashPtr);
+			UInt32 sourceMaterialIndex = 0;
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+			hashPtr = InsertIntoHashBufferVtx(&odata.staticBatchIndex, hashPtr);
+			if (odata.staticBatchIndex == 0)
+				sourceMaterialIndex = odata.sourceMaterialIndex;
+#else
+			sourceMaterialIndex = odata.sourceMaterialIndex;
+#endif
+			hashPtr = InsertIntoHashBufferVtx(&sourceMaterialIndex, hashPtr);
+			
+			Assert(hashPtr-hashBuffer <= kHashBufferSize);
+
+			Assert(hashPtr-hashBuffer <= kHashBufferSize);
+
+			rpData.hash = MurmurHash2A(hashBuffer, hashPtr-hashBuffer, 0x9747b28c);
+#endif
+		}
+	}
+
+	// sort everything
+	if (opaque)
+		queue.SortRenderPassData<true> (queue.m_PlainRenderPasses);
+	else
+		queue.SortRenderPassData<false> (queue.m_PlainRenderPasses);
+
+	// Render everything. When transitioning to render queues,
+	// it will invoke camera renderables (halos, and so on).
+	RenderTexture* rtMain = ctx.m_Camera->GetCurrentTargetTexture ();
+	queue.PerformRendering (linearLighting && (!rtMain || rtMain->GetSRGBReadWrite()), clearFrameBuffer);
+}
diff --git a/Runtime/Camera/RenderLoops/GlobalLayeringData.h b/Runtime/Camera/RenderLoops/GlobalLayeringData.h
new file mode 100644
index 0000000..176edde
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/GlobalLayeringData.h
@@ -0,0 +1,26 @@
+#pragma once
+
+struct GlobalLayeringData
+{
+	// Per-renderer sorting data.
+	SInt16 layer; // Layer order.
+	SInt16 order; // In-layer order.
+};
+
+inline GlobalLayeringData GlobalLayeringDataCleared () { GlobalLayeringData data = {0,0}; return data;  }
+
+inline bool CompareGlobalLayeringData(const GlobalLayeringData& lhs, const GlobalLayeringData& rhs, bool& result)
+{
+	if (lhs.layer != rhs.layer)
+	{
+		result = lhs.layer < rhs.layer;
+		return true;
+	}
+	else if (lhs.order != rhs.order)
+	{
+		result = lhs.order < rhs.order;
+		return true;
+	}
+
+	return false;
+}
diff --git a/Runtime/Camera/RenderLoops/PrePassRenderLoop.cpp b/Runtime/Camera/RenderLoops/PrePassRenderLoop.cpp
new file mode 100644
index 0000000..8aa3ab6
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/PrePassRenderLoop.cpp
@@ -0,0 +1,1958 @@
+#include "UnityPrefix.h"
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+
+#if GFX_SUPPORTS_RENDERLOOP_PREPASS
+#include "RenderLoopPrivate.h"
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Camera/BaseRenderer.h"
+#include "Runtime/Filters/Renderer.h"
+#include "Runtime/Graphics/Transform.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/ImageFilters.h"
+#include "Runtime/Geometry/Intersection.h"
+#include "External/shaderlab/Library/intshader.h"
+#include "External/shaderlab/Library/properties.h"
+#include "External/shaderlab/Library/shaderlab.h"
+#include "Runtime/Shaders/Shader.h"
+#include "Runtime/Shaders/ShaderNameRegistry.h"
+#include "Runtime/Camera/RenderSettings.h"
+#include "Runtime/Misc/ResourceManager.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Camera/CameraUtil.h"
+#include "Runtime/Graphics/RenderBufferManager.h"
+#include "Runtime/Graphics/GraphicsHelper.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Camera/Light.h"
+#include "Runtime/Camera/Shadows.h"
+#include "Runtime/Graphics/LightmapSettings.h"
+#include "External/shaderlab/Library/texenv.h"
+#include "Runtime/Misc/QualitySettings.h"
+#include "Runtime/Misc/BuildSettings.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Graphics/GeneratedTextures.h"
+#include "Runtime/Utilities/BitUtility.h"
+#include "Runtime/GfxDevice/BatchRendering.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "ReplacementRenderLoop.h"
+#if UNITY_EDITOR
+#include "Runtime/BaseClasses/Tags.h"
+#endif
+#include "BuiltinShaderParamUtility.h"
+#include "Runtime/Math/ColorSpaceConversion.h"
+#include "Runtime/Math/SphericalHarmonics.h"
+#include "Runtime/Camera/LightManager.h"
+#include "External/MurmurHash/MurmurHash2.h"
+#include "Runtime/Filters/Mesh/LodMesh.h"
+#include "Runtime/Graphics/DrawUtil.h"
+
+// Enable/disable hash based pre pass render loop sorting functionality.
+#define ENABLE_PRE_PASS_LOOP_HASH_SORTING 0
+
+#define SEPERATE_PREPASS_SPECULAR UNITY_XENON
+
+PROFILER_INFORMATION(gPrepassSort, "RenderPrePass.Sort", kProfilerRender)
+PROFILER_INFORMATION(gPrepassGeom, "RenderPrePass.GeometryPass", kProfilerRender)
+PROFILER_INFORMATION(gPrepassLighting, "RenderPrePass.Lighting", kProfilerRender)
+PROFILER_INFORMATION(gPrepassLight, "RenderPrePass.Light", kProfilerRender)
+PROFILER_INFORMATION(gPrepassFinal, "RenderPrePass.FinalPass", kProfilerRender)
+PROFILER_INFORMATION(gPrepassFwdDepth, "RenderPrePass.ForwardObjectsToDepth", kProfilerRender)
+PROFILER_INFORMATION(gPrepassCombineDepthNormals, "RenderPrePass.CombineDepthNormals", kProfilerRender)
+
+
+static SHADERPROP (LightPos);
+static SHADERPROP (LightDir);
+static SHADERPROP (LightColor);
+static SHADERPROP (LightTexture0);
+static SHADERPROP (LightBuffer);
+static SHADERPROP (LightAsQuad);
+
+// ShadowMapTexture must be in namespace or otherwise it conflicts with property in
+// ForwardShaderRenderLoop.cpp in batched Android build.
+namespace PrePassPrivate
+{
+static SHADERPROP (ShadowMapTexture);
+}
+
+#if SEPERATE_PREPASS_SPECULAR
+static SHADERPROP (LightSpecBuffer);
+#endif
+
+static Material* s_LightMaterial = NULL;
+static Material* s_CollectMaterial = NULL;
+
+static ShaderKeyword kKeywordHDRLightPrepassOn = keywords::Create ("HDR_LIGHT_PREPASS_ON");
+
+static PPtr<Mesh> s_Icosahedron = NULL;
+static PPtr<Mesh> s_Icosphere = NULL;
+static PPtr<Mesh> s_Pyramid = NULL;
+
+
+enum {
+	kLightingLayerCount = 4, // bits of stencil used for lighting layers
+	
+	// 3 highest bits used for excluding lights for other reasons.
+	kStencilMaskSomething = (1<<7),	// any object (i.e. not background)
+	kStencilMaskNonLightmapped = (1<<6), // non-lightmapped object
+	kStencilMaskBeyondShadowDistace = (1<<5), // beyond shadow distance
+	kStencilMaskLightBackface = (1<<4), // don't render light where it's backface passes z test
+	
+	// Next 4 highest bits (3 down to 0) used for lighting layers.
+	kStencilBitLayerStart = 0, // start of lighting layer bits
+	kStencilMaskLayers = ((1<<kLightingLayerCount)-1) << kStencilBitLayerStart,
+
+	kStencilGeomWriteMask = kStencilMaskSomething | kStencilMaskNonLightmapped | kStencilMaskBeyondShadowDistace | kStencilMaskLayers,
+};
+
+
+
+// Lights can illuminate arbitrary layer masks. Say we have several lights:
+//	La = XXXXXXXX
+//	Lb = XXXXXXX-
+//	Lc = XXXX-XXX
+//	Ld = XXXX-X--
+// Layers used for excluding lights are then:
+//       ----O-OO (3 in total)
+// In stencil buffer, we allocate 3 consecutive bits to handle this:
+// LaS = ---
+// LbS = --O
+// LcS = O--
+// LdS = OOO
+//
+// When rendering an object, set that bit if object belongs to one of light layers.
+//
+// When drawing a light, set stencil mask to light layer stencil mask, and stencil
+// test should be equal to zero in those bits.
+
+struct LightingLayers
+{
+	enum { kLayerCount = 32 };
+
+	LightingLayers (UInt32 lightMask)
+		: lightingLayerMask(lightMask)
+	{
+		for (int i = 0; i < kLayerCount; ++i)
+			layerToStencil[i] = -1;
+
+		int bit = kStencilBitLayerStart + kLightingLayerCount - 1;
+		lightLayerCount = 0;
+		UInt32 mask = 1;
+		for (int i = 0; i < kLayerCount; ++i, mask<<=1)
+		{
+			if (lightMask & mask)
+			{
+				if (lightLayerCount < kLightingLayerCount)
+					layerToStencil[i] = bit;
+				--bit;
+				++lightLayerCount;
+			}
+		}
+	}
+
+	UInt32 lightingLayerMask;
+	int layerToStencil[kLayerCount];
+	int lightLayerCount;
+};
+
+struct PrePassRenderData {
+	int	roIndex;
+#if ENABLE_PRE_PASS_LOOP_HASH_SORTING
+	UInt32 hash;
+#endif
+};
+typedef dynamic_array<PrePassRenderData> PreRenderPasses;
+
+
+struct PrePassRenderLoop
+{
+	const RenderLoopContext*	m_Context;
+	RenderObjectDataContainer*	m_Objects;
+
+	#if GFX_ENABLE_DRAW_CALL_BATCHING
+	BatchRenderer				m_BatchRenderer;
+	#endif
+	
+	PreRenderPasses				m_PlainRenderPasses;
+
+	RenderTexture* RenderBasePass (RenderTexture* rtMain, const LightingLayers& lightingLayers, RenderObjectDataContainer& outRemainingObjects, MinMaxAABB& receiverBounds);
+	
+	void RenderLighting (
+								   ActiveLights& activeLights,
+								   RenderTexture* rtMain, 
+								   TextureID depthTextureID, 
+								   RenderTexture* rtNormalsSpec, 
+								   RenderTexture*& rtLight,
+								   
+#if SEPERATE_PREPASS_SPECULAR
+								   RenderTexture*& rtLightSpec,
+#endif
+								   const Vector4f& lightFade, 
+								   const LightingLayers& lightingLayers, 
+								   MinMaxAABB& receiverBounds, 
+								   RenderTexture** outMainShadowMap);
+	
+	void RenderFinalPass (RenderTexture* rtMain, 
+						  RenderTexture* rtLight,
+#if SEPERATE_PREPASS_SPECULAR
+						  RenderTexture* rtLightSpec,
+#endif 
+						  bool hdr, 
+						  bool linearLighting);
+
+	struct RenderPrePassObjectSorterHash
+	{
+		bool operator()( const PrePassRenderData& ra, const PrePassRenderData& rb ) const;
+		const PrePassRenderLoop* queue;
+	};
+
+	void SortPreRenderPassData( PreRenderPasses& passes )
+	{
+		RenderPrePassObjectSorterHash sorter;
+		sorter.queue = this;
+		std::sort( passes.begin(), passes.end(), sorter );
+	}
+};
+
+
+struct RenderPrePassObjectSorter {
+	bool operator()( const RenderObjectData& ra, const RenderObjectData& rb ) const;
+};
+
+	
+
+
+bool RenderPrePassObjectSorter::operator()( const RenderObjectData& ra, const RenderObjectData& rb ) const
+{
+	// Sort by layering depth.
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(ra.globalLayeringData, rb.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+	
+	// Sort by render queues first
+	if( ra.queueIndex != rb.queueIndex )
+		return ra.queueIndex < rb.queueIndex;
+	
+	// sort by lightmap index
+	if( ra.lightmapIndex != rb.lightmapIndex )
+		return ra.lightmapIndex < rb.lightmapIndex;
+
+#if GFX_ENABLE_DRAW_CALL_BATCHING
+	// if part of predefined static batch, then sort by static batch index
+	if( ra.staticBatchIndex != rb.staticBatchIndex )
+		return ra.staticBatchIndex > rb.staticBatchIndex; // assuming that statically batched geometry occlude more - render it first
+#endif
+
+	// then sort by material (maybe better sort by shader?)
+	if( ra.material != rb.material )
+		return ra.material->GetInstanceID() < rb.material->GetInstanceID(); // just compare instance IDs
+	
+	// Sort front to back
+	return ra.distance > rb.distance;
+}
+
+#if ENABLE_PRE_PASS_LOOP_HASH_SORTING
+bool PrePassRenderLoop::RenderPrePassObjectSorterHash::operator()( const PrePassRenderData& ra, const PrePassRenderData& rb ) const
+{
+	const RenderObjectData& dataa = (*queue->m_Objects)[ra.roIndex];
+	const RenderObjectData& datab = (*queue->m_Objects)[rb.roIndex];
+	
+	// Sort by layering depth.
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(dataa.globalLayeringData, datab.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+
+	// Sort by render queues first
+	if( dataa.queueIndex != datab.queueIndex )
+		return dataa.queueIndex < datab.queueIndex;
+
+	// sort by hash
+	if( ra.hash != rb.hash )
+		return ra.hash < rb.hash;
+
+	// Sort front to back
+	return dataa.distance > datab.distance;
+}
+#endif
+
+static Texture* defaultSpotCookie = NULL;
+
+static void AssignCookieToMaterial(const Light& light, Material* lightMaterial)
+{
+	//@TODO: when computing positions from screen space, mipmapping of cookie will really play against
+	// us, when some adjacent pixels will happen to have very similar UVs. It will sample high levels which
+	// will be mostly black!
+	// Proper fix would be manual derivatives based on something else in the shader, but that needs SM3.0 on D3D
+	// and GLSL in GL. So just use bad mip bias for now.
+
+	Texture* cookie = light.GetCookie();
+
+	if(cookie)
+	{
+		lightMaterial->SetTexture (kSLPropLightTexture0, cookie);
+	}
+	else if(light.GetType() == kLightSpot)
+	{
+		if(!defaultSpotCookie)
+		{
+			defaultSpotCookie = (Texture*)GetRenderSettings().GetDefaultSpotCookie();
+		}
+		lightMaterial->SetTexture (kSLPropLightTexture0, defaultSpotCookie);
+	}
+}
+
+
+// To properly collect & blur directional light's screen space shadow map,
+// we need to have shadow receivers that are forward-rendered in the depth buffer.
+// Also, if camera needs a depth texture, forward-rendered objects should be there
+// as well.
+static void RenderForwardObjectsIntoDepth (
+	const RenderLoopContext& ctx,
+	RenderTexture* rt,
+	RenderObjectDataContainer* forwardRenderedObjects,
+	RenderSurfaceHandle rtColorSurface,
+	RenderSurfaceHandle rtDepthSurface,
+	int width, int height,
+	bool cameraNeedsDepthTexture)
+{
+	Assert (rt);
+
+	if (!forwardRenderedObjects || forwardRenderedObjects->size() == 0)
+		return; // nothing to do
+	
+	PROFILER_AUTO_GFX(gPrepassFwdDepth, ctx.m_Camera);
+	GPU_AUTO_SECTION(kGPUSectionOpaquePass);
+
+	Shader* depthShader = GetCameraDepthTextureShader ();
+	if (!depthShader)
+		return;
+
+	// If we do not need the depth texture, leave only the objects that will possibly receive shadows;
+	// no need to render all forward objects.
+	RenderObjectDataContainer forwardRenderedShadowReceivers;
+	if (!cameraNeedsDepthTexture)
+	{
+		size_t n = forwardRenderedObjects->size();
+		forwardRenderedShadowReceivers.reserve (n / 4);
+		for (size_t i = 0; i < n; ++i)
+		{
+			RenderObjectData& roData = (*forwardRenderedObjects)[i];
+			DebugAssert (roData.visibleNode);
+			BaseRenderer* renderer = roData.visibleNode->renderer;
+			DebugAssert (renderer);
+			if (!renderer->GetReceiveShadows())
+				continue; // does not receive shadows
+			Shader* shader = roData.shader;
+			int ss = shader->GetShaderLabShader()->GetDefaultSubshaderIndex (kRenderPathExtForward);
+			if (ss == -1)
+				continue; // is not forward rendered
+			forwardRenderedShadowReceivers.push_back (roData);
+		}
+
+		if (forwardRenderedShadowReceivers.size() == 0)
+			return; // nothing left to render
+		forwardRenderedObjects = &forwardRenderedShadowReceivers;
+	}
+
+	RenderTexture::SetActive (1, &rtColorSurface, rtDepthSurface, rt);
+	RenderSceneShaderReplacement (*forwardRenderedObjects, depthShader, "RenderType");
+}
+
+static RenderTexture* ComputeScreenSpaceShadowMap (
+	const RenderLoopContext& ctx,
+	RenderTexture* shadowMap,
+	float blurWidth,
+	float blurFade,
+	ShadowType shadowType)
+{
+	Assert (shadowMap);
+
+	GfxDevice& device = GetGfxDevice();
+
+	if (!s_CollectMaterial)
+	{
+		Shader* shader = GetScriptMapper().FindShader ("Hidden/Internal-PrePassCollectShadows");
+		s_CollectMaterial = Material::CreateMaterial (*shader, Object::kHideAndDontSave);
+	}
+
+	SetShadowsKeywords (kLightDirectional, shadowType, false, false);
+	RenderBufferManager& rbm = GetRenderBufferManager ();
+
+	RenderTexture* screenShadowMap = rbm.GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, kDepthFormatNone, kRTFormatARGB32, 0, kRTReadWriteLinear);
+	RenderTexture::SetActive (screenShadowMap);
+
+	// Clear so that tiled and multi-GPU systems don't do a RT unresolve
+	float clearColor[4] = {1,0,1,0};
+	device.Clear(kGfxClearColor, clearColor, 1.0f, 0);
+
+	LoadFullScreenOrthoMatrix ();
+	s_CollectMaterial->SetTexture (PrePassPrivate::kSLPropShadowMapTexture, shadowMap);
+	s_CollectMaterial->SetPass (0);
+
+	Vector3f ray;
+	device.ImmediateBegin (kPrimitiveQuads);
+
+	float x1 = 0.0f;
+	float x2 = 1.0f;
+	float y1 = 0.0f;
+	float y2 = 1.0f;
+	float f = ctx.m_Camera->GetProjectionFar();
+
+	const Transform& camtr = ctx.m_Camera->GetComponent(Transform);
+	Matrix4x4f cameraWorldToLocalNoScale = camtr.GetWorldToLocalMatrixNoScale();
+
+	device.ImmediateTexCoord (0, x1, y1, 0.0f);
+	ray = cameraWorldToLocalNoScale.MultiplyPoint3(ctx.m_Camera->ViewportToWorldPoint (Vector3f(x1, y1, f)));
+	device.ImmediateNormal (ray.x, ray.y, ray.z);
+	device.ImmediateVertex (x1, y1, 0.1f);
+
+	device.ImmediateTexCoord (0, x2, y1, 0.0f);
+	ray = cameraWorldToLocalNoScale.MultiplyPoint3(ctx.m_Camera->ViewportToWorldPoint (Vector3f(x2, y1, f)));
+	device.ImmediateNormal (ray.x, ray.y, ray.z);
+	device.ImmediateVertex (x2, y1, 0.1f);
+
+	device.ImmediateTexCoord (0, x2, y2, 0.0f);
+	ray = cameraWorldToLocalNoScale.MultiplyPoint3(ctx.m_Camera->ViewportToWorldPoint (Vector3f(x2, y2, f)));
+	device.ImmediateNormal (ray.x, ray.y, ray.z);
+	device.ImmediateVertex (x2, y2, 0.1f);
+
+	device.ImmediateTexCoord (0, x1, y2, 0.0f);
+	ray = cameraWorldToLocalNoScale.MultiplyPoint3(ctx.m_Camera->ViewportToWorldPoint (Vector3f(x1, y2, f)));
+	device.ImmediateNormal (ray.x, ray.y, ray.z);
+	device.ImmediateVertex (x1, y2, 0.1f);
+
+	device.ImmediateEnd ();
+	GPU_TIMESTAMP();
+
+	rbm.ReleaseTempBuffer (shadowMap);
+
+	// possibly blur into another screen-space render texture
+	SetShadowsKeywords (kLightDirectional, shadowType, true, true);
+	if (IsSoftShadow(shadowType) && GetSoftShadowsEnabled())
+		return BlurScreenShadowMap (screenShadowMap, shadowType, f, blurWidth, blurFade);
+
+	return screenShadowMap;
+}
+
+static void RenderLightGeom (const RenderLoopContext& ctx, const ActiveLight& light, const Vector3f& lightPos, const Matrix4x4f& lightMatrix, const bool renderAsQuad)
+{
+	// Spot and point lights: render as tight geometry. If it doesn't intersect near or far, stencil optimisation will be used
+	// (rendering the z tested back faces into stencil and then front faces will only pass for these pixels).
+	// If it intersects near, back faces with z test greater will be rendered (shouldn't use that when not intersecting near, because
+	// then there could be objects between the cam and the light, not touching the light).
+	// If it intersects far, render front faces without any gimmicks.
+	// If it intersects both near and far, render as a quad.
+
+	GfxDevice& device = GetGfxDevice();
+	Light& l = *light.light;
+	float r = l.GetRange();
+	float n = ctx.m_Camera->GetProjectionNear() * 1.001f;
+
+	if (l.GetType() == kLightPoint && !renderAsQuad)
+	{
+		#if GFX_USE_SPHERE_FOR_POINT_LIGHT
+			ChannelAssigns ch;
+			ch.Bind (kShaderChannelVertex, kVertexCompVertex);
+
+			// Older content might have included/overriden old Internal-PrePassLighting.shader,
+			// which relied on normals being zeros here. Light .fbx files have zero normals just for that.
+			if (!IS_CONTENT_NEWER_OR_SAME(kUnityVersion4_3_a1))
+				ch.Bind (kShaderChannelNormal, kVertexCompNormal);
+
+			Matrix4x4f m;
+			m.SetTranslate (lightPos);
+			m.Get (0, 0) = r;
+			m.Get (1, 1) = r;
+			m.Get (2, 2) = r;
+			// Point lights bigger than 0.25 of the screen height can be rendered with high-poly, but tighter geometry.
+			DrawUtil::DrawMesh (ch, light.screenRect.height > 0.25f ? *s_Icosphere : *s_Icosahedron, m, -1);
+		#else
+			// PS3 is not the best at vertex processing, so stick to low-poly meshes
+			device.ImmediateShape(lightPos.x, lightPos.y, lightPos.z, r, GfxDevice::kShapeCube);
+		#endif
+	}
+	else if (l.GetType() == kLightSpot && !renderAsQuad)
+	{
+		Matrix4x4f m (lightMatrix);
+		ChannelAssigns ch;
+		ch.Bind (kShaderChannelVertex, kVertexCompVertex);
+		if (!IS_CONTENT_NEWER_OR_SAME(kUnityVersion4_3_a1))
+			ch.Bind (kShaderChannelNormal, kVertexCompNormal);
+		float sideLength = r / l.GetCotanHalfSpotAngle ();
+		m.Scale (Vector3f(sideLength, sideLength, r));
+		DrawUtil::DrawMesh (ch, *s_Pyramid, m, -1);
+	}
+	else // Directional light or spot/point that needs to be rendered as a quad
+	{ 
+		DeviceViewProjMatricesState preserveViewProj;
+
+		const Camera* camera = ctx.m_Camera;
+		float nearPlane = 0;
+
+		float x1 = light.screenRect.x;
+		float x2 = light.screenRect.x + light.screenRect.width;
+		float y1 = light.screenRect.y;
+		float y2 = light.screenRect.y + light.screenRect.height;
+
+		// Calculate rays pointing from the camera to the near plane's corners in camera space
+		Vector3f ray1 = camera->ViewportToCameraPoint (Vector3f(x1, y1, n));
+		Vector3f ray2 = camera->ViewportToCameraPoint (Vector3f(x1, y2, n));
+		Vector3f ray3 = camera->ViewportToCameraPoint (Vector3f(x2, y2, n));
+		Vector3f ray4 = camera->ViewportToCameraPoint (Vector3f(x2, y1, n));
+
+		// Set up orthographic projection not to have to deal with precision problems
+		// that show up when drawing a full screen quad in perspective projection in world space.
+		LoadFullScreenOrthoMatrix (nearPlane, camera->GetProjectionFar(), true);
+
+		// Draw the fullscreen quad on the near plane
+		device.ImmediateBegin (kPrimitiveQuads);
+
+		device.ImmediateNormal (ray1.x, ray1.y, ray1.z);
+		device.ImmediateVertex (x1, y1, nearPlane);
+
+		device.ImmediateNormal (ray2.x, ray2.y, ray2.z);
+		device.ImmediateVertex (x1, y2, nearPlane);
+
+		device.ImmediateNormal (ray3.x, ray3.y, ray3.z);
+		device.ImmediateVertex (x2, y2, nearPlane);
+
+		device.ImmediateNormal (ray4.x, ray4.y, ray4.z);
+		device.ImmediateVertex (x2, y1, nearPlane);
+
+		device.ImmediateEnd ();
+		GPU_TIMESTAMP();
+	}
+}
+
+static UInt32 LightMask (const Light& l, const LightingLayers& lightingLayers)
+{
+	UInt32 mask = 0U;
+	UInt32 lightExcludeLayers = ~l.GetCullingMask();
+	int bit = 0;
+	while (lightExcludeLayers)
+	{
+		if (lightExcludeLayers & 1)
+		{
+			int layerStencilBit = lightingLayers.layerToStencil[bit];
+			if (layerStencilBit != -1)
+				mask |= 1 << layerStencilBit;
+		}
+		lightExcludeLayers >>= 1;
+		++bit;
+	}
+	return mask;
+}
+
+static RenderTexture* RenderLight (
+						 const RenderLoopContext& ctx,
+						 const ShadowCullData& shadowCullData,
+						 QualitySettings::ShadowQuality shadowQuality,
+						 const LightmapSettings::LightmapsMode lightmapsMode,
+						 RenderTexture*& rtLight,
+						 RenderTexture* rtMain,
+						 int width, int height,
+						 DeviceStencilState* devStDisabled,
+						 const MinMaxAABB& receiverBounds,
+						 const DeviceMVPMatricesState& mvpState,
+						 const Vector4f& lightFade,
+						 const LightingLayers& lightingLayers,
+						 const ActiveLight& light,
+#if SEPERATE_PREPASS_SPECULAR
+						 bool specularPass,
+#endif
+						 bool returnShadowMap)
+{
+	Light& l = *light.light;
+
+	PROFILER_AUTO_GFX(gPrepassLight, &l);
+
+	const Light::Lightmapping lightmappingMode = l.GetLightmappingForRender();
+	const Transform& trans = l.GetComponent(Transform);
+	Matrix4x4f lightMatrix = trans.GetLocalToWorldMatrixNoScale();
+	Vector3f lightPos = lightMatrix.GetPosition();
+
+	Assert(light.isVisibleInPrepass);
+	Assert(!light.screenRect.IsEmpty());
+
+	ShadowType lightShadows = l.GetShadows();
+	// Shadows on local lights are Pro only
+	if (lightShadows != kShadowNone && l.GetType() != kLightDirectional &&
+		!GetBuildSettings().hasLocalLightShadows)
+		lightShadows = kShadowNone;
+
+	// Check if soft shadows are allowed by license, quality settings etc.
+	if (IS_CONTENT_NEWER_OR_SAME(kUnityVersion4_1_a1) &&
+		lightShadows > kShadowHard && !GetSoftShadowsEnabled())
+		lightShadows = kShadowHard;
+
+	GfxDevice& device = GetGfxDevice();
+	BuiltinShaderParamValues& params = device.GetBuiltinParamValues();
+
+	RenderSurfaceHandle rtSurfaceColor;
+	RenderSurfaceHandle rtSurfaceDepth = rtMain->GetDepthSurfaceHandle(); // re-use depth from final target
+	RenderSurfaceHandle rtSurfaceMainColor = rtMain->GetColorSurfaceHandle(); // will allocate color later (if any lights will actually be present)
+
+	bool hdr = ctx.m_Camera->GetUsingHDR();
+	float white[] = {1,1,1,1};
+	float black[] = {0,0,0,0};
+	UInt32 rtFlags = RenderTexture::kFlagDontRestoreColor;
+	
+	if (!rtLight)
+	{
+		rtLight = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, kDepthFormatNone, hdr ? GetGfxDevice().GetDefaultHDRRTFormat() : kRTFormatARGB32, 0, kRTReadWriteLinear);
+
+		if (!rtLight->IsCreated())
+			rtLight->Create();
+		
+		rtLight->SetFilterMode (kTexFilterNearest);
+		rtSurfaceColor = rtLight->GetColorSurfaceHandle();
+
+
+		RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, rtLight, 0, kCubeFaceUnknown, rtFlags);
+		GraphicsHelper::Clear(kGfxClearColor, hdr ? black : white, 1.0f, 0);
+		GPU_TIMESTAMP();
+	}
+	
+	rtSurfaceColor = rtLight->GetColorSurfaceHandle();
+
+	l.SetLightKeyword();
+
+	Vector3f lightDir = lightMatrix.GetAxisZ();
+	ColorRGBAf lightCol = GammaToActiveColorSpace (l.GetColor()) * l.GetIntensity() * 2.0f;
+
+	Matrix4x4f temp1, temp2, temp3;
+	if (l.GetType() == kLightSpot)
+	{
+		Matrix4x4f worldToLight = l.GetWorldToLocalMatrix();
+		{
+			temp1.SetScale (Vector3f (-.5f, -.5f, 1.0f));
+			temp2.SetTranslate (Vector3f (.5f, .5f, 0.0f));
+			temp3.SetPerspectiveCotan( l.GetCotanHalfSpotAngle(), 0.0f, l.GetRange() );
+			// temp2 * temp3 * temp1 * worldToLight
+			Matrix4x4f temp4;
+			MultiplyMatrices4x4 (&temp2, &temp3, &temp4);
+			MultiplyMatrices4x4 (&temp4, &temp1, &temp2);
+			MultiplyMatrices4x4 (&temp2, &worldToLight, &params.GetWritableMatrixParam(kShaderMatLightMatrix));
+		}
+	}
+	else if (l.GetCookie())
+	{
+		if (l.GetType() == kLightPoint)
+		{
+			params.SetMatrixParam(kShaderMatLightMatrix, l.GetWorldToLocalMatrix());
+		}
+		else if (l.GetType() == kLightDirectional)
+		{
+			float scale = 1.0f / l.GetCookieSize();
+			temp1.SetScale (Vector3f (scale, scale, 0));
+			temp2.SetTranslate (Vector3f (.5f, .5f, 0));
+			// temp2 * temp1 * l.GetWorldToLocalMatrix()
+			MultiplyMatrices4x4 (&temp2, &temp1, &temp3);
+			MultiplyMatrices4x4 (&temp3, &l.GetWorldToLocalMatrix(), &params.GetWritableMatrixParam(kShaderMatLightMatrix));
+		}
+	}
+
+	AssignCookieToMaterial(l, s_LightMaterial);
+
+	const bool renderAsQuad = light.intersectsNear && light.intersectsFar || l.GetType() == kLightDirectional;
+	ShaderLab::g_GlobalProperties->SetFloat(kSLPropLightAsQuad, renderAsQuad ? 1.0f : 0.0f);
+	ShaderLab::g_GlobalProperties->SetVector (kSLPropLightPos, lightPos.x, lightPos.y, lightPos.z, 1.0f / (l.GetRange() * l.GetRange()));
+	ShaderLab::g_GlobalProperties->SetVector (kSLPropLightDir, lightDir.x, lightDir.y, lightDir.z, 0.0f);
+	ShaderLab::g_GlobalProperties->SetVector (kSLPropLightColor, lightCol.GetPtr());
+	///@TODO: cleanup, remove this from Internal-PrePassLighting shader
+	s_LightMaterial->SetTexture (ShaderLab::Property("_LightTextureB0"), builtintex::GetAttenuationTexture());
+
+	RenderTexture* shadowMap = NULL;
+	ShadowCameraData camData(shadowCullData);
+
+
+	if (light.shadowedLight != NULL && receiverBounds.IsValid() && shadowQuality != QualitySettings::kShadowsDisable)
+	{
+		Assert(light.insideShadowRange);
+
+		ShadowType lightShadows = l.GetShadows();
+
+		if (IS_CONTENT_NEWER_OR_SAME(kUnityVersion4_1_a1))
+		{
+			if (shadowQuality == QualitySettings::kShadowsHardOnly && lightShadows != kShadowNone)
+				lightShadows = kShadowHard;
+		}
+
+		SetShadowsKeywords (l.GetType(), lightShadows, false, true);
+
+		Matrix4x4f shadowMatrices[kMaxShadowCascades];
+		device.SetViewMatrix (ctx.m_CurCameraMatrix.GetPtr());
+		device.SetStencilState (devStDisabled, 0);
+
+		// Rendering shadowmaps will switch away from the lighting buffer and then will switch back.
+		// Nothing we can do about it, so don't produce the warning.
+		device.IgnoreNextUnresolveOnCurrentRenderTarget();
+
+		shadowMap = RenderShadowMaps (camData, light, receiverBounds, false, shadowMatrices);
+		
+		if (!shadowMap)
+		{
+			// If shadow map could not actually be created (no casters, out of VRAM, whatever),
+			// set the no shadows keywords and proceed. So there will be no shadows,
+			// but otherwise it will be ok.
+			SetNoShadowsKeywords();
+		}
+		else
+		{
+			Vector4f data;
+			
+			// ambient & shadow fade out
+			data.x = 1.0f - l.GetShadowStrength(); // R = 1-strength
+			data.y = data.z = data.w = 0.0f;
+			params.SetVectorParam(kShaderVecLightShadowData, data);
+			
+			if (l.GetType() == kLightDirectional)
+			{
+				params.SetMatrixParam(kShaderMatWorldToShadow, shadowMatrices[0]);
+				SetCascadedShadowShaderParams (shadowMatrices, camData.splitDistances, camData.splitSphereCentersAndSquaredRadii);
+
+				shadowMap = ComputeScreenSpaceShadowMap (
+					ctx,
+					shadowMap,
+					l.GetShadowSoftness(),
+					l.GetShadowSoftnessFade(),
+					lightShadows);
+			}
+			else if (l.GetType() == kLightSpot)
+			{
+				params.SetMatrixParam(kShaderMatWorldToShadow, shadowMatrices[0]);
+			}
+
+			// texel offsets for PCF
+			float offX = 0.5f / shadowMap->GetGLWidth();
+			float offY = 0.5f / shadowMap->GetGLHeight();
+			data.z = 0.0f; data.w = 0.0f;
+			data.x = -offX; data.y = -offY; params.SetVectorParam(kShaderVecShadowOffset0, data);
+			data.x =  offX; data.y = -offY; params.SetVectorParam(kShaderVecShadowOffset1, data);
+			data.x = -offX; data.y =  offY; params.SetVectorParam(kShaderVecShadowOffset2, data);
+			data.x =  offX; data.y =  offY; params.SetVectorParam(kShaderVecShadowOffset3, data);
+			s_LightMaterial->SetTexture (PrePassPrivate::kSLPropShadowMapTexture, shadowMap);
+
+			if (rtLight != NULL)
+				RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, rtLight);
+			else
+				RenderTexture::SetActive (1, &rtSurfaceMainColor, rtSurfaceDepth, rtMain);
+		}
+		device.SetViewMatrix(mvpState.GetView().GetPtr());
+		device.SetProjectionMatrix(mvpState.GetProj());
+		SetClippingPlaneShaderProps();
+
+		// restore the cull mode, since it could be changed by a shadow caster with odd-negative scale
+		device.SetNormalizationBackface( kNormalizationDisabled, false );
+	}
+	else
+	{
+		SetNoShadowsKeywords ();
+	}
+	
+	// Draw each light in two passes: illuminate non-lightmapped objects; illuminate lightmapped objects.
+	int lightPassCount = 2;
+	int lightPassAddBits[2] = { kStencilMaskNonLightmapped, 0 };
+	if (lightmappingMode == Light::kLightmappingRealtimeOnly)
+	{
+		// If light is realtime only, it's enough to draw one pass; that illuminates any object
+		// be it lightmapped or not.
+		lightPassCount = 1;
+		lightPassAddBits[0] = 0;
+	}
+	else if (lightmappingMode == Light::kLightmappingAuto && lightmapsMode != LightmapSettings::kDualLightmapsMode)
+	{
+		// If it's an auto light but we're in single lightmaps mode, draw in one pass only to illuminate
+		// non-lightmapped objects
+		// TODO: realtime shadows from auto lights won't be received by lightmapped objects. Do we want to fix it?
+		lightPassCount = 1;
+		lightPassAddBits[0] = kStencilMaskNonLightmapped;
+	}
+
+	//TODO: skip if smaller than certain size
+	const bool useStencilMask = !light.intersectsNear && !light.intersectsFar &&
+		(lightmappingMode == Light::kLightmappingRealtimeOnly) &&
+		(l.GetType() == kLightSpot || l.GetType() == kLightPoint );
+
+	const UInt32 lightmask = LightMask (l, lightingLayers);
+
+	// Render stencil mask, to discard all light pixels, at which the light is fully in front of scene geometry.
+	if (useStencilMask)
+	{
+		Material::GetDefault ()->SetPass (0);
+		#if UNITY_XENON
+		device.SetNullPixelShader ();
+		#endif
+
+		GfxBlendState blendstate;
+		blendstate.renderTargetWriteMask = 0U;
+		device.SetBlendState (device.CreateBlendState(blendstate), 0);
+
+		GfxRasterState rasterstate;
+		rasterstate.cullMode = kCullOff;
+		device.SetRasterState (device.CreateRasterState(rasterstate));
+
+		GfxDepthState depthstate;
+		depthstate.depthWrite = false;
+		depthstate.depthFunc = kFuncLEqual;
+		device.SetDepthState (device.CreateDepthState(depthstate));
+
+		GfxStencilState lightStencil;
+		lightStencil.stencilEnable = true;
+		lightStencil.readMask = 0xFFU;
+		lightStencil.writeMask = kStencilMaskLightBackface;
+		lightStencil.stencilZFailOpBack = kStencilOpInvert;
+		lightStencil.stencilZFailOpFront = kStencilOpInvert;
+		lightStencil.stencilPassOpBack = kStencilOpKeep;
+		lightStencil.stencilPassOpFront = kStencilOpKeep;
+		lightStencil.stencilFuncBack = (lightmask != 0 ) ? kFuncNotEqual : kFuncAlways;
+		lightStencil.stencilFuncFront = (lightmask != 0 ) ? kFuncNotEqual : kFuncAlways;
+		device.SetStencilState (device.CreateStencilState(lightStencil), lightmask|kStencilMaskSomething|kStencilMaskNonLightmapped);
+
+		#if UNITY_XENON
+		// Clear within light-geom, sets all HiS to cull.
+		// Set to cull where equal to background (to deal with lightmasks), unoptimal but works
+		if (useStencilMask)
+			device.SetHiStencilState (false, true, kStencilMaskSomething|kStencilMaskNonLightmapped, kFuncEqual);
+		#endif
+
+		RenderLightGeom (ctx, light, lightPos, lightMatrix, renderAsQuad);
+
+		blendstate.renderTargetWriteMask = KColorWriteAll;
+		device.SetBlendState (device.CreateBlendState(blendstate), 0);
+
+		#if UNITY_XENON
+		device.HiStencilFlush (kHiSflush_sync);
+		#endif
+	}
+
+	for (int pp = 0; pp < lightPassCount; ++pp)
+	{
+		Vector4f lightingFade = lightFade;
+		Vector4f shadowFade = lightFade;
+		shadowFade.x = 1.0f - l.GetShadowStrength();
+		if (pp == 0 || lightmappingMode == Light::kLightmappingRealtimeOnly)
+			lightingFade.z = lightingFade.w = 0.0f;
+		else
+			shadowFade.z = shadowFade.w = 0.0f;
+		params.SetVectorParam(kShaderVecLightmapFade, lightingFade);
+		params.SetVectorParam(kShaderVecLightShadowData, shadowFade);
+
+		// Disable mipmapping on light cookies
+		ShaderLab::TexEnv* cookieEnv = s_LightMaterial->GetProperties().GetTexEnv(kSLPropLightTexture0);
+		if (cookieEnv)
+		{
+			cookieEnv->TextureMipBiasChanged (-8);
+		}
+
+		#if SEPERATE_PREPASS_SPECULAR
+		if (s_LightMaterial->GetPassCount () > 2 && ctx.m_Camera->GetUsingHDR() && specularPass)
+			s_LightMaterial->SetPass (2);
+		else
+		#endif
+		if (s_LightMaterial->GetPassCount () > 1 && ctx.m_Camera->GetUsingHDR())
+			s_LightMaterial->SetPass (1);
+		else 
+			s_LightMaterial->SetPass (0);
+
+		// Construct stencil read mask
+		GfxStencilState stencil;
+		stencil.stencilEnable = true;
+		stencil.stencilFuncFront = stencil.stencilFuncBack = kFuncEqual;
+		stencil.readMask = kStencilMaskSomething;
+		// Check lightmapped vs. non-lightmapped unless it's a realtime light that
+		// only cares about not illuminating non-something.
+		if (lightmappingMode != Light::kLightmappingRealtimeOnly)
+			stencil.readMask |= kStencilMaskNonLightmapped;
+
+		if (pp != 0 && lightmappingMode != Light::kLightmappingRealtimeOnly)
+			stencil.readMask |= kStencilMaskBeyondShadowDistace;
+
+		stencil.readMask |= lightmask;
+		int stencilRef = kStencilMaskSomething + lightPassAddBits[pp];
+
+		if (useStencilMask)
+		{
+			// Clear stencil while rendering
+			stencil.writeMask = kStencilMaskLightBackface;
+			stencil.stencilZFailOpBack = kStencilOpZero;
+			stencil.stencilZFailOpFront = kStencilOpZero;
+			stencil.stencilPassOpBack = kStencilOpZero;
+			stencil.stencilPassOpFront = kStencilOpZero;
+			// Clear the kStencilMaskLightBackface bit even if rejecting pixel due to stencil layer mask
+			stencil.stencilFailOpBack = kStencilOpZero;
+			stencil.stencilFailOpFront = kStencilOpZero;
+
+			stencil.readMask |= kStencilMaskLightBackface;
+			stencilRef |= kStencilMaskLightBackface;
+		}
+
+		DeviceStencilState* devStCheck = device.CreateStencilState (stencil);
+		device.SetStencilState (devStCheck, stencilRef);
+
+		#if UNITY_XENON
+		// Set to cull when all == background (to deal with lightmasks), unoptimal but works
+		if (useStencilMask)
+			device.SetHiStencilState (true, true, kStencilMaskSomething|kStencilMaskNonLightmapped, kFuncEqual);
+		#endif
+
+		// Draw light shape
+		GfxRasterState rasterstate;
+		GfxDepthState depthstate;
+		depthstate.depthWrite = false;
+		if (light.intersectsNear && !light.intersectsFar && (l.GetType() == kLightSpot || l.GetType() == kLightPoint))
+		{
+			// When near (but not far) plane intersects the light, render back faces (tighter than rendering a bounding quad).
+			// Can't use this when not intersecting, since it would waste processing for objects between
+			// the light and the cam, even when they don't touch the light.
+			rasterstate.cullMode = kCullFront;
+			depthstate.depthFunc = kFuncGreater;
+		}
+		else
+		{
+			depthstate.depthFunc = kFuncLEqual;
+			#if UNITY_XENON
+			device.SetHiZEnable (kHiZEnable);
+			#endif
+		}
+		device.SetRasterState (device.CreateRasterState (rasterstate));
+		device.SetDepthState (device.CreateDepthState (depthstate));
+
+		RenderLightGeom (ctx, light, lightPos, lightMatrix, renderAsQuad);
+
+		#if UNITY_XENON
+		device.SetHiZEnable (kHiZAuto);
+		if (useStencilMask) 
+			device.HiStencilFlush (kHiSflush_async);
+		#endif
+	}
+
+	if (shadowMap && !returnShadowMap)
+		GetRenderBufferManager().ReleaseTempBuffer (shadowMap);
+
+	return returnShadowMap ? shadowMap : NULL;
+}
+
+
+void PrePassRenderLoop::RenderLighting (
+									  ActiveLights& activeLights,
+									  RenderTexture* rtMain,
+									  TextureID depthTextureID,
+									  RenderTexture* rtNormalsSpec,
+									  RenderTexture*& rtLight,
+										
+#if SEPERATE_PREPASS_SPECULAR
+									  RenderTexture*& rtLightSpec,
+#endif
+									  const Vector4f& lightFade,
+									  const LightingLayers& lightingLayers,
+									  MinMaxAABB& receiverBounds,
+									  RenderTexture** outMainShadowMap)
+{
+	PROFILER_AUTO_GFX(gPrepassLighting, m_Context->m_Camera);
+	GPU_AUTO_SECTION(kGPUSectionDeferedLighting);
+	*outMainShadowMap = NULL;
+
+	Assert(rtLight == NULL);
+#if SEPERATE_PREPASS_SPECULAR
+	Assert(rtLightSpec == NULL);
+#endif
+	const QualitySettings::ShadowQuality shadowQuality = static_cast<QualitySettings::ShadowQuality>(GetQualitySettings().GetCurrent().shadows);
+	const LightmapSettings::LightmapsMode lightmapsMode = static_cast<LightmapSettings::LightmapsMode>(GetLightmapSettings().GetLightmapsMode());
+
+	ShadowCameraData camData(*m_Context->m_ShadowCullData);
+
+	// Prevent receiver bounds to be zero size in any dimension;
+	// causes trouble with calculating intersection of frustum and bounds.
+	receiverBounds.Expand( 0.01f );	
+
+	const Rectf screenRect = m_Context->m_Camera->GetScreenViewportRect();
+
+	if (!s_LightMaterial) {
+		Shader* shader = GetScriptMapper().FindShader ("Hidden/Internal-PrePassLighting");
+		s_LightMaterial = Material::CreateMaterial (*shader, Object::kHideAndDontSave);
+	}
+
+	if (s_Icosahedron.IsNull ())
+		s_Icosahedron = GetBuiltinResource<Mesh> ("icosahedron.fbx");
+	if (s_Icosphere.IsNull ())
+		s_Icosphere = GetBuiltinResource<Mesh> ("icosphere.fbx");
+	if (s_Pyramid.IsNull ())
+		s_Pyramid = GetBuiltinResource<Mesh> ("pyramid.fbx");
+	
+	static SHADERPROP (CameraDepthTexture);
+	static SHADERPROP (CameraNormalsTexture);
+	const int width = rtNormalsSpec->GetGLWidth();
+	const int height = rtNormalsSpec->GetGLHeight();
+	if (gGraphicsCaps.hasStencilInDepthTexture)
+	{
+		ShaderLab::g_GlobalProperties->SetRectTextureID (
+														 kSLPropCameraDepthTexture,
+														 depthTextureID,
+														 width,
+														 height,
+														 rtMain->GetTexelSizeX(),
+														 rtMain->GetTexelSizeY(),
+														 rtMain->GetUVScaleX(),
+														 rtMain->GetUVScaleY()
+														 );
+	}
+	
+	// set as _CameraNormalsTexture for external access
+	ShaderLab::g_GlobalProperties->SetTexture (kSLPropCameraNormalsTexture, rtNormalsSpec);
+
+	GfxDevice& device = GetGfxDevice();
+
+	SetAndRestoreWireframeMode setWireframeOff(false); // turn off wireframe; will restore old value in destructor
+	device.SetNormalizationBackface( kNormalizationDisabled, false );
+
+	DeviceStencilState* devStDisabled = device.CreateStencilState (GfxStencilState());
+
+
+	DeviceMVPMatricesState preserveMVP;
+
+	device.SetWorldMatrix (Matrix4x4f::identity.GetPtr());
+
+	RenderTexture** currentLightTex = &rtLight;
+#if SEPERATE_PREPASS_SPECULAR
+	//Do 2 passes for HDR prepass lighting on xenon
+	for (int lp = 0; lp < (m_Context->m_Camera->GetUsingHDR() ? 2 : 1); ++lp)
+	{
+		if (lp == 0)
+			currentLightTex = &rtLight;
+		else
+			currentLightTex = &rtLightSpec;
+#endif
+	
+	const ActiveLight* mainActiveLight = GetMainActiveLight(activeLights);
+	ActiveLights::Array::iterator it, itEnd = activeLights.lights.end();
+	for (it = activeLights.lights.begin(); it != itEnd; ++it)
+	{
+		if (!it->isVisibleInPrepass)
+			continue;
+		if (&*it == mainActiveLight)
+		{
+			// skip main light now; will render it last
+			continue;
+		}
+		RenderLight (*m_Context, camData, shadowQuality, lightmapsMode,
+			*currentLightTex,
+			rtMain,
+			width, height, devStDisabled, receiverBounds,
+			preserveMVP, lightFade, lightingLayers, *it,
+#if SEPERATE_PREPASS_SPECULAR
+			lp == 1, 
+#endif
+			false);
+	}
+
+	#if UNITY_XENON
+	device.SetStencilState (devStDisabled, 0);
+	device.SetHiStencilState (false, false, 0, kFuncEqual);
+	#endif
+
+	// render main light
+	if (mainActiveLight)
+	{
+		RenderTexture* shadowMap = RenderLight (
+			*m_Context, camData, shadowQuality, lightmapsMode,
+			*currentLightTex,
+			rtMain,
+			width, height, devStDisabled, receiverBounds,
+			preserveMVP, lightFade, lightingLayers, *mainActiveLight,
+#if SEPERATE_PREPASS_SPECULAR
+			lp == 1, 
+#endif
+			true);
+		if (shadowMap)
+		{
+			AddRenderLoopTempBuffer (m_Context->m_RenderLoop, shadowMap);
+			*outMainShadowMap = shadowMap;
+		}
+	}
+#if SEPERATE_PREPASS_SPECULAR
+	}
+#endif
+	SetNoShadowsKeywords ();
+
+	Vector4f lightmapFade = lightFade;
+	// if we're not in dual lightmaps mode, always use the far lightmap, i.e. lightmapFade = 1
+	if (GetLightmapSettings().GetLightmapsMode() != LightmapSettings::kDualLightmapsMode)
+		lightmapFade.z = lightmapFade.w = 1.0f;
+
+	device.GetBuiltinParamValues().SetVectorParam(kShaderVecLightmapFade, lightmapFade);
+
+	device.SetStencilState (devStDisabled, 0);
+	
+	#if !UNITY_XENON
+	// Ok, we didn't really have any lights worth rendering.
+	// Create a small render texture and clear it to white and pass it as the lighting buffer.
+	// Don't do that on 360; pointless and saves a resolve.
+	if (!rtLight)
+	{
+		rtLight = GetRenderBufferManager().GetTempBuffer (16, 16, kDepthFormatNone, kRTFormatARGB32, 0, kRTReadWriteLinear);
+		RenderTexture::SetActive (rtLight);
+		float white[] = {1,1,1,1};
+		float black[] = {0,0,0,0};
+		GraphicsHelper::Clear (kGfxClearColor, m_Context->m_Camera->GetUsingHDR() ? black : white, 1.0f, 0);
+		GPU_TIMESTAMP();
+
+		// We just switched away from a Z buffer (only in case when no lights were there!),
+		// and we'll switch back to it. So ignore the unresolve warning on it.
+		device.IgnoreNextUnresolveOnRS(rtMain->GetDepthSurfaceHandle());
+	}
+	#endif
+}
+
+
+static RenderTexture* CombineDepthNormalsTexture (const RenderLoopContext& ctx, RenderObjectDataContainer& remainingObjects)
+{
+	PROFILER_AUTO_GFX(gPrepassCombineDepthNormals, ctx.m_Camera);
+	
+	static Material* s_CombineMaterial = NULL;
+	if (!s_CombineMaterial)
+	{
+		Shader* shader = GetScriptMapper ().FindShader ("Hidden/Internal-CombineDepthNormals");
+		if (shader)
+			s_CombineMaterial = Material::CreateMaterial (*shader, Object::kHideAndDontSave);
+		if (!s_CombineMaterial) {
+			AssertString ("Coult not find depth+normals combine shader");
+			return NULL;
+		}
+	}
+	
+	RenderTexture* depthNormals = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, kDepthFormatNone, kRTFormatARGB32, 0, kRTReadWriteLinear);
+	RenderTexture::SetActive (depthNormals);
+	GraphicsHelper::Clear (kGfxClearColor, ColorRGBAf(0.5f,0.5f,1.0f,1.0f).GetPtr(), 1.0f, 0);
+	GPU_TIMESTAMP();
+	
+	// Combine depth & normals into single texture
+	ImageFilters::Blit (NULL, depthNormals, s_CombineMaterial, 0, false);
+
+	AddRenderLoopTempBuffer (ctx.m_RenderLoop, depthNormals);
+	
+	static SHADERPROP (CameraDepthNormalsTexture);
+	ShaderLab::g_GlobalProperties->SetTexture (kSLPropCameraDepthNormalsTexture, depthNormals);
+	
+	return depthNormals;
+}
+
+
+
+// Separate pass to render depth into a separate target. Only used on Macs with Radeon HDs, since
+// only there doing it the regular way is broken.
+#if GFX_SUPPORTS_OPENGL
+static RenderTexture* RenderBasePassDepth (const RenderLoopContext& ctx, RenderObjectDataContainer& renderData, PreRenderPasses& plainRenderPasses)
+{
+	GPU_AUTO_SECTION(kGPUSectionDeferedPrePass);
+
+	GfxDevice& device = GetGfxDevice();
+	
+	RenderTexture* rt = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, kDepthFormat24, kRTFormatDepth, 0, kRTReadWriteLinear);
+	rt->SetFilterMode (kTexFilterNearest);
+	if (!rt->IsCreated())
+		rt->Create();
+	RenderTexture::SetActive (rt);	
+	AddRenderLoopTempBuffer (ctx.m_RenderLoop, rt);
+	
+	float black[] = {0,0,0,0};
+	GraphicsHelper::Clear (kGfxClearAll, black, 1.0f, 0);
+	GPU_TIMESTAMP();
+	
+	device.SetViewMatrix (ctx.m_CurCameraMatrix.GetPtr());
+	
+	size_t ndata = renderData.size();
+	
+	for( size_t i = 0; i < ndata; ++i )
+	{
+		const PrePassRenderData& rpData = plainRenderPasses[i];
+		const RenderObjectData& roData = renderData[rpData.roIndex];
+		Shader* shader = roData.shader;
+		int ss = shader->GetShaderLabShader()->GetDefaultSubshaderIndex(kRenderPathExtPrePass);
+		if (ss == -1)
+			continue;
+		
+		const VisibleNode *node = roData.visibleNode;
+		
+		SetObjectScale (device, node->lodFade, node->invScale);
+
+		//@TODO: if this returns true and we have any sort of batching, we'd have to break batches here
+		node->renderer->ApplyCustomProperties(*roData.material, shader, ss);		
+		
+		ShaderLab::SubShader& subshader = roData.shader->GetShaderLabShader()->GetSubShader (ss);
+		int shaderPassCount = subshader.GetValidPassCount();
+		for (int p = 0; p < shaderPassCount; ++p)
+		{
+			ShaderPassType passType;
+			UInt32 passRenderOptions;
+			subshader.GetPass(p)->GetPassOptions (passType, passRenderOptions);
+			if (passType != kPassLightPrePassBase)
+				continue;
+			
+			const ChannelAssigns* channels = roData.material->SetPassWithShader(p, shader, ss);
+			if (channels)
+			{
+				SetupObjectMatrix (node->worldMatrix, node->transformType);
+				node->renderer->Render( roData.subsetIndex, *channels );
+			}
+		}
+	}
+	
+	return rt;
+}
+#endif
+
+inline float MultiplyAbsVectorZ (const Matrix4x4f& m, const Vector3f& v)
+{
+	return Abs(m.m_Data[2]) * v.x + Abs(m.m_Data[6]) * v.y + Abs(m.m_Data[10]) * v.z;
+}
+
+
+RenderTexture* PrePassRenderLoop::RenderBasePass (
+									  RenderTexture* rtMain,
+									  const LightingLayers& lightingLayers,
+									  RenderObjectDataContainer& outRemainingObjects,
+									  MinMaxAABB& receiverBounds
+									 )
+{
+	PROFILER_AUTO_GFX(gPrepassGeom, m_Context->m_Camera);
+	GPU_AUTO_SECTION(kGPUSectionDeferedPrePass);
+
+	const float shadowDistance = m_Context->m_ShadowCullData->shadowDistance;
+
+	GfxDevice& device = GetGfxDevice();
+	device.SetNormalizationBackface( kNormalizationDisabled, false );
+
+	GfxStencilState stRender;
+	stRender.writeMask = kStencilGeomWriteMask;
+	stRender.stencilEnable = true;
+	stRender.stencilPassOpFront = stRender.stencilPassOpBack = kStencilOpReplace;
+	DeviceStencilState* devStRender = device.CreateStencilState (stRender);
+	
+	RenderTexture* rtNormalsSpec = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, kDepthFormatNone, kRTFormatARGB32, 0, kRTReadWriteLinear);
+	rtNormalsSpec->SetFilterMode (kTexFilterNearest);
+	if (!rtNormalsSpec->IsCreated())
+		rtNormalsSpec->Create();
+	RenderSurfaceHandle rtSurfaceColor = rtNormalsSpec->GetColorSurfaceHandle();
+	RenderSurfaceHandle rtSurfaceDepth = rtMain->GetDepthSurfaceHandle(); // reuse depth buffer from final pass
+
+	UInt32 rtFlags = RenderTexture::kFlagDontRestoreColor;
+	UInt32 gfxClearFlags = kGfxClearAll;
+	// do not clear depth/stencil if camera set to DontClear
+	if (m_Context->m_Camera->GetClearFlags() == Camera::kDontClear)
+	{
+		gfxClearFlags &= ~kGfxClearDepthStencil;
+	}
+	else
+	{
+		rtFlags |= RenderTexture::kFlagDontRestoreDepth;
+	}
+
+	// set base pass render texture
+	RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, rtNormalsSpec, 0, kCubeFaceUnknown, rtFlags);
+
+	AddRenderLoopTempBuffer (m_Context->m_RenderLoop, rtNormalsSpec);
+
+	float black[] = {0,0,0,0};
+	GraphicsHelper::Clear (gfxClearFlags, black, 1.0f, 0);
+	GPU_TIMESTAMP();
+
+	device.SetViewMatrix (m_Context->m_CurCameraMatrix.GetPtr());
+
+	const ChannelAssigns* channels = NULL;
+
+	#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+	int prevTransformType = -1;
+	Material* prevMaterial = 0;
+	Shader* prevShader = 0;
+	int prevSubshaderIndex = -1;
+	float prevInvScale = 0.0f;
+	float prevLodFade = 0.0f;
+	UInt32 prevCustomPropsHash = 0;
+	int prevPassIndex = -1;
+	int prevStencilRef = 0;
+
+	int canBatch = 0;
+
+	#endif
+
+	const bool directLightBakedInLightProbes = LightProbes::AreBaked() && GetLightmapSettings().GetLightmapsMode() != LightmapSettings::kDualLightmapsMode;
+
+	size_t ndata = m_Objects->size();
+	outRemainingObjects.reserve (ndata / 16);
+
+	for( size_t i = 0; i < ndata; ++i )
+	{
+		const PrePassRenderData& rpData = m_PlainRenderPasses[i];
+		const RenderObjectData& roData = (*m_Objects)[rpData.roIndex];
+		Shader* shader = roData.shader;
+
+		int ss = roData.subShaderIndex;
+		if (ss == -1)
+			ss = shader->GetShaderLabShader()->GetDefaultSubshaderIndex(kRenderPathExtPrePass);
+
+		const VisibleNode *node = roData.visibleNode;
+		
+		bool withinShadowDistance = true;
+		float distanceAlongView = roData.distanceAlongView;
+		if (distanceAlongView > shadowDistance)
+		{
+			// check whether its bounds is actually further than shadow distance
+			// project extents onto camera forward axis
+			float z = MultiplyAbsVectorZ (m_Context->m_CurCameraMatrix, node->worldAABB.GetExtent());
+			Assert(z >= 0.0f);
+			if (distanceAlongView - z > shadowDistance)
+				withinShadowDistance = false;
+		}
+		
+		if (ss == -1)
+		{
+			if (withinShadowDistance && node->renderer->GetReceiveShadows())
+				receiverBounds.Encapsulate (node->worldAABB);
+			outRemainingObjects.push_back() = roData;
+			continue;
+		}
+
+
+		const float invScale = node->invScale;
+		const float lodFade = node->lodFade;
+		const int transformType = node->transformType;
+		const UInt32 customPropsHash = node->renderer->GetCustomPropertiesHash();
+
+		#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+		if (
+			node->renderer->GetStaticBatchIndex() == 0 ||
+			prevTransformType != transformType ||
+			prevMaterial != roData.material ||
+			prevShader != shader ||
+			prevSubshaderIndex != ss ||
+			!CompareApproximately(prevInvScale,invScale) || 
+			!CompareApproximately(prevLodFade,lodFade, LOD_FADE_BATCH_EPSILON) ||
+			prevCustomPropsHash != customPropsHash)
+		{
+			m_BatchRenderer.Flush();
+			
+			prevTransformType = transformType;
+			prevMaterial = roData.material;
+			prevShader = shader;
+			prevSubshaderIndex = ss;
+			prevInvScale = invScale;
+			prevLodFade = lodFade;
+			prevCustomPropsHash = customPropsHash;
+			
+			canBatch = 0;
+		}
+		else
+			++canBatch;
+
+		#endif
+
+		SetObjectScale (device, lodFade, invScale);
+
+		node->renderer->ApplyCustomProperties(*roData.material, shader, ss);
+
+		const bool lightmapped = node->renderer->IsLightmappedForRendering();
+		const Renderer* renderer = static_cast<Renderer*>(node->renderer);
+		const bool directLightFromLightProbes = directLightBakedInLightProbes && node->renderer->GetRendererType() != kRendererIntermediate && renderer->GetUseLightProbes();
+
+		ShaderLab::SubShader& subshader = shader->GetShaderLabShader()->GetSubShader (ss);
+		int shaderPassCount = subshader.GetValidPassCount();
+		for (int p = 0; p < shaderPassCount; ++p)
+		{
+			ShaderPassType passType;
+			UInt32 passRenderOptions;
+			subshader.GetPass(p)->GetPassOptions (passType, passRenderOptions);
+			if (passType != kPassLightPrePassBase)
+				continue;
+
+			int stencilRef = kStencilMaskSomething;
+			if (!lightmapped && !directLightFromLightProbes)
+			{
+				stencilRef += kStencilMaskNonLightmapped;
+			}
+
+			if (!withinShadowDistance)
+				stencilRef += kStencilMaskBeyondShadowDistace;
+
+			int layerStencilBit = lightingLayers.layerToStencil[node->renderer->GetLayer()];
+			if (layerStencilBit != -1)
+				stencilRef |= 1<<layerStencilBit;
+
+			#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+			if ((p != prevPassIndex) ||
+				(stencilRef != prevStencilRef))
+			{
+				m_BatchRenderer.Flush();
+				prevPassIndex = p;
+				prevStencilRef = stencilRef;
+				canBatch = 0;
+			}
+
+			if (canBatch <= 1)
+			#endif
+			{
+				device.SetStencilState (devStRender, stencilRef);
+				channels = roData.material->SetPassWithShader(p, shader, ss);
+			#if GFX_ENABLE_DRAW_CALL_BATCHING
+				prevPassIndex = p;
+				prevStencilRef = stencilRef;
+			#endif
+			}
+
+			receiverBounds.Encapsulate (node->worldAABB);
+
+			if (channels)
+			{
+				#if GFX_ENABLE_DRAW_CALL_BATCHING
+				m_BatchRenderer.Add (node->renderer, roData.subsetIndex, channels, node->worldMatrix, transformType);
+				#else
+				SetupObjectMatrix (node->worldMatrix, transformType);
+				node->renderer->Render (roData.subsetIndex, *channels);
+				#endif
+			}
+		}
+	}
+
+	#if GFX_ENABLE_DRAW_CALL_BATCHING
+	m_BatchRenderer.Flush();
+	#endif
+	
+	return rtNormalsSpec;
+}
+
+void PrePassRenderLoop::RenderFinalPass (RenderTexture* rtMain, 
+										 RenderTexture* rtLight, 
+#if SEPERATE_PREPASS_SPECULAR
+										 RenderTexture* rtLightSpec,
+#endif
+										 bool hdr, 
+										 bool linearLighting)
+{
+	PROFILER_AUTO_GFX(gPrepassFinal, m_Context->m_Camera);
+	GPU_AUTO_SECTION(kGPUSectionOpaquePass);
+
+	GfxDevice& device = GetGfxDevice();
+	device.SetNormalizationBackface( kNormalizationDisabled, false );
+
+	RenderTexture::SetActive (rtMain);
+
+	// Clear with background. Do not clear depth since we need the already
+	// filled one from the base pass.
+	device.SetSRGBWrite(!hdr && linearLighting && (!rtMain || rtMain->GetSRGBReadWrite()) );
+	m_Context->m_Camera->ClearNoSkybox(true);
+
+	if(rtLight)
+		rtLight->SetGlobalProperty (kSLPropLightBuffer);
+	else
+	{
+		ShaderLab::TexEnv *te = ShaderLab::g_GlobalProperties->SetTexture (kSLPropLightBuffer, hdr ? builtintex::GetBlackTexture() : builtintex::GetWhiteTexture());
+		te->ClearMatrix();
+	}
+	
+#if SEPERATE_PREPASS_SPECULAR
+	if(rtLightSpec)
+		rtLightSpec->SetGlobalProperty (kSLPropLightSpecBuffer);
+	else
+	{
+		ShaderLab::TexEnv *te = ShaderLab::g_GlobalProperties->SetTexture (kSLPropLightSpecBuffer, hdr ? builtintex::GetBlackTexture() : builtintex::GetWhiteTexture());
+		te->ClearMatrix();
+	}	
+#endif
+
+	const ChannelAssigns* channels = NULL;
+	const LightmapSettings& lightmapper = GetLightmapSettings();
+
+	#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+	int prevPassIndex = -1;
+
+	int prevLightmapIndex = -1;
+	Vector4f prevLightmapST (0,0,0,0);
+	int prevTransformType = -1;
+	Material* prevMaterial = 0;
+	Shader* prevShader = 0;
+	int prevSubshaderIndex = -1;
+	float prevInvScale = 0.0f;
+	float prevLodFade = 0.0f;
+	UInt32 prevCustomPropsHash = 0;
+
+	int canBatch = 0;
+
+	#endif
+
+	if (hdr)
+		g_ShaderKeywords.Enable (kKeywordHDRLightPrepassOn);
+	else
+		g_ShaderKeywords.Disable (kKeywordHDRLightPrepassOn);
+
+	LightProbes* lightProbes = GetLightProbes();
+	const bool areLightProbesBaked =  LightProbes::AreBaked();
+	BuiltinShaderParamValues& builtinParamValues = GetGfxDevice().GetBuiltinParamValues();
+	Vector3f ambientSH;
+	SHEvalAmbientLight(GetRenderSettings().GetAmbientLightInActiveColorSpace(), &ambientSH[0]);
+
+	size_t ndata = m_Objects->size();
+	for( size_t i = 0; i < ndata; ++i )
+	{
+		const PrePassRenderData& rpData = m_PlainRenderPasses[i];
+		const RenderObjectData& roData = (*m_Objects)[rpData.roIndex];
+		
+		const VisibleNode *node = roData.visibleNode;
+		Shader* shader = roData.shader;
+		
+		int ss = roData.subShaderIndex;
+		if (ss == -1)
+			ss = shader->GetShaderLabShader()->GetDefaultSubshaderIndex(kRenderPathExtPrePass);
+		if (ss == -1)
+			continue;
+
+		const Vector4f lightmapST = node->renderer->GetLightmapSTForRendering();
+		const int lightmapIndex = roData.lightmapIndex;
+		DebugAssert(lightmapIndex == node->renderer->GetLightmapIndex());
+
+		const float invScale = node->invScale;
+		const float lodFade = node->lodFade;
+		const int transformType = node->transformType;
+		const UInt32 customPropsHash = node->renderer->GetCustomPropertiesHash();
+
+		#if GFX_ENABLE_DRAW_CALL_BATCHING
+
+		if (
+			node->renderer->GetStaticBatchIndex() == 0 ||
+			prevTransformType != transformType ||
+			prevMaterial != roData.material ||
+			prevShader != shader ||
+			prevSubshaderIndex != ss ||
+			prevLightmapIndex != lightmapIndex ||
+			!CompareMemory(prevLightmapST, lightmapST) ||
+			!CompareApproximately(prevInvScale,invScale) ||
+			!CompareApproximately(prevLodFade,lodFade) ||
+			prevCustomPropsHash != customPropsHash)
+		{
+			m_BatchRenderer.Flush();
+			
+			prevLightmapIndex = lightmapIndex;
+			prevLightmapST = lightmapST;
+			prevTransformType = transformType;
+			prevMaterial = roData.material;
+			prevShader = shader;
+			prevSubshaderIndex = ss;
+			prevInvScale = invScale;
+			prevLodFade = lodFade;
+			prevCustomPropsHash = customPropsHash;
+
+			canBatch = 0;
+		}
+		else
+			++canBatch;
+
+		#endif
+
+		SetObjectScale (device, lodFade, invScale);
+
+		node->renderer->ApplyCustomProperties(*roData.material, roData.shader, ss);
+
+		ShaderLab::SubShader& subshader = roData.shader->GetShaderLabShader()->GetSubShader (ss);
+		int shaderPassCount = subshader.GetValidPassCount();
+		for (int p = 0; p < shaderPassCount; ++p)
+		{
+			ShaderPassType passType;
+			UInt32 passRenderOptions;
+			subshader.GetPass(p)->GetPassOptions (passType, passRenderOptions);
+			if (passType != kPassLightPrePassFinal)
+				continue;
+
+			#if GFX_ENABLE_DRAW_CALL_BATCHING
+			if (p != prevPassIndex)
+			{
+				m_BatchRenderer.Flush();
+				canBatch = 0;
+			}
+
+			if (canBatch <= 1)
+			#endif
+			{
+				// lightmap
+				SetupObjectLightmaps (lightmapper, lightmapIndex, lightmapST, false);
+
+				// light probes
+				// TODO: figure how does that interact with lightmaps and with batching;
+				// if we are about to use light probes and the renderer gets different coeffs (maybe a simpler check?) => can't batch
+				float lightProbeCoeffs[9][3];
+				memset (lightProbeCoeffs, 0, sizeof(lightProbeCoeffs));
+				if (areLightProbesBaked && node->renderer->GetRendererType() != kRendererIntermediate)
+				{
+					Renderer* renderer = static_cast<Renderer*>(node->renderer);
+					if (renderer && renderer->GetUseLightProbes())
+						lightProbes->GetInterpolatedLightProbe(renderer->GetLightProbeInterpolationPosition(node->worldAABB), renderer, &(lightProbeCoeffs[0][0]));
+				}
+				lightProbeCoeffs[0][0] += ambientSH[0];
+				lightProbeCoeffs[0][1] += ambientSH[1];
+				lightProbeCoeffs[0][2] += ambientSH[2];
+				SetSHConstants (lightProbeCoeffs, builtinParamValues);
+				
+				// set pass
+				channels = roData.material->SetPassWithShader(p, shader, ss);
+			}
+
+			#if GFX_ENABLE_DRAW_CALL_BATCHING
+			prevPassIndex = p;
+			#endif
+			
+			if (channels)
+			{
+				#if GFX_ENABLE_DRAW_CALL_BATCHING
+				m_BatchRenderer.Add (node->renderer, roData.subsetIndex, channels, node->worldMatrix, transformType);
+				#else
+				SetupObjectMatrix (node->worldMatrix, transformType);
+				node->renderer->Render (roData.subsetIndex, *channels);
+				#endif
+			}
+		}
+	}
+
+	#if GFX_ENABLE_DRAW_CALL_BATCHING
+	m_BatchRenderer.Flush();
+	#endif
+
+	GetGfxDevice().SetSRGBWrite(false);
+}
+
+
+PrePassRenderLoop* CreatePrePassRenderLoop()
+{
+	return new PrePassRenderLoop();
+}
+
+void DeletePrePassRenderLoop (PrePassRenderLoop* queue)
+{
+	delete queue;
+}
+
+
+static UInt32 CalculateLightingLayers ()
+{
+	// TODO: Use active lights instead
+	const LightManager::Lights& lights = GetLightManager().GetAllLights();
+	LightManager::Lights::const_iterator it, itEnd = lights.end();
+	UInt32 layers = ~0;
+	for (it = lights.begin(); it != itEnd; ++it)
+	{
+		UInt32 mask = it->GetCullingMask();
+		if (mask == 0)
+			continue;
+		layers &= mask;
+	}
+	return ~layers;
+}
+
+
+#if UNITY_EDITOR
+static void CheckLightLayerUsage (const LightingLayers& layers)
+{
+	static bool s_UsageWasOK = true;
+	bool usageIsOK = (layers.lightLayerCount <= kLightingLayerCount);
+
+	// Only log/remove warning message when broken vs. okay has changed
+	if (usageIsOK == s_UsageWasOK)
+		return;
+
+	s_UsageWasOK = usageIsOK;
+
+	// Remove any previous error
+	// Use instanceID of QualitySettings as log identifier
+	RemoveErrorWithIdentifierFromConsole (GetQualitySettings().GetInstanceID());
+
+	if (!usageIsOK)
+	{
+		std::string msg = Format(
+			"Too many layers used to exclude objects from lighting. Up to %i layers can be used to exclude lights, while your lights use %i:",
+			kLightingLayerCount,
+			layers.lightLayerCount);
+		for (int i = 0; i < LightingLayers::kLayerCount; ++i)
+		{
+			if (layers.lightingLayerMask & (1<<i))
+			{
+				std::string layerName = LayerToString (i);
+				if (layerName.empty())
+					layerName = "Unnamed " + IntToString (i);
+				msg += " '" + layerName + "'";
+			}
+		}
+		// Use instanceID of QualitySettings as log identifier
+		DebugStringToFile (msg, 0, __FILE__, __LINE__, kScriptingWarning, 0, GetQualitySettings().GetInstanceID());
+	}
+}
+#endif
+
+static void ResolveDepthIntoTextureIfNeeded (
+	GfxDevice& device,
+	RenderLoop& renderLoop,
+	DepthBufferFormat depthFormat,
+	RenderTexture*& outDepthRT,
+	TextureID* outDepthTextureID,
+	bool* outDepthWasCopied)
+{
+	// TODO FIXME!! Should add GLES20 here as well, but it's missing GfxDevice::ResolveDepthIntoTexture!
+
+#if GFX_SUPPORTS_D3D9 || GFX_SUPPORTS_D3D11 || GFX_SUPPORTS_OPENGL || GFX_SUPPORTS_OPENGLES30
+	bool needsDepthResolve = false;
+#if GFX_SUPPORTS_D3D9
+	// If doing depth tests & sampling as INTZ is very slow,
+	// do a depth resolve into a separate texture first.
+	needsDepthResolve |= (device.GetRenderer() == kGfxRendererD3D9 && gGraphicsCaps.hasStencilInDepthTexture && gGraphicsCaps.d3d.hasDepthResolveRESZ && gGraphicsCaps.d3d.slowINTZSampling);
+#endif
+#if GFX_SUPPORTS_D3D11
+	// Always needs resolve on D3D11.
+	needsDepthResolve |= (device.GetRenderer() == kGfxRendererD3D11);
+#endif
+#if GFX_SUPPORTS_OPENGL
+	// Needs resolve on OpenGL, unless we did the slow RenderBasePassDepth().
+	// TODO: get rid of buggyPackedDepthStencil
+	needsDepthResolve |= (device.GetRenderer() == kGfxRendererOpenGL) && !gGraphicsCaps.gl.buggyPackedDepthStencil;
+#endif
+#if GFX_SUPPORTS_OPENGLES30
+	// Always needs resolve on GLES30.
+	needsDepthResolve |= (device.GetRenderer() == kGfxRendererOpenGLES30);
+#endif
+
+	if (needsDepthResolve)
+	{
+		DebugAssert (depthFormat != kDepthFormatNone);
+		RenderTexture* depthCopy = GetRenderBufferManager().GetTempBuffer (RenderBufferManager::kFullSize, RenderBufferManager::kFullSize, depthFormat, kRTFormatDepth, RenderBufferManager::kRBSampleOnlyDepth, kRTReadWriteLinear);
+		depthCopy->SetFilterMode (kTexFilterNearest);
+		if (!depthCopy->IsCreated())
+			depthCopy->Create();
+		AddRenderLoopTempBuffer (&renderLoop, depthCopy);
+
+		device.ResolveDepthIntoTexture (depthCopy->GetColorSurfaceHandle (), depthCopy->GetDepthSurfaceHandle ());
+
+		outDepthRT = depthCopy;
+		*outDepthTextureID = depthCopy->GetTextureID ();
+		*outDepthWasCopied = true;
+	}
+
+#endif
+}
+
+#if ENABLE_PRE_PASS_LOOP_HASH_SORTING
+template<typename T>
+static UInt8* InstertToHashBufferPreLoop(const T* p, UInt8* buffer)
+{
+	Assert((sizeof(T) % 4) == 0);	// unaligned write
+	*reinterpret_cast<T*>(buffer) = *p;
+	return buffer + sizeof(T);
+	}
+#endif
+
+void DoPrePassRenderLoop (
+	RenderLoopContext& ctx,
+	RenderObjectDataContainer& objects,
+	RenderObjectDataContainer& outRemainingObjects,
+	RenderTexture*& outDepthRT,
+	RenderTexture*& outDepthNormalsRT,
+	RenderTexture*& outMainShadowMap,
+	ActiveLights& activeLights,
+	bool linearLighting,
+	bool* outDepthWasCopied)
+{
+	outDepthRT = NULL;
+	outDepthNormalsRT = NULL;
+	*outDepthWasCopied = false;
+
+	// Allocated on the stack each time, uses temp allocators
+	PrePassRenderLoop loop;
+	loop.m_Context = &ctx;
+	loop.m_Objects = &objects;
+
+	loop.m_PlainRenderPasses.resize_uninitialized(0);
+	
+	RenderObjectDataContainer::iterator itEnd = objects.end();
+	size_t roIndex = 0;
+	for (RenderObjectDataContainer::iterator it = objects.begin(); it != itEnd; ++it, ++roIndex)
+	{
+		RenderObjectData& odata = *it;
+		const VisibleNode *node = odata.visibleNode;
+		BaseRenderer* renderer = node->renderer;
+
+		PrePassRenderData rpData;
+		rpData.roIndex = roIndex;
+
+#if ENABLE_PRE_PASS_LOOP_HASH_SORTING
+
+		//hash state information for render object sorter
+		const int kHashBufferSize = 64;
+		UInt8 hashBuffer[kHashBufferSize];
+		UInt8* hashPtr = hashBuffer;
+
+		// Always write 32b granularity into the hash buffer to avoid unaligned writes
+		UInt32 transformType = static_cast<UInt32>(renderer->GetTransformInfo().transformType);	
+		hashPtr = InstertToHashBufferPreLoop(&transformType, hashPtr);
+		hashPtr = InstertToHashBufferPreLoop(&node->invScale, hashPtr);
+		hashPtr = InstertToHashBufferPreLoop(&node->lodFade, hashPtr);
+		int materialID = odata.material->GetInstanceID();
+		hashPtr = InstertToHashBufferPreLoop(&materialID, hashPtr);
+		int shaderID = odata.shader->GetInstanceID();
+		hashPtr = InstertToHashBufferPreLoop(&shaderID, hashPtr);
+		int ss = odata.shader->GetShaderLabShader()->GetDefaultSubshaderIndex(kRenderPathExtPrePass);
+		hashPtr = InstertToHashBufferPreLoop(&ss, hashPtr);
+		#if GFX_ENABLE_DRAW_CALL_BATCHING
+		hashPtr = InstertToHashBufferPreLoop(&odata.staticBatchIndex, hashPtr);
+		#endif
+		Assert(hashPtr-hashBuffer <= kHashBufferSize);
+
+		rpData.hash = MurmurHash2A(hashBuffer, hashPtr-hashBuffer, 0x9747b28c);
+#endif
+		loop.m_PlainRenderPasses.push_back( rpData );
+	}
+		
+	// Sort objects
+	{
+			PROFILER_AUTO(gPrepassSort, ctx.m_Camera);
+#if ENABLE_PRE_PASS_LOOP_HASH_SORTING 			
+			loop.SortPreRenderPassData(loop.m_PlainRenderPasses);
+#else		
+			std::sort (objects.begin(), objects.end(), RenderPrePassObjectSorter());
+#endif
+	}
+
+	// Setup shadow distance, fade and ambient parameters
+	BuiltinShaderParamValues& params = GetGfxDevice().GetBuiltinParamValues();
+	Vector4f lightFade;
+	Vector4f fadeCenterAndType;
+	CalculateLightShadowFade (*ctx.m_Camera, 1.0f, lightFade, fadeCenterAndType);
+	params.SetVectorParam(kShaderVecLightmapFade, lightFade);
+	params.SetVectorParam(kShaderVecShadowFadeCenterAndType, fadeCenterAndType);
+	params.SetVectorParam(kShaderVecUnityAmbient, Vector4f(GetRenderSettings().GetAmbientLightInActiveColorSpace().GetPtr()));
+
+	GfxDevice& device = GetGfxDevice();
+
+	// Prepare for rendering
+	RenderTexture* rtMain = ctx.m_Camera->GetCurrentTargetTexture ();
+	Assert (rtMain);
+	if (!rtMain->IsCreated())
+		rtMain->Create();
+
+	LightingLayers lightingLayers (CalculateLightingLayers ());
+	#if UNITY_EDITOR
+	CheckLightLayerUsage (lightingLayers);
+	#endif
+	
+	// Don't allow shaders to set their own stencil state from base pass until
+	// the end of light pass, since it would screw them up.
+	ShaderLab::g_GlobalAllowShaderStencil = false;
+	
+	// Render Geometry base pass
+	MinMaxAABB receiverBounds;
+	RenderTexture* rtNormalsSpec = loop.RenderBasePass (rtMain, lightingLayers, outRemainingObjects, receiverBounds);
+	outDepthRT = rtNormalsSpec;
+	
+	RenderSurfaceHandle colorSurfaceHandle = rtNormalsSpec->GetColorSurfaceHandle();
+	RenderSurfaceHandle depthTextureHandle = rtMain->GetDepthSurfaceHandle();
+	TextureID depthTextureID = rtMain->GetSecondaryTextureID();
+	DepthBufferFormat depthFormat = rtMain->GetDepthFormat();
+	
+	#if GFX_SUPPORTS_OPENGL
+	if (device.GetRenderer() == kGfxRendererOpenGL && gGraphicsCaps.gl.buggyPackedDepthStencil)
+	{
+		// Separate pass to render depth into a separate target. And then use that texture to read depth
+		// in the lighting pass.
+		RenderTexture* rtDepth = RenderBasePassDepth (ctx, objects, loop.m_PlainRenderPasses);
+		depthTextureID = rtDepth->GetTextureID();
+		outDepthRT = rtDepth;
+		colorSurfaceHandle = rtDepth->GetColorSurfaceHandle();
+		depthTextureHandle = rtDepth->GetDepthSurfaceHandle();
+		*outDepthWasCopied = true;
+	}
+	#endif
+
+	if (gGraphicsCaps.hasStencilInDepthTexture)
+	{
+		const ActiveLight* mainActiveLight = GetMainActiveLight(activeLights);
+		Light* mainLight = mainActiveLight ? mainActiveLight->light : NULL;
+		const bool mainLightHasShadows = mainLight && mainLight->GetType() == kLightDirectional && mainLight->GetShadows() != kShadowNone;
+		const bool cameraNeedsDepthTexture = (ctx.m_Camera->GetDepthTextureMode() & Camera::kDepthTexDepthBit);
+		if (mainLightHasShadows || cameraNeedsDepthTexture)
+		{
+			RenderForwardObjectsIntoDepth (
+				ctx,
+				rtMain,
+				&outRemainingObjects,
+				colorSurfaceHandle,
+				depthTextureHandle,
+				rtMain->GetWidth(),
+				rtMain->GetHeight(),
+				cameraNeedsDepthTexture
+			);
+		}
+	}
+
+	ResolveDepthIntoTextureIfNeeded (device, *(ctx.m_RenderLoop), depthFormat, outDepthRT, &depthTextureID, outDepthWasCopied);
+	
+	// Render Lighting pass
+	RenderTexture* rtLight = NULL;
+#if SEPERATE_PREPASS_SPECULAR
+	RenderTexture* rtLightSpec = NULL;
+#endif
+	loop.RenderLighting (activeLights,
+						 rtMain, 
+						 depthTextureID, 
+						 rtNormalsSpec,
+						 rtLight,
+#if SEPERATE_PREPASS_SPECULAR
+						 rtLightSpec,
+#endif		
+						 lightFade, 
+						 lightingLayers, 
+						 receiverBounds, 
+						 &outMainShadowMap);
+
+	// It's again ok for shaders to set their stencil state now.
+	ShaderLab::g_GlobalAllowShaderStencil = true;
+
+	if (ctx.m_Camera->GetClearStencilAfterLightingPass())
+	{
+		float black[] = {0,0,0,0};
+		device.Clear (kGfxClearStencil, black, 1.0f, 0);
+	}
+	
+	// Render final Geometry pass
+	loop.RenderFinalPass (rtMain, 
+						  rtLight,
+#if SEPERATE_PREPASS_SPECULAR
+						  rtLightSpec,
+#endif
+						  ctx.m_Camera->GetUsingHDR(), 
+						  linearLighting);
+	
+	if (rtLight)
+	{
+		// Do not release the light buffer yet; so that image effects or whatever can access it later
+		// if needed (via _LightBuffer)
+		device.SetSurfaceFlags(rtLight->GetColorSurfaceHandle(), GfxDevice::kSurfaceDefault, ~GfxDevice::kSurfaceRestoreMask);
+		device.SetSurfaceFlags(rtLight->GetDepthSurfaceHandle(), GfxDevice::kSurfaceDefault, ~GfxDevice::kSurfaceRestoreMask);
+		AddRenderLoopTempBuffer (ctx.m_RenderLoop, rtLight);
+	}
+	
+#if SEPERATE_PREPASS_SPECULAR
+	if (rtLightSpec)
+	{
+		device.SetSurfaceFlags(rtLightSpec->GetColorSurfaceHandle(), GfxDevice::kSurfaceDefault, ~GfxDevice::kSurfaceRestoreMask);
+		device.SetSurfaceFlags(rtLightSpec->GetDepthSurfaceHandle(), GfxDevice::kSurfaceDefault, ~GfxDevice::kSurfaceRestoreMask);
+		AddRenderLoopTempBuffer (ctx.m_RenderLoop, rtLightSpec);
+	}
+#endif
+	
+	// Combine depth+normals if needed
+	if (ctx.m_Camera->GetDepthTextureMode() & Camera::kDepthTexDepthNormalsBit)
+	{
+		outDepthNormalsRT = CombineDepthNormalsTexture (ctx, outRemainingObjects);
+		RenderTexture::SetActive (rtMain);
+	}
+	
+	device.SetViewMatrix( ctx.m_CurCameraMatrix.GetPtr() );
+	device.SetNormalizationBackface( kNormalizationDisabled, false );	
+}
+
+#endif // GFX_SUPPORTS_RENDERLOOP_PREPASS
diff --git a/Runtime/Camera/RenderLoops/RenderLoop.h b/Runtime/Camera/RenderLoops/RenderLoop.h
new file mode 100644
index 0000000..8c54ed8
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/RenderLoop.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "RenderLoopEnums.h"
+
+class Shader;
+struct RenderLoop;
+class Camera;
+class ImageFilters;
+class RenderTexture;
+struct ShadowCullData;
+struct CullResults;
+
+
+RenderLoop* CreateRenderLoop (Camera& camera);
+void DeleteRenderLoop (RenderLoop* loop);
+void DoRenderLoop (
+	RenderLoop& loop,
+	RenderingPath renderPath,
+	CullResults& contents,
+	// used in the editor for material previews - those should not render projectors, halos etc.
+	bool dontRenderRenderables
+);
+void CleanupAfterRenderLoop (RenderLoop& loop);
+ImageFilters& GetRenderLoopImageFilters (RenderLoop& loop);
+void RenderImageFilters (RenderLoop& loop, RenderTexture* targetTexture, bool afterOpaque);
diff --git a/Runtime/Camera/RenderLoops/RenderLoopEnums.h b/Runtime/Camera/RenderLoops/RenderLoopEnums.h
new file mode 100644
index 0000000..e6ec07e
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/RenderLoopEnums.h
@@ -0,0 +1,29 @@
+#pragma once
+
+enum RenderingPath {
+	kRenderPathVertex = 0,
+	kRenderPathForward,
+	kRenderPathPrePass,
+	kRenderPathCount
+};
+
+enum OcclusionQueryType {
+	kOcclusionQueryTypeMostAccurate = 0,
+	kOcclusionQueryTypeFastest,
+	kOcclusionQueryTypeCount
+};
+
+enum
+{
+	kBackgroundRenderQueue	= 1000,
+	kGeometryRenderQueue	= 2000,
+	kAlphaTestRenderQueue	= 2450, // we want it to be in the end of geometry queue
+	kTransparentRenderQueue	= 3000,
+	kOverlayRenderQueue		= 4000,
+
+	kQueueIndexMin = 0,
+	kQueueIndexMax = 5000,
+
+	kGeometryQueueIndexMin = kGeometryRenderQueue-500,
+	kGeometryQueueIndexMax = kGeometryRenderQueue+500,
+};
diff --git a/Runtime/Camera/RenderLoops/RenderLoopPrivate.cpp b/Runtime/Camera/RenderLoops/RenderLoopPrivate.cpp
new file mode 100644
index 0000000..823c8fa
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/RenderLoopPrivate.cpp
@@ -0,0 +1,469 @@
+#include "UnityPrefix.h"
+#include "RenderLoopPrivate.h"
+#include "RenderLoop.h"
+#include "Runtime/Camera/UnityScene.h"
+#include "Runtime/Camera/Camera.h"
+#include "Runtime/Camera/ShadowCulling.h"
+#include "Runtime/Camera/RenderSettings.h"
+#include "Runtime/Camera/ImageFilters.h"
+#include "Runtime/Camera/CameraUtil.h"
+#include "Runtime/Graphics/Transform.h"
+#include "Runtime/Camera/BaseRenderer.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Graphics/RenderTexture.h"
+#include "Runtime/Shaders/GraphicsCaps.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+#include "Runtime/Misc/BuildSettings.h"
+#include "Runtime/Profiler/Profiler.h"
+#include "Runtime/Profiler/ExternalGraphicsProfiler.h"
+#include "Runtime/GfxDevice/GfxDeviceConfigure.h"
+#include "Runtime/Shaders/Shader.h"
+#include "External/shaderlab/Library/intshader.h"
+#include "Runtime/Graphics/RenderBufferManager.h"
+#include "External/shaderlab/Library/shaderlab.h"
+#include "ReplacementRenderLoop.h"
+
+#if UNITY_EDITOR
+	#include "Editor/Src/EditorUserBuildSettings.h"
+#endif
+
+PROFILER_INFORMATION(gRenderPrepareObjects, "Render.Prepare", kProfilerRender)
+PROFILER_INFORMATION(gRenderOpaque, "Render.OpaqueGeometry", kProfilerRender)
+PROFILER_INFORMATION(gRenderTransparent, "Render.TransparentGeometry", kProfilerRender)
+PROFILER_INFORMATION(gPrePassFwdDepthTex, "RenderPrePass.FwdObjectsIntoDepth", kProfilerRender)
+PROFILER_INFORMATION(gPrePassFwdDepthNormalsTex, "RenderPrePass.FwdObjectsIntoDepthNormals", kProfilerRender)
+PROFILER_INFORMATION(gCameraResolveProfile, "Camera.AAResolve", kProfilerRender)
+
+
+namespace ShaderLab { void ClearGrabPassFrameState (); } // pass.cpp
+
+
+struct RenderLoop {
+public:
+	RenderLoop (Camera& camera);
+	~RenderLoop ();
+
+	void PrepareFrame (bool dontRenderRenderables, bool renderingShaderReplace);
+
+public:
+	RenderLoopContext m_Context;
+	ShadowCullData m_ShadowCullData;
+	RenderObjectDataContainer m_Objects[kPartCount];
+	ImageFilters	m_ImageFilters;
+
+	enum { kMaxCreatedTempBuffers = 8 };
+	RenderTexture* m_TempBuffers[kMaxCreatedTempBuffers];
+	int m_TempBufferCount;
+};
+
+
+RenderLoop* CreateRenderLoop (Camera& camera)
+{
+	return new RenderLoop(camera);
+}
+
+void DeleteRenderLoop (RenderLoop* loop)
+{
+	delete loop;
+}
+
+ImageFilters& GetRenderLoopImageFilters (RenderLoop& loop)
+{
+	return loop.m_ImageFilters;
+}
+
+
+RenderLoop::RenderLoop(Camera& camera)
+{
+	m_Context.m_Camera = &camera;
+	m_Context.m_RenderLoop = this;
+
+	for (int i = 0; i < kMaxCreatedTempBuffers; ++i) {
+		m_TempBuffers[i] = NULL;
+	}
+	m_TempBufferCount = 0;
+}
+
+RenderLoop::~RenderLoop()
+{
+	Assert (m_TempBufferCount == 0);
+}
+
+inline float MultiplyPointZ (const Matrix4x4f& m, const Vector3f& v)
+{
+	return m.m_Data[2] * v.x + m.m_Data[6] * v.y + m.m_Data[10] * v.z + m.m_Data[14];
+}
+
+// Both distances become smaller (more negative) when moving forward from the camera.
+// outDistanceForSort is for sorting only, and it can be square of the actual distance, and so on.
+// outDistnaceAlongView is projection of the center along camera's view.
+static void EvaluateObjectDepth (const RenderLoopContext& ctx, const TransformInfo& info, float& outDistanceForSort, float& outDistanceAlongView)
+{
+	Vector3f center = info.worldAABB.GetCenter();
+	if (ctx.m_SortOrthographic)
+	{
+		const float d = MultiplyPointZ (ctx.m_CurCameraMatrix, center);
+		outDistanceForSort = d;
+		outDistanceAlongView = d;
+	}
+	else
+	{
+		outDistanceAlongView = MultiplyPointZ (ctx.m_CurCameraMatrix, center);
+		center -= ctx.m_CurCameraPos;
+		outDistanceForSort = -SqrMagnitude(center);
+	}
+
+	// A distance of NaN can cause inconsistent sorting results, if input order is inconsistent.
+	Assert(IsFinite(outDistanceForSort));
+	Assert(IsFinite(outDistanceAlongView));
+}
+
+
+void RenderLoop::PrepareFrame (bool dontRenderRenderables, bool renderingShaderReplace)
+{
+	Camera& camera = *m_Context.m_Camera;
+	m_Context.m_CurCameraMatrix = camera.GetWorldToCameraMatrix();
+	m_Context.m_CurCameraPos = camera.GetComponent(Transform).GetPosition();
+	m_Context.m_CameraViewport = camera.GetRenderRectangle();
+	switch (camera.GetSortMode())
+	{
+	case Camera::kSortPerspective: m_Context.m_SortOrthographic = false; break;
+	case Camera::kSortOrthographic: m_Context.m_SortOrthographic = true; break;
+	default: m_Context.m_SortOrthographic = camera.GetOrthographic(); break;
+	}
+	m_Context.m_DontRenderRenderables = dontRenderRenderables;
+	m_Context.m_RenderingShaderReplace = renderingShaderReplace;
+
+	for (int i = 0; i < kPartCount; ++i)
+		m_Objects[i].resize_uninitialized(0);
+
+	#if DEBUGMODE
+	for (int i = 0; i < kMaxCreatedTempBuffers; ++i) {
+		Assert (m_TempBuffers[i] == NULL);
+	}
+	#endif
+	m_TempBufferCount = 0;
+}
+
+
+static RenderTexture* ResolveScreenToTextureIfNeeded (RenderLoop& loop, bool forceIntoRT, bool beforeOpaqueImageFx)
+{
+	// If we use screen to composite image effects, resolve screen into the render texture now
+	bool usingScreenToComposite = loop.m_ImageFilters.HasImageFilter() && loop.m_Context.m_Camera->GetUsesScreenForCompositing(forceIntoRT);
+	RenderTexture* rt = NULL;
+	if (usingScreenToComposite)
+	{
+		// Do a screen to RT resolve here.
+		rt = beforeOpaqueImageFx ? loop.m_ImageFilters.GetTargetBeforeOpaque () : loop.m_ImageFilters.GetTargetAfterOpaque (forceIntoRT, usingScreenToComposite);
+		if (!rt)
+			return NULL;
+
+		PROFILER_AUTO_GFX(gCameraResolveProfile, loop.m_Context.m_Camera)
+		GPU_AUTO_SECTION(kGPUSectionPostProcess);
+
+		// We should insert proper discard/clear/... on backbuffer when doing MSAA
+		// resolved off it. However that's for the future (case 549705),
+		// for now just silence the RT unresolve warning.
+		GetGfxDevice().IgnoreNextUnresolveOnCurrentRenderTarget();
+
+		Rectf r = loop.m_Context.m_Camera->GetPhysicalViewportRect();
+		int rect[4];
+		RectfToViewport( r, rect );
+		Assert (rect[2] == rt->GetGLWidth() && rect[3] == rt->GetGLHeight());
+		rt->GrabPixels (rect[0], rect[1], rect[2], rect[3]);
+
+		// D3D and GL use different notions of how Y texture coordinates go.
+		// In effect, we have to flip any sampling from the first texture in the image filters
+		// stack on D3D.
+		rt->CorrectVerticalTexelSize(false);
+	}
+
+	return rt;
+}
+
+
+void RenderImageFilters (RenderLoop& loop, RenderTexture* targetTexture, bool afterOpaque)
+{
+	bool forceIntoRT = loop.m_Context.m_Camera->CalculateNeedsToRenderIntoRT();
+	ResolveScreenToTextureIfNeeded (loop, forceIntoRT, afterOpaque);
+	bool usingScreenToComposite = loop.m_ImageFilters.HasImageFilter() && loop.m_Context.m_Camera->GetUsesScreenForCompositing(forceIntoRT);
+	loop.m_ImageFilters.DoRender (targetTexture, forceIntoRT, afterOpaque, usingScreenToComposite, loop.m_Context.m_Camera->GetUsingHDR());
+	if (afterOpaque && !usingScreenToComposite)
+		loop.m_Context.m_Camera->SetCurrentTargetTexture (loop.m_ImageFilters.GetTargetAfterOpaque(forceIntoRT,usingScreenToComposite));
+}
+
+
+static void UpdateCameraDepthTextures (Camera& camera, RenderTexture* rtDepth, RenderTexture* rtDepthNormals, RenderObjectDataContainer& objects, bool depthWasCopied, bool skipDepthTexture, bool afterOpaque)
+{
+	if (!rtDepth || objects.size() == 0)
+		return;
+
+	// use depth buffer from final target
+	RenderTexture* rtFinal = camera.GetCurrentTargetTexture();
+	Assert (rtFinal);
+	RenderSurfaceHandle rtSurfaceDepth = rtFinal->GetDepthSurfaceHandle();
+
+	int renderFlags = Camera::kRenderFlagSetRenderTarget;
+	if (!afterOpaque)
+		renderFlags |= Camera::kRenderFlagSetRenderTargetFinal;
+
+	if (!skipDepthTexture && gGraphicsCaps.hasStencilInDepthTexture && (camera.GetDepthTextureMode() & Camera::kDepthTexDepthBit))
+	{
+		Shader* shader = GetCameraDepthTextureShader ();
+		if (shader)
+		{
+			PROFILER_AUTO_GFX(gPrePassFwdDepthTex, &camera);
+			// If we did separate pass or depth resolve in deferred to work around depth+stencil texture bugs,
+			// render into the copy in that case.
+			if (depthWasCopied)
+			{
+				RenderTexture::SetActive (rtDepth);
+			}
+			else
+			{
+				RenderSurfaceHandle rtSurfaceColor = rtDepth->GetColorSurfaceHandle();
+				RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, rtDepth);
+			}
+
+			RenderSceneShaderReplacement (objects, shader, "RenderType");
+			camera.SetupRender (renderFlags);
+		}
+	}
+
+	if (rtDepthNormals && (camera.GetDepthTextureMode() & Camera::kDepthTexDepthNormalsBit))
+	{
+		Shader* shader = GetCameraDepthNormalsTextureShader ();
+		if (shader)
+		{
+			PROFILER_AUTO_GFX(gPrePassFwdDepthNormalsTex, &camera);
+			RenderSurfaceHandle rtSurfaceColor = rtDepthNormals->GetColorSurfaceHandle();
+			RenderTexture::SetActive (1, &rtSurfaceColor, rtSurfaceDepth, rtDepthNormals);
+
+			RenderSceneShaderReplacement (objects, shader, "RenderType");
+			camera.SetupRender (renderFlags);
+		}
+	}
+}
+
+bool gInsideRenderLoop = false;
+void StartRenderLoop()
+{
+	Assert (!gInsideRenderLoop);
+	gInsideRenderLoop = true;
+}
+void EndRenderLoop()
+{
+	Assert (gInsideRenderLoop);
+	gInsideRenderLoop = false;
+}
+bool IsInsideRenderLoop()
+{
+	return gInsideRenderLoop;
+}
+
+void DoRenderLoop (
+	RenderLoop& loop,
+	RenderingPath renderPath,
+	CullResults& contents,
+	bool dontRenderRenderables)
+{
+	Assert (loop.m_TempBufferCount == 0);
+	Assert (contents.shadowCullData);
+
+	loop.m_Context.m_ShadowCullData = contents.shadowCullData;
+	loop.m_Context.m_CullResults = &contents;
+
+	// save wireframe state, restore at exit
+	SetAndRestoreWireframeMode saveAndRestoreWireframe;
+
+	const bool licenseAllowsStaticBatching = GetBuildSettings().hasAdvancedVersion;
+	Camera& camera = *loop.m_Context.m_Camera;
+
+	Shader* replacementShader = contents.shaderReplaceData.replacementShader;
+	const bool replacementTagSet = contents.shaderReplaceData.replacementTagSet;
+	const int replacementTagID = contents.shaderReplaceData.replacementTagID;
+
+
+	{
+		PROFILER_AUTO(gRenderPrepareObjects, &camera);
+
+		loop.PrepareFrame (dontRenderRenderables, replacementShader);
+
+		const bool useOldRenderQueueLogic = !IS_CONTENT_NEWER_OR_SAME (kUnityVersion4_2_a1);
+
+		// sort out objects into opaque & alpha parts
+		VisibleNodes::iterator itEnd = contents.nodes.end();
+		for (VisibleNodes::iterator it = contents.nodes.begin(); it != itEnd; ++it)
+		{
+			if (!IsFinite(it->invScale))
+				continue;
+
+			BaseRenderer* renderer = it->renderer;
+
+			float distanceForSort, distanceAlongView;
+			EvaluateObjectDepth (loop.m_Context, *it, distanceForSort, distanceAlongView);
+			distanceForSort -= renderer->GetSortingFudge ();
+			distanceAlongView = -distanceAlongView; // make that so increases with distance
+
+			const int matCount = renderer->GetMaterialCount();
+			const int batchIndex = (licenseAllowsStaticBatching)? renderer->GetStaticBatchIndex(): 0;
+			const UInt16 lightmapIndex = renderer->GetLightmapIndex();
+
+			for (int mi = 0; mi < matCount; ++mi)
+			{
+				Material* mat = renderer->GetMaterial (mi);
+				if( mat == NULL )
+					mat = Material::GetDefault();
+				Shader* shader = mat->GetShader();
+
+				int usedSubshaderIndex = -1;
+				if (replacementShader)
+				{
+					if (replacementTagSet)
+					{
+						int subshaderTypeID = shader->GetShaderLabShader()->GetTag (replacementTagID, true);
+						if (subshaderTypeID < 0)
+							continue; // skip rendering
+						usedSubshaderIndex = replacementShader->GetSubShaderWithTagValue (replacementTagID, subshaderTypeID);
+						if (usedSubshaderIndex == -1)
+							continue; // skip rendering
+					}
+					else
+					{
+						usedSubshaderIndex = 0;
+					}
+				}
+
+				const int matIndex = renderer->GetSubsetIndex(mi);
+
+				// Figure out rendering queue to use
+				int queueIndex = mat->GetCustomRenderQueue(); // any per-material overriden queue takes priority
+				if (queueIndex < 0)
+				{
+					// When no shader replacement or old content, take queue from the shader
+					if (!replacementShader || useOldRenderQueueLogic)
+					{
+						queueIndex = shader->GetShaderLabShader()->GetRenderQueue();
+					}
+					// Otherwise take from replacement shader
+					else
+					{
+						queueIndex = replacementShader->GetShaderLabShader()->GetRenderQueue(usedSubshaderIndex);
+					}
+				}
+
+				RenderPart part;
+				if (queueIndex <= kGeometryQueueIndexMax)
+					part = kPartOpaque;
+				else
+					part = kPartAfterOpaque;
+
+				RenderObjectData& odata = loop.m_Objects[part].push_back ();
+				DebugAssertIf (!mat);
+				odata.material = mat;
+				odata.queueIndex = queueIndex;
+				odata.subsetIndex = matIndex;
+				odata.subShaderIndex = usedSubshaderIndex;
+				odata.sourceMaterialIndex = (UInt16)mi;
+				odata.lightmapIndex = lightmapIndex;
+				odata.staticBatchIndex = batchIndex;
+				odata.distance = distanceForSort;
+				odata.distanceAlongView = distanceAlongView;
+				odata.visibleNode = &*it;
+				odata.shader = replacementShader ? replacementShader : shader;
+				odata.globalLayeringData = renderer->GetGlobalLayeringData();
+			}
+		}
+	}
+
+	// want linear lighting?
+	bool linearLighting = GetActiveColorSpace() == kLinearColorSpace;
+
+	// opaque: deferred or forward
+	RenderTexture *rtDepth = NULL, *rtDepthNormals = NULL;
+	bool prepassDepthWasCopied = false;
+	{
+		PROFILER_AUTO_GFX(gRenderOpaque, &camera);
+
+		loop.m_Context.m_RenderQueueStart = kQueueIndexMin; loop.m_Context.m_RenderQueueEnd = kGeometryQueueIndexMax+1;
+		if (renderPath == kRenderPathPrePass)
+		{
+			#if GFX_SUPPORTS_RENDERLOOP_PREPASS
+			RenderTexture *rtShadowMap = NULL;
+			RenderObjectDataContainer remainingObjects;
+			DoPrePassRenderLoop (loop.m_Context, loop.m_Objects[kPartOpaque], remainingObjects, rtDepth, rtDepthNormals, rtShadowMap, contents.activeLights, linearLighting, &prepassDepthWasCopied);
+			if (remainingObjects.size() != 0)
+			{
+				// Objects/shaders that don't handle deferred: render with forward path, and pass main shadowmap to it
+				// Also disable dynamic batching of those objects. They are already rendered into
+				// the depth buffer, and dynamic batching would make them be rendered at slightly
+				// different positions, failing depth test at places.
+				DoForwardShaderRenderLoop (loop.m_Context, remainingObjects, true, true, rtShadowMap, contents.activeLights, linearLighting, false);
+
+				UpdateCameraDepthTextures (camera, rtDepth, rtDepthNormals, remainingObjects, prepassDepthWasCopied, true, true);
+			}
+			#else
+			ErrorString ("Pre-pass rendering loop should never happen on this platform!");
+			#endif
+		}
+		else if (renderPath == kRenderPathForward)
+		{
+			DoForwardShaderRenderLoop (loop.m_Context, loop.m_Objects[kPartOpaque], true, false, NULL, contents.activeLights, linearLighting, true);
+		}
+		else
+		{
+			DoForwardVertexRenderLoop (loop.m_Context, loop.m_Objects[kPartOpaque], true, contents.activeLights, linearLighting, true);
+		}
+	}
+
+	// render skybox after opaque (sRGB conversions needed if using linear rendering)
+	{
+		GetGfxDevice().SetSRGBWrite(linearLighting);
+		camera.RenderSkybox();
+		GetGfxDevice().SetSRGBWrite(false);
+	}
+
+	RenderImageFilters (loop, camera.GetTargetTexture(), true);
+
+	// after opaque: forward
+	{
+		PROFILER_AUTO_GFX(gRenderTransparent, &camera);
+
+		loop.m_Context.m_RenderQueueStart = kGeometryQueueIndexMax+1; loop.m_Context.m_RenderQueueEnd = kQueueIndexMax;
+		if (renderPath != kRenderPathVertex)
+		{
+			DoForwardShaderRenderLoop (loop.m_Context, loop.m_Objects[kPartAfterOpaque], false, false, NULL, contents.activeLights, linearLighting, false);
+		}
+		else
+		{
+			DoForwardVertexRenderLoop (loop.m_Context, loop.m_Objects[kPartAfterOpaque], false, contents.activeLights, linearLighting, false);
+		}
+
+		UpdateCameraDepthTextures (camera, rtDepth, rtDepthNormals, loop.m_Objects[kPartAfterOpaque], prepassDepthWasCopied, false, false);
+	}
+
+	loop.m_Context.m_ShadowCullData = NULL;
+	loop.m_Context.m_CullResults = NULL;
+}
+
+void CleanupAfterRenderLoop (RenderLoop& loop)
+{
+	Assert (loop.m_TempBufferCount >= 0 && loop.m_TempBufferCount < RenderLoop::kMaxCreatedTempBuffers);
+	RenderBufferManager& rbm = GetRenderBufferManager();
+	for (int i = 0; i < loop.m_TempBufferCount; ++i) {
+		Assert (loop.m_TempBuffers[i]);
+		rbm.ReleaseTempBuffer (loop.m_TempBuffers[i]);
+		loop.m_TempBuffers[i] = NULL;
+	}
+	loop.m_TempBufferCount = 0;
+	ShaderLab::ClearGrabPassFrameState();
+}
+
+void AddRenderLoopTempBuffer (RenderLoop* loop, RenderTexture* rt)
+{
+	Assert (loop && rt);
+	Assert (loop->m_TempBufferCount < RenderLoop::kMaxCreatedTempBuffers);
+
+	loop->m_TempBuffers[loop->m_TempBufferCount++] = rt;
+}
diff --git a/Runtime/Camera/RenderLoops/RenderLoopPrivate.h b/Runtime/Camera/RenderLoops/RenderLoopPrivate.h
new file mode 100644
index 0000000..c64927c
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/RenderLoopPrivate.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "Runtime/Math/Matrix4x4.h"
+#include "Runtime/Math/Vector3.h"
+#include "Runtime/Math/Rect.h"
+#include "Runtime/Camera/CullResults.h"
+#include "GlobalLayeringData.h"
+
+namespace Unity { class Material; }
+class Camera;
+class Shader;
+class RenderTexture;
+struct ShadowCullData;
+struct RenderLoop;
+
+struct RenderObjectData {
+	Unity::Material*	material;	// 4
+	SInt16		queueIndex;			// 2
+	UInt16		subsetIndex;		// 2
+	SInt16		subShaderIndex;		// 2
+	UInt16		sourceMaterialIndex;// 2
+	UInt16		lightmapIndex;		// 2
+	int			staticBatchIndex;	// 4
+	float		distance;			// 4
+
+	//@TODO: cold?
+	float		 distanceAlongView;	// 4
+	VisibleNode* visibleNode;		// 4
+	Shader*		shader;				// 4	shader to use
+	GlobalLayeringData
+				globalLayeringData; // 4
+	// 36 bytes
+};
+
+enum RenderPart { kPartOpaque, kPartAfterOpaque, kPartCount };
+
+typedef dynamic_array<RenderObjectData> RenderObjectDataContainer;
+
+struct RenderLoopContext
+{
+	Camera*			m_Camera;
+	
+	const CullResults*    m_CullResults;
+	const ShadowCullData* m_ShadowCullData;
+	Matrix4x4f		m_CurCameraMatrix;
+	Rectf			m_CameraViewport;
+	Vector3f		m_CurCameraPos;
+	bool			m_SortOrthographic;
+	bool			m_DontRenderRenderables;
+	bool			m_RenderingShaderReplace;
+
+	int				m_RenderQueueStart;
+	int				m_RenderQueueEnd;
+	
+	RenderLoop*		m_RenderLoop;
+};
+
+void AddRenderLoopTempBuffer (RenderLoop* loop, RenderTexture* rt);
+
+void DoForwardVertexRenderLoop (RenderLoopContext& ctx, RenderObjectDataContainer& objects, bool opaque, ActiveLights& activeLights, bool linearLighting, bool clearFrameBuffer);
+void DoForwardShaderRenderLoop (
+	RenderLoopContext& ctx,
+	RenderObjectDataContainer& objects,
+	bool opaque,
+	bool disableDynamicBatching,
+	RenderTexture* mainShadowMap,
+	ActiveLights& activeLights,
+	bool linearLighting,
+	bool clearFrameBuffer);
+
+void DoPrePassRenderLoop (
+	  RenderLoopContext& ctx,
+	  RenderObjectDataContainer& objects,
+	  RenderObjectDataContainer& outRemainingObjects,
+	  RenderTexture*& outDepthRT,
+	  RenderTexture*& outDepthNormalsRT,
+	  RenderTexture*& outMainShadowMap,
+	  ActiveLights& activeLights,
+	  bool linearLighting,
+	  bool* outDepthWasCopied);
+
+// This is only usable by GfxDeviceGLES, because GfxDeviceGLES only supports ForwardVertexRenderLoop, you'll only see these functions there
+// If IsInsideRenderLoop() == true, no state caching will be performed by GfxDeviceGLES
+void StartRenderLoop();
+void EndRenderLoop();
+bool IsInsideRenderLoop();
diff --git a/Runtime/Camera/RenderLoops/ReplacementRenderLoop.cpp b/Runtime/Camera/RenderLoops/ReplacementRenderLoop.cpp
new file mode 100644
index 0000000..fcba391
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/ReplacementRenderLoop.cpp
@@ -0,0 +1,245 @@
+#include "UnityPrefix.h"
+#include "ReplacementRenderLoop.h"
+#include "Runtime/Camera/Renderqueue.h"
+#include "Runtime/Shaders/Material.h"
+#include "Runtime/Camera/BaseRenderer.h"
+#include "Runtime/Graphics/Transform.h"
+#include "Runtime/Camera/Camera.h"
+#include "External/shaderlab/Library/intshader.h"
+#include "External/shaderlab/Library/shaderlab.h"
+#include "Runtime/Shaders/Shader.h"
+#include "Runtime/Camera/RenderManager.h"
+#include "Runtime/Camera/UnityScene.h"
+#include "Runtime/GfxDevice/GfxDevice.h"
+
+
+struct RODataReplacement {
+	float				distance;
+	int					subshaderIndex;
+	Material*			material;
+	const VisibleNode*  visibleNode;
+	Shader*				shader;
+	int					materialIndex;
+	GlobalLayeringData  globalLayeringData;
+};
+
+typedef UNITY_TEMP_VECTOR(RODataReplacement) RenderObjects;
+
+struct ROSorterReplacement {
+	bool operator()( const RODataReplacement& ra, const RODataReplacement& rb ) const;
+};
+
+
+bool ROSorterReplacement::operator()( const RODataReplacement& ra, const RODataReplacement& rb ) const
+{
+	// Sort by layering depth. //@TODO:should this be here?
+	bool globalLayeringResult;
+	if (CompareGlobalLayeringData(ra.globalLayeringData, rb.globalLayeringData, globalLayeringResult))
+		return globalLayeringResult;
+	
+	// Sort by subshader index used
+	if (ra.subshaderIndex != rb.subshaderIndex)
+		return ra.subshaderIndex < rb.subshaderIndex;
+	
+	// Sort front to back
+	return ra.distance > rb.distance;
+}
+
+
+static inline float EvaluateObjectDepth (const Matrix4x4f& cameraMatrix, const TransformInfo& info)
+{
+	Vector3f center = info.worldAABB.GetCenter();
+	float d = cameraMatrix.MultiplyPoint3( center ).z;
+	Assert(IsFinite(d));
+	return d;
+}
+
+
+static void PerformRenderingReplacement (Camera& camera, const Matrix4x4f& curCameraMatrix, RenderObjects& renderData)
+{
+	// Sort
+	std::sort (renderData.begin(), renderData.end(), ROSorterReplacement());
+	
+	
+	GfxDevice& device = GetGfxDevice();
+	size_t ndata = renderData.size();
+	device.SetViewMatrix (curCameraMatrix.GetPtr());
+
+	for( size_t i = 0; i < ndata; ++i )
+	{
+		const RODataReplacement& roData = renderData[i];
+		
+		const VisibleNode* node = roData.visibleNode;
+		Assert (node);
+		BaseRenderer* renderer = node->renderer;
+		Assert (renderer);
+		Shader* shader = roData.shader;
+		
+		device.SetInverseScale(1.0f);
+
+		//@TODO: if this returns true and we have any sort of batching, we'd have to break batches here
+		renderer->ApplyCustomProperties(*roData.material, shader, roData.subshaderIndex);
+
+		ShaderLab::SubShader& subshader = roData.shader->GetShaderLabShader()->GetSubShader (roData.subshaderIndex);
+		int shaderPassCount = subshader.GetValidPassCount();
+		for (int p = 0; p < shaderPassCount; ++p)
+		{	
+			const ChannelAssigns* channels = roData.material->SetPassWithShader(p, shader, roData.subshaderIndex);
+			if (channels)
+			{
+				SetupObjectMatrix (node->worldMatrix, node->transformType);
+				renderer->Render( renderer->GetSubsetIndex(roData.materialIndex), *channels );
+			}
+		}
+	}
+}
+
+static void AddReplacementObject (
+								  RenderObjects& renderObjects,
+								  Material* mat,
+								  Shader* replacementShader,
+								  bool noReplacementTag,
+								  int replacementTagID,
+								  const VisibleNode* visibleNode,
+								  float distanceForSort,
+								  int materialIndex,
+								  GlobalLayeringData globalLayeringData
+
+								  )
+{
+	if( mat == NULL )
+		mat = Material::GetDefault();
+	Shader *shader = mat->GetShader();
+	
+	// Note: do not check whether object is in geometry queue range,
+	// let shader replacement handle that. E.g. terrain billboard shaders are actually
+	// beyond geometry queue, but still can output meaningful depth/normals information.
+	
+	// Handle shader replacement
+	// Given a replacement shader and tag name:
+	// 1. if tag name is empty, then all objects are just rendered with replacement shader's first subshader
+	// 2. if tag name is given:
+	//    * real object's subshader is queried for tag value.
+	//    * if it does not have that tag, the object is not rendered.
+	//    * subshader is found in the replacement shader, that has given tag with the given value. If no subshader found, object is not rendered.
+	//    * that subshader is used instead to render the object.
+	int usedSubshaderIndex;
+	if (noReplacementTag)
+	{
+		usedSubshaderIndex = 0;
+	}
+	else
+	{
+		int subshaderTypeID = shader->GetShaderLabShader()->GetTag (replacementTagID, true);
+		if (subshaderTypeID < 0)
+			return; // skip rendering
+		usedSubshaderIndex = replacementShader->GetSubShaderWithTagValue (replacementTagID, subshaderTypeID);
+		if (usedSubshaderIndex == -1)
+			return; // skip rendering
+	}
+	
+	renderObjects.push_back(RODataReplacement());
+	RODataReplacement& roData = renderObjects.back();
+	roData.visibleNode = visibleNode;
+	roData.distance = distanceForSort;
+	
+	DebugAssertIf( !mat );
+	roData.material = mat;
+	roData.materialIndex = materialIndex;
+	
+	roData.shader = replacementShader;
+	roData.subshaderIndex = usedSubshaderIndex;
+	
+	roData.globalLayeringData = globalLayeringData;
+}
+
+void RenderSceneShaderReplacement (const VisibleNodes& contents, Shader* shader, const std::string& shaderReplaceTag)
+{
+	ShaderReplaceData replaceData;
+	replaceData.replacementShader = shader;
+	replaceData.replacementTagSet = !shaderReplaceTag.empty();
+	replaceData.replacementTagID = ShaderLab::GetShaderTagID(shaderReplaceTag);
+	
+	RenderSceneShaderReplacement(contents, replaceData);
+}
+
+	
+void RenderSceneShaderReplacement (const VisibleNodes& contents, const ShaderReplaceData& shaderReplace)
+{
+	Assert (shaderReplace.replacementShader != NULL);
+
+	const bool noReplacementTag = !shaderReplace.replacementTagSet;
+	const int replacementTagID = shaderReplace.replacementTagID;
+	Shader* replacementShader = shaderReplace.replacementShader;
+	Camera& camera = GetRenderManager().GetCurrentCamera();
+	Matrix4x4f curCameraMatrix = camera.GetWorldToCameraMatrix();	
+	
+	RenderObjects renderObjects;
+	renderObjects.reserve (contents.size()/4);
+
+	// Go over the objects
+	for( VisibleNodes::const_iterator i = contents.begin(); i != contents.end(); ++i )
+	{
+		float distanceForSort = EvaluateObjectDepth (curCameraMatrix, *i);
+		
+		const BaseRenderer* renderer = i->renderer;
+		
+		int matCount = renderer->GetMaterialCount();
+		for (int mi = 0; mi < matCount; ++mi)
+		{
+			Material* mat = renderer->GetMaterial(mi);
+			AddReplacementObject (
+								  renderObjects,
+								  mat,
+								  replacementShader,
+								  noReplacementTag,
+								  replacementTagID,
+								  &*i,
+								  distanceForSort,
+								  mi,
+								  renderer->GetGlobalLayeringData()
+								  );
+		}
+	}
+	
+	// Render
+	PerformRenderingReplacement (camera, curCameraMatrix, renderObjects);
+}
+
+void RenderSceneShaderReplacement (const RenderObjectDataContainer& contents, Shader* replacementShader, const std::string& replacementTag)
+{
+	Assert (replacementShader);
+
+	const bool noReplacementTag = replacementTag.empty();
+	const int replacementTagID = ShaderLab::GetShaderTagID(replacementTag);
+
+	Camera& camera = GetRenderManager().GetCurrentCamera();
+	Matrix4x4f curCameraMatrix = camera.GetWorldToCameraMatrix();	
+
+	RenderObjects renderObjects;
+	renderObjects.reserve (contents.size()/4);
+
+	// Go over the objects
+	for (RenderObjectDataContainer::const_iterator i = contents.begin(); i != contents.end(); ++i)
+	{
+		const RenderObjectData& ro = *i;
+		const BaseRenderer* renderer = ro.visibleNode->renderer;
+		Assert (renderer);
+		Material* mat = renderer->GetMaterial(ro.sourceMaterialIndex);
+		AddReplacementObject (
+			renderObjects,
+			mat,
+			replacementShader,
+			noReplacementTag,
+			replacementTagID,
+			ro.visibleNode,
+			ro.distance,
+			ro.sourceMaterialIndex,
+			renderer->GetGlobalLayeringData()
+			);
+	}
+
+	// Render
+	PerformRenderingReplacement (camera, curCameraMatrix, renderObjects);
+}
+
diff --git a/Runtime/Camera/RenderLoops/ReplacementRenderLoop.h b/Runtime/Camera/RenderLoops/ReplacementRenderLoop.h
new file mode 100644
index 0000000..0adce81
--- /dev/null
+++ b/Runtime/Camera/RenderLoops/ReplacementRenderLoop.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "Runtime/Camera/CullResults.h"
+#include "RenderLoopPrivate.h"
+#include <string>
+
+class Shader;
+
+void RenderSceneShaderReplacement (const VisibleNodes& contents, const ShaderReplaceData& shaderReplace);
+void RenderSceneShaderReplacement (const VisibleNodes& contents, Shader* shader, const std::string& shaderReplaceTag);
+void RenderSceneShaderReplacement (const RenderObjectDataContainer& contents, Shader* shader, const std::string& shaderReplaceTag);