#version 430

#ifndef SPIRV_ENABLED
#extension GL_NV_gpu_shader5 : enable
#endif

#extension GL_EXT_conservative_depth: enable

#define GEOMETRY_INFORMATION_STATIC 1

#include <shaders/materials/commons.glsl>
#include <shaders/commons_hlsl.glsl>
#include <shaders/materials/noise/noise3d.glsl>
#include <shaders/materials/commons_instancing_buffers.h>

uniform sampler2D s_NoiseRGBA;

#ifndef SPIRV_ENABLED
in Vertex
{
	vec3 vCoords;
	f16vec3 vNorm;
	f16vec3 vWorldNorm;
	vec3 vLocalPos;
	vec3 vWorldPos;
	f16vec4 vColor;
	f16vec2 vUV0;
} vtx_input;
in flat uint instanceID;
#else
layout(location = 1) in struct
{
	vec3 vCoords;
	f16vec3 vNorm;
	f16vec3 vWorldNorm;
	vec3 vLocalPos;
	vec3 vWorldPos;
	f16vec4 vColor;
	f16vec2 vUV0;
} vtx_input;
layout(location = 0) in flat uint instanceID;
#endif

layout(std140, row_major) uniform TransformParamsBuffer{
	EntityTransformParams transform_params;
};

struct RaymarchParams
{
	vec3  function_scale;
	int   show_bounding_box;
	vec3  function_origin;
	int   clamp_to_volume;
	float param1;
	float param2;
	float param3;
	float param4;
	int   use_instance_origin;
	int   txt1_flip_y;
	int   txt2_flip_y;
	int   gradient_idx;
	vec4  near_far_plane;
	int   trace_inside;
	int   _pad0;
	int   _pad1;
	int   _pad2;
};

#note rename param1 Fractal Power
#note rename param2 Fractal Theta Shift
#note rename param3 Fractal Phi Shift
#note rename param4 rm_param4

layout(std140, row_major) uniform RaymarchParamsBuffer{
	RaymarchParams raymarch_params;
};

#ifdef SPIRV_ENABLED
layout(std140, row_major) uniform BaseMaterialPropertiesBuffer
{
	vec4 colorDiffuse;
	int gUseDerivedNormal;
	int gMaterialMode;
	int materialId;
	int materialIndex;
	int componentTags;
};
#else
uniform int gUseDerivedNormal = 0;
uniform int gMaterialMode = 0;
uniform vec4 colorDiffuse;
uniform int materialId;
uniform int materialIndex;
uniform int componentTags;
#endif

// NOTE: we try to always render using base object front faces so we have conservative depth
//       when inside the object we will just pass the define and have 2 shaders and use the second
//       one to not use the conservative depth

// output for 2 buffers
#ifndef SHADOWMAP_PASS
layout(location = 0) out vec4 outAlbedo;
layout(location = 1) out uint outNormalMaterial;
layout(location = 2) out uvec4 outMetalnessRoughnessMeterialTags;
layout(location = 3) out uint outEmissive;
#endif

// basic template based on the shadertoy framework template

float sdBox(vec3 p, vec3 b)
{
	vec3 q = abs(p) - b;
	return length(max(q,0.0)) + min(max(q.x,max(q.y,q.z)),0.0);
}

float sdTorus( vec3 p, vec2 t )
{
	vec2 q = vec2(length(p.xz) - t.x,p.y);
	return length(q)-t.y;
}

float raymarchBBox(vec3 ro, vec3 rd, vec3 function_scale)
{
	const float maxd = 2000.0;         // max trace distance
	const float precis = 0.001;        // precission of the intersection
	float h = precis*2.0;
	float t = 0.0;
	float res = -1.0;
	for(int i = 0; i < 32; i++)       // max number of raymarching iterations is 90
	{
		if(h<precis || t > maxd) break;

		// yeah, this probably sucks;)
		h = max(sdBox(ro + rd * t, vec3(1000.0)), -sdBox(ro + rd * t, function_scale * 0.5 + 0.01));
		t += h;
	}

	if( t<maxd )
		res = t;

	return res;
}

#if 0 // sphere test

float doModel(vec3 p)
{
	float d_sphere = 99999.0;//length(p - vec3(0.0, 0.0, 0.0)) - 0.5;
	float d_torus = sdTorus(p, vec2(0.4, 0.1));

	return min(d_sphere, d_torus);
}


float calcIntersection(in vec3 ro, in vec3 rd, float max_t, out vec4 color)
{
	const float maxd = 2000.0;         // max trace distance
	const float precis = 0.001;        // precission of the intersection
	float h = precis*2.0;
	float t = 0.0;
	float res = -1.0;
	for(int i = 0; i < 900; i++)       // max number of raymarching iterations is 90
	{
		if(h<precis || t > maxd) break;
		h = doModel(ro + rd * t);
		t += h;
	}

	if( t<maxd )
	{
		res = t;
		color = vec4(1.0);
	}

	if (t > max_t)
		return -1.0;

	return res;
}

vec3 doModelNormal(vec3 p)
{
	const float eps = 0.002;             // precision of the normal computation
	float d0 = doModel(p);
    const vec2 epsilon = vec2(.0001,0);
    vec3 d1 = vec3(
        doModel(p-epsilon.xyy),
        doModel(p-epsilon.yxy),
        doModel(p-epsilon.yyx));
    return normalize(d0 - d1);
}
#endif

#if 1

#define MARCHINGITERATIONS 64

#define MARCHINGSTEP 0.5
#define SMALLESTSTEP 0.1

#define DISTANCE 3.0

#define MAXMANDELBROTDIST 1.5

#ifdef DEFERRED_PASS
#define MANDELBROTSTEPS 64
#else
#define MANDELBROTSTEPS 32
#endif

#ifndef RAYMARCH_STEPS
#ifdef DEFERRED_PASS
#define RAYMARCH_STEPS 128
#else
#define RAYMARCH_STEPS 64
#endif
#endif

float hash(float p)
{
    return fract(sin(dot(vec2(p), vec2(12.9898, 78.233))) * 43758.5453);    
}

// distance estimator to a mandelbulb set
// returns the distance to the set on the x coordinate 
// and the color on the y coordinate
vec2 DE(vec3 pos)
{
    float Power = 3.0+4.0*(clamp(raymarch_params.param1, -1.3, 20.0) + 1.0);
	vec3 z = pos;
	float dr = 1.0;
	float r = 0.0;
	for (int i = 0; i < MANDELBROTSTEPS ; i++) {
		r = length(z);
		if (r>MAXMANDELBROTDIST) break;
		
		// convert to polar coordinates
		float theta = acos(z.z/r);
		float phi = atan(z.y,z.x);
		dr =  pow( r, Power-1.0)*Power*dr + 1.0;
		
		// scale and rotate the point
		float zr = pow( r,Power);
		theta = theta * Power + raymarch_params.param2;
		phi   = phi * Power + raymarch_params.param3;
		
		// convert back to cartesian coordinates
		z = zr*vec3(sin(theta)*cos(phi), sin(phi)*sin(theta), cos(theta));
		z+=pos;
	}
	return vec2(0.5*log(r)*r/dr,50.0*pow(dr,0.128/float(MARCHINGITERATIONS)));
}

// MAPPING FUNCTION ... 
// returns the distance of the nearest object in the direction p on the x coordinate 
// and the color on the y coordinate
vec2 map( in vec3 p, vec3 dummy )
{
   	vec2 d = DE(p);
   	return d;
}

float calcIntersection(in vec3 ro, in vec3 rd, float max_t, out vec4 color)
{
	color = vec4(1.0);

	const float maxd = max_t;          // max trace distance
	const float precis = 0.0002;        // precission of the intersection
	float h = precis*2.0;
	float t = 0.0;
	float res = -1.0;
	for(int i = 0; i < RAYMARCH_STEPS; i++)
	{
		if(h<precis || t > maxd) break;
		vec3 orbit_trap;
		vec2 o = map(ro + rd * t, orbit_trap);
		h = o.x;
		color.rgb = vec3(o.y);
		t += h;
	}

	if( t<maxd )
	{
		res = t;
	}

	if (t > max_t)
		return -1.0;

	color.rgb = fract(color.rgb);
	return res;
}

vec3 doModelNormal(vec3 pos)
{
    vec3 trash;

    // Tetrahedron technique
    // https://iquilezles.org/articles/normalsSDF
    const float h = 0.009;
    const vec2 k = vec2(1,-1);
    return normalize(
        k.xyy * map(pos + k.xyy * h, trash).x + 
        k.yyx * map(pos + k.yyx * h, trash).x + 
        k.yxy * map(pos + k.yxy * h, trash).x + 
        k.xxx * map(pos + k.xxx * h, trash).x
    );
}

#endif

const int gWriteDepth = 1;

mat3 transpose(mat3 matrix)
{
    vec3 row0 = matrix[0];
    vec3 row1 = matrix[1];
    vec3 row2 = matrix[2];
    mat3 result = mat3(
        vec3(row0.x, row1.x, row2.x),
        vec3(row0.y, row1.y, row2.y),
        vec3(row0.z, row1.z, row2.z)
    );
    return result;
}

float det(mat2 matrix) {
    return matrix[0].x * matrix[1].y - matrix[0].y * matrix[1].x;
}

mat3 inverse(mat3 matrix)
{
    vec3 row0 = matrix[0];
    vec3 row1 = matrix[1];
    vec3 row2 = matrix[2];

    vec3 minors0 = vec3(
        det(mat2(row1.y, row1.z, row2.y, row2.z)),
        det(mat2(row1.z, row1.x, row2.z, row2.x)),
        det(mat2(row1.x, row1.y, row2.x, row2.y))
    );
    vec3 minors1 = vec3(
        det(mat2(row2.y, row2.z, row0.y, row0.z)),
        det(mat2(row2.z, row2.x, row0.z, row0.x)),
        det(mat2(row2.x, row2.y, row0.x, row0.y))
    );
    vec3 minors2 = vec3(
        det(mat2(row0.y, row0.z, row1.y, row1.z)),
        det(mat2(row0.z, row0.x, row1.z, row1.x)),
        det(mat2(row0.x, row0.y, row1.x, row1.y))
    );

    mat3 adj = transpose(mat3(minors0, minors1, minors2));

    return (1.0 / dot(row0, minors0)) * adj;
}

layout (depth_greater) out float gl_FragDepth;

void main() 
{
#ifdef MATERIAL_PROPERTIES_BINDING
	MaterialPropertiesGPU material = materials.material_properties[materialIndex];
#else
	MaterialPropertiesGPU material;
	material.diffuse = colorDiffuse;
	material.emmisive = vec4(0.0f);
	material.metalness = 0.0f;
	material.roughness = 0.5f;
	material.transparency = 0.0f;
	material.refraction = 0.0f;
	material.flags = 0;
#endif

#ifndef DEFERRED_PASS
	vec4 outAlbedo = vec4(1.0);	// this is dummy, will be optimized out
#endif

	// NOTE: Whole instancing support is pretty expensive, but maybe it doesn't matter when the marching code is going to cost 10x of it?
	mat4 mat_instance_model;
	mat4 mat_instance_model_inv;
	mat_instance_model[0] = vec4(1.0, 0.0, 0.0, 0.0);
	mat_instance_model[1] = vec4(0.0, 1.0, 0.0, 0.0);
	mat_instance_model[2] = vec4(0.0, 0.0, 1.0, 0.0);
	mat_instance_model[3] = vec4(0.0, 0.0, 0.0, 1.0);

	if (instance_params.stride > 0)
	{
		vec4 inst_m0 = instance_transform[instanceID * instance_params.stride + 0];
		vec4 inst_m1 = instance_transform[instanceID * instance_params.stride + 1];
		vec4 inst_m2 = instance_transform[instanceID * instance_params.stride + 2];

		mat_instance_model[0].xyz = vec3(inst_m0.x, inst_m1.x, inst_m2.x);
		mat_instance_model[1].xyz = vec3(inst_m0.y, inst_m1.y, inst_m2.y);
		mat_instance_model[2].xyz = vec3(inst_m0.z, inst_m1.z, inst_m2.z);
		mat_instance_model[3].xyz = vec3(inst_m0.w, inst_m1.w, inst_m2.w);
	}

	// if we want to have each instance to be selfcontained. in case of continuous function this can be left out
	//vec3 instance_function_origin = -(mat_instance_model * vec4(raymarch_params.function_origin, 1.0)).xyz;
	vec3 instance_function_origin = raymarch_params.function_origin;
	if (raymarch_params.use_instance_origin != 0)
	{
		instance_function_origin = (mat_instance_model * vec4(raymarch_params.function_origin, 1.0)).xyz * 0.001;
	}
	//instance_function_origin += raymarch_params.function_origin;

	mat_instance_model = transform_params.mModel * mat_instance_model;
	mat_instance_model_inv = mat_instance_model;
	{
		mat3 inv = inverse(mat3(mat_instance_model_inv));
		//mat3 inv = mat3(transform_params.mModelInv);
		mat_instance_model_inv[0].xyz = inv[0].xyz;
		mat_instance_model_inv[1].xyz = inv[1].xyz;
		mat_instance_model_inv[2].xyz = inv[2].xyz;
		//mat_instance_model_inv = transform_params.mModelInv;
		mat_instance_model_inv[3].x = -(inv[0].x * mat_instance_model[3].x + inv[1].x * mat_instance_model[3].y + inv[2].x * mat_instance_model[3].z);
		mat_instance_model_inv[3].y = -(inv[0].y * mat_instance_model[3].x + inv[1].y * mat_instance_model[3].y + inv[2].y * mat_instance_model[3].z);
		mat_instance_model_inv[3].z = -(inv[0].z * mat_instance_model[3].x + inv[1].z * mat_instance_model[3].y + inv[2].z * mat_instance_model[3].z);
	}

	float g = vtx_input.vNorm.z * 0.5 + 0.5;
	vec3 worldNorm = vtx_input.vWorldNorm;

#ifdef DEFERRED_PASS
	outMetalnessRoughnessMeterialTags.rg = encode_metalness_roughness_material(material.metalness, material.roughness, materialIndex);
	outMetalnessRoughnessMeterialTags.ba = encode_component_tags(componentTags);
#endif

	// NOTE: when rendering to the shadowmap we should be rendering backfaces
	//       so maybe just mirror the camera and render 'back' towards the real one
	#if 0
	vec3 ro = (transform_params.mModelInv * vec4(vtx_input.vWorldPos.xyz, 1.0)).xyz; // start tracing at the cube surface. still should clamp at the "outgoing face"
	vec3 rd = (transform_params.mModelInv * vec4(transform_params.vCameraPosition, 1.0)).xyz;
	#else
	vec3 ro = (mat_instance_model_inv * vec4(vtx_input.vWorldPos.xyz, 1.0)).xyz; // start tracing at the cube surface. still should clamp at the "outgoing face"
	vec3 rd = (mat_instance_model_inv * vec4(transform_params.vCameraPosition, 1.0)).xyz;
	#endif
	rd = normalize(ro - rd);

	ro *= raymarch_params.function_scale;
	vec3 ro_bounding_volume = ro;
	ro += instance_function_origin;

	vec4 color = vec4(1.0);
	float bounding_volume_intersection = 100000.0;
	if (raymarch_params.clamp_to_volume != 0)
		bounding_volume_intersection = raymarchBBox(ro_bounding_volume , rd, raymarch_params.function_scale);	// NOTE: yeah, we could just intersect analytically, but i want to have possibility to use custom shapes here

	float intersection = calcIntersection(ro, rd, bounding_volume_intersection, color);
	if (intersection > 0.0)
	{
		vec3 ri = ro + intersection * rd;

#ifdef DEFERRED_PASS
		vec3 normal = doModelNormal(ri);
		vec3 world_normal = (mat_instance_model * vec4(normal, 0.0)).xyz;
		outNormalMaterial = encode_normal_material(normalize(world_normal), materialId);
#endif

		ri -= instance_function_origin;
		ri /= raymarch_params.function_scale;

		ri = (transform_params.mView * mat_instance_model * vec4(ri, 1.0)).xyz;
		outAlbedo.rgb = mix(vec3(1.0), color.rgb, raymarch_params.param4);
	
		if (gWriteDepth != 0)
		{
			float near = -1.0;	// this is depth range, not the projection
			float far  =  1.0;
			float depth = (transform_params.mProjection * vec4(ri, 1.0)).z / (transform_params.mProjection * vec4(ri, 1.0)).w;
			depth = (((far - near) * depth) + near + far) / 2.0;
			gl_FragDepth = depth;
		}
	}
	else
	{
		if (raymarch_params.show_bounding_box != 0)
		{
			if (intersection == -1.0)
				outAlbedo = vec4(0.4, 0.2, 0.0, 1.0);
			else
				outAlbedo = vec4(5.0, 0.0, 0.0, 1.0);

			outAlbedo.rgb = fract(vtx_input.vCoords.xyz * 0.01);
		}
		else
			discard;
	}
}

