#pragma OPENCL EXTENSION cl_amd_printf : enable

float3 mod289f3(float3 x) 
{
  return x - floor(x * (1.0 / 289.0)) * 289.0;
}

float2 mod289f2(float2 x) 
{
  return x - floor(x * (1.0 / 289.0)) * 289.0;
}

float3 permute(float3 x) 
{
  return mod289f3(((x*34.0)+1.0)*x);
}

float snoise(float2 v)
  {
  const float4 C = (float4)(0.211324865405187,  // (3.0-sqrt(3.0))/6.0
                      0.366025403784439,  // 0.5*(sqrt(3.0)-1.0)
                     -0.577350269189626,  // -1.0 + 2.0 * C.x
                      0.024390243902439); // 1.0 / 41.0
// First corner
  float2 i  = floor(v + dot(v, C.yy) );
  float2 x0 = v -   i + dot(i, C.xx);

// Other corners
  float2 i1;
  //i1.x = step( x0.y, x0.x ); // x0.x > x0.y ? 1.0 : 0.0
  //i1.y = 1.0 - i1.x;
  i1 = (x0.x > x0.y) ? (float2)(1.0, 0.0) : (float2)(0.0, 1.0);
  // x0 = x0 - 0.0 + 0.0 * C.xx ;
  // x1 = x0 - i1 + 1.0 * C.xx ;
  // x2 = x0 - 1.0 + 2.0 * C.xx ;
  float4 x12 = x0.xyxy + C.xxzz;
  x12.xy -= i1;

// Permutations
  i = mod289f2(i); // Avoid truncation effects in permutation
  float3 p = permute( permute( i.y + (float3)(0.0, i1.y, 1.0 ))
		+ i.x + (float3)(0.0, i1.x, 1.0 ));

  float3 m = max(0.5 - (float3)(dot(x0,x0), dot(x12.xy,x12.xy), dot(x12.zw,x12.zw)), 0.0);
  m = m*m ;
  m = m*m ;

// Gradients: 41 points uniformly over a line, mapped onto a diamond.
// The ring size 17*17 = 289 is close to a multiple of 41 (41*7 = 287)

  float3 temp;
  float3 x = 2.0 * fract(p * C.www, &temp) - 1.0;



  float3 h = fabs(x) - 0.5;
  float3 ox = floor(x + 0.5);
  float3 a0 = x - ox;

// Normalise gradients implicitly by scaling m
// Approximation of: m *= inversesqrt( a0*a0 + h*h );
  m *= 1.79284291400159 - 0.85373472095314 * ( a0*a0 + h*h );

// Compute final noise value at P
  float3 g;
  g.x  = a0.x  * x0.x  + h.x  * x0.y;
  g.yz = a0.yz * x12.xz + h.yz * x12.yw;
  return 130.0 * dot(m, g);
}

float fBm_noise(float2 x)
{
  float y = snoise(x);
  y += snoise(2 * x) * 0.5;
  y += snoise(4 * x) * 0.25;
  y += snoise(8 * x) * 0.125;
  //y += snoise(16 * x) * 0.0625;
  return (y / 1.875) * 0.5 + 0.5;
}

__kernel void SetLifeRandomizedLimiter(__global float4 *positions, __global float4 *velocities, __global float4 *attributes, __global float4 *forces, float deltaTime, float4 newAttribute, float4 limiter, int workLimit, int segmentEnd, int segmentLength)
{
	if(get_global_id(0) < workLimit)
	{
		uint index = get_global_id(0);
		if(index >= segmentEnd) index -= segmentLength;

		bool noNoDontTouch = (positions[index].x > limiter.x || attributes[index].w > 1.0);
		if(noNoDontTouch) return;
		else
		{
			attributes[index].x = newAttribute.x + (fBm_noise(positions[index].xy)*2-1)*1.0;
			attributes[index].w = 10.0;
		}
	}
	else return;
}

__kernel void LinearForce(__global float4 *positions, __global float4 *velocities, __global float4 *attributes, __global float4 *forces, float deltaTime, float4 paramOne, float4 paramTwo, int workLimit, int segmentEnd, int segmentLength)
{
	if(get_global_id(0) < workLimit)
	{
		uint index = get_global_id(0);
		if(index >= segmentEnd) index -= segmentLength;

		forces[index] += paramOne;
	}
	else return;
}

float PackToFloat(unsigned int x, unsigned int y, unsigned int z)
{
	unsigned int packedColor = (x << 16) | (y << 8) | z;
	float packedFloat = (float) ( ((float)packedColor) / ((float) (1 << 24)) );  

	return packedFloat;
}

__kernel void SetColorFromLife(__global float4 *positions, __global float4 *velocities, __global float4 *attributes, __global float4 *forces, float deltaTime, float4 maxLife, float4 limiter, int workLimit, int segmentEnd, int segmentLength)
{
	if(get_global_id(0) < workLimit)
	{
		uint index = get_global_id(0);
		if(index >= segmentEnd) index -= segmentLength;

		if(attributes[index].w > 1.0) positions[index].w = PackToFloat(convert_uint(clamp( (attributes[index].x/maxLife.x), (float)0.0, (float)1.0)*255), 0, 0);
		else positions[index].w = PackToFloat(convert_uint(255 ), 0, 0);
	}
	else return;
}

__kernel void SetSizeColorTrails(__global float4 *positions, __global float4 *velocities, __global float4 *attributes, __global float4 *forces, float deltaTime, float4 maxLife, float4 limiter, int workLimit, int segmentEnd, int segmentLength)
{
	if(get_global_id(0) < workLimit)
	{
		uint index = get_global_id(0);
		if(index >= segmentEnd) index -= segmentLength;
		//convert_uint(230-((attributes[index].x/20.0f)*255))
		positions[index] = positions[index];
		positions[index].w = PackToFloat(convert_uint(attributes[index].y*255), convert_uint(240-((attributes[index].x/20.0f)*220)), 0);
	}
	else return;
}