__kernel void main(
					  __read_only image3d_t volume, 
					  sampler_t volumeSampler, 
					  __global float8 *inParticlesData, 
					  __global float8 *outVbo,
					  float4 dimenTime, // (x,y,z, time)
					  float4 windDir, // (x,y,z, 0.0)
					  float4 deltaDir // (x,y,z, 0.0)
					  )
{
	uint index = get_global_id(0);
    uint numParticles = get_global_size(0);


	float3 dimen = dimenTime.xyz;
	float time = dimenTime.w;

	// particle
	float8 particleData = inParticlesData[index];
	
	float particleTimeCoeff = particleData.s3;
	float particleRadius = particleData.s4;

	// 3d texture sample
	float3 particlePosSample = particleData.xyz;
	particlePosSample += (float3)(-1.0f,0.0f,0.0f) * time * particleTimeCoeff;
	particlePosSample = particlePosSample - floor(particlePosSample);
	float heightCoeff = 0.5f;
	float height = (read_imagef(volume, volumeSampler, (float4)(particlePosSample,1.0f))).x * heightCoeff;

	// actual position
	float3  particlePosNormalized = particleData.xyz + windDir.xyz * time * particleTimeCoeff;
	particlePosNormalized = particlePosNormalized - floor(particlePosNormalized);
	float3 particlePos = (particlePosNormalized * 2.0f - (float3)(1.0f)) * dimen;
	particlePos += deltaDir.xyz * height;

	// output quad
	uint vboIndexOffset = index * 4;
	outVbo[vboIndexOffset+0] = (float8)( particlePos+(float3)(-particleRadius,-particleRadius,0.0f), 1.0f, (float2)(0.0f,0.0f), (float2)(0.0f) );
	outVbo[vboIndexOffset+1] = (float8)( particlePos+(float3)( particleRadius,-particleRadius,0.0f), 1.0f, (float2)(1.0f,0.0f), (float2)(0.0f) );
	outVbo[vboIndexOffset+2] = (float8)( particlePos+(float3)( particleRadius, particleRadius,0.0f), 1.0f, (float2)(1.0f,1.0f), (float2)(0.0f) );
	outVbo[vboIndexOffset+3] = (float8)( particlePos+(float3)(-particleRadius, particleRadius,0.0f), 1.0f, (float2)(0.0f,1.0f), (float2)(0.0f) );
}