#line 2 99

struct RgbPoint {
    vec3 xyz;
    uint normalSpecularSun;
    uint rgba;
    uint weightSpanTrunk;
    float sec;
};

struct CameraParams {
    vec3 pos;
    float nearplane_rcp;
    vec3 dir;
    float nearplane;
    vec3 up;
    float aspect;
    vec3 right;
    float padding3;
    vec3 startPos;  // camera world position at the beginning of the shot
    float padding4;
};


#define PI 3.14159265
#define TAU (2*PI)
#define PHI (sqrt(5)*0.5 + 0.5)

#define SOA_LAYOUT 1
#define INFINITE_DEPTH (3.4e38)

// Clamp to [0,1] - this operation is free under certain circumstances.
// For further information see
// http://www.humus.name/Articles/Persson_LowLevelThinking.pdf and
// http://www.humus.name/Articles/Persson_LowlevelShaderOptimization.pdf
#define saturate(x) clamp(x, 0, 1)

const vec3 RED = vec3(1., 0., 0.);
const vec3 GREEN = vec3(0., 1., 0.);
const vec3 BLUE = vec3(0., 0., 1.);

uniform float deltat;
uniform float shotSecs;

uniform vec3 sunDirection;
uniform vec3 sunColor;
uniform vec3 fogColor;
uniform vec3 fogScatterColor;

uniform sampler2D skyMap;
uniform samplerCube skybox;
uniform samplerCube skyiem;
uniform sampler2D skybg;
uniform sampler2D skyIrradiance;
uniform sampler2D texLogo;
uniform int prop_geometry;
uniform float prop_inner_coverage_boost;
uniform int prop_mandelbox_iters;
uniform float prop_marcher_y_bias;
uniform float prop_marcher_impostor_angle;
uniform float prop_mandelbox_roll;
uniform float prop_mandelbox_height;
uniform float prop_mandelbox_glow;
uniform float prop_plot_strobe;
uniform float prop_plot_sweep;
uniform float prop_plot_sweep_bias;
uniform float prop_plot_scan;
uniform float prop_plot_headlamp_shading;
uniform float prop_plot_motionblur_spread;
uniform int prop_water_render;
uniform float prop_water_gain;
uniform float prop_water_distort;
uniform float prop_water_fog_scale_pre;
uniform float prop_water_fog_scale_post;
uniform float prop_water_hue;
uniform float prop_water_value;
uniform float prop_water_ydarken;
uniform float prop_water_y_gradient;
uniform float prop_fog_c;
uniform float prop_fog_gain;
uniform float prop_bloom_strength;
uniform float prop_bloom_strobe;
uniform float prop_post_size;
uniform float prop_post_nightvision;
uniform float prop_post_sepia;
uniform float prop_post_gain;
uniform float prop_post_over_x;
uniform float prop_post_over_y;
uniform float prop_post_over_alpha;
uniform float prop_post_wobble;
uniform float prop_post_satgain;
uniform float prop_post_gamma;
uniform float prop_post_strobe;
uniform float prop_post_strobe_freq;
uniform float prop_post_chromabs_falloff;
uniform float prop_post_chromabs_size;
uniform float prop_post_blulift;
uniform float prop_post_lift;
uniform float prop_post_hueshift;
uniform float prop_post_gb_power;

uniform float prop_sky_gamma;
uniform float prop_sky_gain;
uniform float prop_sky_horizon_fog;
uniform float prop_sky_ydarken;
uniform float prop_sky_horizon_fog_max;

uniform float prop_bg_alpha;
uniform float prop_bg_reflection_alpha;
uniform float prop_bg_dist;
uniform float prop_bg_uvscale;
uniform float prop_bg_theta;
uniform float prop_bg_slope;
uniform float prop_bg_uv_x;
uniform float prop_bg_uv_y;
uniform float prop_bg_gain;
uniform float prop_bg_gamma;

#define CAM_PAST (0)
#define CAM_NOW (1)
#define CAM_NEXT (2)

layout(std140) uniform cameraArray {
    CameraParams cameras[3];
};

float distanceToDepth(float t, CameraParams cam, vec3 dir)
{
    return dot(cam.dir, dir * t) * cam.nearplane_rcp;
}

float depthToDistance(float z, CameraParams cam, vec3 dir)
{
    return (z * cam.nearplane) / dot(cam.dir, dir);
}

vec3 applyFog( in vec3  rgb,      // original color of the pixel
        in float distance,        // camera to point distance
        in vec3  rayOri,          // camera position
        in vec3  rayDir )         // camera to point vector
{

    float b = 1e-3;
    float c = prop_fog_c;
    float h = 1e2;
    rayOri.y *= -1;
    float dy = -rayDir.y;
    float fogAmount = c * exp(-(rayOri.y + h)*b) * (1.0-exp( -distance*dy*b ))/dy;
    fogAmount = clamp(fogAmount, 0., 1.);
    float scatter = pow(max(0., dot(rayDir, sunDirection)), 10.);
    //vec3 fogScatterColor = 2. * vec3(254., 198.*0.75, 123.*0.75)/255.;
    //vec3 fogColor = 2. * vec3(254., 198., 123.)/255.;
    vec3  col = mix(fogColor * prop_fog_gain, fogScatterColor * prop_fog_gain, scatter);

    // Fog only skybox horizon
    if (distance >= 1e9) {
        fogAmount = max(0., min(prop_sky_horizon_fog_max, prop_sky_horizon_fog * pow(1. - abs(rayDir.y), 128.)));
    }

    //col = clamp(col, vec3(0.), vec3(abs(rayDir.y) * 1.));

    return mix( rgb, col, fogAmount );
}

const vec2 invAtan = vec2(0.1591, 0.3183);
vec2 dirToSpherical(vec3 v)
{
    vec2 uv = vec2(atan(v.z, v.x), asin(v.y));
    const float eps = 1e-2;
    uv *= invAtan;
    uv += 0.5;
    return uv;
}

vec3 sampleSkyIrradiance(vec3 dir) {
    vec3 c = texture(skyiem, -dir).rgb;
    c = pow(c, vec3(prop_sky_gamma));
    return c;
}

vec3 sampleSky(vec3 dir) {
    //if (dot(dir, sunDirection) > 0.99) return RED;
    vec3 c = textureLod(skybox, -dir, 0.).rgb;
    c = pow(c, vec3(prop_sky_gamma));
    return c;
}


vec3 sampleSkyDome(vec3 dir) {
    vec3 c = sampleSky(dir);

    vec3 fogged = applyFog(c, 1e9, cameras[1].pos, dir);
    fogged = mix(fogged, clamp(fogged, vec3(0.), vec3(abs(dir.y) * 1.)), prop_sky_ydarken);
    return prop_sky_gain * fogged;
}

vec3 sampleSkyDomeIrradiance(vec3 dir) {
    vec3 c = sampleSkyIrradiance(dir);
    return prop_sky_gain * applyFog(c, 1e9, cameras[1].pos, dir);
}

// HSV functions, source: http://lolengine.net/blog/2013/07/27/rgb-to-hsv-in-glsl
// All components are in the range [0…1], including hue.
vec3 hsv2rgb(vec3 c)
{
    vec4 K = vec4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);
    vec3 p = abs(fract(c.xxx + K.xyz) * 6.0 - K.www);
    return c.z * mix(K.xxx, clamp(p - K.xxx, 0.0, 1.0), c.y);
}

vec3 rgb2hsv(vec3 c)
{
    vec4 K = vec4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);
    vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));
    vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));

    float d = q.x - min(q.w, q.y);
    float e = 1.0e-10;
    return vec3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);
}

#define USE_SWIZZLED_FB 1

int linearToBlock(ivec2 c, ivec2 size) {
#if USE_SWIZZLED_FB
    const int SIZE = 32;
    const int SHIFT = 5;// findMSB(SIZE);
    const int MASK = SIZE - 1;
    const int B2 = (1 << SHIFT) << SHIFT;
    const int bw = size.x >> SHIFT;
    int bx = c.x >> SHIFT;
    int by = c.y >> SHIFT;
    int lx = c.x & MASK;
    int ly = c.y & MASK;
    return by*B2*bw + bx * B2 + ly * SIZE + lx;
#else
    return c.y * size.x + c.x;
#endif
}

uint64_t packDepthColorSample(vec4 color, float z)
{
    return packUint2x32(uvec2(packUnorm4x8(color), floatBitsToUint(z)));
}

vec4 unpackDepthColorSample(in uint64_t depthColor64, out float sampleZ)
{
    uvec2 depthColor32 = unpackUint2x32(depthColor64);
    sampleZ = uintBitsToFloat(depthColor32[1]);
    vec4 sampleColor = unpackUnorm4x8(depthColor32[0]);
    return sampleColor;
}

float mapMotionBlurCurve(float x)
{
    x *= prop_plot_motionblur_spread;
    x += .5;
    return x;
}

#ifdef USE_RANDOM
uvec4 rndseed;
void jenkins_mix()
{
	rndseed.x -= rndseed.y; rndseed.x -= rndseed.z; rndseed.x ^= rndseed.z >> 13;
	rndseed.y -= rndseed.z; rndseed.y -= rndseed.x; rndseed.y ^= rndseed.x << 8;
	rndseed.z -= rndseed.x; rndseed.z -= rndseed.y; rndseed.z ^= rndseed.y >> 13;
	rndseed.x -= rndseed.y; rndseed.x -= rndseed.z; rndseed.x ^= rndseed.z >> 12;
	rndseed.y -= rndseed.z; rndseed.y -= rndseed.x; rndseed.y ^= rndseed.x << 16;
	rndseed.z -= rndseed.x; rndseed.z -= rndseed.y; rndseed.z ^= rndseed.y >> 5;
	rndseed.x -= rndseed.y; rndseed.x -= rndseed.z; rndseed.x ^= rndseed.z >> 3;
	rndseed.y -= rndseed.z; rndseed.y -= rndseed.x; rndseed.y ^= rndseed.x << 10;
	rndseed.z -= rndseed.x; rndseed.z -= rndseed.y; rndseed.z ^= rndseed.y >> 15;
}
void jenkins_srand(uint A, uint B, uint C) { rndseed = uvec4(A, B, C, 0); jenkins_mix(); jenkins_mix(); }

float jenkins_rand()
{
	if (0 == rndseed.w++ % 3) jenkins_mix();
	return float((rndseed.xyz = rndseed.yzx).x) / pow(2., 32.);
}

// MurMurHash 3 finalizer. Implementation is in public domain.
uint hash( uint h )
{
    h ^= h >> 16;
    h *= 0x85ebca6b;
    h ^= h >> 13;
    h *= 0xc2b2ae35;
    h ^= h >> 16;
    return h;
}

uint hash( uvec3 h )
{
    return hash(h.x ^ hash( h.y ) ^ hash( h.z ));
}

uint rng_state;
void xorshift_srand(uint A, uint B, uint C)
{
    rng_state = hash(A) ^ hash(B) ^ hash(C);
}

uint xorshift_rand()
{
    // Xorshift algorithm from George Marsaglia's paper
    rng_state ^= (rng_state << 13);
    rng_state ^= (rng_state >> 17);
    rng_state ^= (rng_state << 5);
    return rng_state;
}

float xorshift_frand()
{
    return float(xorshift_rand()) * (1.0 / 4294967296.0);
}

// Random function using the idea of StackOverflow user "Spatial" https://stackoverflow.com/a/17479300
// Creates random 23 bits and puts them into the fraction bits of an 32-bit float.
float noise3u( uvec3 h )
{
    uint m = hash(h.x ^ hash( h.y ) ^ hash( h.z ));
    return uintBitsToFloat( ( m & 0x007FFFFFu ) | 0x3f800000u ) - 1.;
}

float noise2u( uvec2 h )
{
    uint m = hash(h.x ^ hash( h.y ));
    return uintBitsToFloat( ( m & 0x007FFFFFu ) | 0x3f800000u ) - 1.;
}

float noise( vec3 v )
{
    return noise3u(floatBitsToUint( v ));
}

float noise( ivec3 v )
{
    return noise3u(floatBitsToUint( v ));
}

float noise( float v )
{
    return hash(floatBitsToUint(v));
}

float noise( vec2 v )
{
    return noise2u(floatBitsToUint(v));
}

// Allow toggling between more expensive Jenkins random and cheaper Xorshift

#if 0
#define srand jenkins_srand
#define rand jenkins_rand
#define USE_HASH_JENKINS 1
#else
#define srand xorshift_srand
#define rand xorshift_frand
#define USE_HASH_XORSHIFT 1
#endif

#endif



// Common raymarcher code

#ifdef USE_MARCHER

const int MATERIAL_SKY = -1;
const int MATERIAL_OTHER = 1;
const int MATERIAL_KLEINIAN = 2;
const int MATERIAL_GRASSLAND = 3;
const int MATERIAL_JUNGLEFLOOR = 4;
const int MATERIAL_DIRT = 5;
const int MATERIAL_ISLAND = 6;
const int MATERIAL_SILVERSCREEN = 7;
const int MATERIAL_STONE = 8;
const int MATERIAL_METAL = 9;
const int MATERIAL_GOLD = 10;
const int MATERIAL_MARBLE = 11;

const int GEOMETRY_MANDELBOX = 0;
const int GEOMETRY_TEMPLECUBE = 1;
const int GEOMETRY_KLEINIAN = 2;
const int GEOMETRY_LABYRINTH = 3;
const int GEOMETRY_JUNGLE = 4;
const int GEOMETRY_SANCTUARY = 5;
const int GEOMETRY_ISLANDCUBE = 6;
const int GEOMETRY_BROKENCUBE = 7;
const int GEOMETRY_CAVE = 8;
const int GEOMETRY_CAVE2 = 9;
const int GEOMETRY_RUINS = 10;
const int GEOMETRY_MANDELBOX2 = 11;
const int GEOMETRY_JUNGLE_CASTLE = 12;


struct ConeSetup {
    float pixelwidth;       // side length of a square pixel in world units
    float radius;           // cone radius in world units
    float proj_plane_dist;  // distance from cam origin to rays start position
    float nearplane;        // nearplanes orthogonal distance to camera origin
    float step_factor;
};

struct SurfaceInfo {
    int material;           // material ID
    vec3 p;                 // worldspace hit position
    vec3 normal;            // hit point normal
    vec3 baseColor;         // baseColor of the surface
    vec3 color;             // shaded surface color
    float ambient;          // ambient light factor, [0, 1], where 0 is darkness
    vec3 toLight;           // normalized vector towards the sun
    float sun;              // sun visibility
    vec3 campos;            // camera world position
};

struct MaterialInfo {
    vec3 tint;
    float shininess;
};

#ifdef USE_SPAWN_POINTS
bool spawnPoints(in SurfaceInfo surf, in ConeSetup cone, in CameraParams cam);
#endif

#ifdef USE_MATERIAL_HANDLER
void materialHandler(in SurfaceInfo surf, inout vec3 base, inout float shininess, inout float ambient, inout float sun, inout float facing, inout vec3 suncol);
#endif

#ifdef USE_SHADING_HANDLER
void shadingHandler(in vec3 p, in float t, in int hitmat, in CameraParams cam, inout vec3 normal, inout vec3 roughNormal,
inout vec3 to_light, inout vec3 to_camera, inout float maxShadowDist);
#endif

uniform int pointBufferMaxElements;
//uniform int jumpBufferMaxElements;
uniform int rayIndexBufferMaxElements;

layout(std430) buffer pointBufferHeader {
    int currentWriteOffset;
    int pointsSplatted;
    int nextRayIndex;
};

layout(std430) buffer pointBuffer {
    RgbPoint points[];
};

layout (std430) buffer pointPosShotBuffer {
    vec4 pointPosShots[];
};

layout (std430) buffer pointColorBuffer {
    uint pointColors[];
};

layout (std430) buffer pointShadingBuffer {
    uint pointNormalShininessSun[]; // NOTE: 16-bit normal, shininess, sun light
};

layout (std430) buffer pointInfoBuffer {
    uint pointWeightSpanTrunk[]; // NOTE: 8-bit weight, 8-bit span, 16-bit plant data
};

coherent layout(std430) buffer jumpBuffer {
    float jumps[];
};

layout(std430) buffer rayIndexBuffer {
    int rayIndices[];
};

layout(std430) buffer radiusBuffer {
    float radiuses[];
};

layout(std430) buffer debugBuffer {
    int debug_i;
    int debug_parent;
    int debug_size;
    int debug_b;
    int debug_start;
    int debug_parent_size;
    float debug_pixelRadius;
    float debug_zdepth;
    float debug_parentDepth;
    float debug_parent_t;
    float debug_child_t;
    float debug_nearPlane;
    float debug_projPlaneDist;
    float debug_parentUVx;
    float debug_parentUVy;
    float debug_childUVx;
    float debug_childUVy;
};

layout(std430) buffer stepBuffer {
    float debug_steps[];
};

uniform int source;
uniform int frame;
uniform float secs;
uniform float sceneID;
uniform ivec2 screenSize;
uniform vec2 screenBoundary;
uniform vec2 cameraJitter;
uniform sampler2D whiteNoise;
uniform sampler2D perlinNoise;
uniform sampler2D texGrass;
uniform sampler2D texPalm;

struct Impostor {
    sampler2D color;
    sampler2D depth;
    sampler2D normal;
    float distance;     // object distance from camera
    float scale;        // object scale in world space
};

#define IMPOSTOR_SMALL_PALM (0)
#define IMPOSTOR_CURRY_TREE1 (1)
#define IMPOSTOR_CURRY_TREE2 (2)
#define IMPOSTOR_CURRY_TREE2B (3)
#define IMPOSTOR_AESCULUS1 (4)
#define IMPOSTOR_CHINABERRY2 (5)
#define IMPOSTOR_CHINABERRY4 (6)
#define IMPOSTOR_AESCULUS2 (7)
#define IMPOSTOR_SPHERE (8)
#define IMPOSTOR_STATUE (9)
#define IMPOSTOR_COUNT (10)
uniform Impostor impostors[IMPOSTOR_COUNT];

const vec2 impostorSizes[] = {
    vec2(0.2), // SMALL_PALM
    vec2(0.2, 0.2), // CURRY_TREE1
    vec2(0.2), // CURRY_TREE2
    vec2(0.2), // CURRY_TREE2B
    vec2(0.22, 0.20), // AESCULUS1
    vec2(0.2, 0.3), // CHINABERRY2
    vec2(0.2, 0.2), // CHINABERRY4
    vec2(0.1, 0.3), // AESCULUS2
    vec2(0.2, 0.0), // SPHERE
    vec2(0.2, 0.0), // STATUE
};

const int impostorAltitudeTtable[] = {
    IMPOSTOR_CHINABERRY2,
    IMPOSTOR_CHINABERRY2,
    IMPOSTOR_CURRY_TREE1,
    IMPOSTOR_CURRY_TREE1,
    IMPOSTOR_AESCULUS1,
    IMPOSTOR_AESCULUS2,
    IMPOSTOR_CHINABERRY4,
    IMPOSTOR_CHINABERRY4,
    IMPOSTOR_CURRY_TREE2B,
    IMPOSTOR_CURRY_TREE2B,
    IMPOSTOR_AESCULUS1,
    IMPOSTOR_AESCULUS2,
    IMPOSTOR_CURRY_TREE2,
    IMPOSTOR_CURRY_TREE2,
};

// iq's hack to avoid driver unrolling a for loop
#define ZERO (min(frame, 0))

//layout(r32f) uniform image2D zbuffer;
layout(r8) uniform image2D edgebuffer;
layout(rgba16f) uniform image2DArray samplebuffer;
layout(rg8) uniform image2DArray jitterbuffer;

#define USE_ANALYTIC_CONE_STEP 0
#define USE_HIT_REFINEMENT 0
#define USE_TREE 1

// This factor how many pixel radiuses of screen space error do we allow
// in the "near geometry snapping" at the end of "march" loop. Without it
// the skybox color leaks through with low grazing angles.
const float SNAP_INFLATE_FACTOR = 3.;

// Ray's maximum travel distance.
const float MAX_DISTANCE = 1e9;
const float TOO_FAR = 400.;

const int PARENT_INDEX  = 16;
const int CHILD_INDEX = 112;
int globalMyIdx;
float global_y_axis_flip = 1.; // Mirroring applied to every point in addPoint. Used to render the planar reflection.

void getCameraProjection(CameraParams cam, vec2 uv, out vec3 outPos, out vec3 outDir)
{
    outPos = cam.pos + cam.dir + (uv.x - 0.5) * cam.right + (uv.y - 0.5) * cam.up;
    outDir = normalize(outPos - cam.pos);
}

// Maps a ray index "i" into a bin index.
int tobin(int i)
{
    return findMSB(3*i+1)>>1;
}

// Maps a bin index into a starting ray index. Inverse of "tobin(i)."
int binto(int b)
{
    // Computes (4**b - 1) / 3
    // FIXME: replace with a lookup table
    int product = 1;
    for (int i = 0; i < b; i++)
        product *= 4;
    return (product - 1) / 3;
}

uint z2x_1(uint x)
{
    x = x & 0x55555555;
    x = (x | (x >> 1)) & 0x33333333;
    x = (x | (x >> 2)) & 0x0F0F0F0F;
    x = (x | (x >> 4)) & 0x00FF00FF;
    x = (x | (x >> 8)) & 0x0000FFFF;
    return x;
}

// Maps 32-bit Z-order index into 16-bit (x, y)
uvec2 z2xy(uint z)
{
    return uvec2(z2x_1(z), z2x_1(z>>1));
}

uvec2 i2gridCoord(int i, out int idim) {
    int b = tobin(i);
    int start = binto(b);
    int z = i - start;
    idim = 1 << b;
    return z2xy(uint(z));
}

vec2 octWrap( vec2 v )
{
    vec2 v2 = mix(vec2(-1.), vec2(1.), greaterThanEqual(v.xy, vec2(0.)));
    return ( 1.0 - abs( v.yx ) ) * v2;
}

vec2 encodeNormal( vec3 n )
{
    n /= ( abs( n.x ) + abs( n.y ) + abs( n.z ) );
    n.xy = mix(octWrap( n.xy ), n.xy, bvec2(n.z >= 0.));
    n.xy = n.xy * 0.5 + vec2(0.5);
    return n.xy;
}

vec3 decodeNormal( vec2 f )
{
    f = f * 2.0 - vec2(1.0);

    // https://twitter.com/Stubbesaurus/status/937994790553227264
    vec3 n = vec3( f.x, f.y, 1.0 - abs( f.x ) - abs( f.y ) );
    float t = clamp( -n.z, 0., 1. );
    n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.)));
    return normalize( n );
}

vec2 i2ray(int i, out ivec2 squareCoord, out int parentIdx, out int sideLength)
{
    int b = tobin(i);
    int start = binto(b);
    int z = i - start;
    uvec2 coord = z2xy(uint(z));
    int idim = 1 << b;
    int size = idim * idim;
    float dim = float(idim);

    int parent_size = size / 4;
    int parent = int(start - parent_size) + (z/4);

    squareCoord = ivec2(coord + vec2(.5));
    parentIdx = parent;
    sideLength = idim;

    vec2 uv = vec2(0.5/dim) + coord / vec2(dim);


    if (i == 599) {
        debug_i = i;
        debug_parent = parent;
        debug_size = idim;
        debug_b = b;
        debug_start = start;
        debug_parent_size = parent_size;
    }

     return uv;
}

// http://graphicrants.blogspot.com/2009/04/rgbm-color-encoding.html
vec4 RGBMEncode( vec3 color ) {
    vec4 rgbm;
    color *= 1.0 / 6.0;
    rgbm.a = clamp(max(max(color.r, color.g), max(color.b, 1e-6 )) , 0., 1.);
    rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
    rgbm.rgb = color / rgbm.a;
    return rgbm;
}


float sinehash1( float n )
{
    return fract( n*17.0*fract( n*0.3183099 ) );
}

float sinehash1( vec2 p )
{
    p  = 50.0*fract( p*0.3183099 );
    return fract( p.x*p.y*(p.x+p.y) );
}


// Fractional Brownian Motion (fBM)

// iq's value noise fBM

const mat2 fbm_m2 = mat2(  0.80,  0.60,
                          -0.60,  0.80 );

const mat2 fbm_m2i = mat2( 0.80, -0.60,
                       0.60,  0.80 );

const mat3 fbm_m3  = mat3( 0.00,  0.80,  0.60,
                      -0.80,  0.36, -0.48,
                      -0.60, -0.48,  0.64 );
float smooth_noise(in vec3 x)
{
    vec3 p = floor(x);
    vec3 w = fract(x);

    #if 1
    vec3 u = w*w*w*(w*(w*6.0-15.0)+10.0);
    #else
    vec3 u = w*w*(3.0-2.0*w);
    #endif

    float n = p.x + 317.0*p.y + 157.0*p.z;

    float a = sinehash1(n+0.0);
    float b = sinehash1(n+1.0);
    float c = sinehash1(n+317.0);
    float d = sinehash1(n+318.0);
    float e = sinehash1(n+157.0);
    float f = sinehash1(n+158.0);
    float g = sinehash1(n+474.0);
    float h = sinehash1(n+475.0);

    float k0 =   a;
    float k1 =   b - a;
    float k2 =   c - a;
    float k3 =   e - a;
    float k4 =   a - b - c + d;
    float k5 =   a - c - e + g;
    float k6 =   a - b - e + f;
    float k7 = - a + b + c - d + e - f - g + h;

    return -1.0+2.0*(k0 + k1*u.x + k2*u.y + k3*u.z + k4*u.x*u.y + k5*u.y*u.z + k6*u.z*u.x + k7*u.x*u.y*u.z);
}

float smooth_noise( in vec2 x )
{
    vec2 p = floor(x);
    vec2 w = fract(x);
    #if 1
    vec2 u = w*w*w*(w*(w*6.0-15.0)+10.0);
    #else
    vec2 u = w*w*(3.0-2.0*w);
    #endif

    float a = sinehash1(p+vec2(0,0));
    float b = sinehash1(p+vec2(1,0));
    float c = sinehash1(p+vec2(0,1));
    float d = sinehash1(p+vec2(1,1));

    return -1.0+2.0*( a + (b-a)*u.x + (c-a)*u.y + (a - b - c + d)*u.x*u.y );
}

vec3 smooth_noised( in vec2 x )
{
    vec2 p = floor(x);
    vec2 w = fract(x);
    #if 1
    vec2 u = w*w*w*(w*(w*6.0-15.0)+10.0);
    vec2 du = 30.0*w*w*(w*(w-2.0)+1.0);
    #else
    vec2 u = w*w*(3.0-2.0*w);
    vec2 du = 6.0*w*(1.0-w);
    #endif

    float a = sinehash1(p+vec2(0,0));
    float b = sinehash1(p+vec2(1,0));
    float c = sinehash1(p+vec2(0,1));
    float d = sinehash1(p+vec2(1,1));

    float k0 = a;
    float k1 = b - a;
    float k2 = c - a;
    float k4 = a - b - c + d;

    return vec3( -1.0+2.0*(k0 + k1*u.x + k2*u.y + k4*u.x*u.y), 
                      2.0* du * vec2( k1 + k4*u.y,
                                      k2 + k4*u.x ) );
}
float fbm(vec3 x) {
    const int NUM_OCTAVES = 5;
    float v = 0.0;
    float a = 0.5;
    vec3 shift = vec3(100);
    for (int i = ZERO; i < NUM_OCTAVES; ++i) {
        v += a * noise(x);
        x = x * 2.0 + shift;
        a *= 0.5;
    }
    return v;
}

float fbm_lowfreq(vec3 x) {
    const int NUM_OCTAVES = 3;
    float v = 0.0;
    float a = 0.5;
    vec3 shift = vec3(100);
    for (int i = ZERO; i < NUM_OCTAVES; ++i) {
        v += a * noise(x);
        x = x * 2.0 + shift;
        a *= 0.5;
    }
    return v;
}

const int TERRAIN_FBM_ITERS = 3;

float fbm_4( in vec3 x )
{
    float f = 2.0;
    float s = 0.5;
    float a = 0.0;
    float b = 0.5;
    for( int i=ZERO; i<4; i++ )
    {
        float n = smooth_noise(x);
        a += b*n;
        b *= s;
        x = f*fbm_m3*x;
    }
	return a;
}

float fbm_9(in vec2 x)
{
    float f = 1.9;
    float s = 0.55;
    float a = 0.0;
    float b = 0.5;
    for(int i=ZERO; i<TERRAIN_FBM_ITERS; i++)
    {
        float n = smooth_noise(x);
        a += b*n;
        b *= s;
        x = f*fbm_m2*x;
    }
    return a;
}

vec3 fbmd_9(in vec2 x)
{
    float f = 1.9;
    float s = 0.55;
    float a = 0.0;
    float b = 0.5;
    vec2 d = vec2(0.0);
    mat2 m = mat2(1.0,0.0,0.0,1.0);
    for(int i=ZERO; i<TERRAIN_FBM_ITERS; i++)
    {
        vec3 n = smooth_noised(x);
        a += b*n.x;          // accumulate values
        d += b*m*n.yz;       // accumulate derivatives
        b *= s;
        x = f*fbm_m2*x;
        m = f*fbm_m2i*m;
    }
    return vec3(a, d);
}


// space wrapping operators from hg_sdf

// Repeat space along one axis. Use like this to repeat along the x axis:
// <float cell = pMod1(p.x,5);> - using the return value is optional.
float pMod1(inout float p, float size) {
    float halfsize = size*0.5;
    float c = floor((p + halfsize)/size);
    p = mod(p + halfsize, size) - halfsize;
    return c;
}

// Repeat in two dimensions
vec2 pMod2(inout vec2 p, vec2 size) {
    vec2 c = floor((p + size*0.5)/size);
    p = mod(p + size*0.5,size) - size*0.5;
    return c;
}

// Same, but mirror every second cell so all boundaries match
vec2 pModMirror2(inout vec2 p, vec2 size) {
    vec2 halfsize = size*0.5;
    vec2 c = floor((p + halfsize)/size);
    p = mod(p + halfsize, size) - halfsize;
    p *= mod(c,vec2(2))*2 - vec2(1);
    return c;
}

// Other SDF building helpers

vec2 rot2d(in vec2 p, float theta) {
    float alpha = cos(theta);
    float beta = sin(theta);
    mat2 R = mat2(alpha, -beta, beta, alpha);
    return R * p;
}

// triplanar mapping by iq (MIT), source: https://www.shadertoy.com/view/MtsGWH

// "p" point apply texture to
// "n" normal at "p"
// "k" controls the sharpness of the blending in the
//     transitions areas.
// "s" texture sampler
vec4 boxmap( in sampler2D s, in vec3 p, in vec3 n, in float k )
{
    // project+fetch
    vec4 x = texture( s, p.yz );
    vec4 y = texture( s, p.zx );
    vec4 z = texture( s, p.xy );

    // and blend
    vec3 m = pow( abs(n), vec3(k) );
    return (x*m.x + y*m.y + z*m.z) / (m.x + m.y + m.z);
}
// Ray marching logic

float scene(vec3 p, out int material) {
    float gradmag = 1.;
    return SCENE_WITH_GRAD(p, material, gradmag);
}

float gradMagToFactor(float mag) {
    return max(0.2, 1. - mag*mag * 4.);
}

float depth_march(inout vec3 p, vec3 rd, ConeSetup cone, out int material, out vec2 restart, int num_iters, out int out_iters, float start_t, float end_t) {
    vec3 ro = p;
    int i;
    float t = start_t;
    int mat;
    float last_t = t;
    material = MATERIAL_OTHER;

    for (i = 0; i < num_iters; i++) {
        float gradmag = 0.;
        float d = SCENE_WITH_GRAD(ro + t * rd, mat, gradmag);
        float coneWorldRadius = cone.radius * (t + cone.proj_plane_dist) / cone.proj_plane_dist;

        if (d <= coneWorldRadius) {
            // In depth rays we write the earlier, "safe", z value to the buffer.
            t = last_t;
            break;
        }

        last_t = (t + d) - coneWorldRadius;

        float factor = gradMagToFactor(gradmag);

        #if USE_ANALYTIC_CONE_STEP
        t = (t + factor*d) * cone.step_factor;
        #else
        t = t + factor*d;
        #endif

        if (t >= end_t) {
            break;
        }
    }

    out_iters = i;

    if (t >= end_t) {
        material = MATERIAL_SKY;
        t = MAX_DISTANCE;
    }

    return t;
}

// Raymarching loop based on techniques of Keinert et al. "Enhanced Sphere Tracing", 2014.
float march(inout vec3 p, vec3 rd, ConeSetup cone, out int material, out vec2 restart, int num_iters, out int out_iters, float start_t, float end_t) {
    vec3 ro = p;
    int i;
    float omega = 1.3;
    float t = start_t;
    float restart_t = t;
    float restart_error = 0.;
    float candidate_error = 1e9;
    float candidate_t = t;
    int mat;
    float last_d = 0.;
    float step = 0.;
    material = MATERIAL_OTHER;

    for (i = 0; i < num_iters; i++) {
        float gradmag = 0.;
        float d = SCENE_WITH_GRAD(ro + t * rd, mat, gradmag);

        bool sorFail = omega > 1. && (d + last_d) < step;
        if (sorFail) {
            step -= omega * step;
            omega = 1.;
        } else {
            step = d * omega;
        }

        float factor = gradMagToFactor(gradmag);
        d *= factor;

        // Worst case distance to surface in screen space.
        float error = d / (t + cone.proj_plane_dist);

        if (d > last_d && error < restart_error) {
            restart_t = t;
            restart_error = error;
        }

        last_d = d;

        if (!sorFail && error < candidate_error) {
            candidate_t = t;
            candidate_error = error;
        }

        if (!sorFail && error < 1. * cone.radius || t >= end_t) {
            material = mat;
            break;
        }

        t += step;
    }

    restart = vec2(0, cone.radius);
    out_iters = i;

    if (t >= end_t) {
        material = MATERIAL_SKY;
        t = MAX_DISTANCE;
        return t;
    }

    // Write out sky color if snapping the hit point to nearest geometry would introduce too much screen space error.
    if (i == num_iters && candidate_error > cone.radius * SNAP_INFLATE_FACTOR) {
        material = MATERIAL_SKY;
        return t;
    }

    restart = vec2(restart_t, restart_error);
    t = candidate_t;
    p = ro + t * rd;

#if USE_HIT_REFINEMENT
    // See "Enhanced Sphere Tracing" section 3.4. and
    // section 3.1.1 in "Efficient Antialiased Rendering of 3-D Linear Fractals"
    for (int i = 0; i < 2; i++) {
        int temp;
        float e = t * 2. * cone.radius;
        t += scene(ro + t*rd, temp) - e;
    }
#endif

    return t;
}

vec3 evalnormal(vec3 p) {
    vec2 e=vec2(1e-5, 0.f);
    int m;
    return normalize(vec3(
                scene(p + e.xyy,m) - scene(p - e.xyy,m),
                scene(p + e.yxy,m) - scene(p - e.yxy,m),
                scene(p + e.yyx,m) - scene(p - e.yyx,m)
                ));
}

vec3 evalnormal_sized(vec3 p, float size) {
    vec2 e=vec2(size, 0.f);
    int m;
    return normalize(vec3(
                scene(p + e.xyy,m) - scene(p - e.xyy,m),
                scene(p + e.yxy,m) - scene(p - e.yxy,m),
                scene(p + e.yyx,m) - scene(p - e.yyx,m)
                ));
}

float shadowResultToSun(vec2 result, float maxShadowDist) {
        float sun = min(result.x, maxShadowDist) / maxShadowDist;
        sun = pow(sun, 2.);
        sun *= 4.;
        return min(1., sun);
}

vec2 shadowMarch(in vec3 p, vec3 rd, int num_iters, float w, float mint, float maxt) {
    vec3 ro = p;
    int i;
    float omega = 1.3;
    float t = mint;
    int mat;
    float last_d = 0.;
    float step = 0.;
    float closest = MAX_DISTANCE;

    for (i = ZERO; i < num_iters; i++) {
        float gradmag=0.;
        float d = SCENE_WITH_GRAD(ro + t * rd, mat, gradmag);
        float factor = gradMagToFactor(gradmag);

        bool sorFail = omega > 1. && (d + last_d) < step;
        if (sorFail) {
            step -= omega * step;
            omega = 1.;
        } else {
            step = d * omega;
        }

        //closest = min(closest, d);
        closest = min(closest, 0.5+0.5*d/(w*t) );

        last_d = d;

        if (d < 1e-5) {
            break;
        }

        if (t >= maxt) {
            break;
        }

        t += step;
    }

    p = ro + t * rd;
    closest = max(0., closest);
    //return vec2(t, closest*closest*(3.-2.*closest));
    return vec2(t, closest);
}

#ifndef AMBIENT_STEPS
#define AMBIENT_STEPS (10)
#endif

float sampleAO(vec3 ro, vec3 rd)
{
    const float step = 0.05;
    float t = step;
    int mat=0;
    float obscurance = 0.;

    for (int i=ZERO; i < AMBIENT_STEPS; i++) {
        float d = scene(ro + t * rd, mat);
        obscurance += max(0., t - d) / t;
        t += step;
    }

    return 1. - obscurance / AMBIENT_STEPS;
}

// p        world space location
// color    (R, G, B, weight)
void addPoint(vec3 p, vec4 color, vec3 normal, float shininess, float sun, float span, float plantID)
{
    int myPointOffset = atomicAdd(currentWriteOffset, 1);
    myPointOffset %= pointBufferMaxElements;
    float weight = color.a;

    // y-axis clipping plane: planar reflection mirroring is applied below so any point that is spawned
    // on the wrong side of the water plane should be removed.
    if (p.y < 0.)
         return;

    p.y = global_y_axis_flip * p.y;

    vec2 packedNormal = encodeNormal(normal);
    color.rgb = sqrt(color.rgb);
    color = RGBMEncode(color.rgb);
    uint packedColor = packUnorm4x8(color);

    #if SOA_LAYOUT
    pointPosShots[myPointOffset] = vec4(p, sceneID);
    pointColors[myPointOffset] = packedColor;
    pointNormalShininessSun[myPointOffset] = packUnorm4x8(vec4(packedNormal, shininess, sun));
    pointWeightSpanTrunk[myPointOffset] = packUnorm4x8(vec4(weight, span, plantID, 0.));
    #else
    points[myPointOffset].xyz = p;
    points[myPointOffset].rgba = packedColor;
    points[myPointOffset].normalSpecularSun = packUnorm4x8(vec4(packedNormal, shininess, sun));
    points[myPointOffset].sec = sceneID;
    points[myPointOffset].weightSpanTrunk = packUnorm4x8(vec4(weight, span, plantID, 0.));
    #endif
}

// Impostor helpers

void spawnCubeImpostor(
        int ID,
        uint plantID,
        float mip,
        in SurfaceInfo surf,
        int numSamples,
        vec3 vo,
        vec3 basisx,
        vec3 basisy,
        vec3 basisz,
        in MaterialInfo matInfo,
        float tipSun)
{
    vec3 cubecoord = vec3(rand(), rand(), rand()) - vec3(.5);
    vec3 cubepoint = cubecoord.x * basisx + cubecoord.y * basisy + cubecoord.z * basisz;
    addPoint(vo + cubepoint, vec4(.1*(vec3(.5)+cubecoord), 1.), vec3(1.), 0., 1., 0., 0.);
}

//#define spawnImpostor spawnCubeImpostor

void spawnImpostor(
        int ID,
        uint plantID,
        float mip,
        in SurfaceInfo surf,
        int numSamples,
        vec3 vo,
        vec3 basisx,
        vec3 basisy,
        vec3 basisz,
        in MaterialInfo matInfo,
        float tipSun,
        float yDarken
        )
{
    for (int i=0;i<numSamples;i++) {
        vec2 st = vec2(rand(), rand());

        vec3 vp = vo + ((-.5 + st.x) * basisx + st.y * basisy);

        vec4 vcolor = textureLod(impostors[ID].color, st, mip);
        // colors are stored premultiplied but we blend in postmultiplied alpha --> black fringes
        // we need to work around with this check

        if (rand() > vcolor.a) {
            continue;
        }

        //vcolor.rgb *= vcolor.rgb; // this shouldn't be needeed since the texture is sRGB already

        vec3 normalmap = textureLod(impostors[ID].normal, st, mip).rgb;
        normalmap.rgb = vec3(-1.) + normalmap.rgb * vec3(2.);
        float depthmap = textureLod(impostors[ID].depth, st, mip).r;
        if (depthmap == 0.) continue;

        // convert [0,1] into blender scene's depth range
        float depth = (depthmap * depthmap) * 100.;
        // transform depth to centered "width" space where 1 unit = sprite width
        depth = (depth - impostors[ID].distance) / impostors[ID].scale;
        // apply the depth in world space along the z basis axis
        vp += depth * basisz;

        vec3 normal = -normalize(normalmap.x * basisx + normalmap.y * -basisz + normalmap.z * basisy);
        vec3 irradiance = sampleSkyDomeIrradiance(normal);

        float NdotL = max(0., dot(normal, surf.toLight));
        vcolor.rgb *= matInfo.tint;
        //vcolor.rgb = vec3(0.1);

        float mySun = mix(surf.sun, tipSun, st.y);
        float sunLightFactor = mix(mySun, 1.5 * st.y, yDarken); // low parts are occluded from sunlight

        vec3 diffuse = surf.ambient * irradiance + sunLightFactor * mySun * sunColor * NdotL;
        //vec3 diffuse = irradiance;
        vcolor.rgb *= diffuse;
        //vcolor.rgb = irradiance;
        //vcolor.rgb = vec3(0.1);

        // transparency has been taken care of by the stochastic check above so we write alpha=1 here
        //addPoint(vp, vec4(vcolor.rgb, 1.), normal, matInfo.shininess, mySun, st.y, float(plantID)/255.);
        addPoint(vp, vec4(vcolor.rgb, 1.), normal, 0., mySun, st.y, float(plantID)/255.);
    }
}

ivec3 getQuantizedPoint(in vec3 p, float voxelDensity, out vec3 out_quantp)
{
    ivec3 quant = ivec3(p * voxelDensity);
    out_quantp = vec3(quant + vec3(.5)) / voxelDensity;
    return quant;
}

void makeOrthoFrame(vec3 normal, out vec3 out_tangent_x, out vec3 out_tangent_y)
{
    vec3 up = abs(normal.y) < 0.999 ? vec3(0., 1., 0.) : vec3(0., 0., 1.);
    out_tangent_x = normalize(cross(up, normal));
    out_tangent_y = cross(normal, out_tangent_x);
}

vec3 getOrientedNormal(vec3 quantp, vec3 surfNormal, float voxelDensity)
{
    vec3 normal = evalnormal_sized(quantp, 1./voxelDensity);
    if (dot(normal, surfNormal) < 0) {
        normal *= -1.;
    }
    return normal;
}

void getQuantizedFrame(vec3 quantp, vec3 surfNormal, float voxelDensity, out vec3 out_normal, out vec3 out_tangent_x, out vec3 out_tangent_y)
{
    vec3 normal = getOrientedNormal(quantp, surfNormal, voxelDensity);

    out_normal = normal;
    makeOrthoFrame(normal, out_tangent_x, out_tangent_y);
}

vec3 snapPointToSurface(vec3 quantp, vec3 normal)
{
    vec3 outside = quantp + normal * 0.01;
    vec3 vo = outside;
    for (int i=0;i<3;i++) {
        int tempmat;
        vo += -normal * scene(vo, tempmat);
    }
    return vo;
}

vec2 offsetGridPoint(in ivec3 quant, in float voxelDensity, in vec3 tangent_x, in vec3 tangent_y, inout vec3 vo)
{
    vec2 uv = vec2(-.5) + vec2(noise(quant*ivec3(9, 2, 31)), noise(quant*ivec3(11, 31, 2)));
    vo += (uv.x * tangent_x + uv.y * tangent_y) / voxelDensity;
    return uv;
}

float pickMip(ivec2 texSize, float worldSize, vec3 quantp, in ConeSetup cone, vec3 campos)
{
    float dist = length(quantp - campos);
    float projRadius = (worldSize / (dist + cone.proj_plane_dist)) / cone.pixelwidth;
    float projAreaInPixels = PI * (projRadius * projRadius);

    float texturePixelsArea = texSize.x * texSize.y;
    float ratio = texturePixelsArea / (projRadius * projRadius);
    return clamp(10. * ratio, 0., 1.);
}

void makeBillboardbasis(
    in vec3 voxelToCam,
    float ofs,
    in vec3 normal,
    in vec3 tangent_x,
    in vec3 tangent_y,
    out vec3 basisx,
    out vec3 basisz,
    in float steps)
{
    vec3 d = normalize(voxelToCam);
    vec3 dproj = d - dot(d, normal) * normal;
    vec2 p2d = vec2(dot(tangent_x, dproj), dot(tangent_y, dproj));

    float rot = atan(p2d.y, p2d.x) + PI*.5 + ofs;
    rot = floor(rot/TAU*steps)/steps*TAU + PI/steps; // we need the "+PI/8" to get perfect alignment when
                                           // billboard is viewed head-on
    vec2 dir = vec2(cos(rot), sin(rot));

    basisx = dir.x * tangent_x + dir.y * tangent_y;
    basisz = -dir.y * tangent_x + dir.x * tangent_y;
}


// Raymarching main function

vec4 orbitTrap;

void main() {
    ivec2 res = screenSize;

    const int maxRayIndex = res.x * res.y;

    while (nextRayIndex < rayIndexBufferMaxElements) {
        int arrayIdx = atomicAdd(nextRayIndex, 1);
        CameraParams cam = cameras[CAM_NOW];
        cam.pos.y += prop_marcher_y_bias;
        cam.pos -= cam.dir * cam.nearplane_rcp * prop_marcher_y_bias * 4.;

#if USE_TREE
        if (arrayIdx >= rayIndexBufferMaxElements)
            return;
        int myIdx = rayIndices[arrayIdx];
        globalMyIdx = myIdx;
        int idim;
        int parentIdx = -2, sideLength = -1;

        ivec2 squareCoord;
        vec2 squareUV = i2ray(myIdx, squareCoord, parentIdx, sideLength);

        if (squareUV.x > screenBoundary.x || squareUV.y > screenBoundary.y) {
            continue;
        }
        squareUV /= screenBoundary.xy;

        ivec2 pixelCoord = ivec2(squareUV * res.xy);
#else
        int myIdx = arrayIdx;
        if (myIdx >= res.x * res.y)
            continue;
        int parentIdx = 0;
        int sideLength = res.x;
        ivec2 pixelCoord = ivec2(myIdx % res.x, myIdx / res.x);
        vec2 squareUV = pixelCoord / vec2(res.xy);
#endif


        vec2 uv = squareUV;

#if USE_TREE
        memoryBarrierBuffer();
        float parentDepth = jumps[parentIdx];
        // parentDepth = 0.; // Setting this forces child rays to always start from the beginning.
        if (parentDepth >= MAX_DISTANCE * 0.9) {
            jumps[myIdx] = parentDepth;
            memoryBarrierBuffer();
            continue;
        }
#else
        float parentDepth = 0.;
#endif

        bool isLowestLevel = sideLength >= max(res.x, res.y);

        if (isLowestLevel) {
            srand(frame, uint(pixelCoord.x), uint(pixelCoord.y));
#ifdef USE_HASH_JENKINS
            jenkins_mix();
            jenkins_mix();
#endif

            vec2 jitter = vec2(rand() - 0.5, rand() - 0.5);
            jitter /= res;
            jitter *= 0.9;
            uv += jitter;
        }

        vec3 start_pos, dir;
        getCameraProjection(cam, uv, start_pos, dir);

        vec3 p = start_pos;
        vec3 rp = p - cam.pos;

        ConeSetup cone;
        cone.pixelwidth = length(cam.right) / float(sideLength);
        cone.radius = .5 * sqrt(2.) * cone.pixelwidth;
        cone.proj_plane_dist = length(rp);
        cone.nearplane = cam.nearplane;
        cone.step_factor = 1.;

#if USE_ANALYTIC_CONE_STEP
        {
            float aperture = 2. * cone.radius;
            float C = sqrt(aperture * aperture + 1.);
            cone.step_factor = C / (C - aperture);
        }
#endif

        SCENE_INIT();

        int hitmat = MATERIAL_SKY;
        vec2 restart;
        int iters=0;
        float t = -1.;

        if (isLowestLevel) {
            t = march(p, dir, cone, hitmat, restart, 400, iters, depthToDistance(parentDepth, cam, dir), TOO_FAR);
        } else {
            t = depth_march(p, dir, cone, hitmat, restart, 100, iters, depthToDistance(parentDepth, cam, dir), TOO_FAR);
        }

        if (myIdx == CHILD_INDEX) {
            debug_pixelRadius = iters;
            debug_nearPlane = cone.nearplane;
            debug_projPlaneDist = cone.proj_plane_dist;
            debug_zdepth = t;
            debug_parentDepth = parentDepth;
            debug_childUVx = uv.x;
            debug_childUVy = uv.y;
        }

        if (myIdx == PARENT_INDEX) {
            debug_parentUVx = uv.x;
            debug_parentUVy = uv.y;
        }

#if USE_TREE
        jumps[myIdx] = distanceToDepth(t, cam, dir);
        memoryBarrierBuffer();
#endif

        if (!isLowestLevel) {
            continue;
        }

        vec3 color;

        if (hitmat == MATERIAL_SKY) {
            color = vec3(0.);
        } else {
            float water_z = -start_pos.y / dir.y;
            //if (p.y < 0) {
            if (water_z > 0. && t > water_z) {
                global_y_axis_flip = -1.;
                p.y = -p.y;
            } else {
                global_y_axis_flip = 1.;
            }

            vec3 normal = evalnormal(p);
            vec3 roughNormal = evalnormal_sized(p, 1e-3);
            vec3 to_camera = normalize(cam.pos - p);
            vec3 to_light = sunDirection;

            const float sunDimmingDist = 30.;
            float maxShadowDist = 30.;

            #ifdef USE_SHADING_HANDLER
            shadingHandler(p, t, hitmat, cameras[CAM_NOW], normal, roughNormal, to_light, to_camera, maxShadowDist);
            #endif

            vec3 shadowRayPos = p + to_camera * 1e-4;

            if (hitmat == MATERIAL_JUNGLEFLOOR) {
                shadowRayPos += 0.1 * roughNormal;

            }

            float sun = 0.;

            #ifdef NO_SHADOWS
            #else
            if (dot(normal, to_light) > 0.) {
                vec2 shadowResult = shadowMarch(shadowRayPos, to_light, 200, 9e-2, 5e-3, maxShadowDist);
                sun = shadowResultToSun(shadowResult, sunDimmingDist);
            }
            #endif

            float ambient = max(0., sampleAO(p, normal));
            ambient = .2 * pow(ambient, 1.3);

            float facing = max(0., dot(normal, to_light));

            vec3 base = vec3(0.7);
            float shininess = 0.;
            vec3 suncol = sunColor;
            vec3 irradiance = sampleSkyIrradiance(normal);
            float alpha = 1.;

            if (hitmat == MATERIAL_KLEINIAN) {
                //base = mix(vec3(1., 0.4, 0.), orbitTrap.xyz, vec3(.3));
                //base = hsv2rgb(vec3(orbitTrap.z*0.1, 1.0, 1));
                base = vec3(.5);
                //shininess = min(0.92, max(0., 0.8 * length(orbitTrap.xyz)));
                ambient = 2. * max(0.2, ambient);
                shininess = 0.0;
                alpha = 1.0;
            } else if (hitmat == MATERIAL_GRASSLAND || hitmat == MATERIAL_GOLD) {
                base = vec3(.8) + .2 * vec3(cos(vec3(p.x) + vec3(p.y) + vec3(.0, 1., 2.)));
                float shade = clamp(p.y*0.1, 0.0, 1.0);
                //base = vec3(shade);
                ambient *= shade;

                //ambient = max(0.05, ambient);
                //base = vec3(1.0, 0.9, 0.8);
                shininess = 0.2;
                irradiance = mix(irradiance, .8*vec3(0.4, 0.5, 1.), .5 * abs(normal.y));
                suncol *= vec3(1., 0.9, 0.5);
            } else if (hitmat == MATERIAL_JUNGLEFLOOR) {
                ambient *= 0.2;
                base = vec3(0.002)*vec3(1., 0.8, 0.2);
                normal = roughNormal;
                shininess = 0.0;
            } else if (hitmat == MATERIAL_DIRT) {
                ambient = 0.2;
                base = vec3(0.002)*vec3(1., 0.8, 0.2);
                normal = roughNormal;
                shininess = 0.0;
            } else if (hitmat == MATERIAL_SILVERSCREEN) {
                //makeOrthoFrame(sunDirection, sun_tangent_x, sun_tangent_y);
                //vec3 sun_tangent_x, sun_tangent_y;
                vec3 proj_dir = vec3(0., 0., -1);
                vec3 sun_tangent_x = vec3(1., 0., 0);
                vec3 sun_tangent_y = vec3(0., 1., 0);
                //makeOrthoFrame(proj_dir, sun_tangent_x, sun_tangent_y);
                vec2 sunUv = vec2(dot(p, sun_tangent_x), dot(p, sun_tangent_y));
                sunUv.y *= 3.33;
                float projScale = 40.;
                vec2 projOffset = vec2(0, -20);
                float projGain = 4.;
                sunUv += projOffset;
                sunUv = sunUv / projScale;
                sunUv += vec2(.5);

                base = vec3(0.10001) * hsv2rgb(vec3(.2 + .1*sin(p.y/8.), .3, 0.05));// * saturate(p.y / 20.);

                //ambient *= 10.;
                if (sunUv.x >= 0 && sunUv.x < 1. && sunUv.y >= 0 && sunUv.y < 1.) {
                    vec4 pic = texture(texLogo, sunUv);
                    pic.rgb *= projGain;
                    //base = mix(base, pic.rgb, pic.a * min(1., sun));
                    float alpha = pic.r; // use red channel for logo alpha
                    base = mix(base, vec3(0.90), alpha * min(1., sun));
                    shininess = mix(shininess, 0.9, alpha * min(1., sun));
                }
                shininess = 0.0;
            }

            SurfaceInfo surf = SurfaceInfo(hitmat, p, normal, base, color, ambient, to_light, sun,  cam.pos);

            #ifdef USE_MATERIAL_HANDLER
            materialHandler(surf, base, shininess, ambient, sun, facing, suncol);
            #endif

            //color = base * (ambient * skycolor + facing * sun * suncol);
            color = base * (ambient * irradiance + 0.1 * facing * sun * suncol);

            color = clamp(color, vec3(0.), vec3(2.));

            bool pointVisible = true;
            #ifdef USE_SPAWN_POINTS
            #ifdef USE_MATERIAL_HANDLER
            // recreate modified SurfaceInfo if it was modified
            surf = SurfaceInfo(hitmat, p, normal, base, color, ambient, to_light, sun,  cam.pos);
            #endif
            pointVisible = spawnPoints(surf, cone, cam);
            #endif

            if (pointVisible) {
                //alpha /= t+1e-3;
                addPoint(p, vec4(color, alpha), normal, shininess, sun, 0., 0.);
            }
        }
    }
}

#endif
