
#include "testbench.h"
#include "mp3music.h"
#include "cameras.h"
#include "camerashake.h"
#include "shot.h"
#include <cinttypes>
#include <cassert>
#include <cmath>
#include <deque>
#include <chrono>

#define FINALBUILD 1
#define BENCHMARK 0
#define LONG_HISTORY 1


#if FINALBUILD 
int screenw = 1920, screenh = 1080;
#define dprintf(...)
#elif BENCHMARK
#include "benchmark.h"
#define dprintf printf
int screenw = 1920, screenh = 1080;
#else
#define dprintf printf
//int screenw = 1280, screenh = 720;
int screenw = 1920, screenh = 1080;
#endif
static constexpr GLuint JITTER_BUFFER_TYPE = GL_RG8;
static bool showDebugInfo = false;
static constexpr bool STEP_BUFFER_DEBUGGING = false;
static constexpr bool JUMP_BUFFER_DEBUGGING = false;
static constexpr bool POINT_BUFFER_HEADER_DEBUGGING = false;

static void setWrapToClamp(GLuint tex) {
	glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}

struct RgbPoint {
	vec3 xyz;
	uint32_t normalSpecularSun;
	uint32_t rgba;
	uint32_t weightSpanTrunk;
	float sec;
};

int tobin(int i)
{
	return int(log2(3 * i + 1)) >> 1;
}

// Maps a bin index into a starting ray index. Inverse of "tobin(i)."
int binto(int b)
{
	// Computes (4**b - 1) / 3
	int product = 1;
	for (int i = 0; i < b; i++)
		product *= 4;
	return (product - 1) / 3;
}

uint z2x_1(uint x)
{
	x = x & 0x55555555;
	x = (x | (x >> 1)) & 0x33333333;
	x = (x | (x >> 2)) & 0x0F0F0F0F;
	x = (x | (x >> 4)) & 0x00FF00FF;
	x = (x | (x >> 8)) & 0x0000FFFF;
	return x;
}

// Maps 32-bit Z-order index into 16-bit (x, y)
uvec2 z2xy(uint z)
{
	return uvec2(z2x_1(z), z2x_1(z >> 1));
}

// How many nodes must a full quadtree have when leaf layer has "dim" nodes.
int dim2nodecount(int dim)
{
	return binto(int(ceil(log2(dim))) + 1);
}

// returns a vector in [-1, 1]^2
vec2 getRandomJitter()
{
	static int xi;
	static int yi;
	int xs[] = { 4, 5, 13, 16, 0, 13, 18, 6, 14, 9, 15, 16, 9, 10, 17, 17, 6, 10, 18, 9, 9, 11, 15, 7, 8, 12, 19, 9, 0 };
	int ys[] = { 1, 18, 9, 19, 4, 0, 14, 6, 1, 5, 9, 2, 2, 13, 7, 4, 13, 14, 12, 6, 7, 1, 10, 17, 5, 3, 6, 17, 20, 19, 13, 19, 2, 0, 12, 1, 20, 15, 6, 1, 7 };
	int x = xs[xi];
	int y = ys[yi];
	xi = (xi+1) % (sizeof(xs) / sizeof(int));
	yi = (yi+1) % (sizeof(ys) / sizeof(int));
	return vec2(x - 10, y - 10) / 20.f;
}

static float smoothstep(float edge0, float edge1, float x) {
	if (edge0 == edge1) return 0.f;
	float denom = (edge1 - edge0);
  // Scale, bias and saturate x to [0,1] range
  x = clamp((x - edge0) / denom, 0.0, 1.0); 
  // Evaluate polynomial
  return x * x * (3 - 2 * x);
}

struct CamProps {
	struct Key {
		float time;
		float value;
	};

	std::string geometry;
	std::string sky;
	std::map<std::string, float> floats;
	std::map<std::string, int> ints;
	std::map<std::string, std::vector<Key>> floatKeys;
	std::vector<std::string> camPath;

	float getf(const std::string& name, float t) const {
		if (floatKeys.find(name) != floatKeys.end()) {
			const auto& keys = floatKeys.at(name);
			if (t <= keys[0].time) return keys[0].value;
			if (t >= keys[keys.size()-1].time) return keys[keys.size()-1].value;
			// we must have at least two keys so let's find the later one and interpolate between it and its predecessor
			for (int i = 1; i < keys.size(); i++) {
				if (t < keys[i].time) {
					float w = smoothstep(keys[i - 1].time, keys[i].time, t);
					return (1.f - w) * keys[i - 1].value + w * keys[i].value;
				}
			}
		} 

        return floats.at(name);
	}
	float geti(const std::string& name, float t) const {
		return ints.at(name);
	}
};

struct Sky {
	Texture<GL_TEXTURE_CUBE_MAP> skybox;
	Texture<GL_TEXTURE_CUBE_MAP> irradiance;
	vec3 sunDirection;
	vec3 sunColor;
	vec3 fogColor = 2.f * vec3(254.f, 198.f*0.75f, 123.f*0.75f)/255.f;
	vec3 fogScatterColor = 2.f * vec3(254.f, 198.f, 123.f) / 255.f;
	Texture<GL_TEXTURE_2D> background;
};

static std::vector<Shot> shots;
static std::map<std::string, Shot&> shotNames;
static std::vector<CameraPose> cameraPoses;
static std::map<std::string, CameraMove> cameraMoves;
static std::map<std::string, CamProps> camProps;
static CameraShakeTrack shakeTracks[CAMSHAKE_MAX];
static std::map<std::string, Sky> skies;

static const CamProps& findProps(const std::string& name) {
	if (camProps.find(name) == camProps.end()) {
		return camProps.at("defaultprops");
	}
	return camProps.at(name);
}


static CameraPose* findPose(const std::string& name) {
	for (CameraPose& p : cameraPoses) {
		if (p.name == name)
			return &p;
	}
	return nullptr;
}

static std::vector<Shot> loadShots()
{
	std::vector<Shot> news;
	FILE* fp = fopen("assets/shots.txt", "r");
	if (fp) {
		int num = 10;
		int idx = 0;
		while (num >= 2) {
			Shot s = {};
			char camname[128] = { '\0' };
			char shotname[128] = { '\0' };
			num = fscanf(fp, "%f %127s\n",
				&s.start,
				camname
			);
			if (num >= 2) {
				s.camName = camname;
				//printf("loaded shot %s\n", name);
			}
			if (num >= 3) {
				s.name = shotname;
			}
			else {
				s.name = "shot" + std::to_string(idx);
			}
			if (num >= 2) {
				news.push_back(s);
				idx++;
			}

		}
		fclose(fp);
	}
	return news;
}

static std::map<std::string, CamProps> loadProps()
{
	std::map<std::string, CamProps> props;
	std::deque<CamProps> stack;

	stack.push_back(CamProps());
	std::vector<std::string> camNames;
	camNames.push_back("defaultprops"); // a fallback name used when a shot has no properties

	FILE* fp = fopen("assets/props.txt", "r");

    if (!fp) {
        return props;
    }

    int num = 10;
    int idx = 0;
    int line = 1;
    while (!feof(fp)) {
        char cmd[128] = { '\0' };
        char param1[128] = { '\0' };
        char param2[128] = { '\0' };
        char param3[128] = { '\0' };
        num = fscanf(fp, "%s", cmd);
        if (num == 1) {
            if (strncmp(cmd, "push", sizeof(cmd)) == 0) {
				stack.push_back(stack.back());
            }
            else if (strncmp(cmd, "pop", sizeof(cmd)) == 0) {
				if (stack.size() == 0) {
                    printf("Stack underflow at line %d\n", line);
                    break;
				}
				stack.pop_back();
				camNames.clear();
            }
            else {
                int params = fscanf(fp, "%s", param1);
                if (params != 1) {
                    printf("Invalid command at line %d: '%s'!\n", line, cmd);
                    break;
				}

				if (strncmp(cmd, "cam", sizeof(cmd)) == 0) {
					camNames.push_back(param1);
				} else if (strncmp(cmd, "geometry", sizeof(cmd)) == 0) {
					stack.back().geometry = param1;
				} else if (strncmp(cmd, "sky", sizeof(cmd)) == 0) {
					stack.back().sky = param1;
				} else if (strncmp(cmd, "uf", sizeof(cmd)) == 0) {
                    int value = fscanf(fp, "%s", param2);
					if (value == 0) {
						printf("Expected uniform float value at line %d.\n", line);
						break;
					}
					stack.back().floats[param1] = atof(param2);
				} else if (strncmp(cmd, "ui", sizeof(cmd)) == 0) {
                    int value = fscanf(fp, "%s", param2);
					if (value == 0) {
						printf("Expected uniform int value at line %d.\n", line);
						break;
					}
					stack.back().ints[param1] = atoi(param2);
				} else if (strncmp(cmd, "animf", sizeof(cmd)) == 0) {
					int keyRet = fscanf(fp, "%s", param2);
					if (keyRet == 0) {
						printf("Expected animation time at line %d.\n", line);
						break;
					}
					int valueRet = fscanf(fp, "%s", param3);
					if (valueRet == 0) {
						printf("Expected animation value at line %d.\n", line);
						break;
					}
					stack.back().floatKeys[param1].push_back({ float(atof(param2)), float(atof(param3)) });
				} else if (strncmp(cmd, "campath", sizeof(cmd)) == 0) {
					//int value = fscanf(fp, "%s", param2);
					//if (value == 0) {
					//	printf("Expected camera name at line %d.\n", line);
					//	break;
					//}
					//int pathindex = atoi(param1);
					//printf("cam path: %d '%s'\n", pathindex, param2);
					stack.back().camPath.push_back(param1);
				}

				// apply the updated stack state to all active cams
                for (auto& name : camNames) {
                    props[name] = stack.back();
                }
            }
        }

        int newline = fgetc(fp);
        if (newline != '\n' && newline != EOF) {
            printf("Expected a newline at line %d!", line);
        }

        line++;
    }
    fclose(fp);
    return props;
}

void applyCamProperties(float secs, Program& program, const CamProps& props) {
	std::map<std::string, int> mapping{
		{"mandelbox", 0},
		{"templecube", 1},
		{"kleinian", 2},
		{"labyrinth", 3},
		{"jungle", 4},
		{"sanctuary", 5},
		{"islandcube", 6},
		{"brokencube", 7},
		{"cave", 8},
		{"cave2", 9},
		{"ruins", 10},
		{"mandelbox2", 11},
		{"jungle_castle", 12},
	};

	int prop_geometry = 0;
	auto found = mapping.find(props.geometry);
	if (found != mapping.end())
		prop_geometry = found->second;

	glUniform1i("prop_geometry", prop_geometry);

	// static uniforms
	for (auto pair : props.floats) {
		std::string name = "prop_" + pair.first;
		glUniform1f(name.c_str(), props.getf(pair.first, secs));
	}

	// animated uniforms
	for (auto pair : props.floatKeys) {
		std::string name = "prop_" + pair.first;
		glUniform1f(name.c_str(), props.getf(pair.first, secs));
	}

	for (auto pair : props.ints) {
		std::string name = "prop_" + pair.first;
		glUniform1i(name.c_str(), props.geti(pair.first, secs));
	}
}

Shot shotAtTime(float secs) {
	Shot s = {};
	for (int i = 0; i < shots.size() - 1; i++) {
		if (shots[i + 1].start > secs) {
			s = shots[i];
			s.end = shots[i + 1].start;
			s.length = s.end - shots[i].start;
			s.relative = secs - shots[i].start;
			s.ratio = s.relative / s.length;
			return s;
		}
	}

	return shots[shots.size() - 1];
}

const CameraPose& findPose(const std::string& name, int* out_poseid)
{
	for (int i = 0; i < cameraPoses.size(); i++) {
		if (cameraPoses[i].name == name) {
			*out_poseid = i;
			return cameraPoses[i];
		}
		if (i == cameraPoses.size() - 1) printf("Error! No camera found: %s\n", name.c_str());
	}

	return cameraPoses[0];
}

static void cameraPath(const Shot& shot, CameraPose& outPose)
{
	CameraPose pose = {};
	int poseid = -1;

	pose = findPose(shot.camName, &poseid);
	if (poseid == -1) return;
	
	const CamProps& props = findProps(shot.camName);

	CameraMove move = {};
	if (cameraMoves.find(shot.camName) != cameraMoves.end()) {
		move = cameraMoves[shot.camName];
	}

	float t = shot.relative + props.getf("time_offset", shot.relative);
	float tuniq = t + poseid * 31.3;

	if (props.camPath.size() > 0) {
		float id = props.getf("cam_id", t);
		if (id < 0 || id > props.camPath.size() - 1) {
			printf("Invalid cam_id %f!\n", id);
		} else {
			int a_id = -1, b_id = -1;
			int a_point = int(id);
			int b_point = min(int(props.camPath.size()) - 1, int(id) + 1);
			const CameraPose& a = findPose(props.camPath[a_point], &a_id);
			const CameraPose& b = findPose(props.camPath[b_point], &b_id);
			if (a_id != -1 || b_id != -1) {
				pose = interpolateCamera(a, b, std::fmodf(id, 1.f));
			}
		}
	}

	pose.startPos = pose.pos;
	pose.pos += t * move.axis;
	pose.pos += t * pose.dir * move.forward;

	// apply camera shake
	int trackIdx = max(0, min(CAMSHAKE_MAX - 1, move.shakeType));
	CameraShakeTrack& track = shakeTracks[trackIdx];
    CameraShakePoint shake = interpolateTrack(track, tuniq);

	pose.dir = rotation_x(props.getf("cam_pitch", t)) * pose.dir;
	pose.dir = rotation_y(props.getf("cam_yaw", t)) * pose.dir;

	vec3 right;
	
	if (abs(pose.dir.y) < 0.999) {
		right = cross(pose.dir, vec3(0.f, 1.f, 0.f));
	} else {
		right = cross(pose.dir, vec3(0.f, 0.f, 1.f));
	}
    vec3 up = cross(right, pose.dir);

	float xsine = props.getf("cam_horiz_sin_amp", t) * sin(props.getf("cam_horiz_sin_freq", t) * t + props.getf("cam_horiz_sin_phase", t));
	pose.pos += right * xsine;
	float ysine = props.getf("cam_vert_sin_amp", t) * sin(props.getf("cam_vert_sin_freq", t) * t + props.getf("cam_vert_sin_phase", t));
	pose.pos += right * xsine;
	pose.pos += up * ysine;
	pose.dir = normalize(pose.dir
		+ props.getf("cam_horiz_sin_turn", t) * right * xsine
		+ props.getf("cam_vert_sin_turn", t) * up * ysine
	);

	pose.zoom += props.getf("cam_zoom", t);

	// could use focal length and angular displacements instead of just moving the dir vector
	float shakeamp = 0.25 * 1.0 / 1920. * (2. * 3.14159) * (90. / 360.); // HACK!!
	vec3 dirdelta = shakeamp * (shake.x * right + shake.y * up);

	pose.dir += move.shake * dirdelta;
	pose.dir = normalize(pose.dir);

	float roll = move.shake * shake.roll;
	pose.up = right * -sin(roll) + up * cos(roll);

	outPose = pose;
}

static void trackCameraPath(const CameraPose& inPose, CameraPose& outPose) {
	outPose = inPose;
	vec3 oldPos = inPose.pos;
	// new position for lookAt
    vec3 right = cross(inPose.dir, vec3(0.f, 1.f, 0.f));
	vec3 ofs = -4.f * inPose.dir - 4.f * right + 1.f * vec3(0.f, 6.f, 0.f);
	ofs *= 2.f;
	outPose.pos += ofs;

	// camera looks from new to old position
	outPose.dir = normalize(oldPos - outPose.pos);

	// translation without affecting dir
	outPose.pos += 3.f * inPose.dir;

    vec3 right2 = cross(outPose.dir, vec3(0.f, 1.f, 0.f));
	outPose.up = cross(right2, outPose.dir);
	outPose.zoom *= 2.f;
}

static void reloadAnimations(Music& music)
{
	std::vector<CameraPose> newPoses = loadPoses();
	if (newPoses.size() > 0) {
		cameraPoses = newPoses;
	}
	std::vector<Shot> newShots = loadShots();
	if (newShots.size() > 0) {
		shots = newShots;
		Shot s = *(shots.end() - 1);// need to duplicate the final shot to compute local time there
		s.start = music.getDuration();
		shots.push_back(s); 
	}
	std::map<std::string, CameraMove> newMoves = loadMoves();
	if (newMoves.size() > 0) {
		cameraMoves = newMoves;
	}

	std::map<std::string, CamProps> newProps = loadProps();
	if (newProps.size() > 0) {
		camProps = newProps;
	}
}

struct Impostor {
    Texture<GL_TEXTURE_2D> color;
    Texture<GL_TEXTURE_2D> depth;
    Texture<GL_TEXTURE_2D> normal;
	float distance = 10.f;
	float scale = 8.f;
};

static void setDepthSpriteFilter(Texture<GL_TEXTURE_2D>& tex) {
    glBindTexture(GL_TEXTURE_2D, tex);
    glGenerateMipmap(GL_TEXTURE_2D);
    glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
    glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}

static Impostor loadImpostor(const std::string& dir, float distance, float scale)
{
	std::string basePath = dir + "/basecolor0000.png";
	std::string depthPath = dir + "/depth0000.png";
	std::string normalPath = dir + "/normal0000.png";
	Impostor imp;
	imp.color = loadImage(basePath, true, true);
	imp.depth = load16bitGrayscaleImage(depthPath);
	imp.normal = loadImage(normalPath, true, false);
	setDepthSpriteFilter(imp.depth);
	setWrapToClamp(imp.color);
	setWrapToClamp(imp.depth);
	setWrapToClamp(imp.normal);
	imp.distance = distance;
	imp.scale = scale;
	return imp;
}

void bindImpostor(const Impostor& imp, int slot)
{
	std::string uniform = "impostors[" + std::to_string(slot) + "]";
    bindTexture(uniform + ".color", imp.color);
    bindTexture(uniform + ".depth", imp.depth);
    bindTexture(uniform + ".normal", imp.normal);
    glUniform1f(uniform + ".distance", imp.distance);
    glUniform1f(uniform + ".scale", imp.scale);
}

void setSkyUniforms(const std::string& wantedSkyName)
{
    std::string skyName = "cliff_top";
    if (skies.find(wantedSkyName) != skies.end()) {
        skyName = wantedSkyName;
    }
    const Sky& sky = skies.at(skyName);

    vec3 sunDirection = sky.sunDirection;
    vec3 sunColor = sky.sunColor;
    vec3 fogColor = sky.fogColor;
    vec3 fogScatterColor = sky.fogScatterColor;

    glUniform3f("sunDirection", sunDirection.x, sunDirection.y, sunDirection.z);
    glUniform3f("sunColor", sunColor.x, sunColor.y, sunColor.z);
    glUniform3f("fogColor", fogColor.x, fogColor.y, fogColor.z);
    glUniform3f("fogScatterColor", fogScatterColor.x, fogScatterColor.y, fogScatterColor.z);

    bindTexture("skybox", sky.skybox);
    bindTexture("skyiem", sky.irradiance);

	if (sky.background) {
		bindTexture("skybg", sky.background);
	}
}

int main(int argc, char** argv) {

	if (argc == 3) {
		screenw = atoi(argv[1]);
		screenh = atoi(argv[2]);
	}

	printf("Using resolution %dx%d\n", screenw, screenh);

	// create window and context. can also do fullscreen and non-visible window for compute only
	bool full = false;
#if FINALBUILD
	full = true;
#endif
	OpenGL context(screenw, screenh, "Project Eldorado", full);
    glClear(GL_COLOR_BUFFER_BIT);
    swapBuffers();
	
	// load a font to draw text with -- any system font or local .ttf file should work
	Font font(L"Consolas");

	bool shakeSuccess = true;
	shakeSuccess = shakeSuccess && loadCameraShakeTrack("assets/shaketracks/still.bin", &shakeTracks[CAMSHAKE_STILL]);
	shakeSuccess = shakeSuccess && loadCameraShakeTrack("assets/shaketracks/onehand.bin", &shakeTracks[CAMSHAKE_SHAKY]);
	shakeSuccess = shakeSuccess && loadCameraShakeTrack("assets/shaketracks/slowwalk.bin", &shakeTracks[CAMSHAKE_WALK]);
	shakeSuccess = shakeSuccess && loadCameraShakeTrack("assets/shaketracks/fastwalk.bin", &shakeTracks[CAMSHAKE_RUN]);

	if (!shakeSuccess) {
		puts("failed to load camera shake data");
		return 1;
	}

	Music music(L"assets/music.wav");
	reloadAnimations(music);

	Program::reloadLibIfRequired();

	// shader variables; could also initialize them here, but it's often a good idea to
	// do that at the callsite (so input/output declarations are close to the bind code)
	Program edgeDetect, draw, blurFilter, sampleResolve, headerUpdate, pointSplat, bloom, clearSamples, composite, present;
	std::map<std::string, Program> marcherShaders;
    std::map<std::string, std::string> geometryShaderPaths{
		{"labyrinth", "shaders/scene_labyrinth.glsl"},
		{"templecube", "shaders/scene_labyrinth.glsl"},
		{"islandcube", "shaders/scene_islandcube.glsl"},
		{"brokencube", "shaders/scene_brokencube.glsl"},
		{"mandelbox", "shaders/scene_mandelbox.glsl"},
		{"kleinian", "shaders/scene_kleinian.glsl"},
		{"jungle", "shaders/scene_jungle.glsl"},
		{"sanctuary", "shaders/scene_sanctuary.glsl"},
		{"cave", "shaders/scene_cave.glsl"},
		{"cave2", "shaders/scene_cave.glsl"},
		{"ruins", "shaders/scene_labyrinth.glsl"},
		{"mandelbox2", "shaders/scene_mandelbox.glsl"},
		{"jungle_castle", "shaders/scene_jungle_castle.glsl"},
	};

	for (const auto& kv : geometryShaderPaths) {
		dprintf("Compiling %s\n", kv.second.c_str());
		marcherShaders[kv.first] = createProgram(kv.second);
	}

	// warmup
	for (auto& kv : marcherShaders) {
		dprintf("Warming up shader %s...\n", kv.first.c_str());
		glUseProgram(kv.second);
		glDispatchCompute(1, 1, 1);
	}



	std::wstring noisePaths[] = {
		L"assets/bluenoise/LDR_RGB1_0.png",
		L"assets/bluenoise/LDR_RGB1_1.png",
		L"assets/bluenoise/LDR_RGB1_2.png",
		L"assets/bluenoise/LDR_RGB1_3.png",
		L"assets/bluenoise/LDR_RGB1_4.png",
		L"assets/bluenoise/LDR_RGB1_5.png",
		L"assets/bluenoise/LDR_RGB1_6.png",
		L"assets/bluenoise/LDR_RGB1_7.png",
		L"assets/bluenoise/LDR_RGB1_8.png",
		L"assets/bluenoise/LDR_RGB1_9.png",
		L"assets/bluenoise/LDR_RGB1_10.png",
		L"assets/bluenoise/LDR_RGB1_11.png",
		L"assets/bluenoise/LDR_RGB1_12.png",
		L"assets/bluenoise/LDR_RGB1_13.png",
		L"assets/bluenoise/LDR_RGB1_14.png",
		L"assets/bluenoise/LDR_RGB1_15.png",
		L"assets/bluenoise/LDR_RGB1_16.png",
		L"assets/bluenoise/LDR_RGB1_17.png",
		L"assets/bluenoise/LDR_RGB1_18.png",
		L"assets/bluenoise/LDR_RGB1_19.png",
		L"assets/bluenoise/LDR_RGB1_20.png",
		L"assets/bluenoise/LDR_RGB1_21.png",
		L"assets/bluenoise/LDR_RGB1_22.png",
		L"assets/bluenoise/LDR_RGB1_23.png",
		L"assets/bluenoise/LDR_RGB1_24.png",
		L"assets/bluenoise/LDR_RGB1_25.png",
		L"assets/bluenoise/LDR_RGB1_26.png",
		L"assets/bluenoise/LDR_RGB1_27.png",
		L"assets/bluenoise/LDR_RGB1_28.png",
		L"assets/bluenoise/LDR_RGB1_29.png",
		L"assets/bluenoise/LDR_RGB1_30.png",
		L"assets/bluenoise/LDR_RGB1_31.png",
		L"assets/bluenoise/LDR_RGB1_32.png",
		L"assets/bluenoise/LDR_RGB1_33.png",
		L"assets/bluenoise/LDR_RGB1_34.png",
		L"assets/bluenoise/LDR_RGB1_35.png",
		L"assets/bluenoise/LDR_RGB1_36.png",
		L"assets/bluenoise/LDR_RGB1_37.png",
		L"assets/bluenoise/LDR_RGB1_38.png",
		L"assets/bluenoise/LDR_RGB1_39.png",
		L"assets/bluenoise/LDR_RGB1_40.png",
		L"assets/bluenoise/LDR_RGB1_41.png",
		L"assets/bluenoise/LDR_RGB1_42.png",
		L"assets/bluenoise/LDR_RGB1_43.png",
		L"assets/bluenoise/LDR_RGB1_44.png",
		L"assets/bluenoise/LDR_RGB1_45.png",
		L"assets/bluenoise/LDR_RGB1_46.png",
		L"assets/bluenoise/LDR_RGB1_47.png",
		L"assets/bluenoise/LDR_RGB1_48.png",
		L"assets/bluenoise/LDR_RGB1_49.png",
		L"assets/bluenoise/LDR_RGB1_50.png",
		L"assets/bluenoise/LDR_RGB1_51.png",
		L"assets/bluenoise/LDR_RGB1_52.png",
		L"assets/bluenoise/LDR_RGB1_53.png",
		L"assets/bluenoise/LDR_RGB1_54.png",
		L"assets/bluenoise/LDR_RGB1_55.png",
		L"assets/bluenoise/LDR_RGB1_56.png",
		L"assets/bluenoise/LDR_RGB1_57.png",
		L"assets/bluenoise/LDR_RGB1_58.png",
		L"assets/bluenoise/LDR_RGB1_59.png",
		L"assets/bluenoise/LDR_RGB1_60.png",
		L"assets/bluenoise/LDR_RGB1_61.png",
		L"assets/bluenoise/LDR_RGB1_62.png",
		L"assets/bluenoise/LDR_RGB1_63.png",
	};

	Texture<GL_TEXTURE_2D_ARRAY> noiseTextures = loadImageArray(noisePaths, sizeof(noisePaths)/sizeof(std::wstring));
	//std::wstring white[] = {
	Texture<GL_TEXTURE_2D> logo = loadImage(L"assets/plates/peisik.png", true, true);
	Texture<GL_TEXTURE_2D> whiteNoise = loadImage(L"assets/whitenoise/noise256.png", false);
	Texture<GL_TEXTURE_2D> perlinNoise = loadImage(L"assets/perlinnoise/rgb_perlin_256.png", false);
	Texture<GL_TEXTURE_2D> waterNormals = loadImage(L"assets/water/Water_002_NORM.jpg", false);
	Texture<GL_TEXTURE_2D> rockDiffuse = loadImage(L"assets/rocks/rock_05_diff_1k.png", true);
	Texture<GL_TEXTURE_2D> rockNormals = loadImage(L"assets/rocks/rock_05_nor_1k.png", false);
	Texture<GL_TEXTURE_2D> rockRoughness = loadImage(L"assets/rocks/rock_05_rough_1k.png", false);
	Texture<GL_TEXTURE_2D> marbleDiffuse = loadImage(L"assets/marble/marble_0008_base_color_2k.jpg", true);
	Texture<GL_TEXTURE_2D> marbleRoughness = loadImage(L"assets/marble/marble_0008_roughness_2k.jpg", true);

    Impostor impostors[] = {
     loadImpostor("assets/impostors/small_palm", 10.f, 7.21429f),
     loadImpostor("assets/impostors/curry_tree1", 10.f, 8.f),
     loadImpostor("assets/impostors/curry_tree2", 10.f, 8.f),
     loadImpostor("assets/impostors/curry_tree2b", 10.f, 8.f),
     loadImpostor("assets/impostors/aesculus1", 10.f, 8.f),
     loadImpostor("assets/impostors/chinaberry02", 10.f, 8.f),
     loadImpostor("assets/impostors/chinaberry04", 10.f, 8.f),
     loadImpostor("assets/impostors/aesculus2", 10.f, 8.f),
     loadImpostor("assets/impostors/sphere", 10.f, 8.f),
     loadImpostor("assets/impostors/bigmammoth", 10.f, 8.f),
    };

	std::wstring cubePaths[] = {
		L"assets/white_cliff_top/_posx.png",
		L"assets/white_cliff_top/_negx.png",
		L"assets/white_cliff_top/_negy.png",
		L"assets/white_cliff_top/_posy.png",
		L"assets/white_cliff_top/_posz.png",
		L"assets/white_cliff_top/_negz.png"
	};

	std::wstring cubePaths2[] = {
		L"assets/white_cliff_top/bright/_posx.png",
		L"assets/white_cliff_top/bright/_negx.png",
		L"assets/white_cliff_top/bright/_negy.png",
		L"assets/white_cliff_top/bright/_posy.png",
		L"assets/white_cliff_top/bright/_posz.png",
		L"assets/white_cliff_top/bright/_negz.png"
	};

	std::wstring cubeIemPaths[] = {
		L"assets/white_cliff_top/linear_iem/iem_posx.png",
		L"assets/white_cliff_top/linear_iem/iem_negx.png",
		L"assets/white_cliff_top/linear_iem/iem_negy.png",
		L"assets/white_cliff_top/linear_iem/iem_posy.png",
		L"assets/white_cliff_top/linear_iem/iem_posz.png",
		L"assets/white_cliff_top/linear_iem/iem_negz.png"
	};

	std::wstring greenCubePaths[] = {
		L"assets/green_sanctuary/_posx.png",
		L"assets/green_sanctuary/_negx.png",
		L"assets/green_sanctuary/_negy.png",
		L"assets/green_sanctuary/_posy.png",
		L"assets/green_sanctuary/_posz.png",
		L"assets/green_sanctuary/_negz.png"
	};

	std::wstring greenIemPaths[] = {
		L"assets/green_sanctuary/linear_iem/iem_posx.png",
		L"assets/green_sanctuary/linear_iem/iem_negx.png",
		L"assets/green_sanctuary/linear_iem/iem_negy.png",
		L"assets/green_sanctuary/linear_iem/iem_posy.png",
		L"assets/green_sanctuary/linear_iem/iem_posz.png",
		L"assets/green_sanctuary/linear_iem/iem_negz.png"
	};

	std::wstring fireCubePaths[] = {
		L"assets/the_sky_is_on_fire/output_skybox_posx.png",
		L"assets/the_sky_is_on_fire/output_skybox_negx.png",
		L"assets/the_sky_is_on_fire/output_skybox_negy.png",
		L"assets/the_sky_is_on_fire/output_skybox_posy.png",
		L"assets/the_sky_is_on_fire/output_skybox_posz.png",
		L"assets/the_sky_is_on_fire/output_skybox_negz.png"
	};

	std::wstring fireIemPaths[] = {
		L"assets/the_sky_is_on_fire/iem_posx.png",
		L"assets/the_sky_is_on_fire/iem_negx.png",
		L"assets/the_sky_is_on_fire/iem_negy.png",
		L"assets/the_sky_is_on_fire/iem_posy.png",
		L"assets/the_sky_is_on_fire/iem_posz.png",
		L"assets/the_sky_is_on_fire/iem_negz.png"
	};
	Texture<GL_TEXTURE_2D> skyMap; // = loadImage(L"assets/white_cliff_top/white_cliff_top_8k_top_gamma_0_5.png", false);

	skies["cliff_top"].skybox = loadCubeMap(cubePaths, sizeof(cubePaths) / sizeof(cubePaths[0]), true);
	skies["cliff_top"].irradiance = loadCubeMap(cubeIemPaths, sizeof(cubeIemPaths) / sizeof(cubeIemPaths[0]), true);
    skies["cliff_top"].sunDirection = normalize(vec3(-5.0, 3.5, -5.0));
    skies["cliff_top"].sunColor = 2.f * vec3(1., 0.8, 0.5);
	skies["cliff_top"].background = loadImage("assets/backgrounds/IMG_0320.jpg", true);
	skies["cliff_top2"].skybox = loadCubeMap(cubePaths2, sizeof(cubePaths) / sizeof(cubePaths[0]), true);
	skies["cliff_top2"].irradiance = loadCubeMap(cubeIemPaths, sizeof(cubeIemPaths) / sizeof(cubeIemPaths[0]), true);
    skies["cliff_top2"].sunDirection = normalize(vec3(-5.0, 3.5, -5.0));
    skies["cliff_top2"].sunColor = 2.f * vec3(1., 0.8, 0.5);
	skies["cliff_top2"].background = loadImage("assets/backgrounds/IMG_0320.jpg", true);
	skies["studio"].skybox = loadCubeMap(cubePaths, sizeof(cubePaths) / sizeof(cubePaths[0]), true);
	skies["studio"].irradiance = loadCubeMap(cubeIemPaths, sizeof(cubeIemPaths) / sizeof(cubeIemPaths[0]), true);
    skies["studio"].sunDirection = normalize(normalize(vec3(-5.0, 3.5, -5.0)) + vec3(0., 0.2, 0.));
    skies["studio"].sunColor = 2.f * vec3(1., 0.8, 0.5);
	skies["sanctuary"].skybox = loadCubeMap(greenCubePaths, sizeof(greenCubePaths) / sizeof(greenCubePaths[0]), true);
	skies["sanctuary"].irradiance = loadCubeMap(greenIemPaths, sizeof(greenIemPaths) / sizeof(greenIemPaths[0]), true);
    skies["sanctuary"].sunDirection = normalize(vec3(-5.0, 3.5, -5.0)); // TODO change
    skies["sanctuary"].sunColor = 2.f * vec3(1., 0.8, 0.5);
	skies["inferno"].skybox = loadCubeMap(cubePaths, sizeof(cubePaths) / sizeof(cubePaths[0]), true);
	skies["inferno"].irradiance = loadCubeMap(cubeIemPaths, sizeof(cubeIemPaths) / sizeof(cubeIemPaths[0]), true);
    skies["inferno"].sunDirection = normalize(vec3(-5.0, 3.5, -5.0));
    skies["inferno"].sunColor = 4.f * vec3(1., 1.0, 1.0);
	skies["inferno"].fogColor = vec3(0.f, 0.f, 0.f);
	skies["inferno"].fogScatterColor = vec3(0.f, 0.f, 0.f);
	skies["fire"].skybox = loadCubeMap(fireCubePaths, sizeof(fireCubePaths) / sizeof(fireCubePaths[0]), true);
	skies["fire"].irradiance = loadCubeMap(fireIemPaths, sizeof(fireIemPaths) / sizeof(fireIemPaths[0]), true);
	skies["fire"].sunDirection = normalize(vec3(-5.8, 2.0, -5.0));
	skies["fire"].sunColor = 2.f * vec3(1., 0.09, 0.02);
	skies["fire"].fogScatterColor = skies["fire"].sunColor;
	skies["fire"].fogColor = vec3(0.1f);
	skies["fire"].background = loadImage("assets/backgrounds/IMG_0831b.jpg", true, false);
	skies["fire2"].skybox = loadCubeMap(fireCubePaths, sizeof(fireCubePaths) / sizeof(fireCubePaths[0]), true);
	skies["fire2"].irradiance = loadCubeMap(fireIemPaths, sizeof(fireIemPaths) / sizeof(fireIemPaths[0]), true);
	skies["fire2"].sunDirection = normalize(vec3(-5.8, 2.0, -5.0));
	skies["fire2"].sunColor = 2.f * vec3(1., 0.09, 0.02);
	skies["fire2"].fogScatterColor = skies["fire"].sunColor;
	skies["fire2"].fogColor = vec3(0.1f);
	skies["fire2"].background = loadImage("assets/backgrounds/IMG_0824b.jpg", true, false);
	skies["cliff_top_grid"].skybox = loadCubeMap(cubePaths, sizeof(cubePaths) / sizeof(cubePaths[0]), true);
	skies["cliff_top_grid"].irradiance = loadCubeMap(cubeIemPaths, sizeof(cubeIemPaths) / sizeof(cubeIemPaths[0]), true);
    skies["cliff_top_grid"].sunDirection = normalize(vec3(-5.0, 3.5, -5.0));
    skies["cliff_top_grid"].sunColor = 2.f * vec3(1., 0.8, 0.5);
	skies["cliff_top_grid"].background = loadImage("assets/backgrounds/grad.png", true, true);
	
	std::vector<Texture<GL_TEXTURE_2D>> plates;
	plates.push_back(loadImage(L"assets/plates/hello.png", true, true));
	plates.push_back(loadImage(L"assets/plates/credits1.png", true, true));
	plates.push_back(loadImage(L"assets/plates/credits2.png", true, true));
	plates.push_back(loadImage(L"assets/plates/halo.png", true, true));

	Texture<GL_TEXTURE_2D> gbuffer;
	Texture<GL_TEXTURE_2D> zbuffer;
	Texture<GL_TEXTURE_2D> resolved;
	Texture<GL_TEXTURE_2D> reflected;
	Texture<GL_TEXTURE_2D> composited;
	Texture<GL_TEXTURE_2D> bloombuffer;
	Texture<GL_TEXTURE_2D> bloombuffer2;
	Texture<GL_TEXTURE_2D_ARRAY> jitterbuffer;
	Buffer cameraData;
	Buffer pointBufferHeader;
	Buffer pointBuffer;
	Buffer pointPosShotBuffer, pointColorBuffer, pointShadingBuffer, pointInfoBuffer;
	Buffer jumpbuffer;
	Buffer radiusbuffer;
	Buffer debugBuffer;
	Buffer stepBuffer;
	Buffer rayIndexBuffer;
	Buffer sampleBuffer;
	Buffer sampleBuffer2;
	Buffer sampleBufferEdges;

	setWrapToClamp(gbuffer);
	setWrapToClamp(zbuffer);
	setWrapToClamp(resolved);
	setWrapToClamp(reflected);
	setWrapToClamp(composited);
	setWrapToClamp(bloombuffer);
	setWrapToClamp(bloombuffer2);

	int renderw = screenw, renderh = screenh;
	int superw = renderw * 2, superh = renderh * 2;

	glTextureStorage2D(zbuffer, 1, GL_R32F, superw, superh);
	glTextureStorage2D(gbuffer, 1, GL_RGBA32F, renderw, renderh);
	glTextureStorage2D(resolved, 1, GL_RGBA16F, renderw, renderh);
	glTextureStorage2D(reflected, 1, GL_RGBA16F, renderw, renderh);
	glTextureStorage2D(composited, 1, GL_RGBA16F, renderw, renderh);
	glTextureStorage2D(bloombuffer, 1, GL_RGBA16F, renderw/2, renderh/2);
	glTextureStorage2D(bloombuffer2, 1, GL_RGBA16F, renderw/2, renderh/2);
	glTextureStorage3D(jitterbuffer, 1, JITTER_BUFFER_TYPE, renderw, renderh, 1);

#if LONG_HISTORY
	const int samplesPerPixel = 15;
#else
	const int samplesPerPixel = 10;
#endif
    const int maxPoints = samplesPerPixel * screenw * screenh;
    dprintf("Max points: %.3f million\n", maxPoints / 1e6);

	int sampleBufferSize = superw * (superh + 128);
	int pointsSplatted = 0;
	int frame = 0;
	int noiseLayer = -1;
	CameraParameters cameras[3] = {};
	glNamedBufferStorage(cameraData, sizeof(cameras), NULL, GL_DYNAMIC_STORAGE_BIT);
	glNamedBufferStorage(pointBufferHeader, 3 * sizeof(int), NULL, POINT_BUFFER_HEADER_DEBUGGING ? GL_DYNAMIC_STORAGE_BIT : 0);
	glNamedBufferStorage(pointBuffer, sizeof(RgbPoint) * maxPoints, NULL, 0);
	glNamedBufferStorage(pointPosShotBuffer, 4 * sizeof(float) * maxPoints, NULL, 0);
	glNamedBufferStorage(pointColorBuffer, 4 * sizeof(float) * maxPoints, NULL, 0);
	glNamedBufferStorage(pointShadingBuffer, 1 * sizeof(unsigned int) * maxPoints, NULL, 0);
	glNamedBufferStorage(pointInfoBuffer, 1 * sizeof(unsigned int) * maxPoints, NULL, 0);
	glNamedBufferStorage(debugBuffer, 1024 * sizeof(int), NULL, 0);
	glNamedBufferStorage(stepBuffer, 50000 * sizeof(float), NULL, STEP_BUFFER_DEBUGGING ? GL_DYNAMIC_STORAGE_BIT : 0);
	glNamedBufferStorage(sampleBuffer, sampleBufferSize * sizeof(uint64_t), NULL, 0);
	glNamedBufferStorage(sampleBuffer2, sampleBufferSize * sizeof(uint64_t), NULL, 0);
	glNamedBufferStorage(sampleBufferEdges, sampleBufferSize * sizeof(uint32_t), NULL, 0);

	int zero = 0;
	glClearNamedBufferData(pointBufferHeader, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	glClearNamedBufferData(pointBuffer, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	glClearNamedBufferData(debugBuffer, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	glClearNamedBufferData(sampleBuffer, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	glClearNamedBufferData(sampleBuffer2, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	glClearNamedBufferData(sampleBufferEdges, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
	float minusone = -1.f;
	glClearNamedBufferData(stepBuffer, GL_R32F, GL_RED, GL_FLOAT, &minusone);
	glClearNamedBufferData(pointPosShotBuffer, GL_R32F, GL_RED, GL_FLOAT, &minusone);

	int headerSize = -1;
	glGetNamedBufferParameteriv(pointBufferHeader, GL_BUFFER_SIZE, &headerSize);
	dprintf("pointBufferHeader size: %d bytes\n", headerSize);
	GLint64 pointBufferSize = -1;
	glGetNamedBufferParameteri64v(pointBuffer, GL_BUFFER_SIZE, &pointBufferSize);
	int pointBufferMaxElements = static_cast<int>(pointBufferSize / sizeof(RgbPoint));
	dprintf("pointBuffer size: %" PRId64 " bytes = %.3f MiB\n", pointBufferSize, pointBufferSize / 1024. / 1024.);
	dprintf("pointBufferMaxElements: %d\n", pointBufferMaxElements);

#if LONG_HISTORY
	ivec2 tracingRez(screenw * 0.75, screenh * 0.75);
#else
	ivec2 tracingRez(screenw, screenh);
#endif

	int jumpBufferMaxElements = dim2nodecount(max(tracingRez.x, tracingRez.y));
	int rayIndexBufferMaxElements = 0;
	int maxdim = 1 << (int(log2(max(tracingRez.x, tracingRez.y))) + 1);
	vec2 screenBoundary(tracingRez.x / float(maxdim), tracingRez.y / float(maxdim));

	// Build index array for the raymarcher
	{
		std::vector<int> indexArray;

		for (int i = 0; i < jumpBufferMaxElements; i++) {
			int b = tobin(i);
			int start = binto(b);
			int z = i - start;
			uvec2 coord = z2xy(uint(z));
			int idim = 1 << b;
			int size = idim * idim;
			float dim = float(idim);

			vec2 uv = vec2(0.5/dim) + vec2(coord) / vec2(dim);
			//float margin = 0.5f / dim; // no effect?
			float margin = 0.f;
			if ( uv.x <= screenBoundary.x + margin && uv.y <= screenBoundary.y + margin) {
				indexArray.push_back(i);
			}
		}
		rayIndexBufferMaxElements = indexArray.size();
		dprintf("rayIndexBuffer elements: %d\n", rayIndexBufferMaxElements);
		glNamedBufferStorage(rayIndexBuffer, indexArray.size() * sizeof(int), indexArray.data(), 0);
	}

	glNamedBufferStorage(jumpbuffer, jumpBufferMaxElements * sizeof(float), NULL, JUMP_BUFFER_DEBUGGING ? GL_DYNAMIC_STORAGE_BIT : 0);
	glNamedBufferStorage(radiusbuffer, jumpBufferMaxElements * sizeof(float), NULL, JUMP_BUFFER_DEBUGGING ? GL_DYNAMIC_STORAGE_BIT : 0);

	GLint64 jumpBufferSize = -1;
	glGetNamedBufferParameteri64v(jumpbuffer, GL_BUFFER_SIZE, &jumpBufferSize);
	dprintf("jumpBuffer size: %" PRId64 " bytes = %.3f MiB\n", jumpBufferSize, jumpBufferSize / 1024. / 1024.);

	bool interactive = false;
	bool controls = true;
	struct {
		float start=0.f;
		float end=1.f;
		bool on=false;
	} looping;
	CameraPose editPose = *findPose("edit");
	std::deque<double> frameTimes{ 1. / 30., 1. / 30., 1. / 30. };
	std::string lastShotName;

#if BENCHMARK
	printf("Running benchmark");
	Benchmark bench;
	for (auto& kv : shotNames) {
		bench.reserve(kv.second.camName.c_str(), music.getDuration());
	}
#endif

#if FINALBUILD
	while (ShowCursor(false) >= 0);
	Sleep(1000);
#endif

#if FINALBUILD
	music.play();
	controls = false;
#elif BENCHMARK
	music.setVolume(-100.);
	music.play();
	controls = false;
#else
	music.setVolume(-100.);
	music.seek(80.);
#endif

	auto lastFrameTime = std::chrono::high_resolution_clock::now();

	while (loop()) // loop() stops if esc pressed or window closed
	{
		TimeStamp start;
		float secs = music.getTime();
		double dt = 0.;
		double rawdt = 0.;

		{
			auto now = std::chrono::high_resolution_clock::now();
			auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(now - lastFrameTime).count();
			double dtnew = duration / 1e9;
			rawdt = dtnew;
			//if (dtnew > 1.) dtnew = 1. / 30.;
			if (frameTimes.size() >= 3) {
				frameTimes.pop_front();
			}
			frameTimes.push_back(dtnew);
			for (double ft : frameTimes) { dt += ft; }
			dt /= frameTimes.size();
			lastFrameTime = std::chrono::high_resolution_clock::now();
		}


		int mouseWheelDelta = getMouseWheelDelta();

		if (controls) {
			if (frame == 0 || (controls && (frame % 4 == 0))) {
				reloadAnimations(music);
			}

			Program::reloadLibIfRequired();
		}

		if (looping.on && secs > looping.end) {
			music.seek(looping.start);
		}

		float futureInterval = dt * 6;
		double futureSecs = double(secs) + double(futureInterval);
		double nextFrameDelta = dt;

		if (controls)
		{
			// keep deltatime even when paused
			static double last_dt;
			if (music.playing) {
				last_dt = nextFrameDelta;
			} else {
				if (last_dt == 0.) last_dt = 1. / 30.;
				nextFrameDelta = last_dt;
			}
		}

		Shot currentShot = shotAtTime(secs);
		Shot nextShot = currentShot; // the same shot but with a predicted camera position
		nextShot.relative += nextFrameDelta;
		Shot futureShot = shotAtTime(futureSecs);

        #if BENCHMARK
            bench.addDelta(currentShot.camName, rawdt);
        #endif

		const CamProps& props = findProps(currentShot.camName);
		const CamProps& futureProps = findProps(futureShot.camName);

		CameraPose pose, nextPose, futurePose;

		static POINT lastMouse;

		if (interactive) {
			POINT mouse = getMouse();
			int dx = mouse.x - lastMouse.x;
			int dy = mouse.y - lastMouse.y;

			float yaw_diff = dx / float( screenw );
			double pitch_diff = dy / double( screenw );

			float movespeed = keyDown(VK_LSHIFT) ? 5.0f : (keyDown(VK_LCONTROL) ? 0.2f : 1.f);
			vec3 right = cross( vec3( 0.f, 1.f, 0.f ), editPose.dir );

			if (mouseDown(0)) {
                double pitch = asin( double(editPose.dir.y) ) + 2. * pitch_diff;
				vec3 d = vec3( editPose.dir.x, sin(pitch), editPose.dir.z );
				editPose.dir = normalize(rotation_y(  yaw_diff ) * d);
			}

			if (mouseDown(1)) {
              editPose.pos -= 1.f * yaw_diff * float( movespeed ) * right;
              editPose.pos.y += 1.f * pitch_diff * float( movespeed );
			}

            if ( keyDown( 'W' ) )
              editPose.pos += float( movespeed * dt ) * editPose.dir;
            if ( keyDown( 'S' ) )
              editPose.pos -= float( movespeed * dt ) * editPose.dir;
            if ( keyDown( 'D' ) )
              editPose.pos += float( movespeed * dt ) * right;
            if ( keyDown( 'A' ) )
              editPose.pos -= float( movespeed * dt ) * right;
            if ( keyDown( 'E' ) )
              editPose.pos.y += float( movespeed * dt );
            if ( keyDown( 'Q' ) )
              editPose.pos.y -= float( movespeed * dt );

			editPose.zoom *= 1. + 1e-6 * (mouseWheelDelta/120.f);

			pose = editPose;
			futurePose = editPose;
			nextPose = editPose;
		}
		else {
			cameraPath(currentShot, pose);
			cameraPath(nextShot, nextPose);
			cameraPath(futureShot, futurePose);
		}
		makeCamera(futurePose, cameras[1]);
		makeCamera(nextPose, cameras[2]);

		lastMouse = getMouse();

		glNamedBufferSubData(cameraData, 0, sizeof(cameras), &cameras);

		if (controls) {
			float seekTime = 1.f;
			if (keyDown(VK_LSHIFT) || keyDown(VK_RSHIFT)) {
				seekTime *= 5.f;
			}
			if (keyDown(VK_LEFT)) {
				music.seek(music.getTime() - seekTime);
			}
			else if (keyDown(VK_RIGHT)) {
				music.seek(music.getTime() + seekTime);
			}
			if (keyHit(VK_SPACE)) music.togglePlaying();
			if (keyHit(VK_BACK)) music.seek(0.);
			if (keyHit(0x4D)) music.setVolume(music.getVolume() > -100. ? -100. : 0.);
			if (keyHit(VK_F2)) {
				if (!interactive) {
					editPose = pose;
					editPose.dir = vec3(0.f, 0.f, -1.f);
					editPose.up = vec3(0.f, 1.f, 0.f);
					interactive = true;
				} else {
					interactive = false;
				}
			}
			if (keyHit(VK_RETURN)) {
				music.seek(currentShot.start);
			}
			if (keyHit('C')) {
				puts("Clearing point buffer");
                int zero = 0;
                glClearNamedBufferData(pointBufferHeader, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
                glClearNamedBufferData(pointBuffer, GL_R32I, GL_RED_INTEGER, GL_INT, &zero);
			}

			if (keyHit('L')) {
				looping.on = !looping.on;
				looping.start = currentShot.start;
				looping.end = currentShot.end;
			}

			if (keyHit(VK_F6)) {
				std::string name = currentShot.camName + "_" + std::to_string(rand());
				if (savePose(editPose, name)) {
					printf("Wrote pose %s\n", name.c_str());
				} else {
					puts("Couldn't write pose!");
				}
			}

			if (keyHit(VK_NUMPAD0)) music.seek(shots[0].start);
			if (keyHit(VK_NUMPAD1)) music.seek(shots[1].start);
			if (keyHit(VK_NUMPAD2)) music.seek(shots[2].start);
			if (keyHit(VK_NUMPAD3)) music.seek(shots[3].start);
			if (keyHit(VK_NUMPAD4)) music.seek(shots[4].start);
			if (keyHit(VK_NUMPAD5)) music.seek(shots[5].start);
			if (keyHit(VK_NUMPAD6)) music.seek(shots[6].start);
			if (keyHit(VK_NUMPAD7)) music.seek(shots[7].start);
			if (keyHit(VK_NUMPAD8)) music.seek(shots[8].start);
			if (keyHit(VK_NUMPAD9)) music.seek(shots[9].start);

			if (keyHit(VK_ADD)) {
				for (int i = 1; i < shots.size(); i++)
					if (currentShot.name == shots[i - 1].name) {
						music.seek(shots[i].start);
						break;
					}
			}
			if (keyHit(VK_SUBTRACT)) {
				for (int i = 0; i < shots.size()-1; i++)
					if (currentShot.name == shots[i + 1].name) {
						music.seek(shots[i].start);
						break;
					}
			}
		}
		if (keyHit(VK_F1)) {
			controls = !controls;
		}
		if (keyHit(VK_F3)) {
			showDebugInfo = !showDebugInfo;
		}

		float zeroFloat = 0.f;
		glClearNamedBufferData(jumpbuffer, GL_R32F, GL_RED, GL_FLOAT, &zeroFloat);

		{
			int layer;
			while ((layer = rand() % 64) == noiseLayer);
			noiseLayer = layer;
			//printf("noiseLayer: %d\n", noiseLayer);
		}

		glDisable(GL_BLEND);

		std::string geometry = "labyrinth";
		if (marcherShaders.find(futureProps.geometry) != marcherShaders.end()) {
			geometry = futureProps.geometry;
		} else {
			printf("Couldn't find a shader for geometry '%s'!\n", futureProps.geometry.c_str());
		}

		Program& marcher = marcherShaders[geometry];
		if (!marcher)
			marcher = createProgram(geometryShaderPaths.at(geometry));

		glUseProgram(marcher);

		vec2 cameraJitter = getRandomJitter() / float(max(tracingRez.x, tracingRez.y));

		glUniform1i("frame", frame);
		glUniform1f("secs", futureSecs);
		glUniform1f("shotSecs", futureShot.relative);
		glUniform1f("deltat", dt);
		glUniform1f("sceneID", futureShot.start);
		glUniform2i("screenSize", tracingRez.x, tracingRez.y);
		glUniform2f("screenBoundary", screenBoundary.x, screenBoundary.y);
		glUniform2f("cameraJitter", cameraJitter.x, cameraJitter.y);
		setSkyUniforms(futureProps.sky);
		glUniform1i("pointBufferMaxElements", pointBufferMaxElements);
		glUniform1i("jumpBufferMaxElements", jumpBufferMaxElements);
		glUniform1i("rayIndexBufferMaxElements", rayIndexBufferMaxElements);
		bindBuffer("cameraArray", cameraData);
		bindBuffer("pointBufferHeader", pointBufferHeader);
		bindBuffer("pointBuffer", pointBuffer);
		bindBuffer("pointPosShotBuffer", pointPosShotBuffer);
		bindBuffer("pointColorBuffer", pointColorBuffer);
		bindBuffer("pointShadingBuffer", pointShadingBuffer);
		bindBuffer("pointInfoBuffer", pointInfoBuffer);
		bindBuffer("jumpBuffer", jumpbuffer);
		bindBuffer("rayIndexBuffer", rayIndexBuffer);
		bindBuffer("radiusBuffer", radiusbuffer);
		bindBuffer("debugBuffer", debugBuffer);
		bindBuffer("stepBuffer", stepBuffer);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		//bindTexture("skyMap", skyMap);
		bindTexture("noiseTextures", noiseTextures);
		bindTexture("whiteNoise", whiteNoise);
		bindTexture("texLogo", logo);
		bindTexture("texRockDiffuse", rockDiffuse);
		bindTexture("texRockNormals", rockNormals);
		bindTexture("texRockRoughness", rockRoughness);
		bindTexture("texMarbleDiffuse", marbleDiffuse);
		bindTexture("texMarbleRoughness", marbleRoughness);

		for (int i = 0; i < sizeof(impostors)/sizeof(impostors[0]); i++) {
			bindImpostor(impostors[i], i);
		}
		
		bindImage("zbuffer", 0, zbuffer, GL_WRITE_ONLY, GL_R32F);
		bindImage("jitterbuffer", 0, jitterbuffer, GL_WRITE_ONLY, JITTER_BUFFER_TYPE);

		applyCamProperties(futureShot.relative, marcher, futureProps);

		const int groupsize_x = 32;
		const int groupsize_y = 1;
		glDispatchCompute((tracingRez.x + groupsize_x - 1) / groupsize_x, (tracingRez.y + groupsize_y - 1) / groupsize_y, 1);

		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT);

		TimeStamp drawTime;

		if (!headerUpdate) {
			headerUpdate = createProgram(
				GLSL(460,
					layout(local_size_x = 1, local_size_y = 1) in;

					layout(std430) buffer pointBufferHeader {
						int currentWriteOffset;
						int pointsSplatted;
						int nextRayIndex;
					};

					uniform int pointBufferMaxElements;

					void main() {
						currentWriteOffset = currentWriteOffset % pointBufferMaxElements;
						pointsSplatted = 0;
						nextRayIndex = 0;
					}
				)
			);
		}

		glUseProgram(headerUpdate);
		glUniform1i("pointBufferMaxElements", pointBufferMaxElements);
		bindBuffer("pointBufferHeader", pointBufferHeader);
		glDispatchCompute(1, 1, 1);
		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

		TimeStamp headerUpdateTime;

		if (!pointSplat) {
			pointSplat = createProgram(
				GLSL(460,
                    #extension GL_EXT_shader_atomic_int64 : enable
                    #extension GL_NV_shader_atomic_int64 : enable
                    #extension GL_ARB_gpu_shader_int64 : enable
                    #extension GL_NV_gpu_shader5 : enable
                    //#extension GL_EXT_shader_image_int64 : require
					layout(local_size_x = 128, local_size_y = 1) in;

            #define USE_RANDOM 1

            #includelib

			vec3 decodeNormal( vec2 f )
			{
				f = f * 2.0 - vec2(1.0);

				// https://twitter.com/Stubbesaurus/status/937994790553227264
				vec3 n = vec3( f.x, f.y, 1.0 - abs( f.x ) - abs( f.y ) );
				float t = clamp( -n.z, 0., 1. );
				//n.xy += n.xy >= 0.0 ? -t : t;
				n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.)));
				//n.x += n.x >= 0.0 ? -t : t;
				//n.y += n.y >= 0.0 ? -t : t;
				return normalize( n );
			}

            layout(r32f) uniform image2D zbuffer;
            uniform float secs;
            uniform int frame;
			uniform int pointBufferMaxElements;
			uniform int numberOfPointsToSplat;
			uniform ivec2 screenSize;
			uniform ivec3 noiseOffset;
			uniform float currentShotStart;
			uniform sampler2DArray noiseTextures;
            uniform sampler2D whiteNoise;

            float getWhiteNoise(ivec2 coord, int ofs = 0)
            {
                return texelFetch(whiteNoise,
						ivec2((coord.x + ofs) % 64, (coord.y + ofs) % 64), 0)[noiseOffset.x % 3];
            }

			// vec3 getNoise(ivec2 coord, int ofs = 0)
			// {
			// 	return texelFetch(noiseTextures,
			// 			ivec3((coord.x + noiseOffset.x) % 64, (coord.y + noiseOffset.y) % 64, (noiseOffset.z + ofs) % 64), 0).rgb;
			// }

			layout(std430) buffer pointBufferHeader {
				int currentWriteOffset;
				int pointsSplatted;
				int nextRayIndex;
			};
			
			layout(std430) buffer pointBuffer {
				RgbPoint points[];
			};

            layout (std430) buffer pointPosShotBuffer {
                vec4 pointPosShots[];
            };

             layout (std430) buffer pointColorBuffer {
                 uint pointColors[];
             };

            layout (std430) buffer pointShadingBuffer {
                uint pointNormalShininessSun[]; // NOTE: 16-bit normal, shininess, sun light
            };

            layout (std430) buffer pointInfoBuffer {
                uint pointWeightSpanTrunk[]; // NOTE: 8-bit weight, 8-bit span, 16-bit trunk direction
            };

			layout(std430) buffer sampleBuffer {
				uint64_t depthColorSamples[];
			};

			layout(rgba32f) uniform image2D gbuffer;

            void getCameraProjection(CameraParams cam, vec2 uv, out vec3 outPos, out vec3 outDir) {
                outPos = cam.pos + cam.dir + (uv.x - 0.5) * cam.right + (uv.y - 0.5) * cam.up;
                outDir = normalize(outPos - cam.pos);
            }

            vec3 projectPoint(CameraParams cam, vec3 p, out vec3 out_fromCamToPoint) {
                vec3 op = p - cam.pos;
                out_fromCamToPoint = op;
                float z = dot(cam.dir, op) * cam.nearplane_rcp;
                vec3 pp = (op * cam.nearplane) / z;
                vec3 up = cam.up;
                vec2 plane = vec2(
                        dot(pp, cam.right) / dot(cam.right, cam.right),
                        dot(pp, up) / dot(up, up)
                        );
                vec3 screenSpace = vec3(plane + vec2(0.5), z);
                return screenSpace;
            }

            vec3 unprojectPoint(CameraParams cam, vec3 dir, float orthogonalZ) {
                float radialZ = orthogonalZ / (dot(cam.dir, dir) * cam.nearplane_rcp);
                return cam.pos + radialZ * dir;
            }

            // https://software.intel.com/en-us/node/503873
            vec3 RGB_YCoCg(vec3 c)
            {
                return vec3(
                        c.x / 4.0 + c.y / 2.0 + c.z / 4.0,
                        c.x / 2.0 - c.z / 2.0,
                        -c.x / 4.0 + c.y / 2.0 - c.z / 4.0
                        );
            }

            // http://graphicrants.blogspot.com/2009/04/rgbm-color-encoding.html
            vec3 RGBMDecode( vec4 rgbm ) {
                return 6.0 * rgbm.rgb * rgbm.a;
            }

			int sampleNum = 0;

			// GGX-shading model code by Bart Wronski (MIT licensed)
			// https://github.com/bartwronski/CSharpRenderer/blob/2e1800ed44a86e5ecb40d902a3b9d44c7ae23491/shaders/optimized-ggx.hlsl#L22

			float G1V(float dotNV, float k)
			{
				return 1.0f/(dotNV*(1.0f-k)+k);
			}

            float LightingFuncGGX_REF(vec3 N, vec3 V, vec3 L, float roughness, float F0)
            {
                float alpha = roughness*roughness;

                vec3 H = normalize(V+L);

                float dotNL = clamp(dot(N,L), 0., 1.);
                float dotNV = clamp(dot(N,V), 0., 1.);
                float dotNH = clamp(dot(N,H), 0., 1.);
                float dotLH = clamp(dot(L,H), 0., 1.);

                float F, D, vis;

                // D
                float alphaSqr = alpha*alpha;
                float pi = 3.14159f;
                float denom = dotNH * dotNH *(alphaSqr-1.0) + 1.0f;
                D = alphaSqr/(pi * denom * denom);

                // F
                float dotLH5 = pow(1.0f-dotLH,5.);
                F = F0 + (1.0-F0)*(dotLH5);

                // V
                float k = alpha/2.0f;
                vis = G1V(dotNL,k)*G1V(dotNV,k);

                float specular = dotNL * D * F * vis;
                return specular;
            }

            float LightingFuncGGX_REF_HeadLight(vec3 N, vec3 V, vec3 L, float roughness, float F0)
            {
                float alpha = roughness*roughness;

                vec3 H = V; //normalize(V+L);

                float dotNL = clamp(dot(N,L), 0., 1.);
                float dotNV = dotNL;
                float dotNH = dotNL; //clamp(dot(N,H), 0., 1.);
                float dotLH = 1.;

                float F, D, vis;

                // D
                float alphaSqr = alpha*alpha;
                float pi = 3.14159f;
                float denom = dotNH * dotNH *(alphaSqr-1.0) + 1.0f;
                D = alphaSqr/(pi * denom * denom);

                // F
                float dotLH5 = 0.; // pow(1.0f-dotLH,5.);
                //F = F0 + (1.0-F0)*(dotLH5);
                F = F0;

                // V
                float k = alpha/2.0f;
                vis = G1V(dotNL,k)*G1V(dotNV,k);

                float specular = dotNL * D * F * vis;
                return specular;
            }

            float LightingFuncGGX_OPT2(vec3 N, vec3 V, vec3 L, float roughness, float F0)
            {
                float alpha = roughness*roughness;

                vec3 H = normalize(V+L);

                float dotNL = saturate(dot(N,L));

                float dotLH = saturate(dot(L,H));
                float dotNH = saturate(dot(N,H));

                float F, D, vis;

                // D
                float alphaSqr = alpha*alpha;
                float pi = 3.14159f;
                float denom = dotNH * dotNH *(alphaSqr-1.0) + 1.0f;
                D = alphaSqr/(pi * denom * denom);

                // F
                float dotLH5 = pow(1.0f-dotLH,5);
                F = F0 + (1.0-F0)*(dotLH5);

                // V
                float k = alpha/2.0f;
                float k2 = k*k;
                float invK2 = 1.0f-k2;
                vis = 1./(dotLH*dotLH*invK2 + k2);

                float specular = dotNL * D * F * vis;
                return specular;
            }

            vec2 LightingFuncGGX_FV(float dotLH, float roughness)
            {
                float alpha = roughness*roughness;

                // F
                float F_a, F_b;
                float dotLH5 = pow(1.0f-dotLH,5);
                F_a = 1.0f;
                F_b = dotLH5;

                // V
                float vis;
                float k = alpha/2.0f;
                float k2 = k*k;
                float invK2 = 1.0f-k2;
                vis = 1./(dotLH*dotLH*invK2 + k2);

                return vec2(F_a*vis,F_b*vis);
            }

            float LightingFuncGGX_D(float dotNH, float roughness)
            {
                float alpha = roughness*roughness;
                float alphaSqr = alpha*alpha;
                float pi = 3.14159f;
                float denom = dotNH * dotNH *(alphaSqr-1.0) + 1.0f;

                float D = alphaSqr/(pi * denom * denom);
                return D;
            }

            float LightingFuncGGX_OPT3(vec3 N, vec3 V, vec3 L, float roughness, float F0)
            {
                vec3 H = normalize(V+L);

                float dotNL = saturate(dot(N,L));
                float dotLH = saturate(dot(L,H));
                float dotNH = saturate(dot(N,H));

                float D = LightingFuncGGX_D(dotNH,roughness);
                vec2 FV_helper = LightingFuncGGX_FV(dotLH,roughness);
                float FV = F0*FV_helper.x + (1.0f-F0)*FV_helper.y;
                float specular = dotNL * D * FV;

                return specular;
            }
            // remapping by demofox in comments of https://www.shadertoy.com/view/4t2SDh
            float ReshapeUniformToTriangle(float rnd)
            {
                rnd = fract(rnd + 0.5f);
                float orig = rnd * 2.0f - 1.0f;
                rnd = (orig == 0.0f) ? -1.0f : (orig / sqrt(abs(orig)));
                rnd = rnd - sign(orig); // + 0.5f;
                return rnd;
            }


            // range [-1, 1]
            float tri_random( vec3 v )
            {
                return ReshapeUniformToTriangle(noise(floatBitsToUint(v)));
            }

            vec3 getStrobePos(in vec3 pos, in vec3 normal, in float t, in float strobe_weight, out vec3 out_normal)
            {
                float rsecs = secs + t * deltat;
                const vec2 center = vec2(-0.6, 3.);

                float travel = length(pos.xy - center);
                travel = 2. + sqrt(travel);
                const float fspat = 10.;
                const float ftemp = -.8;
                const float amplitude = 15.;
                float phase = travel*fspat + rsecs*ftemp;
                float ring = .5*(1. + sin(phase));
                float ring2 = .5*(1. + cos(phase));
                float ring2_sat = .5 + .5*sin(phase);
                float amp = prop_plot_strobe * ring;
                amp /= 1.+travel;
                float ramp = amp;
                amp *= amplitude;
                amp *= (secs*secs)*2e-2;
                //amp *= amp;

                // amp = amp * 2.;
                float strobe = fract(rsecs * 10.);
                //amp *= 1. + strobe * strobe * 0.5;
                amp *= 1. + rand() * 0.5;
                vec3 ofs = 1e-1 * pow(normal, vec3(3.)) * amp;
                pos += ofs;

                vec3 twisted = normalize(sign(-cos(phase)) * (pos - vec3(center.xy, pos.z)));

                float absring = abs(cos(phase)) * ramp;
                const float normalWarpStrength = 1.5;
                out_normal = normalize(mix(normal, twisted, saturate(absring * normalWarpStrength)));
                // out_normal = vec3(absring, 1., 0.);
                return pos;
            }

			void main()
			{
				unsigned int invocationIdx =
					(gl_GlobalInvocationID.y * (gl_WorkGroupSize.x * gl_NumWorkGroups.x)
						+ gl_GlobalInvocationID.x);
				unsigned int baseIdx;

                uint64_t test;

				if (invocationIdx >= pointBufferMaxElements)
					return;

				// We want to process "numberOfPointsToSplat" indices in a way that wraps around the buffer.
				if (currentWriteOffset >= numberOfPointsToSplat) {
					baseIdx = currentWriteOffset - numberOfPointsToSplat;
				}
				else {
					baseIdx = pointBufferMaxElements - (numberOfPointsToSplat - currentWriteOffset);
				};

				unsigned int index = (baseIdx + invocationIdx) % pointBufferMaxElements;
                #if SOA_LAYOUT
				vec3 pos = pointPosShots[index].xyz;
                float pointShot = pointPosShots[index].w;
                #else
				vec3 pos = points[index].xyz;
                float pointShot = points[index].sec;
                #endif

				// Raymarcher never produces pure (0, 0, 0) hits.
				if (pos == vec3(0.) || pointShot != currentShotStart)
					return;

                #if SOA_LAYOUT
				vec4 encodedColor = unpackUnorm4x8(pointColors[index]);
                vec4 weightSpanTrunk = unpackUnorm4x8(pointWeightSpanTrunk[index]);
                #else
				vec4 encodedColor = unpackUnorm4x8(points[index].rgba);
                vec4 weightSpanTrunk = unpackUnorm4x8(points.pointWeightSpanTrunk[index]);
                #endif

                #if SOA_LAYOUT
				vec4 normalSpecularSun = unpackUnorm4x8(pointNormalShininessSun[index]);
                #else
				vec4 normalSpecularSun = unpackUnorm4x8(points[index].normalSpecularSun);
                #endif
				vec3 normal = decodeNormal(normalSpecularSun.xy);

                vec3 color = RGBMDecode(encodedColor);
                //vec3 color = encodedColor.rgb;
                color = color * color;

				float weight = weightSpanTrunk.x;
                float span = weightSpanTrunk.y;
                float plantID = weightSpanTrunk.z;

                if (span > 0.) {
                    /*
                    float wind = 2. * secs;
                    float move = 5e-3 * span;
                    float strength = 0.2 * (.5 + .5 * cos(secs * 1. + pos.x * 4.1) * cos(secs * 0.9 + pos.y*5.3));
                    vec2 slow = 0. * wind * vec2(1., .8) + 2. * sin(pos.xz * 100);
                    vec2 fast = .4 * sin(3. * wind * vec2(2.2, 1.7) + pos.xz * vec2(220., 210));
                    vec2 phase_ofs = strength * (slow + fast) + pos.xz * 0.2;
                    pos.xz += move * sin(pos.xz * vec2(8., 7.) + phase_ofs);
                    */
                    float move = 5e-3 * span;
                    float dir = secs * 0.1 + sin(pos.x) + sin(pos.y) + sin(pos.z);

                    float windt = 2. * secs;
                    float freq = 4.;
                    float strength = 2e-3 * span * span;

                    float histrength = 0. * strength;
                    float hifreq = freq * 8.;
                    float hiwindt = 3.;

                    float phase = plantID * 4.;

                    pos.x += strength * sin(windt + pos.z * freq + phase);
                    pos.y += strength * sin(windt + pos.x * freq + phase);
                    pos.z += strength * sin(windt + pos.y * freq + phase);

                    pos.x += histrength * sin(hiwindt + pos.z * hifreq * freq + phase);
                    pos.y += histrength * sin(hiwindt + pos.x * hifreq * freq + phase);
                    pos.z += histrength * sin(hiwindt + pos.y * hifreq * freq + phase);
                    //color = hsv2rgb(vec3(plantID, 1, 0.2));
                }

                float noiset = getWhiteNoise(ivec2(gl_GlobalInvocationID.x % 64, gl_GlobalInvocationID.x / 64));

                if (prop_plot_strobe > 0.) {
                    vec3 normal1, normal2;
                    vec3 newpos1 = getStrobePos(pos, normal, 0., prop_plot_strobe, normal1);
                    vec3 newpos2 = getStrobePos(pos, normal, 1., prop_plot_strobe, normal2);
                    pos = mix(newpos1, newpos2, noiset);
                    normal = normalize(mix(normal1, normal2, noiset));
                }

                if (prop_plot_sweep > 0.) {
                    if (length(pos-vec3(0., 0.4, -0.5)) > prop_plot_sweep * shotSecs + 2. * (noiset-.5) * deltat + prop_plot_sweep_bias)
                        return;
                }

				vec3 fromCamToPoint0, fromCamToPoint1;

				vec3 screenSpace0 = projectPoint(cameras[CAM_PAST], pos, fromCamToPoint0);
				vec3 screenSpace1 = projectPoint(cameras[CAM_NEXT], pos, fromCamToPoint1);

                float t = mapMotionBlurCurve(noiset);

                // Ignore points that would blur around screen edges. This introduces "vingetting" at high camera speeds.
                if (screenSpace0.z <= cameras[CAM_PAST].nearplane) {
                    return;
                }

                if (screenSpace1.z <= cameras[CAM_NEXT].nearplane) {
                    return;
                }

                vec3 screenSpace = mix(screenSpace0, screenSpace1, t);
                vec3 fromCamToPoint = mix(fromCamToPoint0, fromCamToPoint1, t);

				vec2 windowSpace = screenSpace.xy * vec2(screenSize.xy);

				int x = int(windowSpace.x);
				int y = int(windowSpace.y);

				if (x < 0 || y < 0 || x >= screenSize.x || y >= screenSize.y)
					return;

                if (screenSpace.z <= cameras[CAM_NEXT].nearplane)
                    return;


				vec3 toCamera = normalize(-fromCamToPoint);

                // backface culling
                //if (dot(normal, toCamera) < 0.)
                //    return;

				vec3 diffuse = color.rgb;
				float materialShininess = normalSpecularSun.z;
				float sun = normalSpecularSun.w;

                if (prop_plot_scan > 0.) {
                    float scan = max(0., sin(pos.x * 30. - secs * 5.) - .98);
                    diffuse = mix(diffuse,
                            mix(diffuse * 3. * vec3(0., 1., 1.), 4. * vec3(0., 20., 8.), scan),
                            prop_plot_scan);
                }

				int pixelIdx = screenSize.x * y + x;

				vec3 H = normalize(sunDirection + toCamera);
				float specular = 10. * pow(max(0., dot(normal, H)), 50.);

				const float F0 = 1.0;
				const float roughness = 1.0 - materialShininess;

                vec3 sunDir = sunDirection;
                if (pos.y < -0.) {
                    sunDir.y = -sunDir.y;
                }

                float radialDistance = length(fromCamToPoint);

                vec3 specularLight = vec3(0.);
                vec3 c;
                float specularLobe = 0.;

                vec3 lightDir = sunDir;

                if (prop_plot_headlamp_shading > 0.) {
                    specularLobe = LightingFuncGGX_REF_HeadLight(normal, toCamera, toCamera, roughness, F0);
                    specularLight = prop_plot_headlamp_shading * specularLobe * sunColor * diffuse;
                    c = specularLight / (1e-6 + radialDistance * radialDistance);
                } else {
                    if (sun > 0.) {
                        specularLobe = LightingFuncGGX_REF(normal, toCamera, lightDir, roughness, F0);
                    }
                    specularLight = sun * specularLobe * sunColor * diffuse;
                    c = mix(diffuse, specularLight, materialShininess);
                }


                //vec3 c = mix(diffuse, specular2 * sunColor * diffuse, materialShininess);
				//c = specular2 * sunColor * c;
				//c = mix(specular2, 1., 0.9) * sunColor * c; // "disables" specular for debugging but keeps the same perf hit


                float fogDistance = radialDistance;
                if (pos.y < 0.) {
                    fogDistance *= prop_water_fog_scale_pre;
                }

                c = applyFog( c,        // original color of the pixel
                        fogDistance,     // camera to point distance
                        cameras[CAM_NOW].pos,     // camera position
                        -toCamera);         // camera to point vector

                const int SUPERSAMPLING = 2;
                ivec2 fbSize = SUPERSAMPLING * screenSize;
                vec2 superCoord = SUPERSAMPLING * windowSpace.xy;
                ivec2 superCoordInt = ivec2(superCoord);

                float fadeDist = 400.;
                float fadeSlope = 0.008;
                //weight *= min(1., (fadeDist - radialDistance)*fadeSlope);

				c = c / (vec3(1.) + c);
                c = sqrt(c);

                float dither = tri_random( uvec3( floatBitsToUint( uvec2(superCoordInt) ), frame ) );
                c += vec3(1./256 * dither);

				c = clamp(c, vec3(0.), vec3(1.));
                c.rgb *= weight; // pre-multiply alpha


                //c = color.rgb;
                // c = vec3(.5) + .5 * normal;
                // c *= 0.1;

                int sampleBufferIndex = linearToBlock(superCoordInt, fbSize);

                uint64_t depthColor64 = packDepthColorSample(vec4(c, weight), screenSpace.z);

                if (true) {
                    atomicMin(depthColorSamples[sampleBufferIndex], depthColor64);
                    //atomicMax(depthColorSamples[sampleBufferIndex + 1], depthColor64);
                    //atomicMax(depthColorSamples[sampleBufferIndex + fbSize.x], depthColor64);
                    //atomicMax(depthColorSamples[sampleBufferIndex + fbSize.x + 1], depthColor64);
                } else {
                    depthColorSamples[sampleBufferIndex] = depthColor64;
                    //depthColorSamples[sampleBufferIndex + 1] = depthColor64;
                    //depthColorSamples[sampleBufferIndex + fbSize.x] = depthColor64;
                    //depthColorSamples[sampleBufferIndex + fbSize.x + 1] = depthColor64;
                }


				atomicAdd(pointsSplatted, 1);
			}
			));
		}

		int numberOfPointsToSplat = maxPoints;
		float camTracking = props.getf("cam_tracking", currentShot.relative);

		if (camTracking > 0.f) {
            CameraPose trackingPose, trackingNextPose, trackingFuturePose;

			trackCameraPath(pose, trackingPose);
			trackCameraPath(nextPose, trackingNextPose);
			trackCameraPath(futurePose, trackingFuturePose);

			CameraPose mixedPose = interpolateCamera(pose, trackingPose, camTracking);
			CameraPose mixedNextPose = interpolateCamera(nextPose, trackingNextPose, camTracking);

			makeCamera(mixedPose, cameras[1]);
			makeCamera(mixedNextPose, cameras[2]);
		} else {
			makeCamera(pose, cameras[1]);
			makeCamera(nextPose, cameras[2]);
		}

		//printf("%f, %f, %f\n", pose.up.x, pose.up.y, pose.up.z);

		// handle missing past camera data on camera cuts

		if (lastShotName != currentShot.name)
		{
			// shot has changed so extrapolate camera path backwards in time and use that as an estimate
			// for the "past" camera pose
			Shot predictedPastShot = currentShot;
			predictedPastShot.relative -= nextFrameDelta;
			CameraPose predictedPastPose;
			cameraPath(predictedPastShot, predictedPastPose);
			makeCamera(predictedPastPose, cameras[0]);

			lastShotName = currentShot.name;
		}
		

		glNamedBufferSubData(cameraData, 0, sizeof(cameras), &cameras);

		glUseProgram(pointSplat);

		applyCamProperties(currentShot.relative, pointSplat, props);

		bindImage("gbuffer", 0, gbuffer, GL_READ_WRITE, GL_RGBA32F);
		glUniform1i("pointBufferMaxElements", pointBufferMaxElements);
		glUniform1i("numberOfPointsToSplat", numberOfPointsToSplat);
		int screenSize[] = { screenw, screenh };
		glUniform2i("screenSize", screenw, screenh);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform1i("frame", frame);
		bindBuffer("pointBufferHeader", pointBufferHeader);
		bindBuffer("pointBuffer", pointBuffer);
		bindBuffer("pointPosShotBuffer", pointPosShotBuffer);
		bindBuffer("pointColorBuffer", pointColorBuffer);
		bindBuffer("pointShadingBuffer", pointShadingBuffer);
		bindBuffer("pointInfoBuffer", pointInfoBuffer);
		bindBuffer("sampleBuffer", sampleBuffer);
		bindImage("zbuffer", 0, zbuffer, GL_WRITE_ONLY, GL_R32F);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		bindTexture("noiseTextures", noiseTextures);
		bindTexture("whiteNoise", whiteNoise);
		setSkyUniforms(props.sky);
		glUniform1f("currentShotStart", currentShot.start);
		bindBuffer("cameraArray", cameraData);
		glDispatchCompute(numberOfPointsToSplat / 128 / 1, 1, 1);

		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT);

		TimeStamp splatTime;

		if (showDebugInfo) {
			struct DebugData {
				int i;
				int parent;
				int size;
				int b;
				int start;
				int parent_size;
				float pixelRadius;
				float zdepth;
				float parentDepth;
				float parent_t;
				float child_t;
				float nearPlane;
				float projPlaneDist;
				float parentUVx;
				float parentUVy;
				float childUVx;
				float childUVy;
			};
			DebugData debugData = {};
			glGetNamedBufferSubData(debugBuffer, 0, sizeof(DebugData), &debugData);

			printf("debugData: i: %d, parent: %d, size: %d, b: %d, start: %d, parent_size: %d,\nradius: %f, zdepth: %f, parentDepth: %f\n",
				debugData.i, debugData.parent, debugData.size, debugData.b, debugData.start,
				debugData.parent_size,
				debugData.pixelRadius, debugData.zdepth, debugData.parentDepth);
			printf("nearPlane: %f, projPlaneDist: %f\n", debugData.nearPlane, debugData.projPlaneDist);
			printf("parent UV: (%f, %f), child UV: (%f, %f)\n",
				debugData.parentUVx, debugData.parentUVy, debugData.childUVx, debugData.childUVy);


			if (POINT_BUFFER_HEADER_DEBUGGING) {
				int data[2];
				glGetNamedBufferSubData(pointBufferHeader, 0, 8, data);
				//printf("currentWriteOffset: %d\n", data[0]);
				pointsSplatted = data[1];
				printf("pointsSplatted: %d\t(%.3f million)\n", data[1], data[1] / 1000000.);
			}

			if (STEP_BUFFER_DEBUGGING) {
				GLint64 size = -1;
				glGetNamedBufferParameteri64v(stepBuffer, GL_BUFFER_SIZE, &size);
				std::vector<float> steps(size / sizeof(float), -1.f);
				glGetNamedBufferSubData(stepBuffer, 0, steps.size(), steps.data());
				int i = 0;
				while (steps[4 * i] != -1.f) {
					printf("parent step [%d], t=%f\td=%f, cone: %f, pix: %f\n", i, steps[4 * i], steps[4 * i + 1], steps[4 * i + 2], steps[4 * i + 3]);
					i++;
				}
				i = 1000;
				while (steps[4 * i] != -1.f) {
					printf("child step [%d], t=%f\td=%f, cone: %f, pix: %f\n", i, steps[4 * i], steps[4 * i + 1], steps[4 * i + 2], steps[4 * i + 3]);
					i++;
				}

				printf("parent vs child final t: %f vs %f\n", debugData.parent_t, debugData.child_t);
			}
		}

		if (JUMP_BUFFER_DEBUGGING) {
			std::vector<float> jumpData(jumpBufferMaxElements, 0.f);
			std::vector<float> radiusData(jumpBufferMaxElements, 0.f);
			glGetNamedBufferSubData(jumpbuffer, 0, jumpBufferSize, jumpData.data());
			glGetNamedBufferSubData(radiusbuffer, 0, jumpBufferSize, radiusData.data());
			for (int i = 0; i < jumpBufferMaxElements; i++) {
				int b = tobin(i);
				int start = binto(b);
				int z = i - start;
				uvec2 coord = z2xy(uint(z));
				int dim = 1 << b;
				int size = dim * dim;

				int parent_size = size >> 2; 
				int parent = int(start - parent_size) + (z / 4);
				
				int pb = tobin(parent);
				int pstart = binto(pb);
				int pz = parent - pstart;
				uvec2 pcoord = z2xy(uint(pz));
				int pdim = 1 << pb;
				int psize = pdim * pdim;

				assert(coord.x / 2 == pcoord.x);
				assert(coord.y / 2 == pcoord.y);

				float myz = jumpData[i];
				float parentz = jumpData[parent];
				float myRadius = radiusData[i];
				float parentRadius = radiusData[parent];
				if (i > 0 && myRadius != .5f*parentRadius) {
					puts("fail");
				}
				if (myz < parentz) {
					printf("z[%d] %f < z[%d] %f!\n", i, myz, parent, parentz);
					puts("fail");
				}
			}
		}

		TimeStamp blurTime;

		if (!blurFilter)
			blurFilter = createProgram(
					GLSL(460,
                    #extension GL_EXT_shader_atomic_int64 : enable
                    #extension GL_NV_shader_atomic_int64 : enable
                    #extension GL_ARB_gpu_shader_int64 : enable
                    #extension GL_NV_gpu_shader5 : enable
                    //#extension GL_EXT_shader_image_int64 : require
					layout(local_size_x = 16, local_size_y = 16) in;

                    #includelib

                    layout(std430) buffer sampleBuffer1 {
                        uint64_t depthColorSamples1[];
                    };
                    layout(std430) buffer sampleBuffer2 {
                        uint64_t depthColorSamples2[];
                    };
                    layout(std430) buffer sampleBufferEdges {
                        uint edgeSamples[];
                    };

					// out vec4 outColor;
					uniform ivec2 screenSize;
					uniform int frame;
					uniform ivec3 noiseOffset;
					uniform sampler2DArray noiseTextures;
					uniform int sceneID;
                    uniform int direction;

					// https://gamedev.stackexchange.com/a/148088
					vec3 linearToSRGB(vec3 linearRGB)
					{
						bvec3 cutoff = lessThan(linearRGB, vec3(0.0031308));
						vec3 higher = vec3(1.055)*pow(linearRGB, vec3(1.0 / 2.4)) - vec3(0.055);
						vec3 lower = linearRGB * vec3(12.92);

						return mix(higher, lower, cutoff);
					}

                    struct Sample {
                        vec4 c;
                        float z;
                        float w;
                    };

                    void main() {
                        const int SUPERSAMPLING = 2;
                        ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
                        ivec2 fbSize = SUPERSAMPLING * screenSize;

                        if (coord.x < 0 || coord.x >= fbSize.x || coord.y < 0 || coord.y >= fbSize.y)
                            return;

                        float wsum = 0.;
                        vec4 colorsum = vec4(0.);
                        float minZ = INFINITE_DEPTH;
                        ivec2 icenter = coord;

                        float weights5_sigma15[] = {0.122581,0.233062,0.288713,0.233062,0.122581};
                        float weights7_sigma2[] = {0.071303,0.131514,0.189879,0.214607,0.189879,0.131514,0.071303};
                        float weights3_box[] = {0.33333, 0.33333, 0.33333};
                        float bias = 0.;
                        int numInside = 0;

                        //for (int i=-1; i<=1; i++) {
                        for (int i=-2; i<=2; i++) {
                        //for (int i=-3; i<=3; i++) {
                                ivec2 coord = icenter;
                                coord[direction] += i;
                                coord[direction] = clamp(coord[direction], 0, fbSize[direction]-1);
                                int sidx = linearToBlock(coord, fbSize);

                                float sampleZ;
                                vec4 sampleColor = unpackDepthColorSample(depthColorSamples1[sidx], sampleZ);
                                //float w = weights3_box[i+1];
                                float w = weights5_sigma15[i+1];
                                //float w = weights7_sigma2[i+3];
                                //float w=1.;

                                wsum += w;
                                colorsum += w * sampleColor;
                                minZ = min(minZ, sampleZ);

                                if (sampleZ < INFINITE_DEPTH) {
                                    bias += i;
                                    numInside++;
                                }
                            }

                        colorsum /= wsum;

                        {
                            int outidx = linearToBlock(coord, fbSize);

                            /*
                            //bias /= 3.;
                            bias /= float(weights7_sigma2.length);
                            //bias = (1. + bias)*.5;

                            vec2 biases = unpackSnorm2x16(edgeSamples[outidx]);
                            biases[direction] = bias;

                            uint nu = packSnorm2x16(biases);
                            edgeSamples[outidx] = nu;

                            if (direction == 1) {
                                //colorsum = vec4(length(nu).xxx, 1.);
                                //colorsum = vec4(length(biases).xxx, 1.);
                                //colorsum = vec4(vec3(numInside/7. > 0.8 ? 1: 0), 1.);
                            }
                            */

                            float sampleZ;
                            //TODO don't read twice
                            vec4 sampleColor = unpackDepthColorSample(depthColorSamples1[outidx], sampleZ);
                            //sampleZ = minZ;
                            ////sampleColor.rgb *= sampleColor.rgb; // from gamma to linear space
                            ////sampleColor.rgba *= 8.;

                            uint64_t newSample = packDepthColorSample(colorsum, sampleZ);
                            depthColorSamples2[outidx] = newSample;
                        }
                    }
				)
			);

		glUseProgram(blurFilter);
		//applyCamProperties(currentShot.relative, draw, props);
		glUniform1i("frame", frame);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform2i("screenSize", screenw, screenh);
		glUniform1i("sceneID", int(currentShot.start));
		bindTexture("noiseTextures", noiseTextures);
		bindBuffer("sampleBufferEdges", sampleBufferEdges);

		bindBuffer("sampleBuffer1", sampleBuffer);
		bindBuffer("sampleBuffer2", sampleBuffer2);
		glUniform1i("direction", 0);
        glDispatchCompute((superw+15)/16, (superh+15)/16, 1);
		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

		bindBuffer("sampleBuffer1", sampleBuffer2);
		bindBuffer("sampleBuffer2", sampleBuffer);
		glUniform1i("direction", 1);
        glDispatchCompute((superw+15)/16, (superh+15)/16, 1);
		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

		TimeStamp resolveTime;

		if (!draw)
			// the graphics program version of createProgram() takes 5 sources; vertex, control, evaluation, geometry, fragment
			draw = createProgram(
				GLSL(460,
					void main() {
						gl_Position = vec4(gl_VertexID == 1 ? 4. : -1., gl_VertexID == 2 ? 4. : -1., -.5, 1.);
					}
				),
				"", "", "",
					GLSL(460,
                    #extension GL_ARB_gpu_shader_int64 : enable
                    #extension GL_NV_gpu_shader5 : enable

                    #includelib

					layout(rgba16f) uniform image2D resolved;
					layout(rgba16f) uniform image2D reflected;
                    layout(r32f) uniform image2D zbuffer;
					layout(std430) buffer jumpBuffer { float jumps[]; };
					layout(std430) buffer radiusBuffer { float radiuses[]; };
                    layout(std430) buffer sampleBuffer {
                        uint64_t depthColorSamples[];
                    };

					// out vec4 outColor;
					uniform ivec2 screenSize;
					uniform int frame;
					uniform ivec3 noiseOffset;
					uniform sampler2DArray noiseTextures;
					uniform int sceneID;

					// https://gamedev.stackexchange.com/a/148088
					vec3 linearToSRGB(vec3 linearRGB)
					{
						bvec3 cutoff = lessThan(linearRGB, vec3(0.0031308));
						vec3 higher = vec3(1.055)*pow(linearRGB, vec3(1.0 / 2.4)) - vec3(0.055);
						vec3 lower = linearRGB * vec3(12.92);

						return mix(higher, lower, cutoff);
					}

					void getCameraProjection(CameraParams cam, vec2 uv, out vec3 outPos, out vec3 outDir) {
						outPos = cam.pos + cam.dir + (uv.x - 0.5) * cam.right + (uv.y - 0.5) * cam.up;
						outDir = normalize(outPos - cam.pos);
					}

					vec3 getNoise(ivec2 coord, int ofs = 0)
					{
						return texelFetch(noiseTextures,
							ivec3((coord.x + noiseOffset.x) % 64, (coord.y + noiseOffset.y) % 64, (noiseOffset.z + ofs) % 64), 0).rgb;
					}

                    struct Sample {
                        vec4 c;
                        float z;
                        float w;
                    };

                    vec4 computeRGBA(
                                vec4 avgColor,
                                float samplesTotalWeight,
                                float linearAlpha,
                                float diffCentroidLength)
                    {
                        if (samplesTotalWeight == 0.) {
                            return vec4(0.);
                        }
                        //vec3 color = avgColor / samplesTotalWeight; // average color samples
                        vec4 color = avgColor / samplesTotalWeight; // average color samples
                        return color;

                        float boundaryAlpha = pow(1. * linearAlpha, 1.0);
                        float innerAlpha = min(1., 3. * linearAlpha);
                        float boundaryProb = diffCentroidLength;

                        float alpha;

                        if (boundaryProb > 0.0) {
                            alpha = boundaryAlpha;
                            // color = vec3(1., 0., 0.);
                        } else {
                            alpha = innerAlpha;
                        }

                        color.a = alpha;
                        return color;
                    }

					void main() {
						int pixelIdx = screenSize.x * int(gl_FragCoord.y) + int(gl_FragCoord.x);

                        // Read sub-pixel samples into "samples".

                        int numSamples = 0;
                        int numCentroidSamples = 0;
                        const int MAX_SAMPLES = 16;
                        Sample samples[MAX_SAMPLES];

                        const int SUPERSAMPLING = 2;
                        const int SAMPLE_COORD_BITS = 1;
                        ivec2 fbSize = SUPERSAMPLING * screenSize;
                        vec2 superCoord = SUPERSAMPLING * gl_FragCoord.xy;
                        ivec2 superCoordInt = ivec2(superCoord);

                        // interpolate camera position based on screen space noise
                        vec2 uv = gl_FragCoord.xy / screenSize.xy;

                        float noiset = getNoise(ivec2(gl_FragCoord.xy), 0).x;
                        float blurmix = mapMotionBlurCurve(noiset);

                        vec3 p0, p1, dir0, dir1;

                        getCameraProjection(cameras[CAM_PAST], uv, p0, dir0);
                        getCameraProjection(cameras[CAM_NEXT], uv, p1, dir1);
                        vec3 p = mix(p0, p1, blurmix);
                        vec3 dir = normalize(mix(dir0, dir1, blurmix));
                        vec3 campos_noisy = mix(cameras[CAM_PAST].pos, cameras[CAM_NEXT].pos, blurmix);
                        float len = length(cameras[CAM_PAST].dir);
                        vec3 camdir_noisy = len * normalize(mix(cameras[CAM_PAST].dir, cameras[CAM_NEXT].dir, blurmix));

                        // plane intersection's radial distance from camera origin
                        float plane_t = -campos_noisy.y / dir.y;
                        // orthogonal distance to camera origin (length of projection to cam direction vector)
                        float plane_z = dot(camdir_noisy, dir* plane_t) * cameras[CAM_NOW].nearplane_rcp;

                        vec2 center = gl_FragCoord.xy * SUPERSAMPLING;
                        ivec2 icenter = ivec2(center);
                        vec2 diffCentroid = vec2(0.);
                        float z = INFINITE_DEPTH;
                        //const float totalArea = 2.5;
                        /*
                        const float totalArea = 1.0;

                        {
                            int sidx = linearToBlock(icenter, fbSize);

                            float sampleZ;
                            vec4 sampleColor = unpackDepthColorSample(depthColorSamples[sidx], sampleZ);

                            sampleColor.rgb *= sampleColor.rgb; // from gamma to linear space
                            samples[numSamples].c = sampleColor;
                            samples[numSamples].z = sampleZ;
                            samples[numSamples].w = 1.;
                            z = min(z, sampleZ);
                            numSamples++;
                        }
                        */

                        //const float totalArea = 2.5 * ((2.*2.) / (4.*4.));
                        const float totalArea = 1.0;

                        for (int y=-1; y<=1; y++) {
                            for (int x=-1; x<=1; x++) {
                                ivec2 o = {x,y};
                                ivec2 coord = icenter + o;
                                int sidx = linearToBlock(coord, fbSize);

                                float sampleZ;
                                vec4 sampleColor = unpackDepthColorSample(depthColorSamples[sidx], sampleZ);

                                 {
                                    // float w = 1 - dist_to_center / sqrt(8)
                                    vec2 diff = vec2(o)-vec2(.5,.5);

                                    float l = length(diff);
                                    //float w = 1.0; // box filter
                                    //float w = 1.0 - 0.35 * length(diff); // 4x4 window
                                    //float w = 1.0 - 0.4 * l; // 4x4 window
                                    float w = 1.0 - 0.5 * l; // 3x3 window
                                    //float w = 1.0 - 0.51 * l; // 2x2 window
                                    //float w = exp(-0.35*3*length(diff)); // 4x4 window
                                    //float w = exp(-0.35*2.4*length(diff)); // 4x4 window
                                    // float w = 1.0 - 0.2357 * length(diff); // 6x6 window
                                    //float w = exp(-0.2357*4*length(diff)); // 6x6 window

                                    sampleColor.rgb *= sampleColor.rgb; // from gamma to linear space
                                    sampleColor.a *= w;                 // reduce sample alpha if it's far from pixel
                                    samples[numSamples].c = sampleColor;
                                    samples[numSamples].z = sampleZ;
                                    samples[numSamples].w = w;
                                    z = min(z, sampleZ);
                                    numSamples++;

                                    if (sampleZ < INFINITE_DEPTH) {
                                        diffCentroid += diff;
                                        numCentroidSamples++;
                                    }
                                }
                            }
                        }

                        if (numCentroidSamples > 0) {
                            diffCentroid /= float(numCentroidSamples);
                            //diffCentroid *= 0.5; // scale so that length(diffCentroid) == 1 equals one pixel radius (for 4x4 window)
                            diffCentroid *= 0.666; // same scale but for 3x3
                        }

                        vec4 sumColor1 = vec4(0.);  // points above water surface
                        vec4 sumColor2 = vec4(0.);  // ponits below water surface

                        for (int i=0; i<numSamples; i++) {
                            if (plane_z < 0. || samples[i].z < plane_z) {
                                vec4 premult = samples[i].c;
                                //premult.rgb *= premult.a;
                                sumColor1 += samples[i].w * premult;
                            } else {
                                vec4 premult = samples[i].c;
                                //premult.rgb *= premult.a;
                                sumColor2 += samples[i].w * premult;
                            }
                        }

                        vec4 aboveWater = sumColor1 / totalArea;
                        vec4 underWater = sumColor2 / totalArea;

                        // "interior" pixels need less samples to achieve 100% coverage
                        if (length(diffCentroid) > 0.55) {
                            aboveWater *= prop_inner_coverage_boost;
                            underWater *= prop_inner_coverage_boost;
                            //aboveWater = underWater = vec4(1., 0., 0., 1.);
                        }

                        if (aboveWater.a > 1.) aboveWater /= aboveWater.a;
                        if (underWater.a > 1.) underWater /= underWater.a;

                        // fill empty reflection pixels with solid colors. this avoids sky leaking through in water blur
                        // this is the "over" alpha blend operation
                        underWater = underWater + (1. - underWater.a) * aboveWater;


						imageStore(resolved, ivec2(gl_FragCoord.xy), aboveWater);
						imageStore(reflected, ivec2(gl_FragCoord.xy), underWater);
						imageStore(zbuffer, ivec2(gl_FragCoord.xy), vec4(z));
					}
				)
			);

		glUseProgram(draw);

		applyCamProperties(currentShot.relative, draw, props);

		glUniform1i("frame", frame);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform2i("screenSize", screenw, screenh);
		glUniform1i("sceneID", int(currentShot.start));

		setSkyUniforms(props.sky);

		bindTexture("noiseTextures", noiseTextures);
		bindImage("resolved", 0, resolved, GL_WRITE_ONLY, GL_RGBA16F);
		bindImage("reflected", 0, reflected, GL_WRITE_ONLY, GL_RGBA16F);
        bindImage("zbuffer", 0, zbuffer, GL_WRITE_ONLY, GL_R32F);
		bindBuffer("jumpBuffer", jumpbuffer);
		bindBuffer("radiusBuffer", radiusbuffer);
		bindBuffer("cameraArray", cameraData);
		bindBuffer("sampleBuffer", sampleBuffer);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);

        // composite


		if (!composite) {
			composite = createProgram(
				GLSL(460,
					void main() {
						gl_Position = vec4(gl_VertexID == 1 ? 4. : -1., gl_VertexID == 2 ? 4. : -1., -.5, 1.);
					}
				),
				"", "", "",
					GLSL(460,
                    #extension GL_ARB_gpu_shader_int64 : enable
                    #extension GL_NV_gpu_shader5 : enable

                    #includelib

                    layout(rgba16f) uniform image2D outputImage;

                    layout(r32f) uniform image2D zbuffer;

					// out vec4 outColor;
                    uniform float secs;
					uniform ivec2 screenSize;
					uniform int frame;
					uniform ivec3 noiseOffset;
                    uniform sampler2D waterNormalTexture;
					uniform sampler2DArray noiseTextures;
					uniform sampler2D resolvedTexture;
					uniform sampler2D reflectedTexture;
					uniform sampler2D whiteNoise;
					uniform int sceneID;

					void getCameraProjection(CameraParams cam, vec2 uv, out vec3 outPos, out vec3 outDir) {
						outPos = cam.pos + cam.dir + (uv.x - 0.5) * cam.right + (uv.y - 0.5) * cam.up;
						outDir = normalize(outPos - cam.pos);
					}

                    vec3 unprojectPoint(CameraParams cam, vec3 dir, float orthogonalZ) {
                        float radialZ = orthogonalZ / (dot(cam.dir, dir) * cam.nearplane_rcp);
                        return cam.pos + radialZ * dir;
                    }

					vec3 getNoise(ivec2 coord, int ofs = 0)
					{
						return texelFetch(noiseTextures,
							ivec3((coord.x + noiseOffset.x) % 64, (coord.y + noiseOffset.y) % 64, (noiseOffset.z + ofs) % 64), 0).rgb;
					}

                    float getWhiteNoise(ivec2 coord, int ofs = 0)
                    {
                        return texelFetch(whiteNoise,
                                ivec2((coord.x + ofs) % 64, (coord.y + ofs) % 64), 0)[noiseOffset.x % 3];
                    }

                    vec3 projectPoint(CameraParams cam, vec3 p, out vec3 out_fromCamToPoint) {
                        vec3 op = p - cam.pos;
                        out_fromCamToPoint = op;
                        float z = dot(cam.dir, op) * cam.nearplane_rcp;
                        vec3 pp = (op * cam.nearplane) / z;
                        vec3 up = cam.up;
                        vec2 plane = vec2(
                                dot(pp, cam.right) / dot(cam.right, cam.right),
                                dot(pp, up) / dot(up, up)
                                );
                        vec3 screenSpace = vec3(plane + vec2(0.5), z);
                        return screenSpace;
                    }

                    vec3 projectPoint2(CameraParams cam, vec3 campos, vec3 p, out vec3 out_fromCamToPoint) {
                        vec3 op = p - campos;
                        out_fromCamToPoint = op;
                        float z = dot(cam.dir, op) * cam.nearplane_rcp;
                        vec3 pp = (op * cam.nearplane) / z;
                        vec3 up = cam.up;
                        vec2 plane = vec2(
                                dot(pp, cam.right) / dot(cam.right, cam.right),
                                dot(pp, up) / dot(up, up)
                                );
                        vec3 screenSpace = vec3(plane + vec2(0.5), z);
                        return screenSpace;
                    }


                    vec3 getWarpedNormal(vec3 surface, vec3 V, float plane_t)
                    {
                        vec3 large_waves = 2*vec3(
                                0.1 + 0.1 * sin(0.6 * surface.z + secs*0.11),
                                0.,
                                0.1 + 0.1 * sin(0.4 * surface.x + secs*0.21));

                        vec3 small_waves = vec3(
                                0.1 + 0.1 * sin(32. * surface.z + secs*1.0 + sin(secs)),
                                0.,
                                0.1 + 0.1 * sin(18. * surface.x + secs*1.5));

                        small_waves = pow(small_waves, vec3(3.));

                        vec3 N_ofs = 0.05 * (large_waves + 10.0 * small_waves);
                        vec2 waterUv = 2. * surface.xz + vec2(.03 * secs);
                        vec3 normalmap = texture(waterNormalTexture, waterUv).rgb;
                        N_ofs = 0.10 * (vec3(-1., 0., -1) + 2. * normalmap.xzy);
                        N_ofs += .5 * small_waves;

                        // fade to a planar reflection when far away
                        N_ofs = N_ofs / max(1., (plane_t*plane_t)*3e-2);

                        vec3 N = normalize(vec3(0., 1., 0.) + N_ofs);
                        return N;
                    }

                    struct MattePlane {
                        vec3 n;
                        float d;
                        float uvScale;
                    };

                    float intersectRayPlane(vec3 O, vec3 N, vec3 D, float d, out vec3 out_plane_p)
                    {
                        float t = -(dot(O,N)+d)/dot(D,N);
                        out_plane_p = O + t * D;
                        return t;
                    }

                    vec2 getPlaneUV(vec3 N, vec3 p)
                    {
                        vec3 up = abs(N.y) < 0.999 ? vec3(0., 1., 0.) : vec3(0., 0., 1.);
                        vec3 a = normalize(cross(up, N));
                        vec3 b = cross(N, a);
                        return vec2(dot(a, p), dot(b, p));
                    }

                    vec4 sampleMattePlane(
                            in sampler2D img,
                            in MattePlane plane,
                            vec3 rayOrigin,
                            vec3 rayDir,
                            vec3 xform,
                            vec4 otherColor)
                    {
                        if (prop_bg_alpha == 0.)
                            return otherColor;

                        vec3 p;
                        float bg_t = intersectRayPlane(
                                xform * rayOrigin,
                                plane.n,
                                xform * rayDir,
                                plane.d,
                                p);

                        if (p.y <= 0) {
                            return otherColor;
                        }
                        vec2 bg_uv = plane.uvScale * getPlaneUV(plane.n, p);
                        bg_uv.x *= float(textureSize(img, 0).y) / float(textureSize(img, 0).x);
                        bg_uv += vec2(prop_bg_uv_x, prop_bg_uv_y);

                        if (bg_t < 0.) {
                            return otherColor;
                        }

                        vec4 color = textureLod(img, bg_uv, 0.); // we have to take LOD0 samples to avoid blurring during camera pan
                        color.rgb *= prop_bg_gain;
                        color.rgb = pow(color.rgb, vec3(prop_bg_gamma));

                        //fogAmount = max(0., min(1., prop_sky_horizon_fog * pow(1. - abs(rayDir.y), 128.)));
                        //color.rgb = applyFog( color.rgb,        // original color of the pixel
                        //        1e9,     // camera to point distance
                        //        xform * rayOrigin,     // camera position
                        //        xform * rayDir);         // camera to point vector

                        return mix(otherColor, color, color.a * prop_bg_alpha);
                    }


					void main() {
                        vec2 uv = gl_FragCoord.xy / screenSize.xy;
                        vec2 coord = uv;

						vec3 p, dir;
                        vec2 screenSpace = uv;
						getCameraProjection(cameras[1], screenSpace, p, dir);

                        vec3 p_noisy = p, dir_noisy = dir;
                        vec3 campos_noisy = cameras[1].pos, camdir_noisy = cameras[1].dir;

                        vec3 planeNormal = normalize(vec3( cos( prop_bg_theta ), prop_bg_slope, sin( prop_bg_theta ) ));

                        MattePlane bgPlane = {
                            planeNormal,
                            prop_bg_dist,
                            prop_bg_uvscale};

                        if (true) {
                            float noiset = getNoise(ivec2(gl_FragCoord.xy), 10).x;
                            //float noiset = getWhiteNoise(ivec2(gl_FragCoord.xy), frame+1);
                            float blurmix = mapMotionBlurCurve(noiset);

                            vec3 p0, p1, dir0, dir1;

                            getCameraProjection(cameras[CAM_PAST], screenSpace, p0, dir0);
                            getCameraProjection(cameras[CAM_NEXT], screenSpace, p1, dir1);
                            p_noisy = mix(p0, p1, blurmix);
                            dir_noisy = normalize(mix(dir0, dir1, blurmix));
                            campos_noisy = mix(cameras[CAM_PAST].pos, cameras[CAM_NEXT].pos, blurmix);
                            float len = length(cameras[CAM_PAST].dir);
                            camdir_noisy = len * normalize(mix(cameras[CAM_PAST].dir, cameras[CAM_NEXT].dir, blurmix));
                        }

                        // plane intersection's radial distance from camera origin
                        float plane_t = -cameras[1].pos.y / dir.y;
                        float plane_t_noisy = -campos_noisy.y / dir_noisy.y;

                        // orthogonal distance to camera origin (length of projection to cam direction vector)
                        float plane_z = distanceToDepth(plane_t, cameras[1], dir);
                        float plane_z_noisy = dot(camdir_noisy, dir_noisy * plane_t_noisy) * cameras[1].nearplane_rcp;

                        // orthogonal distance to camera origin
                        float z = imageLoad(zbuffer, ivec2(gl_FragCoord.xy)).r;

                        // geometry intersection's radial distance from camera origin
                        float radial_z = depthToDistance(z, cameras[1], dir);

                        // solid geometry color and alpha
                        vec4 sampledColor = texture(resolvedTexture, coord);

                        vec3 blurColor = {0., 0., 0.};
                        float blurAlpha = 0.;
                        vec3 skyColor = {0., 0., 0.};
                        bool hitWater = false;
                        bool showsReflection = false;
                        bool hitObject = true;
                        vec3 normal = vec3(0., 1., 0.);
                        vec3 surface;

                        const vec3 waterTint = .2e-2 * hsv2rgb(vec3(prop_water_hue, 0.8, prop_water_value));
                        const float clearness = 0.8;

                        const float plane_test_bias = 35.;
                        bool takeBlurSample = prop_water_render > 0;

                        // if ray hit the water plane
                        if (plane_z > 0. && sampledColor.a < 1. && takeBlurSample) {
                            hitWater = true;

                            vec3 V = -dir;
                            vec3 V_noisy = -dir_noisy;
                            surface = cameras[1].pos + plane_t * dir;
                            vec3 surface_noisy = campos_noisy + plane_t_noisy * dir_noisy;


                            vec3 N = getWarpedNormal(surface, V, plane_t);
                            vec3 N_noisy = getWarpedNormal(surface_noisy, V, plane_t_noisy);
                            normal = N;
                            vec3 R = 2. * dot(V, N) * N - V;
                            vec3 R_noisy = 2. * dot(V_noisy, N_noisy) * N_noisy - V_noisy;
                            skyColor = sampleSkyDome(R_noisy);

                            vec3 skyColorWithPlane = sampleMattePlane(skybg, bgPlane,
                                    surface,
                                    R_noisy, vec3(1,1,1), vec4(skyColor, 1.)).rgb;
                            skyColor = mix(skyColor, skyColorWithPlane, prop_bg_reflection_alpha);

                            float R_t = prop_water_distort; // planar reflection
                             //R_t = mix((surface.y + 0.10) / R.y, 10.0, 0.0);
                             // R_t = (surface.y + 0.02) / R.y;

                            // if ray hit a water plane and the z-buffer after that --> we've got a reflection here
                            // HACK: bias the check so that reflection alpha exists under solid pixels as well
                            if (plane_z_noisy < z + plane_test_bias) {
                                showsReflection = true;

                                const int SAMPLES = 8;

                                for (int i=0;i<SAMPLES;i++) {
                                    vec3 noise = getNoise(ivec2(gl_FragCoord.xy), i);
                                    noise -= vec3(.5, 0.0, .5);
                                    float ybias = plane_z_noisy;
                                    vec3 ofs = R_noisy * (R_t) + 1e-3 * (noise * vec3(1., -4., 1.)) * ybias;
                                    vec3 hit_new = surface + ofs;
                                    vec3 tempCamToPoint;
                                    vec3 screenSpace = projectPoint(cameras[1], hit_new, tempCamToPoint);

                                    vec2 ruv = screenSpace.xy;

                                    // mirror the y-coordinate
                                    ruv.y = 1. - abs(mod(ruv.y, 2.) - 1.);
                                    ruv.y -= 2./screenSize.y;

                                    float pz = imageLoad(zbuffer, ivec2(ruv * screenSize.xy)).r;

                                    // uncomment this to skip samples behind object
                                    // if (pz <= plane_z * 0.9)C:\dev\4k\testbench\assets\white_cliff_top { hitObject = false; }

                                    vec4 warpedColor = texture(reflectedTexture, ruv);

                                    blurColor += warpedColor.rgb;
                                    blurAlpha += warpedColor.a;
                                    //blurAlpha += warpedColor.a;
                                }

                                blurColor /= SAMPLES;
                                blurAlpha /= SAMPLES;
                           }
                        }

                        vec3 finalColor;

                        if (hitWater && takeBlurSample) {
                            if (!hitObject) { blurAlpha = 0.; }

                            // composite mirrored object against a mirrored sky

                            vec3 reflection = blurColor + (1. - blurAlpha) * skyColor;
                            const float R0 = 0.001;
                            const float cos_alpha = -dot(dir_noisy, normal);
                            float fresnel = R0 + (1. - R0) * pow(1 - cos_alpha, 5.);

                            reflection *= mix(1., coord.y * coord.y, prop_water_y_gradient);

                            // blend reflection with water tint based on viewing angle

                            vec3 waterRadiance = clearness * mix(waterTint, reflection, fresnel);
                            waterRadiance = applyFog(waterRadiance, plane_t * prop_water_fog_scale_post, cameras[1].pos, dir_noisy);
                            waterRadiance *= prop_water_gain;

                            waterRadiance = clamp(waterRadiance, vec3(0.), vec3(mix(2., abs(dir.y), prop_water_ydarken)));
                            sampledColor = sampleMattePlane(skybg, bgPlane, cameras[1].pos, dir_noisy, vec3(1.), sampledColor);
                            finalColor = waterRadiance * (1. - sampledColor.a) + sampledColor.rgb;

                            //finalColor = texture(reflectedTexture, coord).rgb;
                            //finalColor = sampledColor.rgb; // DEBUG HACK
                        } else  {
                            vec3 skyColor = sampleSkyDome(dir_noisy);
                            // composite the infinitely far away lying plane on top of skydome
                            skyColor = sampleMattePlane(skybg, bgPlane, cameras[1].pos, dir_noisy, vec3(1.), vec4(skyColor,1.)).rgb;
                            finalColor = sampledColor.rgb + (1. - sampledColor.a) * skyColor.rgb;
                            //finalColor = skyColor.rgb;

                            //finalColor = sampledColor.rgb;
                        }

                        // saturate bright colors to white
                        float burn = 1.5 * max(finalColor.r * 0.299, max(finalColor.g * 0.587, finalColor.b * 0.114));

                        finalColor += vec3(1.) * burn * burn;


						imageStore(outputImage, ivec2(gl_FragCoord.xy), vec4(finalColor, 1.0));
					}
				)
			);
        }

		glUseProgram(composite);

		applyCamProperties(currentShot.relative, composite, props);

		glUniform1i("frame", frame);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform2i("screenSize", screenw, screenh);
		glUniform1i("sceneID", int(currentShot.start));

		setSkyUniforms(props.sky);

		bindTexture("resolvedTexture", resolved);
		bindTexture("reflectedTexture", reflected);
		bindTexture("noiseTextures", noiseTextures);
		bindTexture("whiteNoise", whiteNoise);
		bindTexture("waterNormalTexture", waterNormals);

		bindImage("outputImage", 0, composited, GL_WRITE_ONLY, GL_RGBA16F);

        bindImage("zbuffer", 0, zbuffer, GL_READ_WRITE, GL_R32F);
		bindBuffer("cameraArray", cameraData);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);

        // clear sample buffer

		if (!clearSamples) {
			clearSamples = createProgram(
				GLSL(460,
                    #extension GL_ARB_gpu_shader_int64 : enable
                    #extension GL_NV_gpu_shader5 : enable
                    #extension GL_EXT_shader_atomic_int64 : enable
                    #extension GL_NV_shader_atomic_int64 : enable

                    #includelib

					layout(local_size_x = 1024, local_size_y = 1) in;

                    layout(std430) buffer sampleBuffer {
                        uint64_t depthColorSamples[];
                    };

					uniform int size;

                    void main() {
                        unsigned int invocationIdx = gl_GlobalInvocationID.x;
                        if (invocationIdx < size) {
                            // Note that in packUint2x32 the most significant word is the second argument.
                            uint64_t depthColor64 = packUint2x32(uvec2(packUnorm4x8(vec4(0.)), floatBitsToUint(INFINITE_DEPTH)));
                            depthColorSamples[invocationIdx] = depthColor64;
                        }
                    }
				)
			);
		}

		glUseProgram(clearSamples);
		glUniform1i("size", sampleBufferSize);
		bindBuffer("sampleBuffer", sampleBuffer);
        // total postproc became 13% faster (at superw=4*screenw) when this dispatch was made 1D
		glDispatchCompute(sampleBufferSize / 1024 + 1, 1, 1);
		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

        // apply bloom

		if (!bloom)
			bloom = createProgram(
				GLSL(460,
					void main() {
			gl_Position = vec4(gl_VertexID == 1 ? 4. : -1., gl_VertexID == 2 ? 4. : -1., -.5, 1.);
		}
		),
				"", "", "",
			GLSL(460,
			uniform ivec2 screenSize;
			uniform sampler2D inputTexture;
			uniform float secs;
			uniform int sceneID;
			uniform int dir;
			layout(rgba16f) uniform image2D outputImage;

			void main() {
				//float weights[] = {0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229};
				float weights[] = {0.035822, 0.05879, 0.086425, 0.113806, 0.13424, 0.141836, 0.13424, 0.113806, 0.086425, 0.05879, 0.035822};
                vec2 uv = gl_FragCoord.xy / vec2(screenSize);
				ivec2 base = ivec2(textureSize(inputTexture, 0) * uv);
				vec3 c = vec3(0.);
				float sum = 0.;
				for (int i=0;i<weights.length;i++) {
					ivec2 ofs = ivec2(i - weights.length/2, 0);
					if (dir == 1) ofs = ofs.yx;
					float w = weights[i];
				    vec3 s = texelFetch(inputTexture, base+ofs, 0).rgb;
					c += w * s;
					sum += w;
				}
				c /= sum;

				ivec2 outputCoord = ivec2(imageSize(outputImage) * uv);
				imageStore(outputImage, outputCoord, vec4(c, 1.));
			}
			));

		glUseProgram(bloom);

		glUniform2i("screenSize", screenw, screenh);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform1i("sceneID", int(currentShot.start));
		glUniform1i("frame", frame);

		// horizontal pass
		bindImage("outputImage", 0, bloombuffer2, GL_WRITE_ONLY, GL_RGBA16F);
		bindTexture("inputTexture", composited);
		glUniform1i("dir", 0);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

		// vertical pass
		bindImage("outputImage", 0, bloombuffer, GL_WRITE_ONLY, GL_RGBA16F);
		bindTexture("inputTexture", bloombuffer2);
		glUniform1i("dir", 1);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

		// horizontal pass 2
		bindImage("outputImage", 0, bloombuffer, GL_WRITE_ONLY, GL_RGBA16F);
		bindTexture("inputTexture", bloombuffer2);
		glUniform1i("dir", 0);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

		// vertical pass 2
		bindImage("outputImage", 0, bloombuffer2, GL_WRITE_ONLY, GL_RGBA16F);
		bindTexture("inputTexture", bloombuffer);
		glUniform1i("dir", 1);
		glDrawArrays(GL_TRIANGLES, 0, 3);
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);


		if (!present)
			present = createProgram(
				GLSL(460,
					void main() {
						gl_Position = vec4(gl_VertexID == 1 ? 4. : -1., gl_VertexID == 2 ? 4. : -1., -.5, 1.);
					}
				),
				"", "", "",
					GLSL(460,
                #extension GL_EXT_shader_atomic_int64 : enable
                #extension GL_NV_shader_atomic_int64 : enable
                #extension GL_ARB_gpu_shader_int64 : enable
                #extension GL_NV_gpu_shader5 : enable
                #includelib


				uniform sampler2DArray noiseTextures;
				uniform ivec3 noiseOffset;
                uniform sampler2D perlinNoise;

					out vec4 outColor;
					uniform ivec2 screenSize;
					uniform sampler2D composited;
					uniform sampler2D bloom;
					uniform sampler2D overlay;
					uniform float secs;
					uniform int sceneID;
					//
					// https://gamedev.stackexchange.com/a/148088
					vec3 linearToSRGB(vec3 linearRGB)
					{
						bvec3 cutoff = lessThan(linearRGB, vec3(0.0031308));
						vec3 higher = vec3(1.055)*pow(linearRGB, vec3(1.0 / 2.4)) - vec3(0.055);
						vec3 lower = linearRGB * vec3(12.92);

						return mix(higher, lower, cutoff);
					}

			vec3 getNoise(ivec2 coord, int ofs = 0)
			{
				return texelFetch(noiseTextures,
						ivec3((coord.x + noiseOffset.x) % 64, (coord.y + noiseOffset.y) % 64, (noiseOffset.z + ofs) % 64), 0).rgb;
			}

					void main() {
                        vec2 fragCoord = gl_FragCoord.xy;
                        vec2 halfSize = vec2(screenSize)*.5;
                        fragCoord = (fragCoord - halfSize) * prop_post_size + halfSize;
                        vec2 uv = fragCoord.xy / vec2(screenSize);
                        vec2 distorted_uv = uv;

                        if (prop_post_wobble > 0.) {
                            vec2 waveUV = 0.5 * fragCoord.xy/screenSize.x + vec2(0.2*secs);
                            vec3 waves = texture(perlinNoise, waveUV, 0).rgb;

                            vec2 distort = vec2(-.5) + waves.xy;
                            float distort_amp = 0.005 * prop_post_wobble;
                            float distort_blur = min(1., pow(length(distort) * 2., 2.));

                            distorted_uv = uv + distort_amp * distort;
                        }
                        vec3 c;

                        if (prop_post_chromabs_size > 0.) {
                            c = vec3(0.);
                            vec2 away = vec2(.5) - distorted_uv;
                            const int chromsamples = 10;
                            const float falloff = prop_post_chromabs_falloff;
                            const float size = prop_post_chromabs_size * pow(length(away), falloff);
                            const float step = size * 1e-1 / chromsamples;
                            const float inv = 1./chromsamples;
                            const vec2 uvOfs = away * step;
                            vec2 uvs[3] = {distorted_uv, distorted_uv, distorted_uv};
                            vec3 factors = vec3(1.);
                            for (int i=0;i<chromsamples;i++) {
                                c.r += factors.r * inv * texture(composited, uvs[0]).r;
                                c.g += factors.g * inv * texture(composited, uvs[1]).g;
                                c.b += factors.b * inv * texture(composited, uvs[2]).b;
                                uvs[0] += uvOfs * 1;
                                uvs[1] += uvOfs * 2.0;
                                uvs[2] += uvOfs * 4.0;
                                //factors *= vec3(0.998, 0.999, 0.9995);
                            }
                        } else {
                            c = texture(composited, distorted_uv, 0).rgb;
                        }



                        vec3 blurred = texture(bloom, distorted_uv, 0).rgb;
                        // c = mix(c, blurred, distort_blur);

                        float strobe = mix(1., fract(secs*24.), prop_bloom_strobe);
						c += strobe * prop_bloom_strength * pow(blurred, vec3(1.2));

                        c = hsv2rgb(clamp(
                                    rgb2hsv(c) * vec3(1., prop_post_satgain, 1.) + vec3(prop_post_hueshift, 0., 0.),
                                    vec3(0.0), vec3(1.)));

                        c = mix(c, vec3(length(c)) * vec3(1.0, 0.7, 0.5), prop_post_sepia);
                        c = mix(c, c * vec3(0.2, 1., 0.8), prop_post_nightvision);

                        if (prop_post_over_alpha > 0.)
                        {
                            vec2 shake = texture(perlinNoise, vec2(1.0*secs, 0.9*secs), 0).rg*vec2(1.,9./16);

                            vec2 uv2 = uv - vec2(prop_post_over_x, prop_post_over_y);
                            uv2 += 0.00125*shake;
                            vec2 imgUv = uv2 * (vec2(screenSize) / textureSize(overlay, 0));
                            if (imgUv.x >= 0 && imgUv.x < 1. && imgUv.y >= 0 && imgUv.y < 1.) {
                                vec4 imgc = texture(overlay, imgUv);
                                float tint = min(1., 2. * (1.-prop_post_over_alpha));
                                float alpha = min(1., 2. * prop_post_over_alpha);
                                alpha *= 0.8;
                                imgc.rgb = mix(imgc.rgb, vec3(0.20, 0., 0.), tint);
                                //c = mix(c, imgc.rgb, imgc.a * alpha);
                                c = c + imgc.rgb * imgc.a * alpha;
                            }
                        }

                        c *= prop_post_gain;
                        c *= mix(1., fract(secs * prop_post_strobe_freq), prop_post_strobe);
                        c = pow(c, vec3(prop_post_gamma));

                        c += vec3(0.1 * prop_post_lift);
                        c += vec3(0.02, 0.03, 0.1) * prop_post_blulift;
                        c.gb = pow(c.gb, vec2(prop_post_gb_power));


						vec3 srgb = linearToSRGB(c.rgb);
						vec3 noise = 1./255. * (vec3(-.3) + .5*getNoise(ivec2(gl_FragCoord.xy), 1));
						srgb += noise;

						outColor = vec4(srgb,1.);
					}
					));

		glUseProgram(present);

		applyCamProperties(currentShot.relative, present, props);

		float over = props.getf("post_over_id", currentShot.relative);
		Texture<GL_TEXTURE_2D>& plateTex = plates[min(plates.size()-1, int(over))];

		bindTexture("composited", composited);
		bindTexture("bloom", bloombuffer2);
		bindTexture("overlay", plateTex);
		bindTexture("noiseTextures", noiseTextures);
		bindTexture("perlinNoise", perlinNoise);
		glUniform2i("screenSize", screenw, screenh);
		glUniform3i("noiseOffset", rand() % 64, rand() % 64, noiseLayer);
		glUniform1i("frame", frame);
		glUniform1f("secs", secs);
		glUniform1f("shotSecs", currentShot.relative);
		glUniform1f("deltat", dt);
		glUniform1i("sceneID", int(currentShot.start));
		glDrawArrays(GL_TRIANGLES, 0, 3);


		TimeStamp end;

		if (controls) {
			// print the timing (word of warning; this forces a cpu-gpu synchronization)
			font.drawText(L"Total: " + std::to_wstring(end - start), 10.f, 10.f, 15.f); // text, x, y, font size
			font.drawText(L"Draw: " + std::to_wstring(drawTime - start), 10.f, 25.f, 15.f);
			font.drawText(L"Plot: " + std::to_wstring(splatTime - headerUpdateTime), 10.f, 40.f, 15.f);
			font.drawText(L"PostProc: " + std::to_wstring(end - splatTime), 10.f, 55.f, 15.f);
			font.drawText(L"Blur: " + std::to_wstring(resolveTime - blurTime), 10.f, 70.f, 15.f);
			//font.drawText(L"Points: " + std::to_wstring(pointsSplatted / 1000. / 1000.) + L" M", 100.f, 10.f, 15.f);
			font.drawText(L"Music: " + std::to_wstring(music.getTime()) + L" s", 200.f, 25.f, 15.f);
			{
				std::wstring ws, ws2;
				ws.assign(currentShot.camName.begin(), currentShot.camName.end());
				ws2.assign(currentShot.name.begin(), currentShot.name.end());
				font.drawText(L"Cam/Shot: " + ws + L"/" + ws2 + L" @ " + std::to_wstring(currentShot.relative) + L" s" + (looping.on ? L" LOOP" : L""),
					200.f, 40.f, 15.f);
			}
			font.drawText(interactive ? L"Mode: Interactive" : L"Mode: Cam track", 200.f, 55.f, 15.f);
			font.drawText(L"dt: " + std::to_wstring(dt) + L" s", 200.f, 70.f, 15.f);
			const float RTX2070SPEEDUP = 2.5f; // vs GTX 1060 6 GB
			font.drawText(std::to_wstring(1./dt) + L" Hz" + L" (" + std::to_wstring(1./(dt/RTX2070SPEEDUP)) + L" Hz)", 200.f, 10.f, 15.f);
		}


		if (secs >= music.getDuration() - 0.1) {
		//if (secs >= 5. - 0.1) {
			glClear(GL_COLOR_BUFFER_BIT);

			#if BENCHMARK
			auto shots2 = shots;
			shots2.resize(shots2.size() - 1); // drop the last duplicated shot
			int score = bench.saveResults(shots2, "report.txt", "frametimes.tsv");
			if (score >= 0) {
				std::string msg = "Your Peisik GPU Score is " + std::to_string(score);
				MessageBoxA(NULL, msg.c_str(), "kiitos", MB_OK);
				ShellExecuteA(NULL, "open", "report.txt", NULL, NULL, SW_SHOW);
			} else {
				MessageBoxA(NULL, "An error occurred during benchmark", "kiitos", MB_OK);
			}
            break;
			#endif
			#if FINALBUILD 
			break;
			#endif
		}

		// this actually displays the rendered image
		swapBuffers();

		// keep the old camera the same if we paused a frame
		if (music.playing || interactive) {
			std::swap(cameras[0], cameras[1]);
		}
		frame++;
	}

	return 0;
}
