#include <tamtypes.h>
#include <kernel.h>
#include <sifrpc.h>
#include <stdlib.h>
#include <math.h>

#include "gfx.h"
#include "math.h"
#include "vu.h"

void dumpmem(volatile void* beg_, volatile void* end_)
{	u8* beg=(u8*)beg_,*end=(u8*)end_;
u32 cnt=0;
nprintf("0000: ");
while(beg<end)
{
	nprintf("%2.2X ",*beg);
	if(++cnt % 16 == 0) nprintf("\n%4.4X: ",cnt);
	++beg;
}
nprintf("\n");
}

void comparemem(volatile void* beg_, volatile void* end_, volatile void* beg2_)
{
	u8* beg=(u8*)beg_,*end=(u8*)end_,*beg2=(u8*)beg2_;
	u32 cnt=0;
	while(beg<end)
	{
		if(*beg!=*beg2)
			nprintf("%d != %d, cnt = %d\n", (int)*beg,(int)*beg2,cnt);
		++beg;
		++beg2;
		++cnt;
	}
}



/****************** VU *****************************************************************************/

#define VIF_NOP				0x00
#define VIF_STCYL			0x01
#define VIF_FLUSH			0x11		// 0010001(postpone VIF operation untill memory transfers for VU instruction and data memory have finished and the previous VU program has finished executing)
#define VIF_FLUSHA			0x13		// 0010011 (postpone VIF operation untill memory transfers for VU instruction and data memory have finished)
#define VIF_FLUSHE			0x10		// 0010000 (postpone VIF operation untill the previous VU program has finished executing)
#define VIF_MSCALF			0x15		// 0010101 (execute a VU program residing in address without swapping the double buffer registers)
#define VIF_MPG				0x4A		// 1001010 (size dwords to the VU program memory)
#define VIF_UNPACK_V4_32	0x6C		// 11m1100 m=0

#define VIF_CODE(CMD,NUM,IMMEDIATE) ((((u32)(CMD))<<24)|(((u32)(NUM))<<16)|((u32)(IMMEDIATE)))

#define ADDRESS_OF(a) ((u32)(a))
#define WORD_COUNT(a) (((a)+3)/4)		// 32bit
#define DWORD_COUNT(a) (((a)+7)/8)		// 64bit
#define QWORD_COUNT(a) (((a)+15)/16)	// 128bit

typedef u32 __attribute__((aligned(16))) u32_align128;

static u32_align128  vutemp[1024*4+8];
static u32 vutemp_ptr;

void vu_init()
{
	nprintf("vu_init()\n");
}

void vu_reset()
{
	vutemp_ptr = 0;
}

void vu_data_v4_32(void* data, u32 qw_count, int addr)
{
	/*
	u32* src = (u32*)data;
	u32 cnt;
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_STCYL,0,0x0404);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_UNPACK_V4_32,qw_count,addr|(1<<15));
	for(cnt=0;cnt<qw_count*4;++cnt)
		vutemp[vutemp_ptr++] = *src++;
	*/
	u32* src = (u32*)data;
	u32 missing = qw_count;
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_STCYL,0,0x0404);
	while(missing>0)
	{
		u32 now = missing>255?255:missing;
		u32 cnt;
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_UNPACK_V4_32,now,addr|(1<<15));
		for(cnt=0; cnt<now*4; ++cnt)
			vutemp[vutemp_ptr++] = *src++;
		addr += now;
		missing -= now;
	}

}

void vu_mpg(void* data, u32 instruction_count, int addr)
{
	u32* src = (u32*)data;
	u32 missing = instruction_count;
	//vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0);
	while(missing>0)
	{
		u32 now = (missing>256?256:missing)&255;
		u32 cnt;
		if(!(vutemp_ptr&1)) vutemp[vutemp_ptr++] = VIF_CODE(VIF_NOP,0,0); // VIF_MPG data must start on a 64bit boundary
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_MPG,now,addr);
		for(cnt=0; cnt<now*2; ++cnt)
			vutemp[vutemp_ptr++] = *src++;
		addr += now/2;
		missing -= now;
	}
}

void vu_mscalf()
{
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_MSCALF,0,0);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0);
}

void vu_send()
{
	for(;vutemp_ptr&3;) vutemp[vutemp_ptr++] = VIF_CODE(VIF_NOP,0,0); // pad
	flush_cache(0);
//	DMA01_WAIT();
	DMA01_SEND(vutemp,vutemp_ptr/4,0x101);
	DMA01_WAIT();
}

/***************************************************************************************************/

typedef struct
{
	int num_triangles;
	int has_texture_coordinates;
	int filler[2];
	Vector3d vertices[1];
} Object;

static Object* sphere;
static texture font;

extern unsigned int Effect2VU  __attribute__((section(".vudata")));
extern unsigned int Effect2VUEnd  __attribute__((section(".vudata")));
extern unsigned int Effect2Data  __attribute__((section(".vudata")));
extern unsigned int Effect2DataEnd  __attribute__((section(".vudata")));

void effect2_load()
{
	void* font_data;
	int font_size;

	int sphere_size;
	read_file("12k.3d",(void**)&sphere,&sphere_size);
	nprintf("load3ds: loaded 3d object with %d faces\n", sphere->num_triangles);
	vu1_cleardata();
//	data_len = (ADDRESS_OF(&Effect2DataEnd)-ADDRESS_OF(&Effect2Data))/16;

	read_file("font.pcx",&font_data,&font_size);
	texture_load(&font,font_data);
	read_file("font_alpha.pcx",&font_data,&font_size);
	texture_loadalpha(&font,font_data);
	//{ int i; for(i=0;i<512*256;++i) font.image[i] &= ~ ((i&127)<<24); }
}

void effect2_prerun()
{
	texture_allocupload(&font);
}

inline static Vector2d point4i(int x, int y, int z, int w)
{
	Vector2d p={x,y,z,w};
	return p;
}

inline static Vector3d point4f(float x, float y, float z, float w)
{
	Vector3d p={x,y,z,w};
	return p;
}

inline static Vector3d point4fp(float* f)
{
	return *(Vector3d*)f;
}

static s32 buffer_[4096];

void drawobj_inner(Vector3d* data, Matrix* mat, Vector3d* tlightpos, u32 count)
{
	int128 *VU1_int128 = (int128*)buffer_;
	Vector3d *VU1_vec3d = (Vector3d *)buffer_;
	Vector2d *VU1_vec2d = (Vector2d *)buffer_;

	u64 pt=0;

	// count
	VU1_vec2d[pt++] = point4i(count,0,0,0);

	// giftag

	// count is written by the vu program
	VU1_int128[pt].lo = /*(u64)(count) |	// NLOOP=NTRI */
		(1<<15UL) |					// EOP=1
		(1UL<<46UL)	|				// PRIM=1
		((u64)(PRIM_TRI|PRIM_ANTIALIAS1)<<47UL) |		// PRIM=TRI
		(4UL<<60UL);				// NREG=4
	VU1_int128[pt].hi = (u64)GIF_RGBAQ | ((u64)GIF_XYZF2 << 4UL) | ((u64)GIF_XYZF2 << 8UL) | ((u64)GIF_XYZF2 << 12UL);
	pt++;

	// worldmat
	VU1_vec3d[pt++] = point4fp(&mat->xu);
	VU1_vec3d[pt++] = point4fp(&mat->xv);
	VU1_vec3d[pt++] = point4fp(&mat->xw);
	VU1_vec3d[pt++] = point4fp(&mat->xx);

	// translation
	VU1_vec3d[pt++] = point4f(2320.0, 2128.0, 0.0, 0.0);

	// scale
	VU1_vec3d[pt++] = point4f(386.2828632246680438846623223933, 231.76971793480082633079739343595, 1000.0, 0.0);

	// lightpos
	VU1_vec3d[pt++] = point4fp((float*)tlightpos);

	// diffuse
	VU1_vec3d[pt++] = point4f(255,255,255,0);

	// ambient
	VU1_vec3d[pt++] = point4f(5,50,80,0);

	// clamp
	VU1_vec3d[pt++] = point4f(15,160,255,0);

	vu_reset();
	vu_data_v4_32(buffer_,pt,0);

/*	for(i=0; i<count*3; ++i)
	{
		VU1_vec3d[pt++] = data[i];
	} */
	vu_data_v4_32(data,count*3,pt);

	vu_mscalf();
	vu_send();
}

void drawobj(Object* obj, Matrix* mat, Vector3d* tlightpos)
{
	const int batch_size = 144; // hukkommelses forbrug : 12 + trekanter*7, saa max er (1024-12)/7 = 144
	int remaining = obj->num_triangles;
	int pos = 0;

	vu_reset();
	vu_mpg(&Effect2VU,DWORD_COUNT((ADDRESS_OF(&Effect2VUEnd)-ADDRESS_OF(&Effect2VU))),0);
	vu_send();

	while(remaining>0)
	{
		int thiscount = remaining>batch_size?batch_size:remaining;
		drawobj_inner(&obj->vertices[pos*3], mat, tlightpos, thiscount);
		pos += thiscount;
		remaining -= thiscount;
	}
}

Matrix calcmat(float t)
{
	Matrix mat;
	Vector3d pos={0,0,-5,0};
	Vector3d target={0,0,0,0};
	Matrix rotmat;
	const float k = 1.5;
	math_matrix_identity(&rotmat);
	math_matrix_rotatex(&rotmat,t*1.5);
	math_matrix_rotatez(&rotmat,t*-2.2);
	math_matrix_lookat(&mat,&pos,&target);
	math_matrix_multiply(&mat,&rotmat,&mat);
	math_matrix_move(&mat, k*sin(t*4.3+540), k*cos(t*2.3+1234), k*sin(t*-0.43f+121));
	return mat;
}

static int GIF_i; static int128 *GIF_p,*GIF_beg;s64 GIF_nreg;s64 GIF_flg,GIF_prim,GIF_pre; // gif_env !!!!


static void drawchar(int x, int y, int c, int alpha)
{
	int u,v;

	x = 32000+x*16;
	y = 32000+y*16;

	if(c>='a' && c<='z')
		c+='A'-'a';
	if(c>='0' && c <='9')
	{
		c+='Z'-'0'+1;
	}

	if(c>='A' && c<=('Z'+10))
	{
		c-='A';
		u = 32*16*(c%10);
		v = 32*16*(c/10);
	}
	else
	{
		switch(c)
		{
		case '?': u = 32*16*6; v = 32*16*3; break;
		case '!': u = 32*16*7; v = 32*16*3; break;
		case '/': u = 32*16*8; v = 32*16*3; break;
		case ':': u = 32*16*9; v = 32*16*3; break;
		case '(': u = 16*0; v = 16*174; break;
		case ')': u = 16*32; v = 16*174; break;
		case '\'': u = 16*64; v = 16*174; break;
		case '-': u = 16*218; v = 16*140; break;
		case ',': u = 16*250; v = 16*140; break;
		case '.': u = 16*282; v = 16*140; break;
		default:
			return;
		}
	}

	gRGBAQ(0x3F80000000808080+(alpha<<24));

	gUV(u,v);
	gXYZ3(x,y,0);

	gUV(u,v+31*16);
	gXYZ3(x,y+16*16,0);

	gUV(u+31*16,v);
	gXYZ2(x+32*16,y,0);

	gUV(u+31*16,v+31*16);
	gXYZ2(x+32*16,y+16*16,0);
}

void drawline(int x, int y, const char* s, int alpha)
{
	while(*s)
	{
		drawchar(x,y,*s,alpha);
		x+=*s==' '?16:32;
		s++;
	}
}

#define ALPHA_SRC 0
#define ALPHA_DST 1
#define ALPHA_ZERO 2
#define ALPHA_FIX 2

// d[0]=d[0]+((alpha*(e[0]-d[0]))>>8);

// (A - B)*C >> 7 + D

#define ALPHA(A,B,C,D,FIX) ( (((u64)(A))&3) | ((((u64)(B))&3)<<2) | ((((u64)(C))&3)<<4) | ((((u64)(D))&3)<<6) | ((((u64)(FIX))&3)<<32UL) )

typedef const char* text_screen[8];

static text_screen text[] =
{
	{
		"You've reached the ",
		"end of my first ps2",
		"mini-demo, sorry it",
		"isn't longer, but i",
		"haven't had much   ",
		"free time, and that",
		"VU was a bitch to  ",
		"get to work."
	},
	{
		"But i have success-",
		"fuly completed my  ",
		"objective of       ",
		"actually finishing ",
		"something for ttc19",
		"hopefuly my next   ",
		"contribution will  ",
		"be something better",
	},
	{
		"Thanks to:         ",
		"craft/fudge        ",
		"adresd             ",
		"sparky             ",
		"blackdroid         ",
		"drakonite 	    ",
		"and jules for      ",
		"the help you gave  ",
	},
	{
		"and ps2dev         ",
		"in general         ",
		"                   ",
		"i'm still amazed   ",
		"at how such a small",
		"community can      ",
		"accomplish so much ",
		"                   ",
	},
	{
		"well, i've included",
		"the source, just   ",
		"for good measure   ",
		"i can't garranti   ",
		"that's it bug free ",
		"or good to learn   ",
		"from though        ",
		"                   ",
	},
	{
		"credits for this   ",
		"intro:             ",
		" code: jar/ap      ",
		" music: biotek/dmg ",
		"                   ",
		"sorry i didn't ask ",
		"permission to use  ",
		" the music :)      ",
	},
	{
		"bah, i suck at     ",
		"writing scrolltexts",
		"i've gotten all    ",
		"soppy now.         ",
		"                   ",
		"i want to sleep....",
		"so, the text will  ",
		"just loop.....     ",
	},
};

#define ntext (sizeof(text) / sizeof(text_screen))

#define yspace 20
extern int gfx_pal;
//#define starty (256-8*yspace)
#define starty ((gfx_pal?256:244)-8*yspace)

void drawscreen(const char** text, float t, float end)
{
	int i;
	float step=t*16.0/end;

	if(t/end < 0.8)
	{
		for(i=0; i<8 && i<=step; ++i)
		{
			drawline(0,starty+i*yspace,text[i],0x80);
		}
		if(i<8)
		{
			step -= (int)step;
			drawline(0,starty+i*yspace,text[i],0x80*step);
		}
	}
	else
	{
		float f= 1.0f - (( t/end )-0.8)/0.2;
		for(i=0; i<8; ++i)
		{
			drawline(0,starty+i*yspace,text[i],0x80*f);
		}
	}
}

int effect2_run(float t)
{
	gfx_clearscreen(0);
	gfx_set_register(GIF_TEST_1, TEST_DEPTHTEST_GEQUAL);

	{
		Matrix mat, lrotmat;
		Vector3d lightpos={0,0,5,0},tlightpos;
		u32 cnt;

		math_matrix_identity(&lrotmat);
		math_matrix_rotatez(&lrotmat,t*-0.5);
		math_matrix_rotatey(&lrotmat,t*1.5);
		math_transform_vertices(&lightpos,&lightpos,&mat,1);
		math_transform_vertices(&tlightpos,&lightpos,&mat,1); // yeah, it's placed outside the loop on purpose :D
		for(cnt=0; cnt<5; ++cnt)
		{
			mat = calcmat((t+cnt*0.8+sin(cnt*0.3+t*0.23f+(t+5)*cnt*0.78f))*0.6f);
			drawobj(sphere,&mat,&tlightpos);
		}
	}

	gif_begin(dma_list);
	gif_tag(0xe,1,0,0,0);

	gPRMODECONT(1);				// refer to prim attributes

	gPRIM(PRIM_TRI_STRIP | PRIM_TEXTUREMAP | PRIM_NOT_PERSPECTIVE | PRIM_ALPHABLEND);
	gALPHA_1(ALPHA(ALPHA_SRC,ALPHA_DST,ALPHA_SRC,ALPHA_DST,0));
	gPABE(0);
	gTEST_1(TEST_DEPTHTEST_ALWAYS);
	gTEX0_1(texture_TEX0(&font));
	gTEX1_1(0);
	gRGBAQ(0x3f00000080808080);

#define GAAAB 12
	if(t > 5)
	{
		int blah;

		t-=5;

		blah= ((int)(t/GAAAB));

		drawscreen(text[blah%ntext],t-blah*GAAAB,GAAAB);
	}

	gif_send(dma_list);

	return 1;
}

