#include <tamtypes.h>
#include <kernel.h>
#include "gif.h"
#include "misc.h"
#include "vu.h"

typedef u32 __attribute__((aligned(16))) u32_align128;

static u32_align128  vutemp[1024*4+8];
static u32 vutemp_ptr;
static u32 vutemp_header_size;
static u32 vutemp_header_addr;
static u32 vutemp_header_tag_ptr;
static u32 is_dbl_buf;

void vu_get_header(void** start, void** end)
{
	*start = (vutemp+vutemp_header_tag_ptr+1);
	*end   = (vutemp+vutemp_header_size*4+vutemp_header_tag_ptr+1);
}

void vu_reset(int dbl_buf)
{
	vutemp_ptr = 0;
}

void vu_dbl_buf(int base, int offset)
{
	if(base<0 || offset<0)
	{
		is_dbl_buf = 0;
	}
	else
	{
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_BASE,0,base); 
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_OFFSET,0,offset); 
		is_dbl_buf = 1;
	}
}

void vu_data_v4_32(void* data, u32 qw_count, int addr)
{
	u32* src = (u32*)data;
	u32 missing = qw_count;
	if(!is_dbl_buf)	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0); 
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_STCYL,0,0x0404); 
	while(missing>0)
	{
		u32 now = missing>255?255:missing;
		u32 cnt;
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_UNPACK_V4_32,now,addr|(is_dbl_buf?(1<<15):0));
		for(cnt=0; cnt<now*4; ++cnt)
			vutemp[vutemp_ptr++] = *src++;
		addr += now;
		missing -= now;
	}
}

void vu_mpg(void* data, u32 instruction_count, int addr)
{
	u32* src = (u32*)data;
	u32 missing = instruction_count;
	while(missing>0)
	{	
		u32 now = missing>256?256:missing;
		u32 cnt;
		if(!(vutemp_ptr&1)) vutemp[vutemp_ptr++] = VIF_CODE(VIF_NOP,0,0); // VIF_MPG data must start on a 64bit boundary
		vutemp[vutemp_ptr++] = VIF_CODE(VIF_MPG,(now&0xff),addr);
		for(cnt=0; cnt<now*2; ++cnt)
			vutemp[vutemp_ptr++] = *src++;
		addr += now/2;
		missing -= now;
	}
}

void vu_mscalf()
{
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0); 
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_MSCALF,0,0);
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0); 
}


void vu_flush()
{
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0); 
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSHA,0,0); 
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSHE,0,0); 
}

void vu_mscal()
{
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_MSCAL,0,0);
}

void vu_send()
{
	for(;vutemp_ptr&3;) vutemp[vutemp_ptr++] = VIF_CODE(VIF_NOP,0,0); // pad 
	flush_cache(0);
	DMA01_WAIT();
	DMA01_SEND(vutemp,vutemp_ptr/4,0x101);
}

void vu_send_vif(void* pck, int qword_cnt)
{
	flush_cache(0);
	DMA01_WAIT();
	DMA01_SEND(pck,qword_cnt,0x101);
}

void vu_header_begin(int addr)
{
	vutemp_header_size = 0;
	vutemp_header_addr = addr;

	if(!is_dbl_buf) vutemp[vutemp_ptr++] = VIF_CODE(VIF_FLUSH,0,0); 
	vutemp[vutemp_ptr++] = VIF_CODE(VIF_STCYL,0,0x0404); 
	vutemp_header_tag_ptr = vutemp_ptr;
	vutemp[vutemp_ptr++] = 0; // CHANGED LATER
}

void vu_header_add(void* p, int nvec)
{
	int cnt;
	u32 * src = (u32*)p;
	for(cnt=0; cnt<4*nvec; ++cnt)
		vutemp[vutemp_ptr++] = *src++;
	vutemp_header_size += nvec;
}

#define CAST_TO_U32(x) *((u32*)(&(x)))

void vu_header_add_vecf(float x, float y, float z, float w)
{
	vutemp[vutemp_ptr++] = CAST_TO_U32(x);
	vutemp[vutemp_ptr++] = CAST_TO_U32(y);
	vutemp[vutemp_ptr++] = CAST_TO_U32(z);
	vutemp[vutemp_ptr++] = CAST_TO_U32(w);
	vutemp_header_size++;
}

void vu_header_add_veci(u32 x, u32 y, u32 z, u32 w)
{
	vutemp[vutemp_ptr++] = x;
	vutemp[vutemp_ptr++] = y;
	vutemp[vutemp_ptr++] = z;
	vutemp[vutemp_ptr++] = w;
	vutemp_header_size++;
}

void vu_header_add_u64_2(u64 x, u64 y)
{
	vutemp[vutemp_ptr++] = x&0xffffffff;
	vutemp[vutemp_ptr++] = x>>32;
	vutemp[vutemp_ptr++] = y&0xffffffff;
	vutemp[vutemp_ptr++] = y>>32;
	vutemp_header_size++;
}

int vu_header_end()
{
	vutemp[vutemp_header_tag_ptr] = VIF_CODE(VIF_UNPACK_V4_32,vutemp_header_size,vutemp_header_addr|(is_dbl_buf?(1<<15):0));
	return vutemp_header_size;
}

void vu_dump_to_file()
{
	write_file("host:VU1MICROMEM",((void*)0x11008000),1024*16);
	write_file("host:VU1MEM", ((void*)0x1100c000),1024*16);
}
