#include <math.h>
#include "tri.h"

#define	PI		3.1415926535897932384626433832795

#define	WIDTH		1024

#define	SORT_VERTICES( s0, s1, s2, d0, d1, d2 )			\
	float	ty0, ty1, ty2;					\
	ty0 = s0->y;						\
	ty1 = s1->y;						\
	ty2 = s2->y;						\
	if ( ty0 < ty1 ) {					\
		if ( ty0 < ty2 ) {				\
			d0 = s0;				\
			if ( ty1 < ty2 ) {			\
				d1 = s1;			\
				d2 = s2;			\
			} else {	/* ty1 > ty2 */		\
				d1 = s2;			\
				d2 = s1;			\
			}					\
		} else {		/* ty2 < ty0 */		\
			d0 = s2;				\
			d1 = s0;				\
			d2 = s1;				\
		}						\
	} else {			/* ty0 > ty1 */		\
		if ( ty1 < ty2 ) {				\
			d0 = s1;				\
			if ( ty2 < ty0 ) {			\
				d1 = s2;			\
				d2 = s0;			\
			} else {	/* ty2 > ty0 */		\
				d1 = s0;			\
				d2 = s2;			\
			}					\
		} else {		/* ty1 > ty2 */		\
			d0 = s2;				\
			d1 = s1;				\
			d2 = s0;				\
		}						\
	}

#define	INTERP_VARS()		\
		xx0 += dx0;	\
		xx1 += dx1;	\
		zz0 += dz0;	\
		zz1 += dz1;	\
		pu0 += dpu0;	\
		pu1 += dpu1;	\
		pv0 += dpv0;	\
		pv1 += dpv1;	\
		tu0 += dtu0;	\
		tu1 += dtu1;	\
		tv0 += dtv0;	\
		tv1 += dtv1;	\
		p += WIDTH;	\
		z += WIDTH;	\
		y++;


#define	CALC_DXS( N, M, K )				\
	{						\
		float	dy;				\
		dy = v##M->y - v##K->y;			\
		dx##N = (v##M->x - v##K->x) / dy;	\
		dz##N = (v##M##z - v##K##z) / dy;	\
		dpu##N = (v##M##pu - v##K##pu) / dy;	\
		dpv##N = (v##M##pv - v##K##pv) / dy;	\
		dtu##N = (v##M##tu - v##K##tu) / dy;	\
		dtv##N = (v##M##tv - v##K##tv) / dy;	\
	}

#define	START_VARS_SPA( N, M )				\
		xx##N = v##M->x + dx##N * spa;		\
		zz##N = v##M##z + dz##N * spa;		\
		pu##N = v##M##pu + dpu##N * spa;	\
		pv##N = v##M##pv + dpv##N * spa;	\
		tu##N = v##M##tu + dtu##N * spa;	\
		tv##N = v##M##tv + dtv##N * spa;

#define	INNER_INTERP_VARS()	\
		z0 += dz;	\
		pu0 += dpu;	\
		pv0 += dpv;	\
		tu0 += dtu;	\
		tv0 += dtv;	\
		p++;		\
		z++;

#define	CUBIC( v )	((v) * (v) * (3.0f - 2.0f * (v)))

long *		tri_back_buffer;
float *		tri_z_buffer;
char *		tri_phong_map;
long *		tri_texture;

void tri_init( long *back_buffer, float *z_buffer )
{
	int	i, j, s;
	float	phong_amp = 255.0f;

	tri_back_buffer = back_buffer;
	tri_z_buffer = z_buffer;

	// generate phong light-map
	tri_phong_map = new char [256 * 256];
	if ( !tri_phong_map ) return;
	for ( i = 0; i < 256; i++ )
	for ( j = 0; j < 256; j++ ) {
		s = (int)(phong_amp * pow( sin( i * PI / 256.0 ) *
				     sin( j * PI / 256.0 ), 4 ));
		if ( s < 0 ) s = 0; else if ( s > 255 ) s = 255;
		tri_phong_map[(j << 8) + i] = s;
	}
}

void tri_set_texture( long *texture )
{
	tri_texture = texture;
}

// pixel filter
inline long filter( float pu, float pv, float tu, float tv )
{
	long	pl, i;
	char	p0, p1, p2, p3;
	float	mp0, mp1, fp;
	int	pui, pvi, tui, tvi;
	float	puf, pvf, tuf, tvf;

	// phong lighting
	puf = floor( pu );
	pui = (int)puf;
	pvf = floor( pv );
	pvi = (int)pvf;
	i = (pvi << 8) + pui;
	p0 = tri_phong_map[i];
	p1 = tri_phong_map[i + 1];
	p2 = tri_phong_map[i + 256];
	p3 = tri_phong_map[i + 257];
	puf = pu - puf;			// get fraction parts
	pvf = pv - pvf;
	mp0 = (float)p0 + (float)(p2 - p0) * CUBIC( pvf );
	mp1 = (float)p1 + (float)(p3 - p1) * CUBIC( pvf );
	fp = mp0 + (mp1 - mp0) * CUBIC( puf );

	// texture map
	long	c0, c1, c2, c3;
	float	r0, g0, b0;
	float	r1, g1, b1;
	float	r2, g2, b2;
	float	r3, g3, b3;
	float	mr0, mr1;
	float	mg0, mg1;
	float	mb0, mb1;
	float	fr, fg, fb;

	tuf = floor( tu );
	tui = (int)tuf;
	tvf = floor( tv );
	tvi = (int)tvf;
	i = (tvi << 8) + tui;
	c0 = tri_texture[i];
	c1 = tri_texture[i + 1];
	c2 = tri_texture[i + 256];
	c3 = tri_texture[i + 257];
	r0 = (float)((c0 >> 16) & 0xff);
	g0 = (float)((c0 >> 8) & 0xff);
	b0 = (float)(c0 & 0xff);
	r1 = (float)((c1 >> 16) & 0xff);
	g1 = (float)((c1 >> 8) & 0xff);
	b1 = (float)(c1 & 0xff);
	r2 = (float)((c2 >> 16) & 0xff);
	g2 = (float)((c2 >> 8) & 0xff);
	b2 = (float)(c2 & 0xff);
	r3 = (float)((c3 >> 16) & 0xff);
	g3 = (float)((c3 >> 8) & 0xff);
	b3 = (float)(c3 & 0xff);
	tuf = tu - tuf;			// get fraction parts
	tvf = tv - tvf;
	mr0 = r0 + (r2 - r0) * CUBIC( tvf );
	mr1 = r1 + (r3 - r1) * CUBIC( tvf );
	fr = mr0 + (mr1 - mr0) * CUBIC( tuf );
	mg0 = g0 + (g2 - g0) * CUBIC( tvf );
	mg1 = g1 + (g3 - g1) * CUBIC( tvf );
	fg = mg0 + (mg1 - mg0) * CUBIC( tuf );
	mb0 = b0 + (b2 - b0) * CUBIC( tvf );
	mb1 = b1 + (b3 - b1) * CUBIC( tvf );
	fb = mb0 + (mb1 - mb0) * CUBIC( tuf );

	fr += 127.0f;
	fg += 127.0f;
	fb += 127.0f;
	if ( fr < fp ) fr = fp;
	if ( fg < fp ) fg = fp;
	if ( fb < fp ) fb = fp;
	fp /= 255.0f;
	fr *= fp;
	fg *= fp;
	fb *= fp;

	if ( fr < 0.0f ) fr = 0.0f; else if ( fr > 255.0f ) fr = 255.0f;
	if ( fg < 0.0f ) fg = 0.0f; else if ( fg > 255.0f ) fg = 255.0f;
	if ( fb < 0.0f ) fb = 0.0f; else if ( fb > 255.0f ) fb = 255.0f;

	return ((long)fr << 16) | ((long)fg << 8) | (long)fb;
}

inline void inner( long *p, float *z, float x0, float x1, float z0, float z1,
				float pu0, float pu1, float pv0, float pv1,
				float tu0, float tu1, float tv0, float tv1 )
{
	int	length;
	float	x0c, x1c, sta;
	float	ddx, dz, dpu, dpv, dtu, dtv;

	x0c = ceil( x0 );
	x1c = ceil( x1 );
	if ( x1c <= x0c ) return;
	length = (int)(x1c - x0c);
	p += (int)x0c;
	z += (int)x0c;

	ddx = x1 - x0;
	dz = (z1 - z0) / ddx;
	dtu = (tu1 - tu0) / ddx;
	dtv = (tv1 - tv0) / ddx;
	dpu = (pu1 - pu0) / ddx;
	dpv = (pv1 - pv0) / ddx;

	sta = x0c - x0;			// sub-texel accuracy
	z0 += dz * sta;
	tu0 += dtu * sta;
	tv0 += dtv * sta;
	pu0 += dpu * sta;
	pv0 += dpv * sta;

	while ( length-- > 0 ) {
		if ( *z > z0 ) {
			int	iu, iv;
			float	pu, pv, tu, tv;
			float	z0_inv;

/*			z0_inv = 1.0f / z0;
			pu = pu0 * z0_inv;
			pv = pv0 * z0_inv;
			tu = tu0 * z0_inv;
			tv = tv0 * z0_inv;*/

			pu = pu0 * z0;
			pv = pv0 * z0;
			tu = tu0 * z0;
			tv = tv0 * z0;

			*p = filter( pu, pv, tu, tv );
			*z = z0;
		}
		INNER_INTERP_VARS();
	}
}

void tri( tri_vertex *sv0, tri_vertex *sv1, tri_vertex *sv2 )
{
	int		y, h;
	long *		p;
	float *		z;
	tri_vertex *	v0;
	tri_vertex *	v1;
	tri_vertex *	v2;
	float		spa;
	float		y0c, y1c, y2c;
	float		xx0, xx1, dx0, dx1;
	float		zz0, zz1, dz0, dz1;
	float		pu0, pu1, dpu0, dpu1;
	float		pv0, pv1, dpv0, dpv1;
	float		tu0, tu1, dtu0, dtu1;
	float		tv0, tv1, dtv0, dtv1;
	float		v0z, v0pu, v0pv, v0tu, v0tv;
	float		v1z, v1pu, v1pv, v1tu, v1tv;
	float		v2z, v2pu, v2pv, v2tu, v2tv;

	SORT_VERTICES( sv0, sv1, sv2, v0, v1, v2 );
	y0c = ceil( v0->y );
	y1c = ceil( v1->y );
	y2c = ceil( v2->y );
	if ( y2c <= y0c ) return;

	v0z = 1.0f / v0->z;
	v0pu = v0->pu * v0z;
	v0pv = v0->pv * v0z;
	v0tu = v0->tu * v0z;
	v0tv = v0->tv * v0z;
	v0z = 1.0f / v0z;

	v1z = 1.0f / v1->z;
	v1pu = v1->pu * v1z;
	v1pv = v1->pv * v1z;
	v1tu = v1->tu * v1z;
	v1tv = v1->tv * v1z;
	v1z = 1.0f / v1z;

	v2z = 1.0f / v2->z;
	v2pu = v2->pu * v2z;
	v2pv = v2->pv * v2z;
	v2tu = v2->tu * v2z;
	v2tv = v2->tv * v2z;
	v2z = 1.0f / v2z;

	y = (int)y0c;
	p = tri_back_buffer + y * WIDTH;
	z = tri_z_buffer + y * WIDTH;
	spa = y0c - v0->y;			// SPA value
	CALC_DXS( 0, 2, 0 );
	START_VARS_SPA( 0, 0 );
	if ( y0c < y1c ) {
		CALC_DXS( 1, 1, 0 );
		START_VARS_SPA( 1, 0 );
		h = (int)y1c;
		if ( dx0 < dx1 ) { while ( y < h ) {
			inner( p, z, xx0, xx1, zz0, zz1,
				pu0, pu1, pv0, pv1, tu0, tu1, tv0, tv1 );
			INTERP_VARS();
		} } else { while ( y < h ) {
			inner( p, z, xx1, xx0, zz1, zz0,
				pu1, pu0, pv1, pv0, tu1, tu0, tv1, tv0 );
			INTERP_VARS();
		} }
	}
	if ( y2c <= y1c ) return;		// clip bottom
	spa = y1c - v1->y;			// SPA value
	CALC_DXS( 1, 2, 1 );
	START_VARS_SPA( 1, 1 );
	h = (int)y2c;
	if ( xx0 < xx1 ) { while ( y < h ) {
		inner( p, z, xx0, xx1, zz0, zz1,
			pu0, pu1, pv0, pv1, tu0, tu1, tv0, tv1 );
		INTERP_VARS();
	} } else { while ( y < h ) {
		inner( p, z, xx1, xx0, zz1, zz0,
			pu1, pu0, pv1, pv0, tu1, tu0, tv1, tv0 );
		INTERP_VARS();
	} }
}
