/**************************************************************************
 *                                                                        *
 *                            bil.c                                       *
 *                                                                        *
 *  fixed 4x4 dithering applied to bilinear filtering.                    *
 *                                                                        *
 * Skal/Bomb!                                                             *
 * skal@planet-d.net                                                      *
 **************************************************************************/

// uncomment one of the following 'DOS', 'X11' or 'SVGALIB' flag.

///////////////////////////////////////////////////////////////////////////
// MS-DOS flag (DJGPP or WATCOM) (or Dosemu:).
//  Compile with:
// gcc -o bil bil.c -O6 -lm
//  or:
// wcc386 -j -ei -ot -or -5s -fp5 bil.c
// wcl386 /x -fe=bil bil.obj
///////////////////////////////////////////////////////////////////////////

#define DOS

///////////////////////////////////////////////////////////////////////////
// Unix-Linux/X11 flag.
//  Compile with:
// gcc -o bil bil.c -O6 -lm -lX11 -lXext
///////////////////////////////////////////////////////////////////////////

//#define X11

///////////////////////////////////////////////////////////////////////////
// Linux/SVGALIB flag.
//  Compile with:
// gcc -o bil bil.c -O6 -lm -lvgagl -lvga
///////////////////////////////////////////////////////////////////////////

//#define SVGALIB 

///////////////////////////////////////////////////////////////////////////
// here we go
///////////////////////////////////////////////////////////////////////////

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

typedef unsigned char       BYTE;
typedef short int           SHORT;
typedef unsigned short int  USHORT;

#define Width   320
#define Height  200

///////////////////////////////////////////////////////////////////////////

/* we need this... */

extern int  Video_On(int Nb_Colors);
extern void Video_Off( );
extern void Video_Lock( );
extern void Video_Unlock( );
extern void Video_Store_Colors(int Nb, BYTE *Colors);
extern int Get_Key();

static BYTE *Vid_Mem;
static int Stride;

#include "video.h"    /* ... and it's there */

///////////////////////////////////////////////////////////////////////////

#define BLK_X 8 // interpolation will be performed on a BLK_X x BLK_Y basis. 
#define BLK_Y 8 // try using different values...

#define BW  (Width /BLK_X+1)
#define BH  (Height/BLK_Y+1)
SHORT U_Map[BW*BH];
SHORT V_Map[BW*BH];
#define BW2 (2*BW-1)
#define BH2 (2*BH-1)
SHORT Plasma_Base[BW2*BH2];

///////////////////////////////////////////////////////////////////////////

BYTE Texture[16*16];
struct { BYTE R, G, B; } Color_Map[256];
int Nb_Colors;
BYTE Scan[2*Width]; // We do *not* write bytes directly into video!
                    // Instead, we wait for the whole line to be ready,
                    // and transfer it 32bits at a time...

///////////////////////////////////////////////////////////////////////////

   // compute plasma values on a small BHxBW grid, with interpolation.

void Do_Plasma_Interp(SHORT *Dst, USHORT X1, USHORT Y1, USHORT X2, USHORT Y2)
{
  SHORT *Ptr1, *Ptr2;
  int i, j;
  int a1,b1,c1,d1, a2,b2,c2,d2;

  Ptr1 = Plasma_Base + BW2*(Y1>>8) + (X1>>8);
  Ptr2 = Plasma_Base + BW2*(Y2>>8) + (X2>>8);

  X1 &= 0xff; Y1 &= 0xff;
  X2 &= 0xff; Y2 &= 0xff;

  a1 = (256-X1)*(256-Y1) / 256;
  b1 =      X1 *(256-Y1) / 256;
  c1 = (256-X1)*     Y1  / 256;
  d1 =      X1 *     Y1  / 256;

  a2 = (256-X2)*(256-Y2) / 256;
  b2 =      X2 *(256-Y2) / 256;
  c2 = (256-X2)*     Y2  / 256;
  d2 =      X2 *     Y2  / 256;

  for(j=0; j<BH; ++j) {
    for(i=0; i<BW; ++i) {
      int C;
      C =  a1*Ptr1[i    ] + b1*Ptr1[i+1    ];
      C += c1*Ptr1[i+BW2] + d1*Ptr1[i+1+BW2];
      C += a2*Ptr2[i    ] + b2*Ptr2[i+1    ];
      C += c2*Ptr2[i+BW2] + d2*Ptr2[i+1+BW2];
      *Dst++ = C>>8;
    }
    Ptr1 += BW2; Ptr2 += BW2;
  }
}

  // try this one instead of Do_Plasma_Interp(), 
  // and enjoy the difference!

void Do_Plasma_Non_Interp(SHORT *Dst, USHORT X1, USHORT Y1, USHORT X2, USHORT Y2)
{
  SHORT *Ptr1 = Plasma_Base + BW2*(Y1>>8) + (X1>>8);
  SHORT *Ptr2 = Plasma_Base + BW2*(Y2>>8) + (X2>>8);
  int i, j;
  for(j=0; j<BH; ++j) {
    for(i=0; i<BW; ++i)
      *Dst++ = Ptr1[i] + Ptr2[i];
    Ptr1 += BW2; Ptr2 += BW2;
  }
}

void Make_Move1(float x, SHORT *X1, SHORT *Y1, SHORT *X2, SHORT *Y2)
{
  *X1 = (SHORT)floor((0.53+sin(x*3.14+.42)*0.32)*BW*256.0);
  *Y1 = (SHORT)floor((0.65+cos(x*3.14-.53)*0.26)*BH*256.0);
  *X2 = (SHORT)floor((0.42+sin(x*4.03+.72)*0.42)*BW*256.0);
  *Y2 = (SHORT)floor((0.45+cos(x*2.09-.13)*0.23)*BH*256.0);
}

void Make_Move2(float x, SHORT *X1, SHORT *Y1, SHORT *X2, SHORT *Y2)
{
  *X1 = (SHORT)floor((0.42-sin(x*4.05+.42)*0.14)*BW*256.0);
  *Y1 = (SHORT)floor((0.43-cos(x*3.14-.53)*0.40)*BH*256.0);
  *X2 = (SHORT)floor((0.53+sin(x*3.92+.12)*0.35)*BW*256.0);
  *Y2 = (SHORT)floor((0.33-cos(x*2.75+.76)*0.29)*BH*256.0);
}

   // compute U_Map and V_Map texture coordinates  on a small BWxBH grid

void Make_Plasma(float x)
{
  SHORT X1, Y1, X2, Y2;

  Make_Move1(x, &X1, &Y1, &X2, &Y2);
  Do_Plasma_Interp(U_Map, X1, Y1, X2, Y2);

  Make_Move2(x, &X1, &Y1, &X2, &Y2);
  Do_Plasma_Interp(V_Map, X1, Y1, X2, Y2);
}

///////////////////////////////////////////////////////////////////////////

   // alternative rotozoom'ed U_Map and V_Map texture coordinates
   // on a small BWxBH grid

void Make_Rotozoom(float t)
{
  float alpha, Zoom, Uo, Vo, ca, sa;
  int uo, vo, duo, dvo, du, dv;
  int j, k;

  alpha = fmod(t,6.28); 
  Zoom  = (0.45*sin(t*3.0) + 0.5) / 16.0;
  ca = cos( alpha )*Zoom;
  sa = sin( alpha )*Zoom;
  Uo = (BW*Zoom - ca)/2.0;
  Vo = (BW*Zoom + sa)/2.0;

   // let's go fixed-point

#define FIX_SCALE 65536.0

  uo  = (int)floor(Uo*FIX_SCALE); 
  vo  = (int)floor(Vo*FIX_SCALE);
  duo =-(int)floor(sa*FIX_SCALE)*BLK_Y/BLK_X;
  dvo = (int)floor(ca*FIX_SCALE)*BLK_Y/BLK_X;
  du  = (int)floor(ca*FIX_SCALE);
  dv  = (int)floor(sa*FIX_SCALE);
  for(j=k=0; j<BH; ++j) {
    int i, U, V;
    U = uo; uo += duo;
    V = vo; vo += dvo;
    for(i=0; i<BW; ++i, ++k) {
      U_Map[k] = U; U += du;
      V_Map[k] = V; V += dv;
    }
  }
}

///////////////////////////////////////////////////////////////////////////
// interpolation + mapping stuff
///////////////////////////////////////////////////////////////////////////

SHORT  U0[BW],  V0[BW];
SHORT dU0[BW], dV0[BW];
BYTE UV_Coord[Width];
USHORT Dither[BLK_Y*Width];
BYTE *Out;

  // interpolation/mapping for U_Map[] and V_Map[]

void Map_UV()
{
  int i, j, k;
  for(j=BLK_Y; j>0; --j) {
    BYTE *Coord = UV_Coord;

    USHORT U, V;
    U = U0[0]; U0[0] = U + dU0[0];
    V = V0[0]; V0[0] = V + dV0[0];

    for(i=1; i<BW; i++) {
      USHORT Uo, Vo, dU, dV;
      Uo = U; Vo = V;
      U = U0[i]; U0[i] += dU0[i]; dU = (SHORT)(U-Uo)/BLK_X;
      V = V0[i]; V0[i] += dV0[i]; dV = (SHORT)(V-Vo)/BLK_X;
      Coord += BLK_X;
      for(k=-BLK_X;k<0;++k) {
        Coord[k] = ((Vo>>8)&0xf0) | (Uo>>12);
        Uo += dU; Vo += dV;
      }
    }
    for(i=-Width; i<0; ++i)
      Scan[Width+i] = Texture[UV_Coord[Width+i]];
    memcpy(Out, Scan, Width);  // this will move 32bits at a time
    Out += Stride;
  }
}

  // interpolation/mapping from U_Map[] and V_Map[]
  // with dithering

void Map_UV_Dithered() {
  int i, j, k;
  USHORT *Dith = Dither;

  for(j=BLK_Y; j>0; --j) {
    BYTE *Dst = UV_Coord;

    USHORT U, V;
    U = U0[0]; U0[0] += dU0[0];
    V = V0[0]; V0[0] += dV0[0];

    for(i=1; i<BW; i++) {
      USHORT Uo, Vo, dU, dV;
      Uo = U; Vo = V;
      U = U0[i]; U0[i] += dU0[i]; dU = (SHORT)(U-Uo)/BLK_X;
      V = V0[i]; V0[i] += dV0[i]; dV = (SHORT)(V-Vo)/BLK_X;
      Dst  += BLK_X;
      Dith += BLK_X;
      for(k=-BLK_X; k<0;++k) {
        USHORT u,v;
        u = Uo+Dith[k]; Uo += dU; // dither...
        v = Vo+Dith[k]; Vo += dV;
        Dst[k] = ((v>>12)<<4) | (u>>12); // (v&0xff00) | (u>>8);
      }
    }
      // ...and map.
    for(i=-Width; i<0; ++i)
      Scan[Width+i] = Texture[UV_Coord[Width+i]];
    memcpy(Out, Scan, Width);
    Out += Stride;
  }
}

  // interpolation/mapping from U_Map[] and V_Map[]
  // with dithering and on-the-fly averaging 
  // (yes! this is an ugly trick)
    
void Map_UV_Dithered_Smoothed() {
  int i, j, k;
  USHORT *Dith = Dither;
  static BYTE* Buf1 = Scan, *Buf2 = Scan+Width;

  for(j=BLK_Y; j>0; --j) {
    BYTE *Dst = UV_Coord;

    USHORT U, V;
    U = U0[0]; U0[0] += dU0[0];
    V = V0[0]; V0[0] += dV0[0];

    for(i=1; i<BW; i++) {
      USHORT Uo, Vo, dU, dV;
      Uo = U; Vo = V;
      U = U0[i]; U0[i] += dU0[i]; dU = (SHORT)(U-Uo)/BLK_X;
      V = V0[i]; V0[i] += dV0[i]; dV = (SHORT)(V-Vo)/BLK_X;
      Dst  += BLK_X;
      Dith += BLK_X;
      for(k=-BLK_X; k<0;++k) {
        USHORT u,v;
        u = Uo+Dith[k]; Uo += dU; // dither...
        v = Vo+Dith[k]; Vo += dV;
        Dst[k] = ((v>>12)<<4) | (u>>12); // (v&0xff00) | (u>>8);
      }
    }
      // ... map,
    for(i=-Width; i<0; ++i)
      Buf1[Width+i] = Texture[UV_Coord[Width+i]];
    // ... and smooth'em all.
    for(i=0; i<Width-1; ++i)
       Buf1[i] = ( Buf1[i]+Buf1[i+1]+Buf2[i]+Buf2[i+1] ) / 4;
    memcpy(Out, Buf1, Width);
    { BYTE *Tmp = Buf1; Buf1=Buf2; Buf2=Tmp; } // The Swap Thing
    Out += Stride;
  }
}

///////////////////////////////////////////////////////////////////////////

  // main call

void UV_Mapping( void(*Map)() )
{
  int j;
  SHORT *u = U_Map;
  SHORT *v = V_Map;

  for( j=0; j<BW; ++j) {
    U0[j] = u[j];
    V0[j] = v[j];
  }
  u += j; v += j;

  Out = Vid_Mem;
  for( j=Height; j>0; j-=BLK_Y )
  {
    int i;
    for( i=0; i<BW; ++i )
    {
       dU0[i] = (SHORT)(u[i]-U0[i]) / BLK_Y;
       dV0[i] = (SHORT)(v[i]-V0[i]) / BLK_Y;
    }
    u += i; v += i;
    Map();
  }
}

///////////////////////////////////////////////////////////////////////////
// debugging 
///////////////////////////////////////////////////////////////////////////

   // construct and show the BLK_XxBLK_Y-based interpolation of
   // the given array u[] (supposed being 8:8 fixed-point'ed)

void Show_Interpolation( SHORT u[BLK_X*BLK_Y] )
{
  int j;
  for( j=0; j<BW; ++j) U0[j] = u[j];

  Out = Vid_Mem;
  for( j=Height; j>0; j-=BLK_Y )
  {
    int k;
    u += BW;
    for( k=0; k<BW; ++k )
       dU0[k] = (SHORT)(u[k]-U0[k]) / BLK_Y;

    for(k=BLK_Y; k>0; --k) {
      BYTE *Ptr = Scan;
      int i;
      USHORT U =  U0[0]; 
      U0[0] += dU0[0];

      for(i=-BW+1; i<0; i++) {
        int n;
        USHORT Uo, dU;
        Uo = U;
        U = U0[BW+i]; 
        U0[BW+i] += dU0[BW+i];
        dU = (SHORT)(U-Uo)/BLK_X;
        Ptr += BLK_X;
        for(n=-BLK_X; n<0; ++n) {
          Ptr[n] = (Uo>>8);
          Uo += dU;
        }
      }
      memcpy( Out, Scan, Width);
      Out += Stride;
    }
  }
}   

///////////////////////////////////////////////////////////////////////////

  // show the two interpolated U_Map[] and V_Map[],
  // as well as the original Plasma_Base[] they
  // were 'plasmoized' from...

void Show_The_Making_Of_Plasma(float x)
{
// warning, there's no bound check in accessing Vid_Mem...

  BYTE *Ptr = Vid_Mem;
  SHORT *Src1 = U_Map;
  SHORT *Src2 = V_Map;
  SHORT *Src3 = Plasma_Base;
  SHORT X1, Y1, X2, Y2;
  int i,j;

// show U_Map / V_Map 

  for(j=0; j<BH; ++j)
  {
    BYTE *Ptr1 = Ptr;
    BYTE *Ptr2 = Ptr1 + 2*BH*Stride;
    for(i=0; i<BW;i++)
    {
      Ptr1[2*i] = Ptr1[2*i+1] = Ptr1[2*i+Stride] = Ptr1[2*i+Stride+1] = Src1[i]>>8;
      Ptr2[2*i] = Ptr2[2*i+1] = Ptr2[2*i+Stride] = Ptr2[2*i+Stride+1] = Src2[i]>>8;
    }
    Src1 += BW;
    Src2 += BW;
    Ptr += 2*Stride;
  }

// show Plasma_Base

  Ptr = Vid_Mem + 2*BW;
  for(j=0; j<BH2; ++j)
  {
    BYTE *Ptr1 = Ptr;
    for(i=0; i<BW2;i++)
    {
      Ptr1[2*i] = Ptr1[2*i+1] = 
        Ptr1[2*i+Stride] = Ptr1[2*i+Stride+1] = Src3[i]>>8;
    }
    Src3 += BW2;
    Ptr += 2*Stride;
  }

// ..and the moving zones U_Map/V_Map resulted from the add of

#define COL1  0x00
#define COL2  0x10
#define COL3  0x30
#define COL4  0x40
#define DRAW(X,Y,C) \
  Ptr = Vid_Mem + 2*BW + 2*Stride*Y + 2*X; \
  for(i=0; i<2*BW; ++i) { Ptr[i] = Ptr[i+BH*2*Stride]=C; } \
  for(i=0; i<2*BH; ++i) { Ptr[i*Stride] = Ptr[i*Stride+2*BW] = C; }

  Make_Move1(x, &X1, &Y1, &X2, &Y2);
  X1>>=8; Y1>>=8; X2>>=8; Y2>>=8;
  DRAW(X1,Y1,COL1);
  DRAW(X2,Y2,COL2);

  Make_Move2(x, &X1, &Y1, &X2, &Y2);
  X1>>=8; Y1>>=8; X2>>=8; Y2>>=8;
  DRAW(X1,Y1,COL3);
  DRAW(X2,Y2,COL4);

// don't forget to show the colormap!

  Ptr = Vid_Mem + Stride*(4*BH+2);
  for(j=0; j<5; ++j) { 
    for(i=0; i<256; ++i) Ptr[i] = i; 
    Ptr += Stride;
  }
}

///////////////////////////////////////////////////////////////////////////
// initialization of data
///////////////////////////////////////////////////////////////////////////

#define DITH_LEN 4
#define DITH_SIZE (DITH_LEN*DITH_LEN)
#define DITH_OFF (DITH_SIZE/2 + DITH_LEN/2)

static BYTE Dither_Matrix[DITH_SIZE] = {
  0x00, 0x80, 0x20, 0xa0,
  0xc0, 0x40, 0xe0, 0x60,
  0x30, 0xb0, 0x10, 0x90,
  0xf0, 0x70, 0xd0, 0x50
};

void Init(int What) {
  int i, j, k;

      // some sinuses for the base plasma

  SHORT *Ptr = Plasma_Base;
  for(j=k=0; j<BH2; ++j) {
    float y = 1.0*j/BH2;
    for(i=0; i<BW2; ++i, ++k) {
      float x = 1.0*i/BW2;
      float s;
      s  = sin(x*8.3+.53) + cos(y*6.4+.5);
      s += cos((x+y)*7.3+.243) + sin((x-y)*5.36+.32);
      s /= 4.0;
      Plasma_Base[k] = (SHORT)floor(s*32767.0);
    }
  }

// prepare a Width x BLK_Y dithering 'screen-band' for the
// interpolated data, based on the original dithering matrix.
// As the texture is 16x16, and since we use 4:12 fixed-point
// format for U-V coordinates, we need a "<< 4" shift of values.

  for(j=k=0; j<BLK_Y; ++j)
    for(i=0; i<Width; ++i, ++k)
      Dither[k] = Dither_Matrix[(i+DITH_LEN*j) % DITH_SIZE] << 4;


// By default, let's have some "Trauma" [(c) Haplo 1996] colors.
// No doubt, this nifty colormap is guaranted to emanate from a coder. bwaha:)

  for(i=0; i<256; ++i) {
    Color_Map[i].R = i*3;
    Color_Map[i].G = i;
    Color_Map[i].B = i;
  }
  Nb_Colors = 256;
}

void Init_Texture(int Use_Rotozoom)
{
  BYTE Base_Tile[16*16];
  int i;

  srand(time(0));
  for(i=0; i<16*16; ++i)
    Texture[i] = (rand()>>4)&0x0f;   // what a nice texture we have!!

// we need 16 colors on a ramp

  if (Use_Rotozoom) {
    for(i=0; i<16; ++i) {
      Color_Map[i].R = i*3;
      Color_Map[i].G = i*6;
      Color_Map[i].B = i*5;
    }
  }
  else {
    for(i=0; i<16; ++i) {
      Color_Map[i].R = i*5.4;
      Color_Map[i].G = i*1.1;
      Color_Map[i].B = i*1.6;
    }
  }
  Nb_Colors = 16;
}

///////////////////////////////////////////////////////////////////////////
// main
///////////////////////////////////////////////////////////////////////////

#if defined(X11) || defined(SVGALIB)
   // we don't want some pending shm after a SIGINT, do we?
#include <signal.h>
static void User_Abort( int Dummy )
{
   Video_Off( );
   exit( 0 );      
}
#endif

///////////////////////////////////////////////////////////////////////////
//   Usage:
//
//  bil   -> dithered and smoothed plasma
//  bil 1 -> dithered plasma
//  bil 2 -> non-dithered plasma
//  bil 3 -> dithered and smoothed rotozoom
//  bil 4 -> dithered rotozoom
//  bil 5 -> non-dithered rotozoom
//  bil 6 -> show the making of the plasma
//  bil 7 -> show the U-plasma
//  bil 8 -> show the V-plasma


int main( int argc, char **argv )
{
  int What = 0;
  int Use_Rotozoom = 0;
  int Count = 100000;
  clock_t Time;

  if (argc>1) What = atoi(argv[1]);

  if (What>=3 && What<6) Use_Rotozoom = 1;

  Init(What);
  if (What<6) Init_Texture(Use_Rotozoom);

  if ( Video_On(Nb_Colors) ) return 1;
  Video_Store_Colors( Nb_Colors, (BYTE*)Color_Map );

#if defined(X11) || defined(SVGALIB)
  signal( SIGINT, User_Abort );
#endif
#if defined(X11)
  { 
    int i;
    for(i=0; i<16*16; ++i)
      Texture[i] = X11_Pixels[Texture[i]];    
  }
#endif

  Time = clock();
  while( (!Get_Key()) && (--Count) )
  {
    static float cnt=0;
    float x = 1.0*cnt/1000.0;
    cnt += 3.0;

    if (Use_Rotozoom) Make_Rotozoom(x); 
    else Make_Plasma(x);
    
    Video_Lock();

      // debugging
    if (What==8) Show_Interpolation(U_Map);
    else if (What==7) Show_Interpolation(V_Map);
    else if (What==6) Show_The_Making_Of_Plasma(x);

      // rotozoom
    else if (What==5) UV_Mapping( Map_UV );
    else if (What==4) UV_Mapping( Map_UV_Dithered );
    else if (What==3) UV_Mapping( Map_UV_Dithered_Smoothed );

      // plasma
    else if (What==2) UV_Mapping( Map_UV );
    else if (What==1) UV_Mapping( Map_UV_Dithered );
    else UV_Mapping( Map_UV_Dithered_Smoothed );

    Video_Unlock();
  }
  Time = clock()-Time;
  Count = 100000-Count;

  Video_Off( );   

  if (Count && Time) { 
    float t = 1.0f*Time/CLOCKS_PER_SEC;
    printf( "%d frames in %.2f sec -> %.2f FPS\n", Count, t, Count/t );
  }  
  return 0;
}

///////////////////////////////////////////////////////////////////////////
