/**
 * Math routines for 4K intros using inline assembler.
 *
 * Coded by:
 *  chaos & ryg of Farbrausch (http://xyzw.de/c190.html)
 */

typedef unsigned int size_t;

typedef unsigned char             sU8;
typedef signed char               sS8;
typedef unsigned short            sU16;
typedef short                     sS16;
typedef unsigned int              sU32;
typedef int                       sS32;
typedef float                     sF32;
typedef unsigned __int64          sU64;
typedef signed __int64            sS64;
typedef double                    sF64;
typedef int                       sInt;
typedef char                      sChar;
typedef signed char               sBool;
typedef void*                     sPtr;

int __cdecl abs(int);

double __cdecl atan(double);
double __cdecl atan2(double,double);
double __cdecl cos(double);
double __cdecl exp(double);
double __cdecl fabs(double);
double __cdecl log(double);
double __cdecl log10(double);
double __cdecl sin(double);
double __cdecl sqrt(double);
double __cdecl tan(double); 

double __cdecl acos(double);
double __cdecl asin(double);
double __cdecl cosh(double);
double __cdecl fmod(double,double);
double __cdecl pow(double,double);
double __cdecl sinh(double);
double __cdecl tanh(double);

void * __cdecl memset( void *dest, int c, size_t count );
void * __cdecl memcpy( void *dest, const void *src, size_t count );
int __cdecl memcmp( const void *buf1, const void *buf2, size_t count );
size_t __cdecl strlen( const char *string );

#pragma intrinsic (abs)                                       // int intrinsic
#pragma intrinsic (memset,memcpy,memcmp,strlen)               // memory intrinsic
#pragma intrinsic (atan,atan2,cos,exp,log,log10,sin,sqrt,tan,fabs) // true intrinsic
#pragma intrinsic (acos,asin,cosh,fmod,pow,sinh,tanh)         // fake intrinsic

__forceinline sInt sAbs(sInt i)                                 { return abs(i); }
__forceinline void sSetMem(sPtr dd,sInt s,sInt c)               { memset(dd,s,c); }
__forceinline void sCopyMem(sPtr dd,const void *ss,sInt c)      { memcpy(dd,ss,c); }
__forceinline sInt sCmpMem(const sPtr dd,const void *ss,sInt c) { return (sInt)memcmp(dd,ss,c); }
__forceinline sInt sGetStringLen(const sChar *s)                { return (sInt)strlen(s); }

__forceinline sF64 sFATan(sF64 f)         { return atan(f); }
__forceinline sF64 sFATan2(sF64 a,sF64 b) { return atan2(a,b); }
__forceinline sF64 sFCos(sF64 f)          { return cos(f); }
__forceinline sF64 sFAbs(sF64 f)          { return fabs(f); }
__forceinline sF64 sFLog(sF64 f)          { return log(f); }
__forceinline sF64 sFLog10(sF64 f)        { return log10(f); }
__forceinline sF64 sFSin(sF64 f)          { return sin(f); }
__forceinline sF64 sFSqrt(sF64 f)         { return sqrt(f); }

__forceinline sF64 sFACos(sF64 f)         { return acos(f); }
__forceinline sF64 sFASin(sF64 f)         { return asin(f); }
__forceinline sF64 sFCosH(sF64 f)         { return cosh(f); }
__forceinline sF64 sFSinH(sF64 f)         { return sinh(f); }
__forceinline sF64 sFTanH(sF64 f)         { return tanh(f); }

__forceinline sF64 sFInvSqrt(sF64 f)      { return 1.0/sqrt(f); }

sF64 sFPow(sF64 a,sF64 b);
sF64 sFMod(sF64 a,sF64 b);
sF64 sFExp(sF64 f);

/****************************************************************************/
/***                                                                      ***/
/***   asm                                                                ***/
/***                                                                      ***/
/****************************************************************************/

#pragma warning (disable : 4035) 

__forceinline void sFloatFix()
{
  __asm
  {
    fclex;
    push    0103fh; // round to nearest even + single precision
    fldcw   [esp];
    pop     eax;
  }
}

__forceinline void sFloatDouble()
{
  __asm
  {
    fclex;
    push    0123fh; // round to nearest even + double precision
    fldcw   [esp];
    pop     eax;
  }
}

__forceinline void sFloatDen1()
{
  __asm
  {
    fclex;
    push    0141fh;
    fldcw   [esp];
    pop     eax;
  }
}
__forceinline void sFloatDen0()
{
  __asm
  {
    fclex;
    push    0143fh;
    fldcw   [esp];
    pop     eax;
  }
}

__forceinline sInt sFtol (const float f)
{
  __asm 
  {
    fld f
    push eax
    fistp dword ptr [esp]
    pop eax
  }
}

__forceinline sF32 sFRound (const float f)
{
  __asm 
  {
    fld f
    frndint
  }
}

__forceinline void sFSinCos(const float x, sF32 *sine, sF32 *cosine)
{
  __asm
  {
    fld x;
    fsincos;
    mov eax,[cosine];
    fstp dword ptr [eax];
    mov eax,[sine];
    fstp dword ptr [eax];
  }
}

__forceinline sInt sMulDiv(sInt var_a,sInt var_b,sInt var_c)
{
  __asm
  {
    mov eax,var_a
    imul var_b
    idiv var_c
  }
}

__forceinline sInt sMulShift(sInt var_a,sInt var_b)
{
  __asm
  {
    mov eax, var_a
    imul var_b
    shrd eax, edx, 16
  }
}

__forceinline sInt sDivShift(sInt var_a,sInt var_b)
{
  __asm
  {
    mov eax,var_a
    mov edx,eax
    shl eax,16
    sar edx,16
    idiv var_b
  }
}

#pragma warning (default : 4035) 

__forceinline sF64 sFMod(sF64 a,sF64 b)
{
  __asm
  {
    fld   qword ptr [b];
    fld   qword ptr [a];
    fprem;

    fstp  st(1);
    fstp  qword ptr [a];
  }

  return a;
}

__forceinline sF64 sFPow(sF64 a,sF64 b)
{
  // faster pow based on code by agner fog
  __asm
  {
    fld   qword ptr [b];
    fld   qword ptr [a];

    ftst;
    fstsw ax;
    sahf;
    jz    zero;

    fyl2x;
    fist  dword ptr [a];
    sub   esp, 12;
    mov   dword ptr [esp],0;
    mov   dword ptr [esp+4],0x80000000;
    fisub dword ptr [a];
    mov   eax, dword ptr [a];
    add   eax, 0x3fff;
    mov   [esp+8], eax;
    jle   underflow;
    cmp   eax, 0x8000;
    jge   overflow;
    f2xm1;
    fld1;
    fadd;
    fld   tbyte ptr [esp];
    add   esp, 12;
    fmul;
    jmp   end;

underflow:
    fstp  st;
    fldz;
    add   esp, 12;
    jmp   end;

overflow:
    push  0x7f800000;
    fstp  st;
    fld   dword ptr [esp];
    add   esp, 16;
    jmp   end;

zero:
    fstp  st(1);

end:
  }
}

__forceinline sF64 sFExp(sF64 f)
{
  __asm
  {
    fld   qword ptr [f];
    fldl2e;
    fmulp st(1), st;

    fld1;
    fld   st(1);
    fprem;
    f2xm1;
    faddp st(1), st;
    fscale;

    fstp  st(1);
    fstp  qword ptr [f];
  }

  return f;
}
