// ---------------------------- VSISQRT.H -----------------------
// VSpace 3d library.
// Designed and written by Javier Arvalo Baeza.
// Adaptation of the Fast inverse square root algorithm from Ken
// Turkowski, Graphics Gems V
// And the square root algo from Gems 1 by Paul Lalonde and Robert Dawson
// Make sure the float parameters are really float and not double.

//#define SQRTAPP

#ifndef _VSISQRT_H_
#define _VSISQRT_H_

#ifndef _VSBASE_H_
#include "vsbase.h"
#endif

#ifdef __cplusplus
extern "C" {
#endif

    // General parameters
#define VSISQ_LOOKUP_BITS   8   // Number of mantissa bits for lookup
#define VSISQ_EXP_POS      23   // Position of the exponent.
#define VSISQ_EXP_BIAS    127   // Bias of exponent

    // Type of result.
typedef float VSISQ_TFloat;

    // Derived parameters
#define VSISQ_LOOKUP_POS    (VSISQ_EXP_POS - VSISQ_LOOKUP_BITS) // Position of mantissa lookup
//#define VSISQ_SEED_POS      (VSISQ_EXP_POS - 8)                 // Position of mantissa seed
#define VSISQ_SEED_POS      (0)                                 // Position of mantissa seed
#define VSISQ_TABLE_SIZE    (2 << VSISQ_LOOKUP_BITS)            // Number of entries in table
#define VSISQ_LOOKUP_MASK   (VSISQ_TABLE_SIZE - 1)              // Mask for table index
#define VSISQ_GET_EXP(a)    (((a) >> VSISQ_EXP_POS) & 0xFF)     // Extract exponent
#define VSISQ_SET_EXP(a)    ((a) << VSISQ_EXP_POS)              // Set exponent
#define VSISQ_GET_EMANT(a)  (((a) >> VSISQ_LOOKUP_POS) & VSISQ_LOOKUP_MASK) // Extended mantissa MSB's
#define VSISQ_SET_MANTSEED(a)   (((dword)(a)) << VSISQ_SEED_POS)            // Set mantissa 8 MSB's

#ifdef SQRTAPP

    // Lookup tables
extern long VSISQ_LookupTable[VSISQ_TABLE_SIZE];
extern long VSISQ_SqrtTab[0x100];

    // Init the tables.
extern void VSISQ_Init(void);

#else

#define VSISQ_Init() ((void)0)

#endif

    // Calculate sqrt(x). Note x MUST be >= 0
extern VSISQ_TFloat VSISQ_Sqrt(float a);

    // Calculate 1/sqrt(x)
extern VSISQ_TFloat VSISQ_ISqrt(float x);


#ifdef SQRTAPP

#pragma aux VSISQ_ISqrt modify nomemory [EBX EDX] parm [EAX] value no8087 = \
"   MOV     EBX,EAX                 " \
"   SHR     EBX,1                   " \
"   MOV     EDX,5F000000h           " \
"   SHR     EAX,0Fh                 " \
"   AND     EBX,3FC00000h           " \
"   SUB     EDX,EBX                 " \
"   AND     EAX,1FFh                " \
"   AND     EDX,7F800000h           " \
"   MOV     EBX,OFFSET VSISQ_LookupTable" \
"   DB      0x8B, 0x04, 0x83        " \
"   ADD     EAX,EDX                 "

#pragma aux VSISQ_Sqrt modify nomemory [EBX EDX] parm [EAX] value no8087 = \
"   MOV     EDX,EAX              " \
"   AND     EAX,0x007FFFFF       " \
"   SUB     EDX,0x3F800000       " \
"   MOV     EBX,EDX              " \
"   AND     EDX,0xFF000000       " \
"   SAR     EDX,1                " \
"   AND     EBX,0x00800000       " \
"   ADD     EDX,0x3F800000       " \
"   OR      EAX,EBX              " \
"   SHR     EAX,16               " \
"   MOV     EBX,OFFSET VSISQ_SqrtTab" \
"   DB      0x8B, 0x04, 0x83     " \
"   ADD     EAX,EDX              "

// Note that DB ... means MOV EAX,[4*EAX+EBX] which WC won't accept :(
// Unluckily it won't accept "MOV EAX,[4*EAX+sqrttab]" in DB either. :((
// It doesn't generate the relocation info for DD OFFSET sqrttab.
// Yes Watcom inline ASM sucks bigtime, but it's better than nothing.
// Will 2 AGIs be generated by both EBX & EAX being assigned before the
// address generation? I don't know but I can't fix this for inline code.

#else

#define VSISQ_Sqrt(a) ((sqrt(a)))
#define VSISQ_ISqrt(a) (1.0/(sqrt(a)))

#endif

#ifdef __cplusplus
}
#endif

#endif

// ---------------------------- VSISQRT.H -----------------------
