; Trailball 256b intro by LBi/Bitbandit - 2021.08.14
; Tested on real HW (FreeDos)

        org 0x100

; assume ax=0, si=0x100

        mov al, 0x13              ; set gfx mode to 320x200, 256 color
        int 0x10

        push 0xa000               ; set es to VGA screen segment
        pop es

        fldz                      ; [cnt=0]
Main:
        fadd dword [f8]           ; [increment cnt]
        fst dword [si+2]          ; store cnt

f200    equ $+1
        mov cx, 200               ; screen height (Y)
        xor di, di                ; screen start

Yloop:
        mov bx, 320               ; screen width (X)

Xloop:
        fild word [f4]            ; [4] [cnt]
        fild word [f8]            ; [8] [4] [cnt]

        mov dx, 100
        sub dx, cx                ; Y=100-Y
        mov [si], dx
        fild word [si]            ; [Y] [8] [4] [cnt]
        lea dx, [bx-160]
        shl dx, 2                 ; X=4*(X-160)
        mov [si], dx
        fild word [si]            ; [X] [Y] [8] [4] [cnt]
        mov [si], ah              ; default output byte is 0

; Checked floor idea: https://www.shadertoy.com/view/Xd33W8 thanks to jt and FabriceNeyret2

        fld st4                   ; [cnt] [X] [Y] [8] [4] [cnt]
        fsin                      ; [sin(cnt)] [X] [Y] [8] [4] [cnt]
        fimul word [f200]         ; [sin(cnt)*200] [X] [Y] [8] [4] [cnt]
        fadd st1                  ; [X+sin(cnt)*200] [X] [Y] [8] [4] [cnt]
        fdiv st2                  ; [Rx=(X+sin(cnt)*200)/Y] [X] [Y] [8] [4] [cnt]

        fld st0                   ; [Rx] [Rx] [X] [Y] [8] [4] [cnt]
        fabs                      ; [|Rx|] [Rx] [X] [Y] [8] [4] [cnt]
        fucomip st4               ; [Rx] [X] [Y] [8] [4] [cnt]
        jnc SkipFloor             ; skip if |Rx| > 8

        cmp cx, 80
        jg SkipFloor              ; skip if Y > 80

        fsin                      ; [Rx=sin(Rx)] [X] [Y] [8] [4] [cnt]
        fild word [f200]          ; [200] [Rx] [X] [Y] [8] [4] [cnt]
        fdiv st3                  ; [200/Y] [Rx] [X] [Y] [8] [4] [cnt]
        fld st6                   ; [cnt] [200/Y] [Rx] [X] [Y] [8] [4] [cnt]
        fmul st6                  ; [cnt*4] [200/Y] [Rx] [X] [Y] [8] [4] [cnt]
        faddp                     ; [200/Y+cnt*4] [Rx] [X] [Y] [8] [4] [cnt]
        fmul st5                  ; [4*(200/Y+cnt*4)] [Rx] [X] [Y] [8] [4] [cnt]
        fsin                      ; [Ry=sin(4*(200/Y+cnt*4))] [Rx] [X] [Y] [8] [4] [cnt]
        fmul st1                  ; [Rx*Ry] [Rx] [X] [Y] [8] [4] [cnt]
        fadd st4                  ; [Rx*Ry+8] [Rx] [X] [Y] [8] [4] [cnt]
        fisttp word [si]          ; [Rx] [X] [Y] [8] [4] [cnt]
         
SkipFloor:
        fstp st0                  ; [X] [Y] [8] [4] [cnt]   - drop [Rx]

; Ball inspired by: https://www.shadertoy.com/view/XlfBW7 thanks to FabriceNeyret2

        fidiv word [f200]         ; [X/200] [Y] [8] [4] [cnt]
        call Asine                ; [Ux=ArcSin(X/200)] [Y] [8] [4] [cnt]
        jnc .2

        fld st0                   ; [Ux] [Ux] [Y] [8] [4] [cnt]
        fcos                      ; [cos(Ux)] [Ux] [Y] [8] [4] [cnt]

        fxch                      ; [Ux] [cos(Ux)] [Y] [8] [4] [cnt]
        fxch st2                  ; [Y] [cos(Ux)] [Ux] [8] [4] [cnt]
        fidiv word [f200]         ; [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fld st5                   ; [cnt] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fadd st0                  ; [cnt*2] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fsincos                   ; [cos(cnt*2)] [sin(cnt*2)] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fabs                      ; [|cos]|] [sin] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fxch                      ; [|sin]|] [|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fabs                      ; [|sin]|] [|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fmulp                     ; [|sin|*|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fld1                      ; [1] [|sin|*|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fdiv st5                  ; [1/8] [|sin|*|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fsubrp                    ; [1/8-|sin|*|cos|] [Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fsubrp                    ; [1/8-|sin|*|cos|-Y/200] [cos(Ux)] [Ux] [8] [4] [cnt]
        fmul st4                  ; [Uy=4*(1/8-|sin|*|cos|-Y/200)] [cos(Ux)] [Ux] [8] [4] [cnt]
        fdivrp                    ; [Uy=Uy/cos(Ux)] [Ux] [8] [4] [cnt]

        call Asine                ; [Uy=ArcSin(Uy)] [Ux] [8] [4] [cnt]
        jnc .2

        fld st4                   ; [cnt] [Uy] [Ux] [8] [4] [cnt]
        fadd st0                  ; [cnt*2] [Uy] [Ux] [8] [4] [cnt]
        fsubp                     ; [Uy=Uy-cnt*4] [Ux] [8] [4] [cnt]

        fmul st2                  ; [8*Uy] [Ux] [8] [4] [cnt]
        fsin                      ; [Uy=sin(8*Uy)] [Ux] [8] [4] [cnt]
        fld1                      ; [1] [Uy] [Ux] [8] [4] [cnt]
        faddp                     ; [Uy+1] [Ux] [8] [4] [cnt]
        fxch                      ; [Ux] [Uy] [8] [4] [cnt]
        fmul st2                  ; [8*Ux] [Uy] [8] [4] [cnt]
        fldpi                     ; [PI] [8*Ux] [Uy] [8] [4] [cnt]
        fadd st0                  ; [2*PI] [8*Ux] [Uy] [8] [4] [cnt]
        fdiv st4                  ; [PI/2] [8*Ux] [Uy] [8] [4] [cnt]
        faddp                     ; [8*Ux+PI/2] [Uy] [8] [4] [cnt]
        fsin                      ; [Ux=sin(8*Ux+PI/2)] [Uy] [8] [4] [cnt]
        fld1                      ; [1] [Ux] [Uy] [8] [4] [cnt]
        faddp                     ; [Ux+1] [Uy] [8] [4] [cnt]
        fmulp                     ; [Ux*Uy] [8] [4] [cnt]
        fdiv st2                  ; [Ux*Uy/4] [8] [4] [cnt]
        fsubp st2                 ; [8] [4-(Ux*Uy/4)] [cnt]
        faddp                     ; [4-(Ux*Uy/4)+8] [cnt]
        fistp word [si]           ; [cnt]

.2:
        fninit                    ; clear FPU stack and reload cnt
        fld dword [si+2]          ; [cnt]

        movsb
        dec si

        dec bx
        jnz Xloop

        dec cx
        jnz Yloop

; check for ESC
        in al, 0x60
        dec ax                    ; 1 byte shorter than 'dec al'
        jnz Main

; Restore text mode
        mov al, 3
        int 0x10
;        ret                      ; fallthrough, save 1 byte

Asine:                            ; [c] [v] [8] [4] [cnt]
        fld1                      ; [1] [c] [v] [8] [4] [cnt]
        fld st1                   ; [c] [1] [c] [v] [8] [4] [cnt]
        fabs                      ; [|c|] [1] [c] [v] [8] [4] [cnt]
        fucomi st1                ; [|c|] [1] [c] [v] [8] [4] [cnt]
        jnc .1                    ; jump if |c| > 1 !!! +2 items left on FPU stack !!!
        fmul st0                  ; [c^2] [1] [c] [v] [8] [4] [cnt]
        fsubp                     ; [1-c^2] [c] [v] [8] [4] [cnt]
        fsqrt                     ; [sqrt(1-c^2)] [c] [v] [8] [4] [cnt]
        fpatan                    ; [asine(c)] [v] [8] [4] [cnt]
.1:     ret

; constants
f4:     dw 4
f8:     dw 8
        dw 0x3d00 ; 1/32 - save 2 bytes
