; First of all: _everything_ in this intro is FAKE. No 3D rendering. No polygons. No lights. No ray casting / marching.
; Though disassembling a 256b intro is not a rocket science this is the first time I "officially" publish source code...
; ...because I had enough time to clean it up and add comments.
; Finally you have to know: Usually I prefer coding my next idea to optimizing or fixing the previous one ;-)
; So please feel free to save bytes.

; Feature switches
%define SHADING                                 ; To draw "body" below the top squares to make "piles".
%define LIGHTING                                ; To make the body of the piles darker than the top. Left side of the piles must be even darker than the right side.
%define DEPTH_OF_FIELD                          ; To apply blur in more rounds to achieve depth of field effect.
%define TIMING
%define COMPATIBILITY                           ; I can save some bytes in DosBOX because of its "better" defaults. Real DOS requires more initializations.
;%define LAMP_EFFECT                            ; Didn't look good enough so finally I turned it off. But you can play with it ;-)

; Constants
PILES_IN_A_ROW      equ 64                      ; In fact this is the maximum. It doesn't make sense to decrease it to 32, 16, etc...
SHIFT_TO_PRECALC_TL equ 2                       ; (2 ^ SHIFT_TO_PRECALC_TL) * PILES_IN_A_ROW must be 256.
MAX_MATRIX_XY_INDEX equ PILES_IN_A_ROW-1
MAX_PILE_SIZE_IN_PX equ 256/PILES_IN_A_ROW
MAX_PILE_XY_INDEX   equ MAX_PILE_SIZE_IN_PX-1

org 100h

		; Initializing 320x200 resolution screen with 256 colors
		mov     al, 13h                         ; AH = 0 at program start
		int     10h

		; Generating the palette (0..63 black..white)
		salc
%ifdef COMPATIBILITY
		mov     dx, 3C8h
		out     dx, al
		inc     dx
%else
		mov     dx, 3C9h
%endif
palout:
		out     dx, al
		out     dx, al
		out     dx, al

		inc     ax                              ; 1 byte smaller than incrementing AL only.
		jns     palout                          ; This way it will fill much more color indices than I need ... but who cares? ;-)

mainloop:

		; Timing
%ifdef TIMING
		hlt                                     ; Small but tricky timing! Waits for any (but usually the timer) interrupt. Even a key press can speed the intro up ;-)
%endif

		; Initializing some registers
		push    cs                              ; DS = CS, because it's smaller to use [offset] instead of [cs:offset] used several times below.
		pop     ds
		mov     si, SPEEDS_AND_VPOS             ; Next 2 cycles will use SI as a base offset in DS(==CS).

		; Incrementing frame counter
		inc     byte [FRAME_COUNTER]
		jnz     skipnewrandoms                  ; Change the random moving speed of piles only in the 1st then every 256th frame.

		; Generating new random speeds
		xor     bp, bp                          ; In an unusual way BP will be the index added to SI as a base (because I want to combine SI with BX below, and it wouldn't be possibe to use BP+BX indexing while SI+BP and SI+BX work).
ogenc:
		rdtsc                                   ; Sets the DX and AX to the timestamp counter. It changes approx. 18 times in a second.
sleep:                                          ; Small trick to a better next random. Without this, on fast CPUs RDTSC will tend to give the same value in AL several times.
		dec     ah
		jnz     sleep

		and     al, 111b                        ; Maximizes the speed of movement 0..7
		inc     ax                              ; Sets a new minimum and maximum speed of movement 1..8 (AH is 0 here, so incrementing AX is not a risk, but has smaller op. code)

		mov     [si+bp], al                     ; Sets the first byte only (the speed and direction, second byte is the current vertical position)

		; Grouping effect - overwriting the values in (smaller and smaller) groups to the values of the pile in the group's corner
		mov     bx, bp
		and     bl, [GROUP_MASK]
		and     bh, [GROUP_MASK]
		mov     ax, [si+bx]
		mov     [si+bp], ax

		inc     bp                              ; Smaller than ADD BP, 2
		inc     bp

		jns     ogenc                           ; It will run much more times than I really need (max 64*64*2 bytes) but again the code is smaller this way.

		; Next time, after the next 256 frames I will connect smaller and smaller groups
		sar     byte [GROUP_MASK], 1

skipnewrandoms:

		; (Re)initializing DS segment to be used as 256x256 frame buffer
		push    word 8000h
		pop     ds

		; Drawing top squares of the piles (I really mean squares ;-)
		; Outer cycle - matrix of the piles
		mov     bx, MAX_MATRIX_XY_INDEX*256+MAX_MATRIX_XY_INDEX
mxy:
		and     bl, MAX_MATRIX_XY_INDEX
		push    bx

		; Changing the position with the speed and direction value
		shl     bx, 1
		mov     ax, [cs:si+bx]                  ; AL is the speed and direction, AH is the current vertical position.
		add     ah, al
		jns     notturn                         ; Reached top or bottom positon?
		not     ax                              ; Reverses the direction of the pile (and restores the position to the same end).
notturn:
		mov     [cs:si+bx], ax                  ; Store new values

		; Mapping the position from 0..127 to 0..63 because I will use the position as a color index directly
		; But I need 0..127 to calculate the positions without CMP AH, 63 above (that would require much mpre bytes)
		shr     ah, 1

		; Precalc bottom left pixel coordinate of the top square
		shl     bx, SHIFT_TO_PRECALC_TL-1
		mov     al, ah
		neg     ah
		add     bx, ax

		; Adding a flag to the roof color to skip it from lighting later
		or      al, 10000000b

		; Inner cycle - top of a pile in a matrix
		mov     cx, MAX_PILE_XY_INDEX*256+MAX_PILE_XY_INDEX
sxy:
		and     cl, MAX_PILE_XY_INDEX
		mov     di, cx

		cmp     [di+bx], al                     ; A higher color value at the same pixel means there is a higher pile there.
		jnb     skippx
		mov     [di+bx], al
skippx:
		dec     cx
		jns     sxy

		pop     bx

		dec     bx
		jns     mxy

		; Setting ES to a temporary buffer for lighting the two sides of the piles
		push    word 7000h
		pop     es

		; Very simple/stupid/fake rotation by 45 degrees
		; color at x:y = color at x-y:y+x
		;xor        di, di                      ; Starting value (almost) doesn't matter, it will run 65535 times.
		;xor        cx, cx                      ; CX is ffffh here.
rotate:
		mov     ax, di
		xchg    al, ah
		neg     al
		mov     si, di
		add     si, ax
		movsb
		loop    rotate

		; Switching the purpose of the segments
		push    ds
		pop     gs
		push    es
		pop     ds

		; Adding "body" to piles, DS -> ES, while clearing DS
		inc     bx                              ; BX arrives with FFFFh value here, it's smaller than XOR.
		mov     ah, 0                           ; Deafult color of the pixel "above" for the first row.
pbxy:

%ifdef SHADING
		; First step: decreasing the color( = lightness) from above downwards
		mov     al, [bx]                        ; Reads the current value.
		and     al, 01111111b                   ; Removes the flag from the color of the top squares.

		cmp     al, ah                          ; Comparing to the pixel above (kept in AH, while 0 default for the first row almost doesn't matter) .
		jnb     untouchjustsave                 ; Reached something taller or at the same vertical position?

		mov     al, ah
		dec     ax                              ; Decresing the color of the pixel above...
		jnz     cok                             ; ..but limited to 0
		inc     ax
cok:
		mov     [bx], al                        ; Sets the pixel to the new darker value.
untouchjustsave:
		mov     ah, al                          ; Saves the value as "above" pixel
%endif

		; Second step: add lighting comparing current and the pixel to left (so simple, if increasing that must be the left side, otherwise the right side)
		; Current, unmasked value is in the AH!
		mov     di, bx
		dec     di                              ; Getting the left pixel (already went through the lighting process!)
		mov     al, [di]

%ifdef LIGHTING
		test    al, al                          ; Masked? Then that is the top of a pile, skip!
		js      next

		cmp     al, ah                          ; Divide the color index of body pixels by 4 on the left side and by 2 on the (not so darker) right side
		jg      rightside
		shr     al, 1
rightside:
		shr     al, 1
next:
%endif
		and     al, 01111111b                   ; Removing the mask finally.

		stosb                                   ; Storing the final pixel in ES.
		mov     byte [gs:di], 0                 ; Clearing the frame buffer for the next frame

		xchg    bl, bh                          ; Stepping to next row first then to the next column
		inc     bx
		xchg    bl, bh
		jnz     pbxy


		; Creating depth of field effect
		; Basically this is Rrrola's solution but executed in more rounds for the upper part to increase the "distance"
%ifdef DEPTH_OF_FIELD
		mov     dh, 120                         ; Starting bottom Y position for blur
blur0:
		mov     bx, 257
blur1:
		std                                     ; Sets DF for index decrementation in string functions.
		mov     si, dx
blur2:
		mov     cx, dx
blur3:
		lodsb
		add     [si+bx], al
		rcr     byte[si+bx], 1                  ; Rotates 9 bits (CF + 8 bits) right once.
		loop    blur3

		cld                                     ; Clears DF to turn the direction in string functions.
		neg     bx                              ; Switches to opposite neighbour (+257/-257, +2/-2).
		js      blur2                           ; To run with -257 after 257 and -2 after 2 but only once (stops when sign bit is 0)

		shr     bx, 7                           ; Converts 257 to 2.
		jnz     blur1

		sub     dh, 24                          ; Moves the bottom Y position up for the next round.
		jnz     blur0                           ; One more round of blur till the top
%endif


		; Projecting the 256x256 frame to the 320x200 screen
		push    word 09FFFh                     ; Should be 0A000h but moved left a bit (by 16 pixels to make the first row/column error less visible ;-)
		pop     es

		;dec        cx                          ; CX is 0 here, perfect for loop through a whole fragment (256x256 frame)
		;xor        di, di                      ; Starting value in DI doesn't matter.
dxy:
		mov     ax, 0ccbdh                      ; 0CCCDh was Rrrola's original trick to stretch 256x256 to 320x256,
												; but I want to skew the result a bit to simulate better perspective.

		mul     di                              ; After it DH:DL can be used as Y:X source coordinate in the 256x256 frame (without further multiplication).
		mov     si, dx

%ifdef LAMP_EFFECT
		lodsb
		cmp     al, 63-7
		jnb     lok
		shr     al, 1
lok:
		stosb
%elif
		movsb
%endif

		loop    dxy

		; Checking ESC
		in      al, 60h                         ; Reading the keyboard port.
		cbw
		dec     ax                              ; ESC?
		jnz     mainloop

		; Restoring the text mode screen
%ifdef COMPATIBILITY
		mov     al, 03h
		int     10h
%endif

		; Quit
		ret

GROUP_MASK      db  -MAX_MATRIX_XY_INDEX                ; It helps to group the piles
FRAME_COUNTER   db  0ffh                                ; Incremented at the beginning of main loop, so it will be 0 before drawing the first frame
SPEEDS_AND_VPOS:									; To store the speed, direction and current vertical positions of the piles