; Hugi Compo 22 - NotTetris?
; By Exophase
;
; This is a MESS. Definitely not a simple competition by any stretch of the
; imagination... oh well ^_^
;
; Throughout June I worked on it to get it around ~550 bytes but it had serious
; problems with the test suite and other implementation issues. With other things
; on my mind I kinda burned out on it, but then with only a few days remaining
; and some new ideas, I got back up on it again. Passing the test suite was a
; NIGHTMARE. There are a ton of quirks that you have to get just right that just
; made the experience borderline traumatic :p But, here it is. It's not a compo
; winner kind of entry, THAT'S for sure, but I'm still proud of my accomplishments.
; So there. :D
;
; And on another note.. I can't believe I never realized until now that there are
; 1 byte xchg's with (e)ax...

; Build with NASM. To get a version that works on its own, simply build:
; nasm entry.asm -f bin -o entry.com
; And to get a version that works with testerx, build:
; nasm entry.asm -f bin -Dtester_version -o entry.com

BITS 16

align 1

org 100h

%ifdef tester_version
  %define int_get_key 60h
  %define int_get_ticks 61h
%else
  %define int_get_key 16h
  %define int_get_ticks 1Ah
%endif

start:
  ; Set the screen mode  
  mov al, 13h		          ; screen mode 13h
  int 10h			          ; set/clear screen 

  ; Setup the field (well + empty space)

  push word 12345                       ; the seed, put on the stack

 .beginning:    
  
  mov al, 100                           ; load the tile color for the well (highlight)
  mov di, field_underrun + (12 * 1) + 2 ; start for the field (invisible barrier line)
 
  mov ch, 2                             ; draw well block for at least 512 times
  rep stosb
 
  mov di, field_underrun + (12 * 3) + 3 ; start for the empty space of the field
  xchg ax, cx                           ; set ax to 0 because that's what cx is
                                        ; 0 means no tile, or empty space
  ;mov al, 0                                        
  mov bp, 23                            ; 23 * 10 empty spaces, also zero out score
 .wloop
  mov cl, 10
  rep stosb                             ; carve out 10 empty tiles
  inc di                                ; go to the next row
  inc di
  dec bp
  jnz .wloop 

 .new_piece:  	                      
  mov al, 0
  mov di, current_piece                 ; location of the tetromino map
  mov si, di                            ; this will be necessary later
  mov cl, (5 * 5)
  rep stosb                             ; zero it out

  cwd  
  pop ax                                ; grab current random value
  imul ax, 9421                         ; multiply by 9421 (signed, but won't hurt it)
  inc ax	                              ; increment, random value is now in ax  
  push ax                               ; put the new random value back on the stack
  				         		  	       
  mov cl, 7                             ; now divide the random value in ax by 7
  div cx                                ; and voila, one crappy random number ^_-

  mov di, dx                            ; di has the tetromino number
  xchg ax, dx                           ; dx doesn't matter anymore. 1 byte xchg's rock!
  shl ax, 1  
  add al, 80                            ; ax has the highlight color of the tetromino

  mov dl, [piece_0 + di]                ; grab the tetromino encoding

  mov di, current_piece + (2 * 5)       ; now, expand the encoding to here
  mov cl, 128                           ; number of iterations
 .expand_loop 
  test dx, cx                           ; check the bit
  push ax
  jnz .expand_loop2                     ; if the bit's there, keep the color as is
  mov al, 0                             ; or else, store 0
 .expand_loop2 
  stosb                                 ; store the current color
  pop ax                                ; restore the old color
  test cl, 16
  jz .ex2
  inc di
 .ex2 
  shr cx, 1                             ; move to the next iteration
  jnz .expand_loop  

  mov di, field_underrun + 12 + 6       ; where the piece starts 
  cwd
  
  call test_collision                   ; see if it collides
  jnc .keep_going
  dec dx
  jmp short .key_loop
 .keep_going: 
  call xor_blit                         ; draw the piece

  mov bl, 10                            ; ticks remaining = 10 - score/256 
  mov ax, bp
  sub bl, ah                            ; using ah is a cheap way of getting ax/256
  mov dh, bl                            ; backup!

  ; Confirmation - ah is under 128 because score can't go over 2560
                            
 .main_loop:

  ; Confirmation - ah is under 128 because there aren't any keycodes that will trigger it
  ; for over 128.

  call draw_screen
   
  call xor_blit                         ; erase the piece

  push di
      
  test dl, dl                           ; if the speedup is set then it's going to drop
                                        ; regardless of the tick count
  jnz .do_drop
  dec bx                                ; decrement remaining ticks
  jnz .no_drop                          ; if not zero yet, don't drop
 .do_drop: 
  add di, byte 12                       ; move the piece to the next row
  mov bl, dh
  call test_collision                   ; see if it collides
  jnc .no_drop2                         ; if not.. stop here  
  pop di                                ; gotta restore the field position..
  call xor_blit                         ; and blit the piece back
  
 .line_clear:  
  mov ax, 1700h                         ; ah = 17 is a loop counter, al = 0 is for scasb
  
  mov di, field + 1                     ; start scanning from the top of the field down
  mov bl, 20                            ; the initial amount of points to add
 .line_clear2:                          
  push di                               ; save off the field position..
  mov cl, 11                            ; check 11 tiles. The last one is actually checked here
                                        ; too.. it's always solid though.
  repne scasb                           ; scan for 0's
  dec cl                                ; did it make it to the end of the row?
  jns .lc_next                          ; if not it wasn't a complete line

 .clear:                                ; but if so...
  ; This seems unoptimal but it's good here because it starts where di left off.
  
  std                                   ; set string movement to backwards
  lea si, [di - 12]                     ; si = di - 12, the end of the last row
  mov cx, si                            
  sub cx, field + 1                     ; find the difference between the current position and
                                        ; the beginning
  rep movsb                             ; and copy from the current line to a line above
  cld
  
  mov di, si                            ; Gotta clear that pesky top row too.
  mov cl, 10
  rep stosb

  add bp, bx                            ; update the score
  shl bl, 1                             ; and adjust the score accumulator

  mov cl, dh                            ; draw the screen (and thus wait) for current_speed
                                        ; clock ticks (stored in dh)
 .wait_loop
  call draw_screen                      ; update the screen and wait a tick
  loop .wait_loop                       ; loop it

 .lc_next:
  pop di                                ; check the next line.. work our way down
  add di, byte 12
 .lc_next2: 
  dec ah                                ; loop loop loop
  jnz .line_clear2

  ; ah can be confirmed to be 0 here.

  jmp .new_piece                        ; and start a new piece

 .no_drop2:    
  pop ax                                ; it fell, get the new di on the stack
  push di
 .no_drop:    

  test dl, dl                           ; speedup?
  jz .sw_next2
  inc bp                                ; then increase the score
 .sw_next2:
 
  mov ah, 1                             ; interrupt 16h, 1 (get key status)
  int int_get_key
  jz .no_key                            ; if no key is waiting, skip the rest

 .key_loop    
  cbw                                   ; interrupt 16h, 0 (get key)
  int int_get_key
  cbw                                   ; zero out ah
  cmp al, 1Bh                           ; escape?
  jne .next                             ; if not, continue
  mov al, 3                             ; int 10h, 3 (set video mode, 80x25 text)
  int 10h                               ; restore text mode
  int 21h                               ; exit DOS. Better than ret right now because the
                                        ; stack is seriously malformed.
 
 .next:                                 ; now examine the keys
  cmp al, ' '                           ; was space pressed?
  jne .next2                            ; no? Continue.
  test dl, dl
  js .beginning                         ; in a "game over" state, space starts a new game
  inc dx                                ; otherwise it just turns on speedup
 .next2: 
  test dl, dl                           ; in a "game over" state, space/esc is all you get.
  js short .key_loop                    ; so in that case, it just goes back to get another key
  cmp al, 'j'                           ; is it j?
  je short .key_l                       ; move left
  cmp al, 'k'                           ; is it k?
  je short .key_rotate                  ; rotate the block
  cmp al, 'l'                           ; is it l?
  jne short .no_key                     ; or rather, is it not l.. just fallthrough
 
 .key_r:                                ; move the piece to the right
  inc di                                ; increase it twice then it'll fallthrough and decrease
  inc di
 .key_l:                                ; move the piece to the left
  dec di                                ; decrease
  call test_collision                   ; try to move the block
  jnc .no_key
  jmp short .unroll                     ; that's right, it has to jump to the unroll...
  
 .key_rotate:
  cmp [si + 12], byte 80                ; special fix to see if it's a square block
  jz .no_key                            ; silly square block, tetris is for other tetrominos!

  ; And now we rotate the sucker... it's a copy with inverted x/y
  
  pusha                                 ; save off the registers. We're gonna maul them.
  push si                               ; we'll want the current piece location later
  
  push di                               ; and the field position at that
  mov di, current_piece_alt + (5 * 4)   ; okay, this is the destination.. the alternate buffer's
                                        ; end. This is where we start copying to.

  mov bl, 5                             ; 5 rows
 .l0: 
  mov cl, 5                             ; 5 columns
 .l1:
  movsb                                 ; copy from source to dest..
  sub di, byte 6                        ; but mov dest up a row instead of right a column
  loop .l1                              ; loop it
  add di, byte 26                       ; and now we go to the.. bottom of the NEXT column
  dec bx                                ; loop it!
  jnz .l0

  mov si, current_piece_alt             ; we don't really have this saved but..
  pop di                                ; the field we do..

  call test_collision                   ; does our new piece collide against the current location
                                        ; in the field?
  pop di                                ; first let's get the current piece ready as a
                                        ; destination
  jc .u2                                ; okay, it collided. No copy back!
  
  mov cl, 5 * 5                         ; it didn't collide, meaning we'll use the rotated piece.
  rep movsb                             ; so copy it back

 .u2                                    
  popa                                  ; restore the registers
                                        ; we can fallthrough to unroll because rotations don't
                                        ; change di anyway
 .unroll:
  pop di                                ; unroll... that "undoes" a location change in di
  push ax                               ; push ax? What? It's just stack filler.
 
 .no_key:                     

  pop ax                                ; get rid of the di we kept on the stack
  call xor_blit
  
  jmp .main_loop                        ; and now we go for another round of fun!

draw_screen:

  push word 0A0A7h                      ; beginning of field in video mem
  pop es                                ; load into es
 
  pusha                                 ; save all registers? For now...
  
  mov si, field                         ; load the field
  xor di, di                            ; start off at 0

  mov bh, 24                            ; 24 rows
 
 .sloop1: 
  mov bl, 12                            ; 12 columns

 .sloop2:  
  lodsb                                 ; load current tile color
  ; Putting the ax unsigned confirmations to work to zero out dx...
  ; Yes, all this for one byte ^_^
  ;xor dx, dx
  cwd                                   ; dx = 0
  cmp al, 0                             ; is the color 0?
  jz .go                                ; if so, keep dx zero'd
  dec dx                                ; dx is -1 if color is non-zero
 .go:

  mov ah, al                            ; ax = center|center
                        
  and dx, 3048h                         ; add/subtract values for center/shadow

  mov cl, 7                             ; 7 runs for the top 7 rows of the tile
  push si                               ; we do still need si for other reasons, but..
  mov si, di                            ; si is cheaper than bp (which we also want to hang onto)
 .tloop1: 
  push cx                               ; save loop counter so cx can be used for rep
  mov al, ah                            ; load highlight color
  mov [es:si], al                       ; write highlight, upper row
  inc si                                ; next pixel on upper row
  stosb                                 ; write highligh, left column
  sub al, dh                            ; adjust to center color
  mov cl, 6                             ; write 6 center pixels
  rep stosb
  add al, dl                            ; adjust to shadow color
  stosb                                 ; write
  add di, 320 - 8                       ; go to next row
  pop cx                                ; restore loop counter
  loop .tloop1

  mov cl, 8                             ; get the last row (8 pixels shadow)
  rep stosb

  pop si                                ; restore si

  sub di, (320 * 7)                     ; go to the next tile
  dec bl                                
  jnz .sloop2                           ; loop
  
  add di, ((320 * 8) - 8) - (11 * 8)    ; go to the next row of tiles
  dec bh                                ; loop
  jnz .sloop1

  ; Update score
  mov ah, 2                             ; position the text cursor at tile position 18, 2
  mov dx, 18
  int 10h                               ; interrupt 10, 2 (position text cursor)

  mov bl, 10                            ; bx is the divider, 10
  ;mov ax, bp                            ; get the score ready to be divided
  xchg ax, bp                           ; we can xchg because bp is restored soon
  mov cl, 4                             ; four iterations
 .store_score:
  cwd                                   ; zero out dx for the division
  idiv bx                               ; and grab the number..
  or dl, 30h                            ; adjust to an ascii character
  push dx                               ; and get it ready on the stack
  loop .store_score

  mov cl, 4                             ; write back the four digits
 .write_score:
  pop ax                                ; grab the current digit off the stack
  mov ah, 0Eh
  mov bl, 7                             ; foreground color is 7.. it's light-grey.
  int 10h                               ; interrupt 10, Eh (write text character) 
  loop .write_score

  ; Wait for clock tick

  cbw                                   ; this will zero out ah
  int int_get_ticks                     ; interrupt 1Ah, 0 (cycles elapsed)
  mov bl, dl                            ; call again, wait until the cycles elapsed changes
 .wait_loop:
  int int_get_ticks                     ; interrupt 1Ah, 0 (cycles elapsed).. again
  cmp bl, dl                            ; did it change?
  je .wait_loop                         ; no? Then keep trying.

  popa                                  ; restore all registers

  push ds                               ; restore es as ds
  pop es                                ; then fall through.. test for collision? Sure, why not.

; These were of course two functions, but so much of their code was the same that I could
; actually afford to turn them into one. The carry flag is what differentiates them (easy to
; switch on)

; test_collision returns carry as true if there's a collision between the current piece and its
; current place on the field. A collision is determined if both tile positions contain a color.
; This is done by seeing if they test true; this works because the way the highlight color is
; stored, and all of them have bit 6 set. If the tetromino number alone was being stored, this
; wouldn't work because those numbers cleanly overlap each other sometimes (bitwise)...

; xor_blit blits the current piece against its current position on the field using exclusive or..
; The reason XOR's are used is so that the piece can be blitted on top of itself to do a clear.

test_collision:                         
  clc                                   ; test collision is carry off
  jmp short tc_internal                 ; we'll have to jump into the main part...

xor_blit:
  stc                                   ; and xor blit is carry on.
tc_internal:
  pusha                                 ; save off the registers
  mov bl, 5                             ; 5 rows
 .l0:
  mov cl, 5                             ; 5 columns
 .l1: 
  ;lahf                                  ; save off the flags. pushf/popf is not used here
                                        ; because then we'd have to popf on early exit to
                                        ; correct the stack. Fortunately ah isn't important here.
  lodsb                                 ; grab the current tetromino tile
  jc .xor_blit                          ; do we blit it?
  test [di], al                         ; No? Check for collision. cf is always 0 afterwards.
  jnz collision                         ; if so.. collide
  jmp short .next                       ; moving right along...
 .xor_blit:                             ; blit it.
  xor [di], al                          ; simple enough...
  stc                                   ; wanna set the carry back here
 .next:                                 
  ;sahf                                  ; restore the flags. We need to keep the carry to see
                                        ; which function we're doing, afterall. 
  inc di                                ; move to the next "cell"
  loop .l1                              ; loop
  lea di, [di + (12 - 5)]               ; move to the next row.. lea is used because it doesn't
                                        ; mess up the flags. This is equivilent to add di, 7
                                        ; Fortunately, it's the same byte size.
  dec bx                                
  jnz .l0                               ; loop
  stc                                   ; fallthrough, cmc will set clear cf (no collision)

collision:
  popa                                  ; restore registers
  cmc                                   ; collision flagged; after test sets cf
  ret                                   ; fallthrough? Probably not going to happen.
      
; Tetromino encodings:
; Only the starting orientation is encoded. Each one is encoded in a 1 byte
; bitmask where each nibble corresponds to a 4 wide scanline; thus the
; encoding is 4x2 tiles. It's always copied to 2, 0 of a 5x5 block
; so the position of the piece within its 4x4 encoding determins how it's
; rotated, because the 5x5 block is always rotated about its center of (2, 2)
; The 0 block isn't actually rotated at all since its rotation point isn't
; at whole coordinates...

;       _ _       _ _     _ _ _     _ _       _ _ _ _     _ _ _     _ _ _
;      |_|_|    _|+|_|   |_|+|_|   |_|+|_    |_|_|+|_|   |_|+|_|   |_|+|_|
;      |_|_|   |_|_|         |_|     |_|_|               |_|         |_|

piece_0 db          01100110b                     ;  00000
                                                  ;  00000
                                                  ;  0**00
                                                  ;  0**00
                                                  ;  00000

piece_1 db          00110110b                     ;  00000
                                                  ;  00000
                                                  ;  00**0
                                                  ;  0**00
                                                  ;  00000
                                                  
piece_2 db          01110001b                     ;  00000
                                                  ;  00000
                                                  ;  0***0
                                                  ;  000*0
                                                  ;  00000

piece_3 db          01100011b                     ;  00000
                                                  ;  00000
                                                  ;  0**00
                                                  ;  00**0
                                                  ;  00000
                                                  
piece_4 db          11110000b                     ;  00000
                                                  ;  00000
                                                  ;  ****0
                                                  ;  00000
                                                  ;  00000
                                                  
piece_5 db          01110100b                     ;  00000
                                                  ;  00000
                                                  ;  0***0
                                                  ;  0*000
                                                  ;  00000
                                                  
piece_6 db          01110010b                     ;  00000
                                                  ;  00000
                                                  ;  0***0
                                                  ;  00*00
                                                  ;  00000

section .bss

; The underrun space is there so that stuff can get copied
; slightly under the screen. The buffer is there to overwrite
; the area.
; Hmm, in the several revisions almost all of this became
; obsolete.. oh well ;)

field_underrun resb (12 * 3) + 2
field resb (12 * 24)                
current_piece resb (5 * 5)
buffer resb 1024

field_underrun_alt resb (12 * 3) + 2
field_alt resb (12 * 24)
current_piece_alt resb (5 * 5)
buffer2 resb 1024

ticks resw 1
speedup resb 1
