; Bonz's third attempt at an entry for HC #16...
;
; 229:
; My first attempt used clever instruction sequences to implement the
; moving rules on the grid, precisely as explained in the rules.  This was
; basically an inferno of aam & aad :-)
;
; 202:
; Starting with this one instead I preprocess the command line into a more
; friendly 8x8 grid of partially overlapping hexagons at a distance of 36
; pixels and 18 scanlines.
;
; To map the 4x8 grid onto it, I use every other hexagon (since there
; must be 72 pixels between them), and in particular even hexagons for
; the even rows and odd hexagons for the odd rows (since they must be
; shifted to the right).
;
; Once we do this, movement becomes a lot easier: 123789 respectively
; are +7 +16 +9 -9 -16 -7 in the 8x8 grid.  Testing out of bounds moves
; is also easier because they end on unused hexagons in the 8x8 grid, which
; are encoded like black hexagons.  This saves ~20 bytes even if about this 
; same amount is used to preprocess the command line.  Storing the grid as
; words saved a few more bytes because then it is exactly 128 bytes long.
;
; 172:
; This was done in two parts.  First I rewrote the hexagon code to do filled
; hexagons.  I had completely failed to estimate the size of the filled-hexagon
; routine and in my two previous attempts I had drawn hollow hexagons.  I
; can see why that approach was wrong: not only the drawing can be optimized
; a lot more, but I also had an extra loop needed to draw a filled hexagon out
; of hollow ones, and a 6-byte table containing the steps for the different
; sides.
;
; I got 7 more bytes by removing the short loop that I used to find the word
; holding the cursor.  Previously, I could not store the cursor position while
; drawing the grid: not on the stack because I stored hexagon data there (and
; then popped hexagons as I drew them, in a separate loop), and not in a
; register because none was left.  Now I have both, so I think the
; filled-hexagon optimization was important also because it gave me more
; freedom.

	org	100h
	
	mov	bl, [si-80h]		; compute start of command-line
	lea	si, [bx+61h]		; (length - 20h = no. of spaces)
	mov	di, 0a000h		; DI points to workspace
	mov	dx, 0aa55h		; alternation of hexagons
	push	di

;	mov	cl, 40h			; Looping 255 times makes no harm
copy:
	rol	dx, 1
	salc				; when it is useful, it sets AL = 0
	jnc	store			; According to the mask, store 0 or...
	lodsb				; ...copy an hexagon
store:
	stosw				; store
	loop	copy
	pop	es			; Point to VRAM
	
gameloop:
	mov	al, 13h			; Set video mode
	int	10h
	
	mov	si, es			; Point to workspace
	cwd
	mov	bp, 320*2+18		; Point to top-left corner

draw:
	mov	ax, 708h		; border if no cursor
	and	ah, [si]		; Load SI and set flags
	jz	nohex
	sahf				; Set parity flag if cursor
	jnp	nocursor
	pusha				; if so, store SI
	add	ax, 0fc07h		; remove cursor, AL = border for cursor
nocursor:

	mov	bx, 1107h		; BL = diagonal + 1, BH = side/2 - 1
	mov	di, bp
	call	hex			; Now BX=1007h
	lea	di, [bp+320]		; Now draw the inside in black
	call	hex			; Now BX=0f07h
	mov	al, ah			; move hexagon color to AL
	add	ah, ah			; BH = 15-2*color, BL=7-color
	sub	bx, ax
	or	dl, al			; and 'sum' this hexagon

	; We must compute DI=AL*281h.  We can save a CBW by observing
	; that AX=201h*AL so AX*81h = AL*10281h (whose low 16-bits are
	; what we want).

	imul	di, ax, 81h		; Compute top-left address
	lea	di, [bp+di+640]		; Offset to match the rules
	call	hex

nohex:
	add	bp, byte 36		; update VRAM address
	lodsw				; and workspace address (and set AH=0)

	; Note: XCHG+TEST AL+XCHG is as big as TEST SI (4 bytes),
	; but gives us the ability to test bit 7 of SI with a JNS

	xchg	ax, si
	test	al, 08fh		; low nibble is zero at start of row,
	xchg	ax, si
	jnz	nostart			; high bit is one at end of table
	lea	bp, [bp+320*18-288]	; next row (don't touch the flags)
nostart:
	jns	draw			; draw another if any

	dec	dx			; test if there were hexagons
	popa				; pop cursor location and more
	mov	dl, 3			; text mode and bitmask for color
	js	quit			; No hexagons? Exit

key:
	mov	ah, 8			; Get a key
	int	21h

	add	al, 7-'1'		; Set carry if numeric, prepare for AAS
	jnc	quit			; Note that AL >= 10 for keys 4-9, and
					; AL & 15 >= 7 for valid keys

	aas				; black magic: 4-9 -> AX+=FF00, 
	cmp	al, 7			; 0-3 -> AX+=6: AL=4-6 is invalid
	jb	key
	aad	10h			; subtract 16 if key was 7-9

	test	al, 1			; Almost there: for vertical moves,
	jnz	diagonal
	shl	al, 1			; we must move by two rows
diagonal:

	mov	bx, si			; new address in BX
	add	bl, al
	add	bl, al			; add AX words
	js	key			; If low byte is > 80h, out of bounds

	test	[bx], dl		; Is new position black?
	jz	key			; Yes, bad move
	and	[si], dl		; remove cursor
	add	[bx], dl		; -1 for the move, +4 for the cursor
	jmp	short gameloop

quit:
	xchg	ax, dx			; Text mode
	int	10h
;	ret				; and quit by falling through

	; on output CL=BL, BH--, AL=0, DI destroyed, CF=1
	; Input: BL = side/2 - 1, BH = diagonal + 1, DI = address, AL = color
hex:
	mov	cl, bl

line:
	inc	cx
line2:
	pusha				; Draw a line in the filled hexagon
	mov	ah, al
	rep	stosw
	popa
	add	di, 319			; Advance to the next one
	dec	bh			; BH times
	jg	line			; In the bottom half (BH<=0)?
					; If not, loop and increase the side
	scasw				; while if so, adjust DI
	dec	cx			; and decrease the side
	cmp	bl, cl			; Until we draw a line corresponding to
	jb	line2			; the topmost one

	salc				; AL = 0
	neg	bh			; BH = original value - 1
	ret
