        segment code
        assume cs:code,ds:code
        org 100h
        .586

b       equ byte ptr
w       equ word ptr
s       equ short
o       equ offset

        source equ 0fffh

A       equ [si-2h] ;3
STK     equ [si]    ;8
PC      equ [si+2]  ;5 

        debug     =0

@start:
        IF debug
                xor ax,ax
                mov bx,ax
                mov cx,0ffh
                mov si,100h
                mov di,0fffeh
        ENDIF

;  Ŀ
;   ax=0000  bx=0000  cx=00ff  dx=xxxx  di=fffe  si=0100  bp=0000 
; 
;   read the .spu file                                       023 bytes  
;    This one was stolen from int-e's hc6 entry, thanx for               
;    that, it saved me many bytes.. To understand either                 
;    read int-e's commented source or debug the program..                
; 

	pop	ES		;;;TAD	ES=0000, sp=0000 (ES=1 byte opcode)

        mov     dx,source
        mov     ax,1A5Ch        ;      see int-e's hc6 entry..
        int     21h
        cbw 
        xchg    dx,ax           ;dx=005ch

        mov     di,ax

        int     21h             
        mov     ah,14h
        mov     [6Bh+bx],ah     
        int     21h             

;  Ŀ
;   ax=1403  bx=0000  cx=00ff  dx=005c  di=1000  si=0100  bp=xxxx 
; 
;   fill 000..0ffh, again TAD showed me a better way         006 bytes 
; 

@fillloop:
	mov	[di+bx],bl	;;;TAD
	inc	bl		;;;TAD
	jnz	short @fillloop ;;;TAD


;  Ŀ
;   ax=1403  bx=0000  cx=00ff  dx=005c  di=1000  si=0100  bp=xxxx 
; 
;   the emulator. read the comments to understand..          xxx bytes 
; 

@emulate:
        push    o @emulate             
        mov     si,source+0f00h        ;si=ptr to A                  

        mov     bp,di

        mov     bx,bp                  
        and     bx,[si+4]                  
        mov     bx,[bx+di]              ; bx=abcd (new opcode)

        mov     cl,bh                   ; cx = 00ab     (opcode high-byte)

        lodsw                          ;al=a, ah=status , si=ptr to stk
        xchg    ax,dx                   ; dx = STATUS & A

        mov     ax,18D1h                ;;;TAD  \
        xor     al,bh                   ;;;TAD    prep. JPcc opcode #=o)
        mov     b @spu_jp_mod,al        ;;;TAD  /
        shr     ax,4
        xchg    ax,bx

        cmp     ax,1000h
        je      s @spu_return

        sub     w PC,sp			; PC - -2	(PC+2)
        and     ax,bp                   ; bx = 0xxx

        mov     bl,[bx]     ;       bx = 01xx --> vector

	xchg	ax,bx
        mov     b [@alu],al                  ;self-mod ADCA...XORA

        cmp     al,32h                    ; is it a true low-byte vector ?
        ja      s @keep
        mov     al,(@alu-@start)              ; jump to same vector --> _alu
@keep:
        push    ax

; extract the flags.. TADs Version of it.. 

        mov     ax,dx			; \
	shl	dh,4			;  \ extract flags
	shr	dh,1			;  /
	sahf				; /
        ret

;;; TAD - note each entry position is XOR'd with 0D hex !!


; Now the commands do come up: All you need to know for this is: 
; ax=status*256+A , bx=0bcd (the opcode AND 0FFFh), di=0FFFh (start of the
; spew program which will NEVER be changed), bp=0fffh (will be changed
; sometimes) and cx=00ab (first byte of the opcode. ch=00. ALLWAYS)
; The implementation is quite straight forward: pack together as much as
; possible and try to prevent jumps.

@spu_OScall:                            ;17
        mov     ah,bl      
        int     21h        
@alu:
        adc     al,[bx+di]   ; transforms either to mov cl,1 (if oscall) 
                             ; or to the alucommand we need! 
@packflags:
        lahf            
        pushf           
        pop     bx      
        and     bx,bp   
        xor     ah,bh   
@wrtax:
        mov     a,ax    
        ret               

@spu_return:                            ;8
        and     bp,STK
        mov     bx,[bp+di]  
        sub     w STK,sp		; STK - -2	(STK+2)
        jmp     s @spu_ret2 

@spu_rdi:                       ;12
        xchg    bp,bx
        and     bx,[bp+di]
@spu_lda:
	add	bx,di
	db	0B6h			; MOV DH,xx
@spu_rdsys:
	mov	al,es:[bx]
	jmp	short @wrtax		; AH = STATUS	 ** do not modify **

@jumptable:
        db 0Ah                          ;[0] OR opcode
        db 1Ah                          ;[1] SBB opcode
        db 32h                          ;[2] XOR opcode
        db 22h                          ;[3] AND opcode
        db (@spu_addw-@start)
        db (@spu_rdsys-@start)
        db 12h                          ;[6] ADC opcode
        db @spu_jp-@start		;[7]	= Axxx
        db @spu_sta-@start		;[8]	= 5xxx
        db @spu_lda-@start		;[9]	= 4xxx
        db @spu_wri-@start		;[A]	= 7xxx
        db @spu_rdi-@start		;[B]	= 6xxx
        db @spu_gosub-@start		;[C]	= 1xxx
        db @spu_jump-@start		;[D]	= 0xxx
        db @spu_popb-@start		;[E]	= 3xxx
        db @spu_pushb-@start		;[F]	= 2xxx

@spu_pushb:                             ;14
        dec     w STK     
        mov     al,[bx+di]
        mov     bx,0f02h	       ; bx points to STK
@spu_wri:
        and     bp,[bx+di]
        xchg    bp,bx
@spu_sta:
        mov     [bx+di],al
        ret

@spu_jp:                        ;10
        mov     ax,0f04h
        xchg    ax,bx  
@spu_jp_mod:                            
        jo      s @return
@spu_addw:
        cbw
        add     [bx+di],ax
@return:
        ret

@spu_gosub:
	mov	ax,PC
	add	w STK, sp		; STK + -2	(ie. STK - 2)
	and	bp,STK
	mov	[di+bp],ax
@spu_jump: 
        jcxz    s @spu_OScall               ; opcode 00xx ?
@spu_ret2: 
        mov     pc,bx     
        ret               

@spu_popb:                      ;9
        and     bp,STK    
        mov     al,[di+bp]
        mov     [bx+di],al
        inc     w STK
        ret               

        code ends
        end @start


; a small comment: my asm-programs have no names, they only have numbers.
; So the 4 digits in front is number of the asm program, the others the
; bytes.. If someone needs the sourcecodepack write to:
; blick@fmi.uni-passau.de

; Strike!! Did the 300 bytes limit..                   [3011 = 298 bytes]
; kicking the 'dw's out of the jumptable braught me to [3013 = 285 bytes]
; using other registers...                             [3014 = 282 bytes] 
; did something, earned 3 bytes                        [3019 = 279 bytes]
; there was redundant code at the oscall..
; found the mov problem..                              [3020 = 278 bytes] 
; a useless mov.. tststs                               [3021 = 276 bytes]
; implemented chuts xor at jpcc                        [3022 = 275 bytes]
; optimized the use of the jumptable a bit             [3024 = 273 bytes] 
; eliminated a 'sub pc,2'                              [3026 = 269 bytes]
; changed 'or dx,ax' to 'or ax,dx'                     [3028 = 268 bytes]
; changed some registers again..                       [3030 = 267 bytes]
; did the 'mov dx,' thing - thx chut..                 [4002 = 261 bytes] 
; it took me 5 hours to find out that a-b is NOT b-a... FUCK!
; between: i got my math diploma with 2.3.. :) (proud as hell)
; move the jumptable around..                          [4003 = 255 bytes]

; right now i think i have to send a big THANX to CHUT! His public entry
; showed me many things to optimize at my code. But believe me: i'll get
; you.. :)

; Sunday, 101099
; moving...                                            [4004 = 253 bytes]
; this and that. nothing importand                     [4008 = 251 bytes]
; copied the fcb thing from int-e's hc6 entry..        [4009 = 250 bytes]
; di+x00h needs one more byte than di+x0h. shit.       [4010 = 247 bytes]
; another one..                                        [4011 = 246 bytes]
; and yet another one..                                [4012 = 245 bytes]
; 'stosb;inc al;jnz' -> 'stosb;inc ax;loop'            [4013 = 244 bytes]
; i am missing one byte. it has just gone away.        [4014 = 243 bytes]
; [si+2] is as long as [bp].. kicks 'mov bp,1f04'      [4015 = 240 bytes]
; bp=0fffh                                             [4018 = 238 bytes]
; revisioned chuts 'jpxx xor' code..                   [4020 = 237 bytes]

; Monday, 111099
; kicked one 'xchg bp,bx'                              [4022 = 235 bytes]
; changed 'mov al,cl' to 'xchg ax,cx'                  [4023 = 234 bytes]
; changed si to 1f00, and 'mov ax,a' to 'lodsw'        [4025 = 232 bytes]

; Sunday, 171099
; did the 'flags <-- status' hint tad gave me          [4028 = 227 bytes]
; changed the jumptable and the calc. of the jump..    [4029 = 226 bytes]

; Monday, 181099
; al= a, ah=[bx+di] , instead of cx...                 [4031 = 223 bytes]
; moved some code and kicked a popf                    [4032 = 222 bytes]
; killed a useless move..                              [4033 = 220 bytes]
; again inserted chuts xor 71..                        [4034 = 219 bytes]
; changed pushb a bit..                                [4035 = 218 bytes]
; implemented a mod. version of TADs status<--flags    [4036 = 215 bytes]
; tried the new testprogramm.. it failed. damn.
; 000 - 0ff are not correct. inserted an 'inc cx'      [4037 = 216 bytes]

; Tuesday, 191099
; BIG THANX TO TAD! he saved for me another 2 bytes..  [4038 = 214 bytes]

; Saturday, 231099
; may be incorrect, moved the @spu_rdsys a bit.        [4042 = 213 bytes]
;   jumped into a 'daa'. works fine with the testsuite, dont know if it
;   might cause bugs..

;;;TAD Sunday, 241099
;;;
;;; recoded 'init' code using 'MOV [DI+BX]=BL'		[= 212 bytes]
;;; moved JPcc self-mod to remove PUSHF+POPF		[= 210 bytes]
;;; changed JPcc to load AX=00xx + BX=0F04		[= 209 bytes]

;claw Monday, 251099
; tested TADs init. works. saves one byte..             [4050 = 212 bytes]
; moved the JPcc like TAD did. Saves again one byte     [4050 = 211 bytes]
; TADs 'lodsw, xchg' does not work because bx=1f04, we
; need bx=0f04

;;; TAD Monday, 250199
;;; non-working 204 bytes !! version...

;;; TAD Tuesday, 260199 (2.18am)
;;; got the f*cking thing to work (damn that XOR)	[204 bytes]
;;; changed @spu_rdsys to remove 'daa' thing.
;;; Remembered a nice way to GS=0000 (Thanx Ruud !!)	[201 bytes]
;;; used ES: instead of GS: (POP ES saves 1 byte)	[200 bytes]
;;; added '@wrtax'  for RDI,LDA,RDSYS			[198 bytes]

; moved around the code to make CMP BX,1000h to CMP AX,1000h. This saves
; one byte.. :) STRIKE! One byte optimized by myself..  [5022 = 197 bytes]

;TAD Thursday, 280199
;  he shows me how to fake the @spu_xora .. @spu_addc stuff in the table.
;  saves me three bytes, not 2 as tad thougt..          [5027 = 194 bytes]

; claw Sunday, 8 hours before deadline
;  found a lousy one: WARNING: Don't try this at home!  [5045 = 193 bytes]
;  (TAD: the 'db xx' at oscall is not needed if you move the code tricky.. :))

; la mer  (nine inch nails)
;
; and when the day arrives
; i'll become the sky
; and i'll become the sea
;
; and the sea will come to kiss me
; for i am going home
;
; nothing can stop me now


