        Here is some more info about claw's entry...

        Firstly, I didn't want to take part in the compo
        but found the transformation/translation problem
        far too interesting to ignore. I do still consider
        this as claw's entry.. with a few bytes saved by me.

                                                TAD


;-========== Welcome to: ==========-
;
;o888888888o o888888888o 888888888o
;88888888888 88888888888 888     888
;    888     888     888 888     888
;    888     888     888 888     888
;    888     88888888888 888     888
;    888     888     888 888     888
;    888     888     888 888888888's
;
;888888 88   88 888888 88888  88  88
;88     8888 88   88   88  88  8888
;888    88 8888   88   88888    88
;888888 88   88   88   88  88   88
;
;-= with small contributions from =-
;
;o888888 888    o8888888 888 888 888
;888     888    888  888 888 888 888
;888     888 88 88888888 888 888 888
;8888888 888888 888  888  888888888
;
;-========== explanation: =========-
;
; TAD did  again  most of the  work
; but did not  agree to do a joined
; entry. so i  think he must be the
; main  person in the  credits  and
; not me.
; but non  the less i have to greet
; some people, especially  Bonz who
; 'paid back' for  some help i gave
; him in another  compo.. thanks!
; also  greets to Sniper  and Ruud!
; special  thanks  to adok  and all
; those  who worked  on example and
; testsuite.
;
;-= now to something completly .. =-
;

        JUMPS
        .model tiny

BMP_FILESIZE    equ     17462
salc            equ     <db 0d6h>
bmpimage        equ     load_to + 36h + 1024
load_to         equ     03c8h

        .code
        .386
        org     256
go:
        mov     bx, 3F13h

        ;;-----------------------------------------------------------
        ;; this is the outer 2-pass loop
        ;;
        ;; <1>: pass 1 read file
        ;; ax=0000  bx=3F13  cx=00ff  dx=????
        ;; si=0100  di=????  bp=????  sp=fffe
        ;;
        ;; <2>: pass 2 write file
        ;; ax=0100  bx=4003  cx=0000  dx=????
        ;; si=????  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------
WriteBMP:
smc:    mov     di, 0082h               ; <<<<--- SMC :)
        xor     ax, 3D20h               ; <1> ax=3D20
                                        ; <2> ax=3C20
        mov     dx, di                  ; dx:dx --> filename
@findnospace:
        mov [si+(smc+1-go)], di         ; self modify MOV DI,xxxx
@findspace:
        scasb                           ;
        jc  short @findspace            ; not a space?
        mov byte ptr [di-1],ch          ; space(s) --> 00
        je short @findnospace           ; skip past space(s)

        ;;-----------------------------------------------------------
        ;; read file
        ;; <1> ax=3D20  bx=3F13  cx=00FF  dx=0082
        ;;     si=0100  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------
        ;; write file
        ;; <2> ax=3C20  bx=4003  cx=0000  dx=nnnn
        ;;     si=????  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------

        int     21h
        xchg    ax, bx

        ;;-----------------------------------------------------------
        ;; <1> ax=3F13  bx=file  cx=00FF  dx=????
        ;;     si=0100  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------
        ;; <2> ax=4003  bx=file  cx=0000  dx=????
        ;;     si=0100  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------

        mov     dx, load_to             ; load at 03C8h
        pusha                           ; keep ax & cx
        mov     cx, BMP_FILESIZE        ; read
termin:
        int     21h                     ; <1> read file
                                        ; <2> write file
        popa
        cbw                             ;
        int     10h                     ; <1> vid-mode 13h
                                        ; <2> vid-mode 03h
        jcxz    short termin            ; write file ?

        ;;-----------------------------------------------------------
        ;; this is the main draw, waitkey, trans[form/late] loop
        ;;-----------------------------------------------------------
        ;; ax=????  bx=????  cx=00xx  dx=????
        ;; si=????  di=????  bp=????  sp=????
        ;;-----------------------------------------------------------
mainloop:
        mov     bx, 4003h               ; bh=write file,  bl=mode 3
        mov     di, (320*128)           ; [ES:DI] --> scr line 128
        mov     si, bmpimage            ; [DS:SI] --> bmp image
drawbmp:
        mov     cl, 128                 ; cl=128 pixels per horz
copy_x:
        lodsb                           ; al = image pixel
        push    ax                      ; use stack as 'temp' image

        push 0A000h+(((320*35)+96)/16)  ; *NOTE y=36 - 1

        ;;-----------------------------------------------------------
        ;; here is the nice 10-byte trans 'opcode' table
        ;; after many, many attempts to improve the old table
        ;; we came up with this one. sadly only 3 bytes reused
        ;; (so our trans+table = 19 bytes (12+10-3) #:o)
        ;;-----------------------------------------------------------
matrix:
        db      007h                    ;  [0]        -pop ES
        db      0EEh                    ;  [1]        -out dx,al
        db      039h,018h               ;  [2][3]      cmp [bx+si],bx
        db      00Eh                    ;  [4]        -push cs
        db      03Ah,069h,0EEh          ;  [5][6][7]   cmp ch,[bx+di-12h]
        db      01Bh,0E8h               ;  [8][9]      sbb bp,ax

        stosb                           ; write pixel
        shr     al,2                    ; 8bit --> 6bit colour
        inc     dx                      ;
        out     dx, al                  ; slow and ugly as hell
        out     dx, al                  ; (but it saves bytes ;)
        out     dx, al                  ;
        dec     dx                      ; creditz to Bonz for this

        pop     ES                      ; restore ES = DS
                                        ; (need to scan 2nd filename)

        loop    copy_x
        sub     di, (320+128)           ; up one screen line
        jnz     short drawbmp

        ;;-----------------------------------------------------------
        ;; ax=????  bx=4003  cx=0000  dx=03C8
        ;; si=47fe  di=0000  bp=????  sp=7ffe (temp image)
        ;;-----------------------------------------------------------

        mov     ah, 08h
        int     21h

        aam     20h                     ; split al=20 --> ax=0100
                                        ;       al=3n --> ax=011n

        ;;-----------------------------------------------------------
        ;; [ ] pressed
        ;; ax=0100  bx=4003  cx=0000  dx=03C8
        ;; si=47FE  di=0000  bp=????  sp=7FFE
        ;;-----------------------------------------------------------

        jz      WriteBMP

        xchg    ax,bp                   ; [ss:bp-10h] --> opcode byte

        ;;-----------------------------------------------------------
        ;; n=[0]..[9] pressed
        ;; ax=????  bx=4003  cx=0000  dx=03C8
        ;; si=47FE  di=0000  bp=011n  sp=7FFE
        ;;-----------------------------------------------------------
y_loop:
        mov     bx, di                  ; di = 0...7FFFh (inc 2)
                                        ;    = 0yyyyyyy xxxxxxx0 <-- even!

        shr     bl,1                    ; bx = 0yyyyyyy 0xxxxxxx
                                        ; cf = 0 (due to even loop count)
                                        ;        nice one claw !

        mov     al, [bp+(Matrix-go)-10h] ; get trans 'opcode'
trans:
        cbw                     ; yes folks, here is that nice 12-byte
        sbb     bl, ah          ; trans routine I mentioned on the group
        xchg    bl, bh          ; mailing list.
        xor     bl, ah          ;
        shl     al, 2           ; (see EOF for more info)
        jnz     short trans     ;

        or      bh, 80h         ; remap from end of bmpimage
                                ; bx = 1yyyyyyy ?xxxxxxx
        shl     bl, 1           ; bx = 1yyyyyyy xxxxxxx0
        sar     bx, 1           ; bx = 11yyyyyy yxxxxxxx

        pop     ax              ; pixel <--- temp stack

                                ; [DS:SI] --> bmpimage + (128*128)
        mov     [bx+si], al     ; remamp temp pixel --> image(x,y)

        inc     di              ; \ advance loop count by +2
        inc     di              ; / so the SHR BL,1 makes CF=0
        jns     short y_loop
        jmp     mainloop

        end     go

__________________________________________________________________________
        v v v  v v v v v  v v  v v v v  v v v v v  v v  v v v
        T-H-E  F-U-N-K-Y  1-2  B-Y-T-E  T-R-A-N-S  B-Y  T-A-D
        ^ ^ ^  ^ ^ ^ ^ ^  ^ ^  ^ ^ ^ ^  ^ ^ ^ ^ ^  ^ ^  ^ ^ ^

        As you have all probably worked out, the trans uses
        2-bits per loop to perform a translate/transform step.
        This continues until the <opcode> byte is 0. As a very
        nice bonus this 'nz loop count' allows an odd or even
        number of loop iterations to be done and so can easily
        perform a 'xchg x,y' by doing an odd number of loops.


          7      6     5     4     3     2     1     0
        +-----+-----+-----+-----+-----+-----+-----+-----+
        | xor |  cf | xor |  cf | xor |  cf | xor |  cf |
        +-----+-----+-----+-----+-----+-----+-----+-----+
          <1>  <----2----> <----3----> <----4---->

07 hex     0     0     0     0     0     1     1     1  [0] y flip
EE hex     1     1     1     0     1     1     1     0  [1] 180'
39 hex     0     0     1     1     1     0     0     1  [2] move down
18 hex     0     0     0     1     1     0     0     0  [3] -90'
0E hex     0     0     0     0     1     1     1     0  [4] move left
3A hex     0     0     1     1     1     0     1     0  [5] x flip
69 hex     0     1     1     0     1     0     0     1  [6] move right
EE hex     1     1     1     0     1     1     1     0  [7] 180'
1B hex     0     0     0     1     1     0     1     1  [8] move up
E8 hex     1     1     1     0     1     0     0     0  [9] +90'

        There are many opcodes for each of the [0]...[9]
        operations. We wrote a 'try' program to check all
        256 byte values and print those which gave the
        correct transformation/translations.

        One thing to note about the above table is that the
        image is stored on the stack in an upside-down and
        back-to-front format. Also the trans loop counters
        are reversed too... confused?  yeah.. so I am.... ;)

Bonz' fix:
^^^^^^^^^
        Here is an explaination of the +/-1 fix in the example
        program (Thanks again to Bonz for finding and fixing
        this nasty bug).

        The bmp size is 128 x 128 so calculations for a pixel
        address is MOD 128 (x=0...127 and y=0...127). For a
        x flip operation you might think a NEG x will do, but
        in fact the answer will be 1 out.

                mov     al, 0           ; x = 0
                neg     al              ; x = -x
                and     al, 127         ; x = x MOD 128
                al = 0                  ; this should be 127

        (remember 0 = first pixel, 127 = last pixel)

        so, to fix this perform a decrement.

                mov     al, 0           ; x = 0
                neg     al              ; x = -x
                dec     al              ; x = x - 1
                and     al, 127         ; x = x MOD 128

                al = 127

        A NEG can be performed using a NOT followed by a INC,
        or, by a XOR <dst>,-1 followed by a INC. The entry
        used the SBB BL,AH instruction to perform the INC step.

        Here is an expanded loop with the 'xchg bl,bh' removed:-

        cf = 0

trans:                          ; ah  cf
        cbw                     ; 00   0
        sbb     bl, ah          ; 00   1        x = x - 1
                                ; FF   0        x = x + 1
                                ; FF   1

        xor     bh, ah          ; 00
                                ; FF            y = -y - 1
        shl     al, 2           ;
        jz      short done      ;

        cbw                     ; 00   0
        sbb     bh, ah          ; 00   1        y = y - 1
                                ; FF   0        y = y + 1
                                ; FF   1
                                ;

        xor     bl, ah          ; 00
                                ; FF            x = -x - 1

        shl     al, 2           ;
        jnz     short trans     ;
done:


        Creditz,
^^^^^^^^^^^^^^^^
        > Many thanks to Bonz for his bug fix and helping
          getting this compo up and running.

        > Tnx to claw for making this coding fun.

        > Tnx to everyone else (especially Alexione, respect!)
          for making this compo a nice challenge.

        > Tnx to Ruud, who wasn't here in person,
            but his spirit of fun and 'chewing bytes'
              made this so enjoyable.

               (hope to see you all in the next compo)

                regards
                        TAD  ( tad_uk@bigfoot.com )
