;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
;  Hugi 28: Hugi USB
;
;  Date: 25 June 2009
;  Size: 352
;
;  Assembled with: NBASM 00.26.22  http://www.frontiernet.net/~fys/nbasm.html
;          Author:  Ben Lunt (Sniper)
;

;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; we are a .com file
.model tiny

;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; Structs

UHCI_Q     struct
           horz      dword
           vert      dword
UHCI_Q     ends

UHCI_TD    struct
           link_ptr  dword
           stats     dword
           info      dword
           buffer    dword
           ; there is no reason why we can't use the remaining 16 bytes
           ;  as the next TD, since it is para aligned *and* the controller
           ;  won't modify them on this TD.
           ;resv     dup 16
UHCI_TD    ends


;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; code stars here

.code         ;
.386          ; we will use (and assume) we are a .386 or better

           org 100h  ; .com files start at 100h


           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; First thing would be to find the UHCI controller on the PCI Bus
           ; We will do this using the PCI alone.  We could use the PCI
           ; BIOS services, ax = 0B1xxh, but I prefer to use the PCI itself.
           ; Using ports 0xCF8 and 0xCFC, we can read/write to the PCI Bus.
           ; We first write to the ADDRess port using the following bits:
           ;  Bit(s) 1: 0 reserved (should be written as zeros)
           ;         7: 2 config register number (see #00878 in RBIL)
           ;        10: 8 function number
           ;        15:11 device number
           ;        23:16 bus number
           ;        30:24 reserved (should be written as zeros)
           ;        31    enable configuration space mapping

           ; the PCI specs, ver 2.3, section 6.1, paragraph 3 states that
           ;  any read to a configuration space register of a non existant
           ;  device may return 0xFFFFFFFF.  We will use this assuption.

           mov  ebx,7FFFFF08h  ; ebx = PCI bus/dev/func dword (8 = 2nd dword)
pci_main_loop:
           add  ebx,00000100h  ; move to next bus/dev/func

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; read in the class/subclass/protocol
           call read_pci

           shr  eax,8              ; clear out the low byte
           cmp  eax,000C0300h      ; The UHCI class = 0C, sub = 03, proto = 00
           jne  short pci_main_loop

           ; read in the base address
           mov  bl,(8<<2)
           call read_pci
           dec  ax           ; clear bit 0
           push ax           ; save the base address

           ; write 0005h to the access register
           mov  bl,(1<<2)
           call read_pci
           mov  ax,0005h
           out  dx,eax

;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; reset the controller by setting bit 2 (GRESET) in the command register
;  waiting at least 10ms, then reseting the bit.
           pop  dx                  ; bit 2 in ax is set above
           out  dx,ax               ; COMMAND reg

           ; now wait for at least 10ms
           call delay55ms

           ; clear the register (setting it to default values)
           xor  eax,eax   ; clear eax too for below
           out  dx,ax

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; find a 4k aligned address
           ; convert to 32-bit and write to the controller
           mov  ax,cs
           shl  eax,4
           push eax             ; save for later
           add  eax,(4095+4096) ; + 4096 to go past our code
           and  ax,(~4095)
           add  dx,8
           out  dx,eax
           shr  eax,4           ; convert back to 16:0000h
           mov  es,ax           ; es:0000 is our frame address

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; Set the IOC interrupt bit enable
           ; *We must set all four bit to get Bochs to work*
           ; (Ben: Fix Bochs)
           sub  dx,4
           mov  ax,000Fh
           out  dx,ax

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; mark each frame as 'T'erminated
           xor  di,di
           push di       ; save for later
           mov  ch,8     ; cx = 2048 (08xx and cl = zero from delay above)
           rep           ; ax = 0x000F above (T = 1)
           stosw         ; saves us from using the 66h prefix
           pop  di

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; Start the UHCI controller.
           sub  dx,4
           mov  al,01   ; ah = 0 from above
           out  dx,ax   ; COMMAND

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; set up the address in dx
           add  dx,16   ; first port

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; first, reset the port
           mov  ah,(1<<1)
           out  dx,ax

           ; now wait for at least 50ms
           call delay55ms

           ; clear the reset
           in   ax,dx
           and  ah,(~(1<<1))
           out  dx,ax

           ; now wait for at least 50ms
           call delay55ms

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; enable the device
           in   ax,dx
           or   al,(1<<2)
           out  dx,ax

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; set up our frame TD's
           ; We already have them hard coded in our data section,
           ;  but need to fix up the addresses.
           pop  eax

           ; NBASM doesn't assemble the two lines below correctly, darn it.
           ;  I will have to fix this...
           mov  cl,4
           mov  bx,offset Setup_TD.link_ptr
@@:        db 66h, 01h, 07h       ; add  [bx],eax
           db 66h, 01h, 47h, 0Ch  ; add  [bx+12],eax
           add  bl,16  ; bx = 0220h or less so adding to only bl is fine
           loop @b

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; Set up the Queue Head
           add  Queue.vert,eax

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; point the first frame in the list to our queue.
           add  eax,offset Queue   ; eax still = 'base address'
           or   al,(1<<1)          ; is a queue
           stosd                   ; es:[di]  (di = 0)

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; wait for the interrupt to happen
           sub  dx,14
@@:        in   ax,dx
           test al,1
           jz   short @b

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; stop the controller
           dec  dx
           dec  dx
           xor  ax,ax
           out  dx,ax    ; COMMAND

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; now print it out.
           push cs
           pop  es
           mov  di,dx  ; dx = base io address. is it <= (10000h-3Ah)? For sure.
           xchg bx,ax  ; ax = 0 above, so bx = 0 now
           ; si = 100h on .com startup

do_crlf:   mov  ax,0D0Ah
           stosw

main_prt:  lodsb

           mov  cl,2       ; ch = 00
           aam  10h
main_prt1: xchg al,ah
           cmp  al,0Ah
           sbb  al,69h
           das
           stosb
           loop main_prt1

           inc  bx

           mov  al,' '
           test bl,7
           jnz  short @f
           mov  al,'-'
@@:        test bl,0Fh
           jz   short do_crlf
           stosb

           cmp  bl,18
           jb   short main_prt

           mov  ax,0924h
           dec  di          ; overwrite the last space with a '$'
           stosb            ;
           
           int  21h

           ; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
           ; done, so exit
           ;ret
           ;  we can just fall through and wait 55ms for the exit

;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; if we wait for the BIOS time_stamp to increment at least
;  once, we have waited at least 1/18.2 of a second or 55ms
;  and at most 1/9.1 of a second, or 109ms
delay55ms  proc near
           xor  cx,cx
           mov  gs,cx
           mov  cl,2
delay1:    mov  ax,gs:[046Ch]   ; wait the first time for the actual tick
@@:        cmp  ax,gs:[046Ch]
           je   short @b
           loop delay1
           ret
delay55ms  endp

;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; writes eax to PCI_ADDR, then reads eax from PCI_DATA
read_pci   proc near
           mov  eax,ebx
           mov  dx,0CF8h
           out  dx,eax
           mov  dl,00FCh
           in   eax,dx
           ret
read_pci   endp


;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; data

.para ; must be paragraph aligned

; Queue is 8 bytes

Queue      st  UHCI_Q  uses  00000001h \
                             Setup_TD
; Setup_Packet is 8 bytes

Setup_Packet  db 80h    ; dev->host, type=standard, recipient=device
              db  6     ; get descriptor
              db  0     ; index = 0
              db  1     ;  type = device
              dw  0     ; value = 0 (not used)
              dw  18    ; 18 bytes

; still para aligned

Setup_TD   st  UHCI_TD uses  TD0                            \ ; calc'd at run time
                             ((3<<27) | (1<<26)| (80h<<16)) \ ; C_ERR | LS | STATUS
                             ((7<<21) | (0<<19) | 2Dh) \ ; Len = 8, Data0, Setup
                             Setup_Packet               ; calc'd at run time

TD0        st  UHCI_TD uses  TD1                      \ ; calc'd at run time
                             ((3<<27) | (1<<26)| (80h<<16)) \ ; C_ERR | LS | STATUS
                             ((7<<21) | (1<<19) | 69h) \ ; Len = 8, Data1, In
                             (0100h+0)                  ; calc'd at run time

TD1        st  UHCI_TD uses  TD2                      \ ; calc'd at run time
                             ((3<<27) | (1<<26)| (80h<<16)) \ ; C_ERR | LS | STATUS
                             ((7<<21) | (0<<19) | 69h) \ ; Len = 8, Data0, In
                             (0100h+8)                  ; calc'd at run time

TD2        st  UHCI_TD uses  Status_TD                \ ; calc'd at run time
                             ((3<<27) | (1<<26)| (80h<<16)) \ ; C_ERR | LS | STATUS
                             ((1<<21) | (1<<19) | 69h) \ ; Len = 2, Data1, In
                             (0100h+16)                 ; calc'd at run time

Status_TD  st  UHCI_TD uses  00000001h                \ ; terminate
                             ((3<<27) | (1<<26) | (1<<24) | (80h<<16)) \ ; C_ERR | LS | IOC | STATUS
                             ((7FFh<<21) | (1<<19) | 0E1h) \ ; Len = 0, Data1, Out
                             00000000h                  ; calc'd at run time


.end
