@ vim: set ft=armv4 noet:

#define RGB888(r,g,b) (((r)>>3)|(((g)>>3)<<5)|(((b)>>3)<<10)|0x8000)

#define BE4(x) ((((x)&0xFF)<<24)|(((x)&0xFF00)<<8)|(((x)&0xFF0000)>>8)|(((x)&0xFF000000)>>24))
#define DOL_LMA 0x81010000

#define A7_LMA 0x03800000
#define A9_LMA 0x02000000

#define C64_LMA 0x0801

.macro break
	.4byte 0xE7F000F0
.endm

.section .text.crt0,"ax"

.arch armv4
.cpu  arm7tdmi
.arm

.globl ndsheader
ndsheader:
NDS_GAMETITLE:
GBA_CARTENTRY:
DOL_TEXT0_OFF:
DOS_start: @ everything here is nonsense in 16-bit x86, but it works fine :)
	@.4byte 0, 0, 0 @ gametitle
_start_gba_cart:
	@b main_gba @ DOL: text seg 0 file off
	.2byte C64_LMA, 0xea00 @ C64 PRG: load address $0801, garbage
	                       @ GBA: b 0x0800200C
DOL_TEXT1_OFF:
	@ C64: stuff
	@.byte 0xEE,0x20,0xD0,0x60 @ inc $d020; rts
	@ jsr stuff_c64
	.byte 0x20 @ 0x4C: jmp
	.2byte (C64_LMA+stuff_c64-(ndsheader+2))
	.byte 0x60 @ rts
DOL_TEXT2_OFF:
	.4byte BE4(stuff_ogc-ndsheader) @ DOL: text seg 2 file off

NDS_GAMECODE:
	@ DOS: jump to the actual code
	.byte 0x90 @ align (nop, could be part of hte previous (garbage) insn)
	.byte 0xE9 @ jmp
	.2byte (stuff_dos-NDS_MAKERCODE)
	@.4byte 0 @.ascii "####"  @ gamecode

NDS_MAKERCODE:
	@.2byte 0 @ maker code
	.byte ')',0
	.byte 0 @ unit code (0: NDS)
	.byte 0 @ enc. seed select
	.byte 0 @ device cap (128 kB)
	.byte 0,0,0,0,0,0,0 @ padding
	.byte 0 @ reserved ("used on dsi"?)
	.byte 0 @ region
	.byte 0 @ rom version
DOL_DATA0_OFF:
	.byte 4 @ autostart

NDS_A9_ROMOFF:
DOL_DATA1_OFF:
	.4byte _start_a9 - ndsheader @ A9 romoff
	.4byte A9_LMA @ A9 entry
	.4byte A9_LMA @ A9 addr
	.4byte _start_a9_end-_start_a9

	.4byte _start_a7 - ndsheader @ A7 romoff
	.4byte A7_LMA @ A7 entry
	.4byte A7_LMA @ A7 addr
	.4byte _start_a7_end-_start_a7

	.4byte 0 @END - ndsheader @ FNT off
	.4byte 0 @ FNT size
DOL_TEXT0_LMA:
CGB_IRQ_STAT:
	.byte 0xC3 @ jp hblank
	.2byte (stuff_cgb_hbl-ndsheader)
	.byte 0xD9 @ reti
	@.4byte 0 @END - ndsheader @ FAT off
DOL_TEXT1_LMA:
	@.4byte BE4(DOL_LMA) @ FAT size @ DOL: load addr
	.4byte 0
DOL_TEXT2_LMA:
	.4byte BE4(DOL_LMA) @0 @ A9 overlay off @ DOL: load addr
	.4byte 0 @ A9 overlay size
	.4byte 0 @ A7 overlay off
	.4byte 0 @ A7 overlay size

	.4byte 0x00586000 @ AES stuff?
DOL_DATA0_LMA:
	.4byte 0x001808F8 @ AES stuff?

	@ 0x068
DOL_DATA1_LMA:
	.4byte BE4(0x81020000) @0 @ icon off
	.2byte 0 @ secure area crc16
	.2byte 0 @ secure area 'delay'?
	@ 0x070
	.4byte 0 @ A9 'auto load list hook addr'
	.4byte 0 @ A7 'auto load list hook addr'
	.8byte 0 @ 'secure area disable'
	.4byte 0 @ "total used ROM size"
	.4byte 0x1A0 @HDRend - ndsheader - 1 @ ROM header size
	@ 0x088
	.8byte 0 @ reserved
	@ 0x090
DOL_TEXT0_SIZE:
	.4byte 0
DOL_TEXT1_SIZE:
	@.4byte BE4(stuff_ogc_end-stuff_ogc) @ DOL: text seg 1 file size
	.4byte 0
DOL_TEXT2_SIZE:
	.4byte BE4(stuff_ogc_end-stuff_ogc) @ DOL: text seg 2 file size
	.4byte 0
	.8byte 0,0@,0 @ reserved
	@.8byte 0,0 @ reserved
DOL_DATA1_SIZE:
	.2byte 0
GBA_MAGIC:
	.byte 0x96 @ reqd (GBA)
	.byte 0
	.4byte 0,0
DOL_DATA4_SIZE:
	.byte 0
	@ 0x0BD
GBA_CHECKSUM:
	.byte 0
	.2byte 0

DOL_DATA5_SIZE:
NDS_NINTENDOLOGO:
	@ uuughhh
	@.incbin "nintendo.bin"
GBA_MB_ENTRY:
_start_gba_mb:
	.4byte 0 @b main_gba @ would run GBA stuff in multiboot mode (in EWRAM),
	.4byte 0             @ but throws a wrench in dolphin's DOL parser
	.8byte 0,0
DOL_BSS_ADDR_SIZE:
	.8byte 0
	@ 0x0E0
DOL_ENTRY:
	.4byte BE4(DOL_LMA) @ DOL: entrypoint
	.4byte 0
	.8byte 0,0,0
	@ 0x100
_start_a9:
	b main_a9 @ CGB: fallthru
	.byte 0,0xc0
	.byte 0xc3 @ jp always
	.2byte (stuff_cgb-ndsheader)
	.byte 0,0,0
	.8byte 0,0,0,0,0,0
	.4byte 0x140
	@ 0x140
	.byte 0,0,0,0xC0
	@ 0x144
	.8byte 0,0,0
	@ 0x15C
NDS_LOGOCRC:
	.2byte 0xCF56 @ 9E1Ah?
NDS_HDRCRC:
	.2byte 0 @ header CRC
HDRend:

@ 0x0027C000
#define DTCM_BASE (0x0b000000)
#define DTCM_IF_OFF  (0x3ff8)
#define DTCM_ISR_OFF (0x3ffc)

.globl mainloop
mainloop:
	@ r0 = 0x0400_0000
	ldrh r7, [pc, #(isds-1f)]
	@ wait for new scanline
	ldrh r3, [r0, #6]  @ VCOUNT
1:
2:	@strh r2, [r1]
	ldrh r4, [r0, #6]  @ VCOUNT
	cmp  r3, r4
	beq  2b
	bl common_color
	b mainloop


.globl isr
isr:
	mov r0, #0x04000000
	mov r6, r0
	mov r1, #0
	str r1, [r6, #0x208]! @ disable ints

	ldrh r7, [pc, #(isds-1f)]
	nop
	@cmp  r7, #0x42
1:

	ldrh r1, [r6,  #-6] @ +202h: GBA IF
	ldr  r2, [r0,  #-8]
	orr  r2, r1
	str  r2, [r0,  #-8] @ wtf? (bios stuff)
	strh r1, [r6,  #-6] @ ack current int

	ldrh r4, [r0, #6]
	mov r8, lr
	bl common_color
	mov lr, r8

	mov r1, #1
	str r1, [r6]
	bx lr

.globl main_gba
main_gba:
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9
	b main_a9

.globl common_color
common_color:
	@ new scanline - set color now
	cmp r7, #0x42
	mov r1, #0x05000000

	@ r4 = VCOUNT
	lsrne   r2, r4, #5      @ GBA: div by 160/5 == 32
	ldreq   r3, =((5<<32)/192)  @ 0x06AAAAAA
	smulleq r0, r2, r4, r3  @ NDS: div by 192/5 == 48  --> fixed point mult., use fractional part
	@and    r2, #3
	add     r3, pc, #(collut-1f)
	ldr     r2, [r3, r2, lsl #1]
1:
	strh    r2, [r1]  @ write back color

	bx lr



	.globl collut
collut:
	.2byte RGB888(0x59,0xC8,0xF3)
	.2byte RGB888(0xED,0xA5,0xB3)
	.2byte RGB888(0xFF,0xFF,0xFF)
	.2byte RGB888(0xED,0xA5,0xB3)
	.2byte RGB888(0x59,0xC8,0xF3)
	.2byte RGB888(0x59,0xC8,0xF3)
isds:
	.4byte 0

	@.align
	.pool

	@ lol
	.globl main_a9
main_a9:
	mov  r0, #0x04000000
	mov  r1, #(1<<4)
	strh r1, [r0, #4]  @ enable hbl in DISPSTAT
	mov  r1, #0
	str  r1, [r0, #0x208]! @ disable interrupts

	@ NDS: allocate EWRAM to ARM9
	mov  r1, #0
	strb r1, [r0, #(0x247-0x208)]

	@ init stack
	ldr r1, =0x03003E00
	mov r3, #0x12 @ irq mode
	msr cpsr, r3
	add sp, r1, #0x100
	mov r3, #0x13 @ svc mode
	msr cpsr, r3
	add sp, r1, #0x1F0
	mov r3, #0x1F @ sys mode
	msr cpsr, r3
	mov sp, r1

	@ are we a gba or an nds?
	mov  r1, #0x42
	mov  r2, #0x02000000
	orr  r7, r2, #0x00040000  @ end of GBA RAM
	str  r2, [r7]          @ make sure eors result later will be 0x42 on NDS
	str  r1, [r2]          @ canary memory write  @ also clobbers our entrypoint
	ldr  r3, [r7]          @ read back at mirror (GBA) or other addr (NDS)
	eors r4, r3, r1
	mvn  r4, r4  @ is_DS <=> r1 != r3 <=> !(r3 ^ r1)
	strh r3, [pc, #(isds-1f)]
	beq .Lprolly_gba
1:

	@bl mpu_setup

	@ NDS codepath - set display mode
	ldr r1, =0x8203
	ldr r2, =((0<<8)|(1<<16)|(6<<0)) @ 1<<17
	mov r3, #0x81
	str r1, [r0,  #0x0FC]  @ POWERCNT
	str r2, [r0, #-0x208]! @ DISPCNT
	str r3, [r0,  #0x240]  @ VRAMCNT_A

	b mainloop

.Lprolly_gba:
	@ GBA codepath - set IRQ & display mode
	add  r7, pc, #(isr-2f)
	mov  r1, #(1<<1) @ hblank
2:
	strh r1, [r0, #-8] @ enable selected ints
	str  r7, [r0, #-0x20C] @ set up interrupt handler

	ldr  r1, =(0|(1<<8))@=0x0403
	str  r1, [r0, #-0x208]! @ set DISPCNT

	mov r1, #1
	str r1, [r0, #0x208] @ enable ints

_start_a7: b _start_a7
_start_a7_end:

.align 4
.pool

.globl stuff_pdf
stuff_pdf:
	.incbin "flag.pdf"
stuff_pdf_end:

.align 4
.globl mpu_setup
mpu_setup: @ kinda stolen from libnds
@	ldr r1, =0x02083E00
@	mov r3, #0x12 @ irq mode
@	msr cpsr, r3
@	add sp, r1, #0x100
@	mov r3, #0x13 @ svc mode
@	msr cpsr, r3
@	add sp, r1, #0x1F0
@	mov r3, #0x1F @ sys mode
@	msr cpsr, r3
@	mov sp, r1

	ldr r8, =0x2078 @ disable TCMs, PU
	mcr p15, 0, r8, c1, c0

	mov r8, #0
	mcr p15, 0, r8, c7, c5, 0 @ disable IC
	mcr p15, 0, r8, c7, c6, 0 @ disable DC

	mcr p15, 0, r8, c7, c10, 4 @ empty write buffer (flush caches)

@	ldr r8, =(DTCM_BASE  | 0x0a)
@	mcr p15, 0, r8, c9, c1, 0 @ set DTCM base & size (16k)
@	mov r8, #0x20
@	mcr p15, 0, r8, c9, c1, 1 @ set ITCM base & size (32k)
@
@	ldr r8, =(0x04000000 | 1 | 0b110010) @ 64M
@	mcr p15, 0, r8, c6, c0, 0 @ region 0: IO regs
@	ldr r8, =(0xFFFF0000 | 1 | 0b011110) @ 64k
@	mcr p15, 0, r8, c6, c1, 0 @ region 1: system ROM
@	ldr r8, =(0x00000000 | 1 | 0b010110) @ 4k
@	mcr p15, 0, r8, c6, c2, 0 @ region 2: alt. vectors
@	ldr r8, =(0x01000000 | 1 | 0b101110) @ 16M
@	mcr p15, 0, r8, c6, c4, 0 @ region 4: ITCM
@	ldr r8, =(DTCM_BASE  | 1 | 0b011010) @ 16k
@	mcr p15, 0, r8, c6, c5, 0 @ region 5: DTCM
@
@	@swi 0xf0000 @ ???
@
@	ldr r8, =(0x08000000 | 1 | 0b110100) @ 128M
@	mcr p15, 0, r8, c6, c3, 0 @ region 3: Slot2
@	ldr r8, =(0x0c000000 | 1 | 0b101110) @ 16M
@	mcr p15, 0, r8, c6, c6, 0 @ region 6: MRAM (uncached)
@	ldr r8, =(0x02000000 | 1 | 0b101010) @ 4M
@	mcr p15, 0, r8, c6, c7, 0 @ region 7: MRAM (cached)
@
@	ldr r8, =0b10000000
@	mcr p15, 0, r8, c3, c0, 0 @ enable write buffer
@
@	@ldr r8, =0b10000010
@	@mcr p15, 0, r8, c2, c0, 0 @ enable DC
@	@mcr p15, 0, r8, c2, c0, 1 @ enable IC
@	ldr r8, =0x33333363
@	mcr p15, 0, r8, c5, c0, 3 @ "IAccess"
@	mcr p15, 0, r8, c5, c0, 2 @ "DAccess"
@
@	mrc p15, 0, r8, c1, c0, 0
@	@ldr r9, =((1<<18)|(1<<16)|(1<<12)|(1<<2)|(1<<0)) @ enable ITCM, DTCM, IC, DC, PU
@	ldr r9, =((1<<18)|(1<<16)|(0<<12)|(0<<2)|(1<<0)) @ enable ITCM, DTCM, IC, DC, PU
@	orr r8, r9
@	mcr p15, 0, r8, c1, c0, 0

	bx lr

	.pool

_start_a9_end:

.globl stuff_cgb
stuff_cgb:
	.incbin "cgbstuff/bin/code.bin.main"
stuff_cgb_end:
.globl stuff_cgb_hbl
stuff_cgb_hbl:
	.incbin "cgbstuff/bin/code.bin.hbl"
stuff_cgb_hbl_end:

.globl stuff_c64
stuff_c64:
	.incbin "c64stuff/bin/test.prg", 2 @ skip the original load address
stuff_c64_end:

	.align
.globl stuff_ogc
stuff_ogc:
	.incbin "obj/ogc.S.bin"
stuff_ogc_end:

.globl stuff_dos
stuff_dos:
	.incbin "dosstuff/bin/test.com"
stuff_dos_end:

.globl END
END:

