;
; Intro stub (c)MX^Addict
;
BITS 32

;
; Resolution
;
WIDTH  equ 1280
HEIGHT equ  720

;
; Fullscreen ?
;
%define FULLSCREEN 1

;
; Use postprocessing ?
;
%define POSTPROCESS 1

;
; Use compute pass
;
%define USECOMPUTE 1

;
; Clamp to edge inside postprocess
;
%define POSTCLAMPTOEDGE 1

;
; Use beat detector ?
;
%define BEATDETECTOR 1

;
; Clinkster or 4klang ?
;
%define USE_4KLANG 1

;
; Use fonts
;
%define FONTSMSG 1

;
; Use fonts to render texture
;
%define FONTSTXT 1

;
; Music
;
%if USE_4KLANG
%include "4klang.inc"
extern __4klang_render@4
%define TOTAL_SAMPLES MAX_SAMPLES
%else
%include "clinkster.inc"
%include "musiclen.inc"
%endif

;
; Misc defines
;
%include "misc.inc"

%ifdef USECOMPUTE
%define BEATS_UNIFORM_INDEX 1
%else
%define BEATS_UNIFORM_INDEX 0
%endif

;
; Data
;

;
; Screen mode settings (only in fullscreen)
; DEVMODE = 
; { 
; 	{0}, 0, 0, sizeof(DEVMODE), 0, DM_PELSWIDTH|DM_PELSHEIGHT, {0}, 0, 0, 0, 0, 0, {0}, 0, 0, XRES, YRES, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; };
;
%ifdef FULLSCREEN
section _devmode data align=1
devmode:
	times 9 dd 0
	db 0x9c, 0, 0, 0
	db 0, 0, 0x1c, 0
	times 15 dd 0
	dd 020H, WIDTH, HEIGHT
	times 10 dd 0
%endif

;
; Pixel format descriptor
; PIXELFORMATDESCRIPTOR = 
; {
;     sizeof(PIXELFORMATDESCRIPTOR), 1, PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|PFD_DOUBLEBUFFER, PFD_TYPE_RGBA, 32, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 32, 0, 0, PFD_MAIN_PLANE, 0, 0, 0, 0
; };
;
section _pfd data align=1
pfd:
	dw  028H
	dw	01H
	dd	025H
	db	00H
	db	020H
	db	00H
	db	00H
	db	00H
	db	00H
	db	00H
	db	00H
	db	08H
	db	00H
	db	00H
	db	00H
	db	00H
	db	00H
	db	00H
	db	020H
	db	00H
	db	00H
	db	00H
	db	00H
	dd	00H
	dd	00H
	dd	00H

;
; Wave format
;
section _wavefmt data align=1
wavefmt:
	%ifdef SAMPLE_FORMAT_SHORT
	dw 1 					; wFormatTag = WAVE_FORMAT_PCM
	%else
	dw 3 					; wFormatTag = WAVE_FORMAT_IEEE_FLOAT
	%endif
	dw 2 					; nChannels
	dd SAMPLE_RATE 			; nSamplesPerSec
	%ifdef SAMPLE_FORMAT_SHORT
	dd SAMPLE_RATE * 2 * 2	; nAvgBytesPerSec (SAMPLE_RATE * sizeof(SAMPLE) * nChannels)
    dw 2 * 2 				; nBlockAlign (sizeof(SAMPLE) * nChannels)
    dw 16	 				; wBitsPerSample
	%else
	dd SAMPLE_RATE * 4 * 2	; nAvgBytesPerSec (SAMPLE_RATE * sizeof(SAMPLE) * nChannels)
    dw 4 * 2 				; nBlockAlign (sizeof(SAMPLE) * nChannels)
    dw 32	 				; wBitsPerSample
	%endif
    dw 0 					; cbSize

;
; Wave header
;
section _wavehdr data align=1
wavehdr:
%if USE_4KLANG
	dd sound_buffer 					 ; lpData 
%else
	dd PUBLIC_DATA(Clinkster_MusicBuffer); lpData
%endif
	%ifdef SAMPLE_FORMAT_SHORT
	dd TOTAL_SAMPLES * 2 * 2			 ; dwBufferLength (TOTAL_SAMPLES * sizeof(SAMPLE) * nChannels)
	%else
	dd TOTAL_SAMPLES * 4 * 2			 ; dwBufferLength (TOTAL_SAMPLES * sizeof(SAMPLE) * nChannels)
	%endif
	times 2 dd 0 						 ; unused stuff
	dd 2 								 ; dwFlags WHDR_PREPARED  =  0x00000002
	dd 0
	times 4 dd 0 						 ; unused stuff
	wavehdr_size EQU ($ - wavehdr)


%ifdef FONTSMSG
;
; Text(s)
;
section _txt00s data align=1
_txt00d: 
	db '                        GREETINGS FLY TO: '
	db 'ANADUNE-ANDROMEDA-ALTAIR-COCOON           '
	db '   CONSPIRACY-CNCD-ELUDE-FAIRLIGHT        '
	db '          FARBRAUSCH-FLOPPY-FUTURIS       '
	db '          GHOSTOWN-HAUJOBB-LAMERS         '
	db 'MADWIZARDS-MERCURY-MYSTIC BYTES           '
	db '        THE BLACK LOTUS-WANTED TEAM       '
	db '                                 AND YOU !'
_fontface: 	
	db 'Impact', 0
%endif

;
; BSS sections
;
section _mmtime bss align=1
mmtime: resb 12

section _waveout bss align=1
waveout: resd 8

%if USE_4KLANG
section _sndbuf bss align=1
%ifdef SAMPLE_FORMAT_SHORT
sound_buffer: resw TOTAL_SAMPLES * 2
%else
sound_buffer: resd TOTAL_SAMPLES * 2
%endif
%endif

;
; Shaders
;
section _shader data align=1
%include "fragment.inc"
src_main_entry:
db 'void main(){amain();}', 0
%ifdef POSTPROCESS	
src_post_entry:
db 'void main(){bmain();}', 0
%endif
%ifdef USECOMPUTE
src_comp_entry:
db 'void main(){cmain();}', 0
%endif
section _shdrptr data align=1
src_main:
	dd _fragment_frag
	dd src_main_entry
%ifdef POSTPROCESS	
src_post:
	dd _fragment_frag
	dd src_post_entry
%endif
%ifdef USECOMPUTE
src_comp:
	dd _fragment_frag
	dd src_comp_entry
%endif
	
%ifdef BEATDETECTOR
%ifndef SAMPLE_FORMAT_SHORT
section _btdtcd data align=1
const_beat_treshold:
	dd 0x3ECD0000 ; 0.4f
%endif

section _btdtcb bss align=1
old_slow_beat: resd 1
old_fast_beat: resd 1
res_sums_beat: resd 1
%ifdef USECOMPUTE
fb_identifier: resd 1
cp_identifier: resd 1
%endif
;
; Beat detector
; in -> ebx = #samples
; out-> beat in edx, preserves all other registers
;
section _btdtc text align=1
_detect_beats:
%ifdef SAMPLE_FORMAT_SHORT

	push   eax
	push   ebx
	push   ecx
	push   esi
	
	; Clamp ebx to >= 2560 && <= TOTAL_SAMPLES-2560
	
	mov    ecx, 2560
	cmp    ebx, ecx
	cmovb  ebx, ecx
	
	mov    ecx, TOTAL_SAMPLES-2560
	cmp	   ebx, ecx
	cmova  ebx, ecx	
	
	; Reset beat indicator
	
	xor    edx, edx
	
	; Prepare loop for 200 iterations
	
	mov    ecx, 200
%if USE_4KLANG
	lea	   esi, sound_buffer[ebx*4-(50*16*4)]
%else
	lea	   esi, PUBLIC_DATA(Clinkster_MusicBuffer)[ebx*4-(50*16*4)]
%endif

	; Main loop
.beatloop:
	mov    ax, [esi]   ; Left channel only
	
	mov    bx, ax      ; store ax in bx
	neg    ax
	cmovl  ax,  bx     ; if ax is now negative, restore its saved value so we have abs(ax) now

	cmp    ax, 13107   ; if (ax > Treshold) edx++;
	jbe	   SHORT .beatnoinc
	inc    edx
.beatnoinc:	

	add	   esi, 16*4
	dec    ecx
	jne	   SHORT .beatloop	
	
	pop    esi
	pop    ecx
	pop    ebx	
	pop    eax

%else

	push   eax
	push   ebx
	push   ecx
	
	; Clamp ebx to >= 2560 && <= TOTAL_SAMPLES-2560
	
	mov    ecx, 2560
	cmp    ebx, ecx
	cmovb  ebx, ecx
	
	mov    ecx, TOTAL_SAMPLES-2560
	cmp	   ebx, ecx
	cmova  ebx, ecx	
	
	; Reset beat indicator
	
	xor    edx, edx
	
	; Prepare loop for 200 iterations
	
	mov    cl, 200
%if USE_4KLANG
	lea	   ebx, sound_buffer[ebx*8-(50*16*8)]
%else
	lea	   ebx, PUBLIC_DATA(Clinkster_MusicBuffer)[ebx*8-(50*16*8)]
%endif
	fld    dword [const_beat_treshold]
	
	; Main loop
.beatloop:
	
	fld    dword [ebx]
	fabs

	; if (st0 > Treshold) edx++;
	
	fcomip st0, st1
	jb 	   SHORT .beatnoinc

	inc    edx
.beatnoinc:	

	add	   ebx, 16*8			
	dec    cl
	jne	   SHORT .beatloop	
	
	fstp   st0
	
	pop    ecx
	pop    ebx	
	pop    eax
	
%endif
	ret
%endif

;
; Inner loop
;
; eax - ?
; ebx - time
; ecx - ?
; edx - ?
; edi - post shader pid
; esi - main shader pid
; ebp - HDC
;
section _inloop text align=1
_inner_loop:

	%ifdef USECOMPUTE
		;
		; Use compute shader
		;
		FNCALL	wglGetProcAddress, glBindFramebuffer
		FNCALL	eax, GL_FRAMEBUFFER, 1
		FNCALL  glViewport, 0, 0, 128, 1
		FNCALL	wglGetProcAddress, glUseProgram
		FNCALL	eax, dword [cp_identifier]
		FNCALL	wglGetProcAddress, glUniform4i
		%ifdef BEATDETECTOR
			FNCALL	eax, 0, ebx, dword [old_fast_beat], dword [old_slow_beat], dword [res_sums_beat] ; ebx = #samples, Beat, Accumulated beats, Sum of all beats
		%else
			FNCALL	eax, 0, ebx, 0, 0, 0															 ; ebx = #samples, rest is zero
		%endif
		FNCALL	glRects, byte -1, byte -1, byte 1, byte 1
		FNCALL	wglGetProcAddress, glBindFramebuffer
		FNCALL	eax, GL_FRAMEBUFFER, 0
		FNCALL  glViewport, 0, 0, WIDTH, HEIGHT
	%endif
		
	;
	; Use main shader
	;
	FNCALL	wglGetProcAddress, glUseProgram
	FNCALL	eax, esi
	FNCALL	wglGetProcAddress, glUniform4i		
	%ifdef FONTSTXT
		push	eax
	%endif
	%ifdef BEATDETECTOR
		FNCALL	eax, BEATS_UNIFORM_INDEX, ebx, dword [old_fast_beat], dword [old_slow_beat], dword [res_sums_beat] ; ebx = #samples, Beat, Accumulated beats, Sum of all beats
	%else
		FNCALL	eax, BEATS_UNIFORM_INDEX, ebx, 0, 0, 0															   ; ebx = #samples, rest is zero
	%endif
	%ifdef USECOMPUTE
		FNCALL	glBindTexture, GL_TEXTURE_2D, 2
		FNCALL	wglGetProcAddress, glUniform1i
		%ifdef FONTSTXT
			push	eax
		%endif
		FNCALL	eax, 0, 0
	%endif

	FNCALL	glRects, byte -1, byte -1, byte 1, byte 1

	%ifdef POSTPROCESS
		;
		; Use post shader
		;
		FNCALL	glBindTexture, GL_TEXTURE_2D, 1
		FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR
	%ifdef POSTCLAMPTOEDGE
		FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE
		FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE
	%endif
		FNCALL	glCopyTexImage2D, GL_TEXTURE_2D, 0, GL_RGBA8, 0, 0, WIDTH, HEIGHT, 0
		FNCALL	wglGetProcAddress, glUseProgram
		FNCALL	eax, edi
		FNCALL	wglGetProcAddress, glUniform1i
		%ifdef FONTSTXT
		%ifndef USECOMPUTE
			push    eax
		%endif
		%endif
		FNCALL	eax, 0, 0
		%ifdef FONTSTXT
			pop		eax
			FNCALL	eax, 1, 1
			pop		eax
			FNCALL	eax, 2, ebx, 0, 0, 0																		; ebx = #samples, rest is zero
		%endif
		FNCALL	glRects, byte -1, byte -1, byte 1, byte 1
	%endif
	%ifdef FONTSMSG
	%ifndef FONTSTXT
		;
		; Prepare for write
		;
		FNCALL	wglGetProcAddress, glUseProgram
		FNCALL	eax, 0
		FNCALL	glListBase, 1
		FNCALL	glPushMatrix
		FNCALL	glScalef, 0x3a830000, 0x3a830000, 0x3a830000 ; 0.000999451 x 3, to use raster position as int (from -1000 to 1000, both X and Y)

		;
		; Write text(s) at location(s)
		;
		; ebx - #Samples
		push    ecx
		mov		ecx, 8							; Number of text lines
		push	esi
		mov		esi, _txt00d
.txtloop:
		push	ecx
		imul	eax, ecx, 120					; Lines spacing

		; Calculate scrolling offset from #Samples

		mov		edx, ebx
		shr		edx, 6							; Scroll speed
		sub		edx, 83000						; Scroll start time
		add		eax, edx

		FNCALL	glRasterPos2i, -475, eax		; First argument is XOffset, then YOffset
		FNCALL	glCallLists, 42, GL_UNSIGNED_BYTE, esi

		add		esi, 42
		pop		ecx
		dec		ecx
		jnz		.txtloop
		pop		esi
		pop     ecx

		FNCALL	glPopMatrix
	%endif
	%endif

	;
	; Swap, process messages
	;
	FNCALL	SwapBuffers, ebp						; ebp = HDC 
	FNCALL	PeekMessageA, 0, 0, 0, 0, 1				; PM_REMOVE = 1

	ret
	
;
; Code, Main entry
;
section _text text align=1
_start:

	;
	; Calculate music
	;
%if USE_4KLANG
    FNCALL	CreateThread, 0, 0, __4klang_render@4, sound_buffer, 0, 0 
%else
	call	PUBLIC_FN(Clinkster_GenerateMusic, 0)
;	FNCALL	CreateThread, 0, 0, PUBLIC_FN(Clinkster_GenerateMusic, 0), 0, 0, 0 ; may do this in async ...
%endif
	
	;
	; Switch to fullscreen & create window
	;
%ifdef FULLSCREEN
	FNCALL	ChangeDisplaySettingsA, devmode, 4
	FNCALL	ShowCursor, 0
	FNCALL	CreateWindowExA, 0, 0x0000c018, 0, 0x91000000, 0, 0, 0, 0, 0, 0, 0, 0
%else
	FNCALL	CreateWindowExA, 0, 0x0000c018, 0, 0x90000000, 0, 0, WIDTH, HEIGHT, 0, 0, 0, 0
%endif

	;
	; Initialize OpenGL
	;
	FNCALL	GetDC, eax
	mov		ebp, eax ; ebp = HDC
	FNCALL	ChoosePixelFormat, ebp, pfd
	FNCALL	SetPixelFormat, ebp, eax, pfd
	FNCALL	wglCreateContext, ebp
	FNCALL	wglMakeCurrent, ebp, eax

%ifdef FONTSMSG
		;
		; Create font
		;
		FNCALL	CreateFontA, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, _fontface ; First argument is font height, last is font face name (may be null)
		FNCALL	SelectObject, ebp, eax
		FNCALL	wglUseFontBitmapsA, ebp, 0, 255, 1
	%ifdef FONTSTXT
			FNCALL	glListBase, 1
			FNCALL	glPushMatrix
			FNCALL	glScalef, 0x3a830000, 0x3a830000, 0x3a830000 ; 0.000999451 x 3, to use raster position as int (from -1000 to 1000, both X and Y)

			;
			; Write text
			;
			mov		ecx, 8							; Number of text lines
			mov		esi, _txt00d
.txtloop:
			push	ecx
			imul	eax, ecx, 120					; Lines spacing
			sub		eax, 475						; Pixels offset from top

			FNCALL	glRasterPos2i, -475, eax		; First argument is XOffset, then YOffset
			FNCALL	glCallLists, 42, GL_UNSIGNED_BYTE, esi

			add		esi, 42
			pop		ecx
			dec		ecx
			jnz		.txtloop

			FNCALL	glPopMatrix
			FNCALL	glBindTexture, GL_TEXTURE_2D, 8
			FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR
			FNCALL	glCopyTexImage2D, GL_TEXTURE_2D, 0, GL_RGBA8, 0, 0, WIDTH, HEIGHT, 0
			FNCALL	wglGetProcAddress, glActiveTexture
			push	eax
			FNCALL	eax, GL_TEXTURE1
			FNCALL	glBindTexture, GL_TEXTURE_2D, 8
			pop		eax
			FNCALL	eax, GL_TEXTURE0
	%endif
%endif

	;
	; Create main shader and store it in esi
	;
	FNCALL	wglGetProcAddress, glCreateShaderProgramv
	FNCALL	eax, GL_FRAGMENT_SHADER, 2, src_main
	mov		esi, eax ; esi = main shader

	%ifdef POSTPROCESS
		;
		; Create post shader and store it in edi
		;
		FNCALL	wglGetProcAddress, glCreateShaderProgramv
		FNCALL	eax, GL_FRAGMENT_SHADER, 2, src_post
		mov		edi, eax ; edi = post shader
	%endif

	%ifdef USECOMPUTE
		;
		; Create compute shader and store it in cp_identifier (cannot be in ecx, couse ecx is not preserved during subsequent calls)
		;
		FNCALL	wglGetProcAddress, glCreateShaderProgramv
		FNCALL	eax, GL_FRAGMENT_SHADER, 2, src_comp
		mov		dword [cp_identifier], eax ; comp shader

		;
		; Create texture and framebuffer bindings
		;
		FNCALL	wglGetProcAddress, glGenFramebuffers
		FNCALL	eax, 1, fb_identifier
		FNCALL	wglGetProcAddress, glBindFramebuffer
		FNCALL	eax, GL_FRAMEBUFFER, 1
		FNCALL	glBindTexture, GL_TEXTURE_2D, 2
		FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST
		FNCALL	glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST
		FNCALL  glTexImage2D, GL_TEXTURE_2D, 0, GL_RGBA32F, 128, 1, 0, GL_RGBA, GL_FLOAT, 0
		FNCALL	wglGetProcAddress, glFramebufferTexture
		FNCALL  eax, GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 2, 0	
	%endif

	;
	; Pre-run before music play (shader takes looooong time to compile)
	;
	xor		ebx, ebx
	call	_inner_loop
	FNCALL  Sleep, 4000

	;
	; Play music
	;
	FNCALL	waveOutOpen, waveout, byte -1, wavefmt, 0, 0, 0
	FNCALL	waveOutWrite, dword [waveout], wavehdr, wavehdr_size

	;
	; Main loop
	;
	.mainloop:
		;
		; Query position of music & terminate if reached end
		;
		FNCALL	waveOutGetPosition, dword [waveout], mmtime, 12
		mov		ebx, dword [mmtime + 4]
		%ifdef SAMPLE_FORMAT_SHORT
			shr		ebx, 2 ; divide by 4 (sizeof(SAMPLE) * nChannels) to get #samples
		%else
			shr		ebx, 3 ; divide by 8 (sizeof(SAMPLE) * nChannels) to get #samples
		%endif
		cmp		ebx, TOTAL_SAMPLES
		jge		.exit

		%ifdef BEATDETECTOR
			;
			; Accumulate beats history & calc single beat
			;
			push	ebx	
			xor		ecx, ecx
			sub		ebx, 4096
			call	_detect_beats                      ; ebx = #samples, return beat in edx
			add		ecx, edx
			add		ebx, 2048
			call	_detect_beats                      ; ebx = #samples, return beat in edx
			add		ecx, edx
			add		ebx, 4096
			call	_detect_beats                      ; ebx = #samples, return beat in edx
			add		ecx, edx
			add		ebx, 2048
			call	_detect_beats                      ; ebx = #samples, return beat in edx
			add		ecx, edx
			add		ecx, dword [old_slow_beat]
			shr		ecx, 1
			mov		dword [old_slow_beat], ecx
			pop		ebx			
			call	_detect_beats                      ; ebx = #samples, return beat in edx
			add		dword [res_sums_beat], edx		   ; add to sum of all beats
			mov     dword [old_fast_beat], edx
		%endif

		call _inner_loop

		;
		; Check ESC
		;
		FNCALL	GetAsyncKeyState, 27 ; VK_ESCAPE = 27
	jz	.mainloop
.exit:
	call ExitProcess
