
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
--           ______              _        _             _ _
--          |  ____|            | |      | |           | | |
--          | |__ _ __ __ _  ___| |_ __ _| |___    __ _| | |
--          |  __| '__/ _` |/ __| __/ _` | / __|  / _` | | |
--          | |  | | | (_| | (__| || (_| | \__ \ | (_| | | |
--          |_|  |_|  \__,_|\___|\__\__,_|_|___/  \__,_|_|_|
--  _   _                                      _
-- | | | |                                    | |
-- | |_| |__   ___  __      ____ _ _   _    __| | _____      ___ __
-- | __| '_ \ / _ \ \ \ /\ / / _` | | | |  / _` |/ _ \ \ /\ / / '_ \
-- | |_| | | |  __/  \ V  V / (_| | |_| | | (_| | (_) \ V  V /| | | |
--  \__|_| |_|\___|   \_/\_/ \__,_|\__, |  \__,_|\___/ \_/\_/ |_| |_|
--                                  __/ |
--                                 |___/
--
-------------------------------------------------------------------------------
--
--   An FPGA demo by doz from crtc, music by mr_lou, font by tunk.
--
--       Presented at Sundown 2013, 2nd place in Wild Compo.
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- The demo was running on my custom FPGA board, which I designed in order
-- to create an Amstrad CPC emulator. It's not yet available to anyone else
-- so there's not really any point releasing a binary version, so I'm making
-- the source VHDL available and the script to create the data file.
--
-- The source is (c) 2013 Ranulf Doswell
--
-- If you have an questions, please e-mail doz@ranulf.net
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--

-- fractmain.vhd
library IEEE;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;

entity fractmain is
	port(
		clk16				: in	std_logic;
		clk96				: in	std_logic;

		sram_address			: out	std_logic_vector(18 downto 0);
		sram_data			: inout std_logic_vector(7 downto 0);
		sram_we				: out	std_logic;
		sram_oe				: out	std_logic;	-- could even tie this low

		red, green, blue		: out	std_logic_vector(3 downto 0);
		hsync, vsync			: out	std_logic;

		din_nreset			: in    std_logic;
		din				: in	std_logic_vector(7 downto 0);
		din_latch			: in	std_logic;
		din_can_accept			: out	std_logic;

		audio_left			: out	std_logic;
		audio_right			: out	std_logic);
end fractmain;

architecture impl of fractmain is
	signal pal				: std_logic := '0';
	signal vde, hde, vsync_o, hsync_o	: std_logic;
	signal xcoord				: std_logic_vector(8 downto 0);
	signal ycoord				: std_logic_vector(7 downto 0);
	signal video_rw				: std_logic;
	signal pixel				: std_logic_vector(7 downto 0);

	signal pixel_wr				: std_logic_vector(2 downto 0);
	signal advance_wr			: std_logic := '1';

	signal transform_block			: std_logic_vector(2 downto 0) := "000";
	signal transform_block_sel		: std_logic_vector(2 downto 0);
	signal block_changed			: std_logic := '1';	-- 1 don't render ifs

	signal picture				: std_logic_vector(5 downto 0);

	signal	frame_count			: std_logic_vector(10 downto 0);

	signal	clk96_div_6			: std_logic;
	signal	clk96_n				: std_logic;

        component RAMB16_S9_S9 
--		generic ( WRITE_MODE : string := "READ_FIRST" );
		port (	ADDRA : in    std_logic_vector (10 downto 0); 
			ADDRB : in    std_logic_vector (10 downto 0); 
			CLKA  : in    std_logic; 
			CLKB  : in    std_logic; 
			DIA   : in    std_logic_vector (7 downto 0); 
			DIB   : in    std_logic_vector (7 downto 0); 
			DIPA  : in    std_logic_vector (0 downto 0); 
			DIPB  : in    std_logic_vector (0 downto 0); 
			ENA   : in    std_logic; 
			ENB   : in    std_logic; 
			SSRA  : in    std_logic; 
			SSRB  : in    std_logic; 
			WEA   : in    std_logic; 
			WEB   : in    std_logic; 
			DOA   : out   std_logic_vector (7 downto 0); 
			DOB   : out   std_logic_vector (7 downto 0); 
			DOPA  : out   std_logic_vector (0 downto 0); 
			DOPB  : out   std_logic_vector (0 downto 0));
	end component;

	component RAMB16_S18_S18 
--		generic ( WRITE_MODE : string := "READ_FIRST" );
		port (	ADDRA : in    std_logic_vector (9 downto 0); 
			ADDRB : in    std_logic_vector (9 downto 0); 
			CLKA  : in    std_logic; 
			CLKB  : in    std_logic; 
			DIA   : in    std_logic_vector (15 downto 0); 
			DIB   : in    std_logic_vector (15 downto 0); 
			DIPA  : in    std_logic_vector (1 downto 0); 
			DIPB  : in    std_logic_vector (1 downto 0); 
			ENA   : in    std_logic; 
			ENB   : in    std_logic; 
			SSRA  : in    std_logic; 
			SSRB  : in    std_logic; 
			WEA   : in    std_logic; 
			WEB   : in    std_logic; 
			DOA   : out   std_logic_vector (15 downto 0); 
			DOB   : out   std_logic_vector (15 downto 0); 
			DOPA  : out   std_logic_vector (1 downto 0); 
			DOPB  : out   std_logic_vector (1 downto 0));
	end component;

	component RAMB16_S9_S36
--		generic ( WRITE_MODE : string := "READ_FIRST" );
		port (	ADDRA : in    std_logic_vector (10 downto 0); 
			ADDRB : in    std_logic_vector (8 downto 0); 
			CLKA  : in    std_logic; 
			CLKB  : in    std_logic; 
			DIA   : in    std_logic_vector (7 downto 0); 
			DIB   : in    std_logic_vector (31 downto 0); 
			DIPA  : in    std_logic_vector (0 downto 0); 
			DIPB  : in    std_logic_vector (3 downto 0); 
			ENA   : in    std_logic; 
			ENB   : in    std_logic; 
			SSRA  : in    std_logic; 
			SSRB  : in    std_logic; 
			WEA   : in    std_logic; 
			WEB   : in    std_logic; 
			DOA   : out   std_logic_vector (7 downto 0); 
			DOB   : out   std_logic_vector (31 downto 0); 
			DOPA  : out   std_logic_vector (0 downto 0); 
			DOPB  : out   std_logic_vector (3 downto 0));
	end component;

	component MULT18X18 port (
		P : out std_logic_vector (35 downto 0);
		A : in std_logic_vector (17 downto 0);
		B : in std_logic_vector (17 downto 0));
	end component;

	-- probability management
	signal prob_mem_wr_addr			: std_logic_vector(10 downto 0);
	signal prob_mem_wr_len			: std_logic_vector(11 downto 0);
	signal prob_mem_wr_data			: std_logic_vector(7 downto 0);
	signal prob_mem_wr_start		: std_logic := '0';
	signal prob_mem_wr_busy			: std_logic;

	signal prob_mem_addr			: std_logic_vector(10 downto 0);
	signal prob_mem_in			: std_logic_vector(7 downto 0);
	signal prob_mem_out			: std_logic_vector(7 downto 0);
	signal prob_mem_out_parity		: std_logic_vector(0 downto 0);
	signal prob_mem_we			: std_logic;

	-- transform_phase is the sub pixel clock for the transform logic
	signal transform_phase			: std_logic_vector(2 downto 0) := "000";
	signal transform_draw_add		: std_logic_vector(4 downto 0) := "00101";

	-- transform is the index into the transformation table
	signal transform_selection		: std_logic_vector(7 downto 0) := (others=>'0');
	signal transform_choice			: std_logic_vector(7 downto 0);
	signal transform_addrhi			: std_logic_vector(6 downto 0) := (others=>'0');
	signal transform_addrlo			: std_logic_vector(2 downto 0) := (others=>'0');
	signal transform_mem_out		: std_logic_vector(15 downto 0);
	signal transform_mem_out_parity		: std_logic_vector(1 downto 0);

	-- multiplier
	signal transform_sum			: std_logic_vector (29 downto 0);
	signal transform_sum_plus_p		: std_logic_vector (29 downto 0);

	signal transform_p			: std_logic_vector (35 downto 0);
	signal transform_a			: std_logic_vector (17 downto 0);
	signal transform_b			: std_logic_vector (17 downto 0);

	-- transform current coords
	signal transform_colour			: std_logic_vector (3 downto 0);
	signal transform_x			: std_logic_vector (17 downto 0);
	signal transform_y			: std_logic_vector (19 downto 0);
	signal transform_x_new			: std_logic_vector (17 downto 0);
	signal transform_address		: std_logic_vector (17 downto 0);

	-- cpu memory interface
	signal cpu_mem_addr			: std_logic_vector (12 downto 0);
	signal cpu_mem_din			: std_logic_vector (15 downto 0);
	signal cpu_mem_pin			: std_logic_vector (1 downto 0);
	signal cpu_mem_pout			: std_logic_vector (1 downto 0);
	signal cpu_mem_we			: std_logic;
	signal cpu_mem_ce_prb, cpu_mem_ce_tr	: std_logic;
	signal cpu_mem_ce_txt			: std_logic;

	-- source data
	signal	input_data			: std_logic_vector(7 downto 0) := x"00";
	signal	input_ram_read_addr		: std_logic_vector(8 downto 0) := (others=>'0');
	signal	input_ram_write_addr		: std_logic_vector(10 downto 0) := (others=>'0');
	signal	input_ram_write_addr_plus_one	: std_logic_vector(8 downto 0);
	signal	input_ram_write			: std_logic;
	signal	input_ram_in			: std_logic_vector(7 downto 0);
	signal	input_ram_out			: std_logic_vector(31 downto 0);
	signal	input_ram_can_read		: std_logic;
	signal	input_ram_read_advance		: std_logic;

	-- font
	signal text_mem_addr			: std_logic_vector(10 downto 0);
	signal font_mem_addr			: std_logic_vector(10 downto 0);
	signal text_mem_out			: std_logic_vector(7 downto 0);
	signal font_mem_out			: std_logic_vector(7 downto 0);
	signal font_pixel			: std_logic;
	signal mred, mgreen, mblue		: std_logic_vector(3 downto 0);

	-- audio
	component ay8192_internal is port(
		nRESET			: in	std_logic;
		clk			: in	std_logic;
		pwm_clk			: in	std_logic;
		
		-- registers
		tone_a, tone_b, tone_c			: in std_logic_vector(11 downto 0);
		noise					: in std_logic_vector( 4 downto 0);
		ioadir, iobdir				: in std_logic;
		en_noise_a, en_noise_b, en_noise_c	: in std_logic;
		en_tone_a,  en_tone_b,  en_tone_c	: in std_logic;
		amp_a, amp_b, amp_c			: in std_logic_vector( 4 downto 0);
		env_period				: in std_logic_vector(15 downto 0);
		env_shape				: in std_logic_vector( 3 downto 0);
		env_restart				: in std_logic;
	
		-- sound
		tape_noise		: in	std_logic;
		is_mono			: in	std_logic;
		pwm_left, pwm_right	: out	std_logic);
	end component;

	-- these are the internal state of the PSG
	signal	tone_a, tone_b, tone_c			: std_logic_vector(11 downto 0) := (others=>'0');
	signal	noise					: std_logic_vector( 4 downto 0) := (others=>'0');
	signal	en_noise_a, en_noise_b, en_noise_c	: std_logic := '0';
	signal	en_tone_a,  en_tone_b,  en_tone_c	: std_logic := '0';
	signal	amp_a, amp_b, amp_c			: std_logic_vector( 4 downto 0) := (others=>'0');
	signal	env_period				: std_logic_vector(15 downto 0) := (others=>'0');
	signal	env_shape				: std_logic_vector( 3 downto 0) := (others=>'0');
	signal	env_restart				: std_logic := '0';
	signal	psg_clk					: std_logic;

	-- frame sync
	signal	frame_sync_busy				: std_logic := '0';
	signal	frame_sync_start			: std_logic := '0';
begin
	-- frame sync
	process(clk96)
		variable	vsync_last		: std_logic := '0';
	begin
		if rising_edge(clk96) then
			if frame_sync_start = '1' then
				frame_sync_busy		<= '1';
			elsif vsync_o='1' and vsync_last='0' then
				frame_sync_busy		<= '0';
			end if;
			vsync_last			:= vsync_o;
		end if;
	end process;

	-- probability stuff
	probability_ram : RAMB16_S9_S9 port map (
		DIA => prob_mem_in, DIPA => "0",
		ADDRA=>prob_mem_addr,
		ENA => '1', WEA => prob_mem_we, SSRA => '0',
		CLKA => clk96_n, DOA => prob_mem_out, DOPA => prob_mem_out_parity,

		ADDRB => cpu_mem_addr(10 downto 0), DIB => cpu_mem_din(7 downto 0),
		WEB => cpu_mem_we, DIPB => "0",
		ENB => cpu_mem_ce_prb, SSRB=>'0', CLKB =>clk96 );
	transform_block_sel <= transform_block;

	transform_seed: process(clk96)
		variable lfsr1			: std_logic_vector(63 downto 1);
		variable lfsr2			: std_logic_vector(47 downto 1);

		variable wr_addr		: std_logic_vector(10 downto 0);
		variable wr_len			: std_logic_vector(11 downto 0);
		variable wr_data		: std_logic_vector(7 downto 0);
		variable wr_start		: std_logic := '0';
	begin
		if rising_edge(clk96) then
			-- update shift register at 96MHz so no bits are common
			-- between usages every 16MHz
			lfsr1 := lfsr1(62 downto 1) & (lfsr1(63) xnor lfsr1(62));
			lfsr2 := lfsr2(46 downto 1) & (lfsr2(47) xnor lfsr2(42));

			if prob_mem_wr_start = '1' then
				wr_addr			:= prob_mem_wr_addr;
				wr_len			:= prob_mem_wr_len;
				wr_data			:= prob_mem_wr_data;
				prob_mem_wr_busy	<= '1';
			end if;

			-- on 16MHz, do a single probability lookup
			if clk96_div_6 = '1' then
				transform_choice	<= prob_mem_out;
				prob_mem_addr		<= transform_block_sel
							 & lfsr2(47 downto 46)
							 & lfsr1(63 downto 60)
							 & lfsr2(43 downto 42);
				prob_mem_we		<= '0';
			elsif wr_len /= 0 then
			-- on the other 5 cycles we can populate the prob RAM
				prob_mem_addr		<= wr_addr;
				prob_mem_in		<= wr_data;
				prob_mem_we		<= '1';
				
				wr_addr			:= wr_addr + 1;
				wr_len			:= wr_len  - 1;
			else
			-- no more data to write
				prob_mem_we		<= '0';
				prob_mem_wr_busy	<= '0';
			end if;
		end if;
	end process;

	-- update screen memory or allow store
-- 0  			A <= mem(c), B<=1 | 0 00 -> 10
-- 1  T <= P		A <= mem(a), B<=x | 0 01 -> 00
-- 2  T <= P+T		A <= mem(b), B<=y | 0 10 -> 01
-- 3  R <= P+T		A <= mem(f), B<=1 | 1 00
-- 4  T <= P		A <= mem(d), B<=x | 1 01
-- 5  T <= P+T		A <= mem(e), B<=y | 1 10
-- 6  y <= P+T, x <= R	A <= mem(f), B<=1 | 0 00 (and continue to 1)

	phase_transition:process(clk96)
	begin
		if rising_edge(clk96) then

			clk96_div_6					<= '0';

			transform_b(17)					<= transform_mem_out(15);
			transform_b(16)					<= transform_mem_out(15);
			transform_b(15 downto 0)			<= transform_mem_out;
			transform_addrlo				<= transform_phase(2 downto 0);

			case transform_phase(1 downto 0) is
			when "00" =>
					transform_a			<= (14=>'1', others=>'0');
			when "01" =>
					transform_a			<= transform_x;
			when "10" =>
					transform_a			<= transform_y(19 downto 2);
			when others =>
					transform_a			<= (others=>'0');
			end case;

			case transform_phase(2 downto 0) is
			when "000" =>	-- f has been read from memory
					transform_phase			<= "001";
					-- first mem transition
					transform_addrhi		<= transform_choice(6 downto 0);
					pixel_wr			<= transform_colour(2 downto 0);
					transform_colour		<= transform_choice(3 downto 0);
					-- dy has been multipled
					transform_sum			<= transform_sum + transform_p(29 downto 0);

			when "001" =>	-- a has been read from memory
					transform_phase			<= "010";
					-- f has been multipled
--					transform_y			<= transform_sum + transform_p(29 downto 0);
--					transform_y			<= transform_y + "00000000000000001";
					transform_y			<= transform_sum_plus_p(29 downto 10);

			when "010" =>	-- b has been read from memory
					transform_phase			<= "100";
					-- ax has been multipled
					transform_sum			<= transform_p(29 downto 0);

					-- transform y address (PAL = NTSC y*1.25)
--					if pal = '0' then
						transform_address(17) <= '0';
						transform_address(16 downto 9) <= transform_y(15 downto 8);
--					else
--						transform_address(17 downto 8) <= transform_y(16 downto 7)
--										+ transform_y(18 downto 9);
--					end if;
					-- transform x address (511 if any coord out of range)
					if transform_y(19 downto 16)="0000" and transform_x(17 downto 15)="000" and block_changed = '0' then
						transform_address( 8 downto 6) <= transform_x(14 downto 12);
						transform_address( 5 downto 0) <= transform_x(11 downto 6);
					else
						transform_address( 8 downto 6) <= (others=>'1');
					end if;
					clk96_div_6			<= '1';

			when "100" =>	-- e has been read from memory
					transform_phase			<= "101";
					-- by has been multipled
					transform_sum			<= transform_sum + transform_p(29 downto 0);

			when "101" =>	-- c has been read from memory
					transform_phase			<= "110";
					-- e has been multipled
--					transform_x			<= transform_sum + transform_p(29 downto 0);
--					transform_x			<= transform_x + "00000000100000000";
					transform_x			<= transform_sum_plus_p(29 downto 12);

			when "110" =>	-- d has been read from memory
					transform_phase			<= "000";
					-- cx has been multipled
					transform_sum			<= transform_p(29 downto 0);

			when others =>	
					transform_phase			<= "000";
			end case;

		end if;
	end process;

	-- screen address, tie x coord to unused one if any coord not visible
	transform_ram: RAMB16_S18_S18 port map (
                DIA => (others=>'0'), DIPA => "00",
		ADDRA(9 downto 3)=>transform_addrhi(6 downto 0),
		ADDRA(2 downto 0)=>transform_addrlo,
                ENA => '1', WEA => '0', SSRA => '0',
                CLKA => clk96_n, DOA => transform_mem_out, DOPA => transform_mem_out_parity,

		ADDRB => cpu_mem_addr(9 downto 0), DIB => cpu_mem_din,
		WEB => cpu_mem_we, DIPB => cpu_mem_pin, DOPB => cpu_mem_pout,
		ENB => cpu_mem_ce_tr, SSRB=>'0', CLKB =>clk96 );
	clk96_n <= not clk96;
	transform_mult: MULT18X18 port map (
		P => transform_p, A => transform_A, B=>transform_b );
	transform_sum_plus_p <= transform_sum + transform_p(29 downto 0);

	process(clk96)
	begin
		if rising_edge(clk96) then
			if clk96_div_6='1' and input_ram_can_read='1' and prob_mem_wr_busy='0' and frame_sync_busy='0' then
				cpu_mem_we		<= '1';
				cpu_mem_addr		<= input_ram_out(12 downto 0);
				cpu_mem_din		<= input_ram_out(31 downto 16);
				input_ram_read_advance	<= '1';
			else
				cpu_mem_we		<= '0';
				input_ram_read_advance	<= '0';
			end if;
		end if;
	end process;

	cpu_mem_ce_txt	<= '1' when cpu_mem_addr(12 downto 11)="10" else '0';
	cpu_mem_ce_prb	<= '1' when cpu_mem_addr(12 downto 11)="01" else '0';
	cpu_mem_ce_tr	<= '1' when cpu_mem_addr(12 downto 10)="000" else '0';

	-- special addresses
	process(clk96)
		--variable last_we			: std_logic;
	begin
		if rising_edge(clk96) then
			if cpu_mem_we = '1' and cpu_mem_addr(12 downto 10)="001" then
				case cpu_mem_addr(3 downto 0) is
				when "0000" =>
					prob_mem_wr_addr	<= cpu_mem_din(10 downto 0);

				when "0001" =>
					prob_mem_wr_data	<= cpu_mem_din(7 downto 0);

				when "0010" =>
					prob_mem_wr_len		<= cpu_mem_din(11 downto 0);
					prob_mem_wr_start	<= '1';

				when "0011" =>
					frame_sync_start	<= '1';
				when "1111" =>
					pal			<= cpu_mem_din(0);

				when "0100" =>
					transform_draw_add	<= cpu_mem_din(4 downto 0);

				when "1000" =>
					tone_a			<= cpu_mem_din(11 downto 0);
					en_tone_a		<= cpu_mem_din(12);
				when "1001" =>
					tone_b			<= cpu_mem_din(11 downto 0);
					en_tone_b		<= cpu_mem_din(12);
				when "1010" =>
					tone_c			<= cpu_mem_din(11 downto 0);
					en_tone_c		<= cpu_mem_din(12);
				when "1011" =>
					noise			<= cpu_mem_din(4 downto 0);
					en_noise_a		<= cpu_mem_din(7);
					en_noise_b		<= cpu_mem_din(6);
					en_noise_c		<= cpu_mem_din(5);
				when "1100" =>
					amp_a			<= cpu_mem_din(4 downto 0);
				when "1101" =>
					amp_b			<= cpu_mem_din(4 downto 0);
				when "1110" =>
					amp_c			<= cpu_mem_din(4 downto 0);

				-- these others aren't used for this demo
--	signal	env_period				: std_logic_vector(15 downto 0) := (others=>'0');
--	signal	env_shape				: std_logic_vector( 3 downto 0) := (others=>'0');
--	signal	env_restart				: std_logic := '0';

				when others => null;
				end case;
			else
				if prob_mem_wr_busy = '1' then		-- start ack
					prob_mem_wr_start		<= '0';
				end if;
				if frame_sync_busy = '1' then
					frame_sync_start		<= '0';
				end if;
			end if;
--			last_we					:= cpu_mem_we;
		end if;
	end process;

	-- video colour calcs
	mred	<= pixel(4 downto 1) when pixel(7)='1' else "0000";
	mgreen	<= pixel(4 downto 1) when pixel(6)='1' else "0000";
	mblue	<= pixel(4 downto 1) when pixel(5)='1' else "0000";
	hsync	<= hsync_o;
	vsync	<= vsync_o;
	red	<= mred   when font_pixel='0' else ("11"&mred  (3 downto 2));
	green	<= mgreen when font_pixel='0' else ("11"&mgreen(3 downto 2));
	blue	<= mblue  when font_pixel='0' else ("11"&mblue (3 downto 2));

	-- update screen memory or allow store
	memory_arbiter: process(clk96)
		variable pixel_local		: std_logic_vector(7 downto 0);
		variable pixel_add		: std_logic_vector(6 downto 0);
	begin
		if rising_edge(clk96) then
		    if clk96_div_6 = '1' then
			if vde='1' and hde='1' then
				if video_rw='0' then
					sram_address		<= (others=>'0');
					sram_address(8 downto 0)<= xcoord;
					sram_address(16 downto 9)<= ycoord;
					sram_data		<= (others=>'Z');
					sram_we			<= '1';
					sram_oe			<= '0';
				else
					pixel_local		:= sram_data;
					pixel			<= sram_data;
					if pixel_local(4 downto 1) /= "0000" then
						pixel_local(4 downto 0) := pixel_local(4 downto 0) - 2;
					else
						pixel_local	:= (others=>'0');
					end if;
					sram_data		<= pixel_local;
					sram_oe			<= '1';
					sram_we			<= '0';
				end if;
				advance_wr			<= '0';
			else
				if video_rw='1' then
					pixel			<= (others=>'0');
				end if;

				if video_rw='0' then
					pixel_local(7 downto 5)	:= pixel_wr;
					sram_address		<= (others=>'0');
					sram_address(17 downto 0)<= transform_address;
					sram_data		<= (others=>'Z');
					sram_we			<= '1';
					sram_oe			<= '0';
				else
					pixel_local(4 downto 0)	:= sram_data(4 downto 0);

					pixel_add		:= ('0'&pixel_local(4 downto 0)) + ('0'&transform_draw_add);
					if pixel_add(5)='0' then
						pixel_local(4 downto 0) := pixel_add(4 downto 0);
					else
						pixel_local(4 downto 0) := (others=>'1');
					end if;

					sram_data		<= pixel_local;
					sram_oe			<= '1';
					sram_we			<= '0';
					advance_wr		<= '1';
				end if;
			end if;
		    end if;
		end if;
	end process;

	-- frame logic
	frame_counter: process(clk16)
		variable	last_vsync	: std_logic := '0';
		variable	frame_counter	: std_logic_vector(10 downto 0);
	begin
		if rising_edge(clk16) then
			picture				<= frame_counter(10 downto 5);
			if vsync_o='1' and last_vsync='0' then
				frame_counter		:= frame_counter + 1;
				frame_count		<= frame_counter;
			end if;
			last_vsync			:= vsync_o;
		end if;
	end process;

	-- video clock
	video_clock: process(clk96)
		variable	hcounter		: std_logic_vector(10 downto 0);
		variable	vcounter		: std_logic_vector(8 downto 0);
		-- bits 0	sub pixel counter (16MHz)
		-- bits 1-9	0..511 pixel counter (8MHz)
		-- bits 10-18	0..311 line counter (15.625kHz)
	begin
		if rising_edge(clk96) then
		    if clk96_div_6 = '1' then
			-- update counter
			hcounter				:= hcounter + 1;

			-- vertical timing
			if hcounter(10)='1' then
				vcounter			:= vcounter + 1;

				-- change ifs every line without penalty in visible area, in vblank
				-- change every 8 lines
				if vcounter(8)='0' or (vcounter(2 downto 0)="000") then
					transform_block		<= transform_block + 1;
					block_changed		<= '1';
				end if;

				-- do line events
				case (pal & vcounter) is
					when "0011010000"    -- 208 NTSC
					   | "1100000000" => -- 256 PAL
						vde		<= '0';
					when "0011011110"    -- 222 NTSC
					   | "1100010000" => -- 272 PAL
						vsync_o		<= '1';
					when "0011110110"    -- 246 NTSC
					   | "1100101000" => -- 296 PAL
						vsync_o		<= '0';
					when "0100000110"    -- 262 NTSC
					   | "1100111000" => -- 312 PAL
						transform_block	<= (others=>'0');
						vcounter	:= (others=>'0');
						vde		<= '1';
					when others =>
						null;
				end case;
				ycoord				<= vcounter(7 downto 0);
				hde				<= '1';
			end if;
			hcounter(10)				:= '0';

			-- horizontal timing
			case hcounter(9 downto 1) is
				when "101110000" =>		-- 368
					hde			<= '0';
					block_changed		<= '0';
				when "110010100" =>		-- 404
					hsync_o			<= '1';
				when "111001100" =>		-- 460
					hsync_o			<= '0';
				when others => null;
			end case;
			xcoord					<= hcounter(9 downto 1);

			-- for rewrite
			video_rw				<= hcounter(0);
		    end if;
		end if;
	end process;

	-- data input port
	process(clk96)
		variable latch_last		: std_logic := '1';
		variable has_written		: std_logic := '0';
		variable advance_last		: std_logic := '1';
		variable sync_count		: std_logic_vector(2 downto 0);

	begin
		if rising_edge(clk96) then
			input_ram_write		<= '0';

			if din_nreset='0' then
				input_ram_write_addr	<= (others=>'0');
				input_ram_read_addr	<= (others=>'0');
			else
				if latch_last='0' and din_latch='1' then
					input_data	<= din;

					if din/=x"ff" then
						sync_count	:= "000";
					elsif sync_count/="111" then
						sync_count	:= sync_count+1;
					else
						input_ram_write_addr	<= (others=>'0');
						input_ram_read_addr	<= (others=>'0');
					end if;

					if sync_count/="111" then
						input_ram_in	<= din;
						input_ram_write	<= '1';
						has_written	:= '1';
					end if;
	
				elsif has_written='1' then
					input_ram_write_addr(1 downto 0) <= input_ram_write_addr(1 downto 0)+1;
					if input_ram_write_addr(1 downto 0)="11" then
						input_ram_write_addr(10 downto 2) <= input_ram_write_addr_plus_one;
					end if;
					has_written	:= '0';
				end if;
				latch_last		:= din_latch;
	
				if input_ram_read_advance='1' then
					input_ram_read_addr	<= input_ram_read_addr+1;
				end if;
				advance_last		:= input_ram_read_advance;
			end if;
		end if;
	end process;

	input_ram : RAMB16_S9_S36 port map (
		DIB => (others=>'0'), DIPB => "0000",
		ADDRB=>input_ram_read_addr,
		ENB => '1', WEB => '0', SSRB => '0',
		CLKB => clk96, DOB => input_ram_out,
		ADDRA => input_ram_write_addr, DIA => input_ram_in,
		WEA => input_ram_write, DIPA => "0",
		ENA => '1', SSRA=>'0', CLKA =>clk96_n );
	input_ram_write_addr_plus_one <= input_ram_write_addr(10 downto 2) + 1;
	input_ram_can_read <= '0' when input_ram_write_addr(10 downto 2)=input_ram_read_addr else '1';
	din_can_accept <= '0' when input_ram_write_addr_plus_one=input_ram_read_addr else '1';

	-- font
	text_ram : RAMB16_S9_S9 port map (
		DIA => (others=>'0'), DIPA => "0",
		ADDRA=>text_mem_addr,
		ENA => '1', WEA => '0', SSRA => '0',
		CLKA => clk96_n, DOA => text_mem_out,

		ADDRB => cpu_mem_addr(10 downto 0), DIB => cpu_mem_din(7 downto 0),
		WEB => cpu_mem_we, DIPB => "0",
		ENB => cpu_mem_ce_txt, SSRB=>'0', CLKB =>clk96 );

	font_ram : RAMB16_S9_S9 port map (
		DIA => (others=>'0'), DIPA => "0",
		ADDRA=>font_mem_addr,
		ENA => '1', WEA => '0', SSRA => '0',
		CLKA => clk96, DOA => font_mem_out,

		ADDRB => cpu_mem_addr(10 downto 0), DIB => cpu_mem_din(7 downto 0),
		WEB => cpu_mem_we, DIPB => "0",
		ENB => '0',
		SSRB=>'0', CLKB =>clk96 );

	text_mem_addr	<=  ycoord(7 downto 3) & xcoord(8 downto 3);
	font_mem_addr	<= text_mem_out & ycoord(2 downto 0);

	process(clk96)		-- font read is 1 clock later
		variable which		: std_logic_vector(2 downto 0);
	begin
		if rising_edge(clk96) then
			if vde='1' and hde='1' then
				case which is
					when "000" =>	font_pixel <= font_mem_out(7);
					when "001" =>	font_pixel <= font_mem_out(6);
					when "010" =>	font_pixel <= font_mem_out(5);
					when "011" =>	font_pixel <= font_mem_out(4);
					when "100" =>	font_pixel <= font_mem_out(3);
					when "101" =>	font_pixel <= font_mem_out(2);
					when "110" =>	font_pixel <= font_mem_out(1);
					when others=>	font_pixel <= font_mem_out(0);
				end case;
			else
				font_pixel <= '0';
			end if;
			which	:= xcoord(2 downto 0);
		end if;
	end process;

	-- audio
	ay8192_0 : ay8192_internal port map (
		nRESET=>'1', clk=>psg_clk, pwm_clk=>clk16,
		tone_a=>tone_a, tone_b=>tone_b, tone_c=>tone_c,
		noise=>noise,
		ioadir=>'0', iobdir=>'0',
		en_noise_a=>en_noise_a, en_noise_b=>en_noise_b, en_noise_c=>en_noise_c,
		en_tone_a=>en_tone_a,  en_tone_b=>en_tone_b,  en_tone_c=>en_tone_c,
		amp_a=>amp_a, amp_b=>amp_b, amp_c=>amp_c,
		env_period=>env_period,
		env_shape=>env_shape,
		env_restart=>env_restart,
		tape_noise=>'0',
		is_mono=>'0',
		pwm_left=>audio_left, pwm_right=>audio_right);

	process(clk96)
		variable clk		: std_logic_vector(15 downto 0) := x"ff00";
	begin
		if rising_edge(clk96) then
			if clk96_div_6 = '1' then
				psg_clk			<= clk(15);
				clk			:= clk(14 downto 0) & clk(15);
			end if;
		end if;
	end process;
end impl;
