#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include <memory.h>
#include <stdlib.h>

#define WMAX 256
#define HMAX 256
#define CHARH 8
#define RL32(arr, ofs) (arr[ofs] + (arr[ofs+1] << 8) + (arr[ofs+2] << 16) + (arr[ofs+3] << 24))
#define PACKBITS 7

typedef struct {
    int count;
    int non_tip;            //zero if this is the tip of a subtree (no parent)
    int zero, one;          //indexes of zero and one branch
    int nleaves;            //non-leaf node if non-zero
    union {
        union {
            int leaves[256];//indexes to leaves belonging to this entry
        } n;
        struct {
            int sym;        //huffman symbol if leaf
            int len;        //length in bits
        } l;
    } u;
} huffsym;

huffsym syms[512];
int nglyphs = 0;
char hufftab[256] = {0};

void print_bits(FILE *f, int sym, int len) {
    while (len) {
        fprintf(f, "%i", sym&1);
        sym >>= 1;
        len--;
    }
}

void set_hufftab(int x, int v) {
    if (hufftab[x]) {
        fprintf(stderr, "ERROR: hufftab[%i] == %i\n", x, hufftab[x]);
        exit(1);
    }

    hufftab[x] = v;
}

int curglyph = 0;
char font[256][CHARH][8];
//unsigned char glyphdata[256];   //bytes to be stuffed into the font
uint8_t textures[4][16*16];
int ntex = 0;

void print_leaf(int x, int bits, int nbits, int pass) {
    int y;

    if (pass == 1) {
    syms[x].u.l.sym = bits;
    syms[x].u.l.len = nbits;
    } else {
    printf("Glyph%i\t\t\t\t;%c = ", curglyph, x);
    print_bits(stdout, syms[x].u.l.sym, syms[x].u.l.len);
    printf("\n");
    if (x == '!')
        printf("GlyphExcl\n");
    if (x == ' ')
        printf("GlyphSpace\n");

    /* upside-down as is typical for sprites */
    for (y = CHARH-1; y >= 0; y--) {
        int b = 0;//(glyphdata[curglyph*2+(7-y)/4] >> (((7-y)<<1)&7))&3;
        printf("\t.byte %%%i%i%i%i%i%i%i%i\n",
                font[x][y][0], font[x][y][1], font[x][y][2], font[x][y][3],
                font[x][y][4], font[x][y][5], b >> 1, b & 1);
    }

    curglyph++;
    }
}

void fill_hufftab(int sym, int ofs, int symleft, int bits, int nbits, int pass) {
    int zero = syms[sym].zero;
    int one = syms[sym].one;
    int zero_children = syms[zero].nleaves ? syms[zero].nleaves : 1;
    int one_children = syms[one].nleaves ? syms[one].nleaves : 1;

    if (pass == 1)
        set_hufftab(ofs, one_children);

    if (one_children > 1)
        fill_hufftab(one, ofs+1, one_children, bits | (1 << nbits), nbits+1, pass);
    else
        print_leaf(one, bits | (1 << nbits), nbits+1, pass);

    symleft -= one_children;

    if (symleft > 1)
        fill_hufftab(zero, ofs+one_children, symleft, bits, nbits+1, pass);
        //fill_hufftab(zero, ofs+one_children-1, symleft);          //two bytes smaller
        //fill_hufftab(zero, ofs+(one_children*2+2)/3, symleft);    //27% smaller, requires tricky division
    else
        print_leaf(zero, bits, nbits+1, pass);
}

/*void append_bit(int sym, int bit) {
    int x;
    if (syms[sym].nleaves) {
        fprintf(stderr, "%i leaves\n", syms[sym].nleaves);
        for (x = 0; x < syms[sym].nleaves; x++) {
            syms[syms[sym].u.n.leaves[x]].u.l.sym |= bit << syms[syms[sym].u.n.leaves[x]].u.l.len;
            syms[syms[sym].u.n.leaves[x]].u.l.len++;
            fprintf(stderr, "%c (%3i): %2i bits x %i\n", syms[sym].u.n.leaves[x], sym, syms[syms[sym].u.n.leaves[x]].u.l.len, syms[syms[sym].u.n.leaves[x]].count);
        }
    } else {
        syms[sym].u.l.sym = bit;
        syms[sym].u.l.len = 1;
        fprintf(stderr, "%c (%3i): %2i bits x %i\n", sym, sym, syms[sym].u.l.len, syms[sym].count);
    }
    syms[sym].non_tip = 1;
}*/

//parse BMP of expected size
int parse_bmp(const char *filename, uint8_t *image, int W, int H) {
    uint8_t header[54];
    int x, y, w, h, bpp;
    FILE *f;

    /* read 8-bit images as they are, RGB images as grayscale */
    if (!(f = fopen(filename, "rb"))) {
        fprintf(stderr, "failed to open %s\n" , filename);
        return 1;
    }
    
    fprintf(stderr, "Parsing %s\n", filename);

    fread(header, 54, 1, f);

    w = RL32(header, 18);
    h = RL32(header, 22);
    if (w != W || h != H) {
        fprintf(stderr, "%s expected %ix%i got %ix%i\n" , filename, W, H, w, h);
        return 1;
    }
    bpp = header[28] + (header[29] << 8);
    fseek(f, RL32(header, 10), SEEK_SET);

    fprintf(stderr, "%ix%i %i bpp\n", w, h, bpp);

    if (w > WMAX || h > HMAX) {
        fprintf(stderr, "image too large\n");
        return -1;
    }

    for (y = h-1; y >= 0; y--) {
        switch(bpp) {
        case 8:
            fread(&image[y*w], w, 1, f);
            break;
        case 24:
        case 32:
            for (x = 0; x < w; x++) {
                int temp = getc(f) + getc(f) + getc(f);
                if (bpp == 32) getc(f);
                image[y*w+x] = temp / 3;
            }
            break;
        default:
            fprintf(stderr, "can't handle %i-bit BMPs\n", bpp);
        }

        /* align */
        if (w*(bpp/8) & 3)
            fseek(f, 4 - (w*(bpp/8) & 3), SEEK_CUR);
    }

    fclose(f);
}

int main(int argc, char **argv) {
    uint8_t fontimage[128*128];
    char text[65536];
    int x, y;
    int c, x2, y2, textlen = 0;
    double entropy = 0;
    int bits = 0, symofs = 256, lowest, nlowest, lcount, nlcount, g;
    int curbyte = 0, curbits = 0;
    int outbits = 0;   //output size vs. uncompressed (does not include font)

    memset(syms, 0, sizeof(syms));

    int ret = parse_bmp(argv[1], fontimage, 128, 128);
    if (ret) return ret;
    
    for (y2 = 0; y2 < 16; y2++)
        for (x2 = 0; x2 < 16; x2++)
            for (y = 0; y < CHARH; y++)
                for (x = 0; x < 8; x++)
                    font[y2*16+x2][y][x] = fontimage[(y+y2*CHARH)*128+x+x2*8] < 128;


    ntex = argc - 2;
    for (int i = 0; i < ntex; i++) {
        int ret = parse_bmp(argv[2+i], textures[i], 16, 16);
        if (ret) return ret;
        for (y = 0; y < 16; y++) {
            for (x = 0; x < 16; x++) {
                textures[i][y*16+x] = textures[i][y*16+x] >= 128 ? 0x80 : 0x00;
            }
        }
    }

    for (;;) {
        if ((c = getc(stdin)) < 0)
            break;

        if (c == '\n')
            c = ' ';
        else
        if (c < ' ')
            continue;
        text[textlen] = c;
        if (!syms[text[textlen]].count++)
            nglyphs++;
        textlen++;
    }

    //build symbols
    for (;;) {
        int lleaves, nlleaves;
        //find the two entries in syms with lowest count
        lowest = -1;
        for (x = 0; x < symofs; x++)
            if (syms[x].count && (lowest == -1 || syms[x].count < syms[lowest].count) && !syms[x].non_tip)
                lowest = x;

        nlowest = -1;
        for (x = 0; x < symofs; x++)
            if (x != lowest && syms[x].count && (nlowest == -1 || syms[x].count < syms[nlowest].count) && !syms[x].non_tip)
                nlowest = x;

        if (nlowest == -1)
            break;

        /*append_bit(lowest, 0);
        append_bit(nlowest, 1);*/
        syms[lowest].non_tip = 1;
        syms[nlowest].non_tip = 1;
        syms[symofs].zero = lowest;
        syms[symofs].one = nlowest;

        syms[symofs].count = syms[lowest].count + syms[nlowest].count;
        lleaves  =  syms[lowest].nleaves ?  syms[lowest].nleaves : 1;
        nlleaves = syms[nlowest].nleaves ? syms[nlowest].nleaves : 1;
        syms[symofs].nleaves = lleaves + nlleaves;

        if (syms[lowest].nleaves)
            memcpy(syms[symofs].u.n.leaves, syms[lowest].u.n.leaves, lleaves*sizeof(int));
        else
            syms[symofs].u.n.leaves[0] = lowest;

        if (syms[nlowest].nleaves)
            memcpy(&syms[symofs].u.n.leaves[lleaves], syms[nlowest].u.n.leaves, nlleaves*sizeof(int));
        else
            syms[symofs].u.n.leaves[lleaves] = nlowest;

        //fprintf(stderr, "%i: %i,%i have %i,%i leaves\n", symofs, syms[symofs].zero, syms[symofs].one, syms[lowest].nleaves, syms[nlowest].nleaves);
        symofs++;
    }

    printf("CHARH equ %i\n", CHARH);
    printf("NSYM equ %i\n", nglyphs);

    fill_hufftab(lowest, 0, nglyphs, 0, 0, 1);

    //printf("\tMAC TEXT\n");
    //printf("Text\n");
    //pack

    //number of bits we can pack
    int packbits = PACKBITS*ntex*16*16;
    //number of texture bytes we've put packed data into
    int texofs = 0;
    for (x = 0; x < textlen; x++) {
        printf(";");
        print_bits(stdout, curbyte, curbits);
        printf(" + ");
        print_bits(stdout, syms[text[x]].u.l.sym, syms[text[x]].u.l.len);

        curbyte |= syms[text[x]].u.l.sym << curbits;
        curbits += syms[text[x]].u.l.len;

        printf(" = ");
        print_bits(stdout, curbyte, curbits);
        printf("\n");
        while (curbits > PACKBITS) {
            if (outbits >= packbits) {
                fprintf(stderr, "not enough space for text data\n");
                return 1;
            }
            /*if (outbytes < nglyphs*2)
                glyphdata[outbytes] = curbyte & 0xFF;
            else
            printf("\tbyte $%02X\n", curbyte & 0xFF);*/
            int I = texofs/256;
            int Y = 15-((texofs/16)%16);
            int X = texofs%16;
            int texbyte = curbyte & ((1<<PACKBITS)-1);
            textures[I][Y*16+X] |= texbyte;

            curbyte >>= PACKBITS;
            curbits -= PACKBITS;
            outbits += PACKBITS;
            texofs++;
        }
        
        /*printf("\tbyte $%02X ;%c ", ((c << 3) & 0xFF) | (c >> 5), text[x]);
        print_bits(stdout, syms[text[x]].u.l.sym, syms[text[x]].u.l.len);
        printf("\n");*/
    }
    //printf("TextEnd\n");
    //printf("\tENDM\n");

    printf("\tMAC FONT\n");
    fill_hufftab(lowest, 0, nglyphs, 0, 0, 2);

    for (x = g = 0; x < 256; x++) {
        if (!syms[x].count)
            continue;

        entropy += syms[x].count*log2((double)textlen/syms[x].count);
        bits += syms[x].count*syms[x].u.l.len;
    }
    printf("FontEnd\n");
    printf("\tENDM\n");

    //find length of hufftab and output it
    for (x = y = 0; x < 256; x++)
        if (hufftab[x])
            y = x;

    printf("\tMAC HUFFTAB\n");
    printf("HuffTab\t;%i bytes\n", y+1);
    for (x = 0; x <= y; x++) {
        printf("\tbyte %i\n", hufftab[x]);
        outbits += 8;
    }
    printf("\tENDM\n");
    
    printf("\tMAC TEXTURES\n");
    printf("Textures\n");
    printf("TextStart\n");
    for (int i = 0; i < ntex; i++) {
        for (y = 15; y >= 0; y--) {
            for (x = 0; x < 16; x++) {
                if (texofs-- == 0) {
                    printf("TextEnd\n");
                }
                printf("\t.byte $%02x\n", textures[i][y*16+x]);
            }
        }
    }
    printf("\tENDM\n");

    //add size of decoder
    outbits += 66*8;

    //subtract stuffed bytes
    //outbytes -= nglyphs*2;

    printf(";entropy = %.2lf bits vs %i bits vs %i huffman bits, ratio = %.2lf\n", entropy, 8*textlen, bits, 8*textlen/entropy);
    printf(";huff: %i B -> %i B, ratio = %.2lf\n", textlen, (bits+7)/8, (double)textlen / ((bits+7)/8));
    fprintf(stderr, "ratio = %.2lf\n", (double)textlen / ((bits+7)/8));
    int outbytes = (outbits+7)/8;
    printf(";huff total: %i B vs %i B uncompressed\n", outbytes, textlen);
    if (nglyphs > 64)
        printf(";simple 7/8 packed + stuffed size: %i B + decoder\n", textlen*7/8 - nglyphs*2);
    else
    printf(";simple 6/8 packed + stuffed size: %i B + decoder\n", textlen*6/8 - nglyphs*2);

    if (outbytes > textlen) {
        fprintf(stderr, "%i > %i - Huffman encoding not worth it (TODO: make program choose the most efficient method)\n", outbytes, textlen);
        //return 1;
    }

    return 0;
}
