#include <iostream>
#include <fstream>
#include <vector>

using namespace std;

vector<uint8_t> bits;
int nextbit = -1;
int total_src = 0, total_packed = 0, total_bits = 0;

void add_bit(bool bit)
{
    if (nextbit < 0) {
        bits.insert(bits.begin(), 0);
        nextbit = 7;
    }
    if (bit) { bits[0] |= 1 << nextbit; }
    ++total_bits;
    --nextbit;
}

int main(int argc, char* argv[])
{
    // Check command line parameters
    if (argc < 4) {
        cout << "Usage: AVX_PACKER sourcefile packedfile bitfile [/2]\n";
        cout << "/2 - don't add 0xE2 byte after VEX prefix 0xC4.\n";
        return 1;
    }

    bool add_e2 = true;
    if (argc > 4) {
        if (!strcmp(argv[4], "/2")) {
            add_e2 = false;
        } else {
            cout << "Unknown option \"" << argv[4] << "\"!\n";
            return 1;
        }
    }

    // Open files
    ifstream fi(argv[1], ios::binary);
    if (!fi) {
        cout << "Source file open error!\n";
        return 1;
    }
    ofstream fop(argv[2], ios::binary);
    if (!fop) {
        cout << "Destination (packed bytes) file open error!\n";
        return 1;
    }
    ofstream fob(argv[3], ios::binary);
    if (!fob) {
        cout << "Destination (bits) file open error!\n";
        return 1;
    }

    // Compress source file
    int prev2 = -1, prev3 = -1;
    while (fi) {
        uint8_t byte;
        // Raw or first VEX prefix byte
        if (!fi.read(reinterpret_cast<char*>(&byte), 1)) { break; }
        ++total_src;
        bool bit = byte == 0xC4 || byte == 0xC5;
        add_bit(bit);

        if (bit) {
            bit = (byte == 0xC4);
            add_bit(bit);
            if (bit && add_e2) {
                // Second prefix byte
                if (!fi.read(reinterpret_cast<char*>(&byte), 1)) { break; }
                if (byte != 0xE2) {
                    cout << "Wrong 2nd VEX prefix byte (not 0xE2) after 0xC4 is detected at offset " << total_src << "!\n";
                    return 1;
                }
                ++total_src;
            }

            // Last (2nd if "/2" is specified) VEX prefix byte
            if (!fi.read(reinterpret_cast<char*>(&byte), 1)) { break; }
            ++total_src;
            bit = (byte == prev2);
            add_bit(bit);
            if (!bit) {
                prev2 = byte;
                fop.write(reinterpret_cast<char*>(&byte), 1);
                ++total_packed;
            }

            // Opcode (3rd VEX prefix byte for 0xC4 if "/2" is specified) byte
            if (!fi.read(reinterpret_cast<char*>(&byte), 1)) { break; }
            ++total_src;
            bit = (byte == prev3);
            add_bit(bit);
            if (!bit) {
                prev3 = byte;
                fop.write(reinterpret_cast<char*>(&byte), 1);
                ++total_packed;
            }

            // Next byte (opcode byte for 0xC4 if "/2" is specified)
            if (!fi.read(reinterpret_cast<char*>(&byte), 1)) { break; }
            ++total_src;
        }

        // Store raw byte
        fop.write(reinterpret_cast<char*>(&byte), 1);
        ++total_packed;
    }

    if (!fop) {
        cout << "Destination (packed bytes) file write error!\n";
        return 1;
    }

    // Create bitmap file
    if (!fob.write(reinterpret_cast<char*>(bits.data()), bits.size())) {
        cout << "Destination (bits) file write error!\n";
        return 1;
    }

    // Statistics
    cout << "Source size = " << total_src << " bytes\n";
    cout << "Packed size = " << total_packed << " bytes\n";
    cout << "Bitmap size = " << bits.size() << " bytes (" << total_bits << " bits)\n";
    int total = total_packed + (int)bits.size();
    cout << "Result size = " << total << " bytes (" << 100.0 * total / total_src << "%)\n";
    cout << "Profit size = " << total_src - total << " bytes (" << 100.0 * (total_src - total) / total_src << "%)\n";
}
