#define __CRITLIT__
/**********************************************************
 COMPRESS
 
 (C)(P) 1998 Powered by Petschy
 
 for the hugi compo2
 
 mod history:
 1998.06.13 started
 1998.06.18 forked to comp2, simpler encoding
 1998.07.01 change critlit search by TomCat
**********************************************************/

#include <stdio.h>
#include <math.h>

#define PSIZ		8192
#define MATCH1DL	5
#define LBL		3		
#define DBL		9
#define MINMATCH	3
#ifdef __INACC__
#define MAXMATCH	(1<<LBL)-1
#else
#define MAXMATCH	MINMATCH+(1<<LBL)-1
#endif
#define BYTES2CHK	1024
#define DISTIDXBL	3
#define	SYMSIZ		5

unsigned char ibuf[PSIZ];
unsigned char obuf[PSIZ], tbuf[PSIZ];
unsigned char bitcnt, codetab[256];
int obufidx, red, mxdis, mxmat, crulen, startpos, crubits, critlit;
int lbl=LBL, dbl=DBL;
FILE *ifile, *ofile;
int	match1dl=MATCH1DL, minmatch=MINMATCH, maxmatch=MAXMATCH, b2c=BYTES2CHK;
int	distidxbl=DISTIDXBL, dists;
char 	output=1;
char 	overlap=0;
char	distbl[16]={3,5,4,6,6,6,7,10,0,0,0,0,0,0,0,0};
int	distfu[16];

int	matfreq[PSIZ], disbits, decbits, matbits, idxbits;
int	disfreq[PSIZ], idxfreq[16], totlit, totmat;
#ifdef __DEBUG__
int	chrfreq[256];
#endif
/*********************************************************/
void initrbits(void)
{
	obufidx=0;
	bitcnt=8;
}
/*********************************************************/
void initwbits(void)
{
	obufidx=0;
	bitcnt=8;
	obuf[0]=0;
}
/*********************************************************/
char readbit(void)
{
char b;
	b=obuf[obufidx];
	b>>=8-bitcnt;
	b&=0x1;
	bitcnt--;
	if(!bitcnt){
		bitcnt=8;
		obufidx++;
	}
	return b;
}
/*********************************************************/
void writebit(unsigned char b)
{

	obuf[obufidx]=(obuf[obufidx]>>1)|((b&0x01)<<7);
	bitcnt--;
	if (!bitcnt){
		bitcnt=8;
		obufidx++;
		if (obufidx==PSIZ){
			if (output)
				fwrite(obuf,1,PSIZ,ofile);
			obufidx=0;
		}
		obuf[obufidx]=0;
	}
		
}
/*********************************************************/
int readbits(char l)
{
unsigned int val=0;
	l&=0x1f;
	while(l--)
		val=(val<<1)+readbit();
	return val;
}
/*********************************************************/
void writebits(unsigned int val, char len)
{
if (len){
	len&=0x1f;
	if (val >= 1<<len)
		printf("***FATAL : bits 0verfl0w in writebits!\n"); 
	val<<=(0x20-len);
	while(len){
		writebit(val>>31);
		val<<=1;
		len--;
	}
#ifdef __DEBUG__
//printf("\n");
#endif
 }
}
/*********************************************************/
void flushbits()
{
	crubits=obufidx*8+8-bitcnt;
	if(bitcnt<8)
		writebits(0,bitcnt);
	if(output)
		fwrite(obuf,1,obufidx,ofile);
	crulen=obufidx;	
	obufidx=0;
	obuf[0]=0;
}
/*********************************************************/
void writeelias(unsigned int val)
{
unsigned int mask;
char len;

	writebits(val, lbl);
}
/*********************************************************/
int testelias(unsigned int val)
{

	if (val>maxmatch)
		return 65536;

	return lbl;
}
/*********************************************************/
void writedist(int d)
{
	writebits(d, dbl);
}
/*********************************************************/
int testdist(int d)
{

	if (d>=(1<<dbl))
		return 65536;
		
	return dbl;
}
/*********************************************************/
int findmatch (int cur, int *len, int *dis)
{
int i, j, l, mxlen, d, s, ts, dd, ll;

		s=-1;
		*len=0; *dis=0;
		for(i=cur-minmatch*(overlap^0x1)-overlap; i>=0 && (cur-i)<b2c; i--){
			if(ibuf[cur]==ibuf[i]){
				mxlen=red-cur;
				if (!overlap && mxlen>cur-i)
					mxlen=cur-i;
				if (mxlen>maxmatch)
					mxlen=maxmatch;	
				l=1;
				while(l<mxlen && ibuf[cur+l]==ibuf[i+l])
					l++;
				d=cur-i;
				if (l>1)
#ifdef __INACC__
					ts=l*(SYMSIZ+1)-(1+testelias(l)+
						       testdist(d));
#else
					ts=l*(SYMSIZ+1)-(1+testelias(l-minmatch)+
						       testdist(d-overlap-l*(overlap^0x1)));
#endif
				else{ 
					if(d-1 >= 1<<match1dl)
						ts=-2;
					else
						ts=(SYMSIZ+1)-(1+1+match1dl);	
				}
#ifdef __DEBUG__
if(output){
if(l>=minmatch){
printf("tmatch\t>>");
for(j=0;j<l;j++)
 if (ibuf[cur+j]>31)
   printf("%c",ibuf[cur+j]);
 else
   printf("(%d)", ibuf[cur+j]);  
printf("<< len %d, dis %d, sho %d\n", l, d, ts);
}
}
#endif   
						
				if (ts>s && l>=minmatch){
					s=ts;
					dd=d;
					ll=l;
				}
			}
		}
		if (s>=0){
			*len=ll;
			*dis=dd;
		}
		return s;
}
/*********************************************************/
void decompress(void)
{
int midx, cidx, len, dis, i;
char sym;
	
	initrbits();
	midx=startpos;
	while(midx<red){
		if(readbits(1)){
			dis=readbits(dbl);
#ifdef __INACC__
			len=readbits(lbl);
#else
			len=readbits(lbl)+minmatch;
			dis+=len;
#endif
			cidx=midx-dis;
			for(i=0; i<len; i++)
				tbuf[midx+i]=tbuf[cidx+i];
			midx+=len;
		}else{
			sym=readbits(SYMSIZ);
			if (sym>4)
				sym+='a'-5;
			else switch(sym){
				case 0:sym=' ';break;
				case 1:sym='.';break;
				case 2:sym=',';break;
				case 3:sym='?';break;
				case 4:sym='-';break;
				default:printf("DECOMPRESSOR LITERAL PANIC : unknown symbol %c(%d)\n",sym,sym);
			}	
			tbuf[midx++]=sym;
		}
	}
	for(i=startpos, len=0; i<red; i++)
		if (tbuf[i]!=ibuf[i])
			len++;
	if(len)
		printf("DECOMPRESSOR PANIC : decompressed and original data mismatch! (%d)\n", len);
	else
		printf("Decompression succeeded.\n");
	
}
/*********************************************************/
void compress(void)
{
int	cpos, rpos, sho, len, l, dis;
int	tsho, tlen, tdis, mxlen;
int 	i, stb, dummy, l2, s2;
char 	spec;
unsigned char c,cc;

#ifdef __DEBUG__
double	entropy, p, bsum=0;
int	f, totfreq, j;
#endif
	
	initwbits();
	
	for(i=0; i<PSIZ; i++){
		disfreq[i]=0;
		matfreq[i]=0;
	} 
	for(i=0;i<16;i++)
		idxfreq[i]=0;

#ifdef __DEBUG__
for(i=0;i<256;i++){
 chrfreq[i]=0; 
 codetab[i]=i;
} 
#endif	

#ifdef __INACC__
	maxmatch=(1<<lbl)-1;
#else
	maxmatch=minmatch+(1<<lbl)-1;
#endif
		
	mxdis=0; mxmat=0;
	totlit=0; totmat=0;
	disbits=decbits=matbits=idxbits=0;

	for(cpos=startpos; cpos<red; cpos++){
		
		if(cpos<critlit){

			c=ibuf[cpos];
			if (SYMSIZ<8){
				if (c>'@')
					cc=c-'a'+5;
				else switch(c){
					case ' ':cc=0;break;
					case '.':cc=1;break;
					case ',':cc=2;break;
					case '?':cc=3;break;
					case '-':cc=4;break;
					default:printf("LITERAL PANIC : unknown symbol %c(%d)\n",c,c);
				}	
				writebits(cc,SYMSIZ);
			}else
				writebits(c,SYMSIZ);

			continue;
		}
			
		sho=findmatch(cpos, &len, &dis);
#if 1	
		for(i=1; i<2 && sho>0; i++){	// lazy eval
			stb=findmatch(cpos+i, &l2, &dummy);
			s2=stb+i*(8-(SYMSIZ));
		     	if (len < l2){
		  		sho=-1;
		     		break;
		     	}
		}
#endif		
		if(sho>0){			// store match
			spec=(len==1 ? 1:0);
			l=len;
#ifdef __INACC__
#else
			dis-=overlap+len*(overlap^0x1);
			len-=minmatch;
#endif

			writebit(1);		// encode decision bit : literal/match
			decbits++;

#if 0
			writeelias(len);	// encode length
			writedist(dis); 	// encode distance
#else
			writedist(dis); 	// encode distance
			writeelias(len);	// encode length
#endif
			if (mxdis<(dis))
				mxdis=dis;
			if (mxmat<(len))
				mxmat=len;	
			matfreq[len]++;
			disfreq[dis]++;
			totmat++;
			
#ifdef __DEBUG__
if(output){
printf("match\t>>");
for(i=0;i<l;i++)
 if (ibuf[cpos+i]>31)
   printf("%c",ibuf[cpos+i]);
 else
   printf("(%d)", ibuf[cpos+i]);  
printf("<< len %d, dis %d\n", l, dis);
}
#endif 	

			cpos+=l-1;			
		}else{
			writebit(0);
			decbits++;
			totlit++;
 #ifdef __DEBUG__			
  chrfreq[ibuf[cpos]]++;
 #endif
			c=ibuf[cpos];
			if (SYMSIZ<8){
				if (c>'@')
					cc=c-'a'+5;
				else switch(c){
					case ' ':cc=0;break;
					case '.':cc=1;break;
					case ',':cc=2;break;
					case '?':cc=3;break;
					case '-':cc=4;break;
					default:printf("LITERAL PANIC : unknown symbol %c(%d)\n",c,c);
				}	
				writebits(cc,SYMSIZ);
			}else
				writebits(c,SYMSIZ);


#ifdef __DEBUG__
if(output){
if (c>31)
 printf("literal\t>>%c<< (%d)/%d\n",c,cc, totlit);
else
 printf("literal\t>>(%d)<< (%d)/%d\n",c,cc,totlit);
}
#endif   			
		}			
				
	}
	flushbits();	


#ifdef __DEBUG__
if(output){
	for(i=0, totfreq=0;i<256;i++)
		totfreq+=chrfreq[i];
		
	for (i=0; i<255; i++)	
		for(j=i+1; j<256; j++)
			if (chrfreq[i]<chrfreq[j]){
				f=chrfreq[i];
				chrfreq[i]=chrfreq[j];
				chrfreq[j]=f;
				c=codetab[i];
				codetab[i]=codetab[j];
				codetab[j]=c;
			}
printf("maxdis %d, maxmat %d, totlit %d, totmat %d\n", mxdis, mxmat, totlit, totmat);

for (i=0, entropy=0.0; i<256; i++){
	if (chrfreq[i]==0)
		break;
	p=(double)chrfreq[i]/(double)totfreq;
	entropy+=p*log(p)/log(2.0); 
}
entropy=-entropy;
printf("sym entropy=%f, %f bits total, %d bits written, %d freq\n", entropy,(entropy*(double)totfreq),SYMSIZ*totfreq, totfreq);
bsum=(entropy*(double)totfreq);

for (i=0, entropy=0.0; i<mxmat; i++){
	if (matfreq[i]==0)
		continue;
	p=(double)matfreq[i]/(double)totmat;
	entropy+=p*log(p)/log(2.0); 
}
entropy=-entropy;
printf("mat entropy=%f, %f bits total, %d bits written, %d freq\n", entropy,(entropy*(double)totmat), matbits,totmat);
bsum+=(entropy*(double)totmat);

for (i=0, entropy=0.0; i<mxdis; i++){
	if (disfreq[i]==0)
		continue;
	p=(double)disfreq[i]/(double)totmat;
	entropy+=p*log(p)/log(2.0); 
}
entropy=-entropy;
printf("dis entropy=%f, %f bits total, %d bits written, %d freq\n", entropy,(entropy*(double)totmat), disbits, totmat);
bsum+=(entropy*(double)totmat);

for (i=0, entropy=0.0; i<dists; i++){
	if (idxfreq[i]==0)
		continue;
	p=(double)idxfreq[i]/(double)totmat;
	entropy+=p*log(p)/log(2.0); 
}
entropy=-entropy;
printf("idx entropy=%f, %f bits total, %d bits written, %d freq\n", entropy,(entropy*(double)totmat), idxbits,totmat);
bsum+=(entropy*(double)totmat);

entropy=0;
	p=(double)totmat/(double)(totmat+totlit);
	entropy+=p*log(p)/log(2.0); 
	p=(double)totlit/(double)(totmat+totlit);
	entropy+=p*log(p)/log(2.0); 
entropy=-entropy;
printf("dec entropy=%f, %f bits total, %d bits written, %d freq\n", entropy,(entropy*(double)(totmat+totlit)),decbits, totmat+totlit);
bsum+=(entropy*(double)(totmat+totlit));
printf("%f bits total, %d bytes, %d bits written, %d bytes, %d really\n", bsum, (int)(bsum/8.0+0.9999), 
				SYMSIZ*totfreq+disbits+decbits+matbits+idxbits,
				(SYMSIZ*totfreq+disbits+decbits+matbits+idxbits+7)/8,
				crulen );
for(i=0,l=0;i<mxdis;i++)				
	if (disfreq[i]==0)
		l++;
printf("%d dists, %d zeros\n", mxdis, l);		
}
#endif
	
}
/*********************************************************/
int main(int argc, char **argv)
{
int progress=0, i, bestlen=65536, rem, bestov, bestmm, bestdl, bestcr;
char bestbl[16];

	ifile=fopen("text.chr","rb");
	if(ifile){
		fread(codetab,1,256,ifile);
		fclose(ifile);
	}
	
	ifile=fopen("xxxdeco2.com","rb");
	if(ifile){
		startpos=fread(ibuf,1,256,ifile);
		fclose(ifile);

	}else
		startpos=0;

	if(argc>1)			
		ifile=fopen(argv[1], "rb");
	else	
		ifile=fopen("text.txt", "rb");
	if(!ifile){
		printf("***ERROR : Can't open infile!\n\n");
		printf("USAGE : comp2 [infile [outfile]]\n");
		printf("infile defaults to TEXT.TXT, outfile defaults to TEXT.CRU\n\n");
		return -1;
	}
	red=fread(&ibuf[startpos],1,PSIZ, ifile);
	fclose(ifile);
	red+=startpos;

	output=0;critlit=0;
	for(minmatch=2; minmatch<6; minmatch++)
		for(dbl=1; dbl<11; dbl++)
			for(lbl=1;lbl<5;lbl++)
			{
				compress();
				if (bestlen>crulen){
					bestlen=crulen;
					bestmm=lbl;
					bestdl=dbl;
					bestov=minmatch;
					printf("lbl %d, dbl %d, minmatch %d, critlit %d, crulen %d, \n", 
						lbl, dbl, minmatch, critlit, crulen);
				}	
			}
	lbl=bestmm;
	dbl=bestdl;	
	minmatch=bestov;

#ifdef __CRITLIT__
	for(critlit=0; critlit<800; critlit++)
	{
		compress();
		if (bestlen>crulen){
			bestlen=crulen;
			bestcr=critlit;
			printf("lbl %d, dbl %d, minmatch %d, critlit %d, crulen %d, \n", 
				lbl, dbl, minmatch, critlit, crulen);
		}	
	}
	critlit=bestcr;
#endif

	output=1;
	if(argc>2)
		ofile=fopen(argv[2],"wb");
	else
		ofile=fopen("text.cru","wb");		
	compress();
	fclose(ofile);

	printf("bestlen %d\n",bestlen);
	printf("bitszam %d\n",crubits);
	decompress();
	return 0;
}
