(* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                                           *
 * TITLE   : Hugi Compo #27 Compressor                                       *
 * AUTHOR  : Ang Chin Keong                                                  *
 * DATE    : October 2008                                                    *
 *                                                                           *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

 Pseudo Code
 -----------
 
 { Dictionary Phase }
 While still got data
 Do Begin
       If this byte is not in the dictionary
       Then add it to the dictionary
 End;
  
 { Run Length Encoding Phase }
 While still got data
 Do Begin
       If first 3 bytes are same
       Then begin
               Repeat
                 Count
               Until next byte are not same
               Write (RLE Marker, Data, Count)
            end
       Else write data without compress.
 End;
 
 { Convert to Bits Steam Phase }
 While still got data
 Do Begin
       If is RLE Marker
       Then begin
               Write the marker as 2 bits (11b)                             
               If counter used more than 4 bits, write (N extra bits * 2 bits of 11b) infront.
               Write the data as 4 bits with reference to the dictionary.
               Write the counter as 4 bits.
            end
       Else Write the data as 4 bits with reference to the dictionary.
 End;

*)

Program Hugi_Compo_27_Compressor;
{$DEFINE Counter}
Const
   BufSize=64512;
   WorkingBufSize=6;
Type
   DataBuf=Array[1..BufSize] Of Byte;
   WorkingBuf=Array[1..WorkingBufSize] Of Byte;
Var
   InputFile,OutputFile:File;
   DataIn,DataOut:^DataBuf;
   WorkBuf:^WorkingBuf;
   DataInPos:Word; { pointer to current data(have been use) in the buffer }
   WorkBufUsed:Word; { pointer to current data(been use) in the buffer }
   DataInSize,DataOutSize,WorkBufSize:Word; { Total size of data loaded in buffers }
   OriginalTotalData,TotalData,DataUsed:LongInt;
   RunLength,CompressedFileSize:LongInt;
   RepeatedByte,FirstByte,LastByte,NextByte:Byte;
   RunLengthData:Array[1..4] of Byte; { Change Longint into 4 bytes of data }
   BitUsed:Byte;
   ByteData:Byte;
   
   {$IFDEF Counter}
   LoopCount:LongInt;
   SRLEByteCount:LongInt;
   NotCmpCount:Longint;
   {$ENDIF}

Function ReadDataIn(Var ByteOut:Byte):Boolean; { Read Next byte of data from file }
Begin
   ReadDataIn:=True; { Will set to false later if no more data left }
   Inc(DataInPos); { Point to next data byte }
   If DataInPos>DataInSize { If no more data in buffer, read from file }
   Then Begin
           If Not(EOF(InputFile)) { Is there any data left in file ? }
           Then Begin { Yes, read it into DataIn buffer }
                   BlockRead(InputFile,DataIn^,BufSize,DataInSize);
                   DataInPos:=1; { Point to first data byte }
                End
           Else ReadDataIn:=False;
        End;
   ByteOut:=DataIn^[DataInPos]; { Return the data }
End;

Procedure UpdateWorkBuf;
Var
   Pos,BlankPos:Word;
Begin
   { Move data from back to front to replace the used date }
   Move(WorkBuf^[WorkBufUsed+1],WorkBuf^[1],WorkBufSize-WorkBufUsed);
   WorkBufSize:=WorkBufSize-WorkBufUsed; { New Workbuf Size }
   WorkBufUsed:=0; { New Workbuf Pos }
   { Move data from DataIn buffer to fill in the blank }
   BlankPos:=WorkBufSize+1; {Point to the first blank in work buffer }
   For Pos:=BlankPos to WorkingBufSize Do
   Begin
      If ReadDataIn(WorkBuf^[Pos]) { Fill in the back }
      Then Inc(WorkBufSize)    { If successful then increase buffer size }
      Else break;              { stop if no data to read }
   End;
End;

Procedure WriteData(Dta:Byte);
Begin
   If DataOutSize=BufSize { Ouput buffer full ? }
   Then Begin { Write data to disk }
           BlockWrite(OutputFile,DataOut^,DataOutSize);
           DataOutSize:=0;
        End;
   Inc(DataOutSize); { Add data to ouput buffer }
   DataOut^[DataOutSize]:=Dta;
End;

Procedure WriteBitData(Dta:Byte; Bit:Byte);
Var
   BitLeft:Byte;
   ShlCount:Byte;
   TmpDta:Byte;
   RemainingBit:Byte;
Begin
   If DataOutSize=BufSize { Ouput buffer full ? }
   Then Begin { Write data to disk }
           BlockWrite(OutputFile,DataOut^,DataOutSize);
           DataOutSize:=0;
        End;
        
   BitLeft := (8 - BitUsed);
   TmpDta  := $FF SHR (8 - Bit);
   Dta     := (Dta AND TmpDta);

   If (BitLeft = Bit) Then
   Begin
     ByteData := ByteData OR Dta;
     Inc(DataOutSize); { Add data to ouput buffer }
     DataOut^[DataOutSize]:=ByteData;
     ByteData := 0;
     BitUsed  := 0;
   End Else
   If (BitLeft > Bit) Then
   Begin
     ShlCount := BitLeft - Bit;
     TmpDta   := Dta SHL ShlCount;
     ByteData := ByteData OR TmpDta;
     BitUsed  := BitUsed + Bit;
   End Else
   If (BitLeft < Bit) Then
   Begin
     TmpDta   := Dta SHR (Bit - BitLeft);
     ByteData := ByteData OR TmpDta;
     Inc(DataOutSize); { Add data to ouput buffer }
     DataOut^[DataOutSize]:=ByteData;
     RemainingBit := Bit - BitLeft;
     ShlCount     := 8 - RemainingBit;
     ByteData     := Dta SHL ShlCount;
     BitUsed      := RemainingBit;
   End;
End;


Var
   DictBuf:Array[1..20000] Of Byte;
   DictUsed:Byte;
   A:Integer;
   B:Integer;
   F:Boolean;
Begin
   BitUsed := 0;
   ByteData := 0;

   {$IFDEF Counter}
   LoopCount:=0;
   SRLEByteCount:=0;
   NotCmpCount:=0;
   {$ENDIF}
   
   Writeln;
   WriteLn('-=====================================-');
   WriteLn(' Hugi Compo #27 ACK Entry''s Compressor');
   WriteLn('-=====================================-');
   Writeln;

   IF ParamCount <> 2
   Then Begin WriteLn('Usage: H27CMP [InputFile]  [OutputFile]');Writeln;Halt(1); End;

   WriteLn('Building Dictionary......');

   Assign(InputFIle,ParamStr(1)); Assign(OutputFIle, 'Dict.bin');
   Reset(InputFile,1);Rewrite(OutputFile,1);

   DataIn:=Nil;DataOut:=Nil;WorkBuf:=Nil;
   New(DataIn);New(DataOut);New(WorkBuf);
   DataInPos:=0;WorkBufUsed:=0;
   DataInSize:=0;DataOutSize:=0;WorkBufSize:=0;

   TotalData:=FileSize(InputFile);
   OriginalTotalData:=TotalData;
   DataUsed:=0;
   DictUsed:=0;

   { Start Main Loop }
   While DataUsed<TotalData DO
   Begin
      
       {$IFDEF Counter}
       Inc(LoopCount);
       {$ENDIF}

       UpdateWorkBuf;

       F:=False;
       For A:=1 To DictUsed Do
       Begin
          If (DictBuf[A]=WorkBuf^[1]) Then
          Begin
            F:=True;
          End;
       End;

       If (F=False)
       Then Begin
               DictUsed := DictUsed + 1;
               DictBuf[DictUsed]:=WorkBuf^[1];
               WriteData(WorkBuf^[1]);
            End;

       Inc(DataUsed,1);
       WorkBufUsed:=1;

   End; { End of Main Loop }

   BlockWrite(OutputFile,DataOut^,DataOutSize);
   CompressedFileSize:=FilePos(OutputFile);
   Close(InputFile); Close(OutputFile);

   Writeln;
   Writeln('Dictionary size is ',CompressedFileSize,' bytes.');
   Writeln('Dictionary saved to file Dict.bin');
   Writeln;
   
   WriteLn('Phase One Compressing......');
   
   If (CompressedFileSize > 12) Then
   Begin
     Writeln;
     Writeln('ERROR! Dictionary size is too large. MAX allowed is 12 bytes.');
     Writeln;
     EXIT;
   End;

   Assign(InputFIle,ParamStr(1)); Assign(OutputFIle,'temp.cmp');
   Reset(InputFile,1);Rewrite(OutputFile,1);

   DataInPos:=0;WorkBufUsed:=0;
   DataInSize:=0;DataOutSize:=0;WorkBufSize:=0;

   TotalData:=FileSize(InputFile);
   DataUsed:=0;

   { Start Main Loop }
   While DataUsed<TotalData DO
   Begin

   {$IFDEF Counter}
   Inc(LoopCount);
   {$ENDIF}

   UpdateWorkBuf;

   F:=False;
   For A:=1 To DictUsed Do
   Begin
      If ( (DictBuf[A]=WorkBuf^[1]) And (DictBuf[A]=WorkBuf^[2]) And
           (DictBuf[A]=WorkBuf^[3]) )
      Then
      Begin
           F:=True;
           WorkBufUsed:=3;
           Inc(DataUsed,3);
           RunLength:=0;
           UpdateWorkBuf;

           While (
                   (WorkBufSize<>0) And
                   (DictBuf[A]=WorkBuf^[1]) And
                   (RunLength < $FF)
                 )
           Do Begin
                 Inc(RunLength); { Increase counter } { Bugs if RunLength reach max!(impossible) }
                 WorkBufUsed:=1;
                 UpdateWorkBuf;
              End;

           Inc(DataUsed,RunLength);
           WriteData($FF);
           WriteData(A-1);
           WriteData(RunLength);

          {$IFDEF Counter} Inc(SRLEByteCount);  {$ENDIF}
      End

      Else
      If (DictBuf[A]=WorkBuf^[1]) AND (DictBuf[A]=WorkBuf^[2]) Then
      Begin
          F:=True;
          Begin
            WorkBufUsed:=2;
            Inc(DataUsed,2);
            WriteData(A-1);
            WriteData(A-1);
          End;
      End

      Else
      If (DictBuf[A]=WorkBuf^[1]) Then
      Begin
          F:=True;
          Begin
            WorkBufUsed:=1;
            Inc(DataUsed,1);
            WriteData(A-1);
            {$IFDEF Counter} Inc(NotCmpCount);  {$ENDIF}
          End;
      End;
   End;

   If (F=False)
   Then Begin
           Writeln('Not Found in Dictionary Error!');
        End;

   End; { End of Main Loop }

   BlockWrite(OutputFile,DataOut^,DataOutSize);

   CompressedFileSize:=FilePos(OutputFile);
   Close(InputFile); Close(OutputFile);

   WriteLn('Phase Two Compressing......');

   Assign(InputFIle,'Temp.cmp'); Assign(OutputFIle,ParamStr(2));
   Reset(InputFile,1);Rewrite(OutputFile,1);

   DataInPos:=0;WorkBufUsed:=0;
   DataInSize:=0;DataOutSize:=0;WorkBufSize:=0;

   TotalData:=FileSize(InputFile);
   DataUsed:=0;

   { Start Main Loop }
   While DataUsed<TotalData DO
   Begin

   {$IFDEF Counter}
   Inc(LoopCount);
   {$ENDIF}

   UpdateWorkBuf;

   { If RLE Compressed }
   If (WorkBuf^[1] >= $7E) Then
   Begin
        { If 18 bytes to a nibble method }
        If (WorkBuf^[1] = $FF) Then
        Begin
           WriteBitData($FF, 2);
           If (WorkBuf^[3] > $0F) Then
             WriteBitData($FF, 2);
           If (WorkBuf^[3] > $1F) Then
             WriteBitData($FF, 2);
           If (WorkBuf^[3] > $3F) Then
             WriteBitData($FF, 2);
           If (WorkBuf^[3] > $7F) Then
             WriteBitData($FF, 2);

           WriteBitData(WorkBuf^[2], 4);
           WriteBitData(WorkBuf^[3], 4);
           Inc(DataUsed,3);
           WorkBufUsed:=3;
        End;
   End
   { Else Not RLE Compressd }
   Else Begin
           WriteBitData(WorkBuf^[1], 4);
           Inc(DataUsed,1);
           WorkBufUsed:=1;
        End;

   End; { End of Main Loop }

   WriteBitData($FE, 6);{ 6 bits END Marker }

   If (BitUsed <> 0) Then
     Begin
       Writeln(8 - BitUsed,' unused bits filled with 1s');
       WriteBitData($FF, 8 - BitUsed);
     END
   Else
     Writeln('No partially used byte left.');

   BlockWrite(OutputFile,DataOut^,DataOutSize);
   CompressedFileSize:=FilePos(OutputFile);
   Close(InputFile); Close(OutputFile);

   Writeln('Done.');
   {$IFDEF Counter}
   Writeln;
   Writeln('Compression Method             Count');
   Writeln('------------------------------------' );
   Writeln('RLE Compressed          ',SRLEByteCount:12);
   Writeln('Not RLE Compressed      ',NotCmpCount:12);
   Writeln('====================================');
   {Writeln('Total Compression Loops ',LoopCount:12);}
   {$ENDIF}
   Writeln;
   Writeln('Result:');
   Writeln('Total ',OriginalTotalData,' bytes compressed to ',CompressedFileSize,' bytes.');
   Write  (CompressedFileSize/OriginalTotalData*100:3:2,'% of original size. ');
   Writeln(OriginalTotalData-CompressedFileSize,' bytes saved.');
End.
