/*  dmc.c										T.L. Yu, Feb. 95
 *  uses dynamic markov model to compress data.
 *  The program has been compiled using TURBO C or WATCOM C;
 *  so far, both resulted "exe" files work well.  If use TURBO C,
 *  huge model should be chosen.
 *  This program is for demonstration use.
 *
 *  Compression improvement can be obtained by adjusting min_cnt1, min_cnt2
 *  and the way of reconstructing model when memory is full.
 *  Usage --  to compress :   dmc  input_file   output_file
 *            to expand   :   dmc -e input_file output_file
 *
 */

#include	<stdlib.h>
#include	<stdio.h>
#include <io.h>

#include "dmc.h"
#include "iodmc.fun"			/* io functions */

   /*	because we only have two symbols, we do not need higher precision */
#define NBITS  15    	/* # of bits used in high, low */
#define MSBIT	0x4000   /* most significant bit */
#define MSMASK	0x7FFF	/* consider 15 bits */
#define MASK2  0x1FFF	/* for underflow use */

typedef struct {
	unsigned char mask;
	unsigned char code;
} IO_CONTROL;

static IO_CONTROL	out_control = { 0x80, 0 };
static IO_CONTROL	in_control  = { 0x01, 0 };

	/* Arithmetic encode/decoder structure and variables */
typedef struct {
	uint16 code;
   uint16 low;
   uint16 high;
   uint16 underflow_bits;
} Arith;
static Arith gArith;

	/* Information */
static uint16 comforter = 0;
static uint16 times = 1;

	/* int and unsigned int are 32 bits  for WATCOM C */
#ifdef	__WIN32__
#	define maxstates 100001L     /* size_t is 32 bits */
#else
#ifdef   __TURBOC__
#	define maxstates 32760		/* TURBO C can access gArith. low DOS mem only*/
#else
#	define maxstates 500000lL	/* for WATCOM C */
#endif
#endif

/*
 *	next_state[d][s]  = state reached from s after transition d
 *	trans_cnt[d][s]   = number of observations of input d when in state s
 *	state             = number of current state
 *	min_cnt1	      	= minimum # of transitions from the current state
 * 						to state s before s is eligible for cloning
 * min_cnt2		  		= minimum # of visits to a state s from all
 *							predecessors of S other than the current state
 *							before S is eligible for cloning.
 *                   A simple choice for min_cnt1 and min_cnt2 values
 *                   is to set them to 2, 2.
 */
typedef struct {
   size_t state;				/* current state */
   size_t last_state;		/* last state used */
   size_t total_states;    /* total states */
   uint16 *next_state[2];  /* index to next state from this bit (index) */
   uint16 *trans_cnt[2];   /* observation count for this bit */
   // uint16 nbits;
   uint16 cTrans;	 /* min # of trans. from current state to s -> cloning */
   uint16 cVisit;  /* min # of visits from all preds to s before cloning */
} Model;

static Model gModel;

void put_file_length(uint32 length, FILE *output)
{
   char c;
   int i;
   for (i=0; i<4; i++) {
   	c = (char)(length >> 24);       /* get the most significant bits first */
      putc(c,output);
      length <<= 8;
   }
   if (length) {
   	printf("File size too big\n");
      exit(1);
   }
}



void initialize_encoder(void)
{
   gArith. low = 0;
	gArith. high = MSMASK;
   gArith. underflow_bits = 0;
}

void initialize_decoder( FILE *input )
{
	int i, bit;

   gArith. code = 0;
	for (i = 0 ; i < NBITS; i++) {
		gArith. code <<= 1;
		input_bit(input,bit);
		gArith. code += bit;
	}
	gArith. low = 0;
	gArith. high = MSMASK;
}



	/* initialize the model */
void initialize_model(void)
{
	uint16 j, k, m, n;
   size_t i;
   static bool initialized = false;

   gModel. cTrans = gModel. cVisit = 2;
   if ( !initialized ) {
   	gModel. next_state[0] = (uint16 *) malloc( maxstates*sizeof(uint16));
      check_mem_error( gModel. next_state[0] );
		gModel. next_state[1] = (uint16 *) malloc( maxstates*sizeof(uint16));
      check_mem_error( gModel. next_state[1] );
		gModel. trans_cnt[0] = (uint16 *) malloc( maxstates*sizeof(uint16));
      check_mem_error( gModel. trans_cnt[0] );
		gModel. trans_cnt[1] = (uint16 *) malloc( maxstates*sizeof(uint16));
      check_mem_error(gModel.trans_cnt[1]);
		initialized = true;
   } else {
		for ( i = 0; i < maxstates; ++i ) {
      	gModel. trans_cnt[0][i] = gModel. trans_cnt[1][i] = 0;
      }
   }
   n = 8;
   printf(" initialize_model %d times ", times++);
   m = 1;
   for ( i = 0; i < n; ++i ) {
    	m = 2 * m;
   }
	for ( i = 0; i < n; ++i ) {
    	for ( j = 0; j < m; ++j ) {
      	gModel. state = i + n * j;
         k = ( i + 1 ) % n;
         gModel. next_state[0][gModel. state] = k + (( 2*j ) % m ) * n;
         gModel. next_state[1][gModel. state] = k + ((2*j+1) % m ) * n;
         gModel. trans_cnt[0][gModel. state] = 1;   /* force this to 1 to avoid overfgArith. low*/
         gModel. trans_cnt[1][gModel. state] = 1;
    	}
   }
	gModel. last_state = n * m - 1;
}


void update_count(int x /* bit */)
{
	int b;
   unsigned int nxt, nxt_cnt, newState;

   if (gModel. trans_cnt[x][gModel. state] > 0xfff1) {
   	gModel. trans_cnt[0][gModel. state] /= 2;		/* rescale counts to avoid overfgArith. low*/
      gModel. trans_cnt[1][gModel. state] /= 2;
   }
	++gModel. trans_cnt[x][gModel. state];
	nxt = gModel. next_state[x][gModel. state];       /* next state */
   	/* total transitions out of "nxt" on receiving 0, or 1 */
	nxt_cnt = gModel. trans_cnt[0][nxt] + gModel. trans_cnt[1][nxt];
	if ((gModel. trans_cnt[x][gModel. state] > gModel. cTrans) &&
	 ((int)(nxt_cnt - gModel. trans_cnt[x][gModel. state])>gModel. cVisit)) {
		++gModel. last_state;
		newState = gModel. last_state;		/* obtain a new state # */
		gModel. next_state[x][gModel. state] = newState;
		for (b = 0; b <= 1; ++b) {
			gModel. next_state[b][newState] = gModel. next_state[b][nxt];
			gModel. trans_cnt[b][newState] = (uint16) ((uint32)gModel. trans_cnt[b][nxt] *
         	gModel. trans_cnt[x][gModel. state] / nxt_cnt );
			gModel. trans_cnt[b][nxt] = gModel. trans_cnt[b][nxt] - gModel. trans_cnt[b][newState];
		}
		nxt = newState;
	}
	gModel. state = nxt;
}


static void close_output (FILE *output)
{
	if (out_control.mask != 0x80)
		putc(out_control.code,output);
	fclose (output);
}

void	flush_encoder( FILE	*output )
{
	int	b, i;
	output_bit( output, gArith. low & ( MSBIT >> 1 ) );
	gArith. underflow_bits++;
	b = (~gArith. low & (MSBIT >> 1)) ? 1 : 0;
	while ( gArith. underflow_bits-- > 0 )
		output_bit( output, b );
	b = 0;
	for ( i = 0; i < 16; ++i )
		output_bit( output, b );
   close_output(output);
}


	/* get midpoint of high-low interval */
uint16  get_mp (void)
{
	uint16 p0, p1, mp;
	uint32 ps, range;

   p0 = gModel. trans_cnt[0][gModel. state] + 1;
	p1 = gModel. trans_cnt[1][gModel. state] + 1;
	ps = p0 + p1;
	ps = (uint32)p0 + (uint32)p1;          /* ps is unsigned long */

   range = (uint32)(gArith. high - gArith. low) + 1;
	mp = gArith. low + (uint16)((range * p0) / ps);
	if (mp >= gArith. high) {
   	mp = gArith. high - 1;         /* take care of roundoff error*/
   }
	return mp;
}

void	shift_out_encoded_bits( FILE *output )
{
	int	b;

	for ( ; ; ) {
      /*
      * Shift out matched MSBs.
      */
      if ((gArith. low & MSBIT)==(gArith. high & MSBIT)) {
         b = ( gArith. high & MSBIT ) ? 1 : 0;
         output_bit(output, b);		/* output one bit */
         b = b ? 0 : 1;
         while ( gArith. underflow_bits > 0 ){
            output_bit( output, b );
            gArith. underflow_bits--;
         }
      }	/* if */

      /*
       If uderflow is threatening, throw away 2nd MSBs
      */
      else if ((gArith. low & ( MSBIT >> 1)) && !( gArith. high & (MSBIT >> 1) )) {
         gArith. underflow_bits += 1;
         gArith. low = gArith. low &  MASK2;
         gArith. high = gArith. high | (MSBIT>>1);
      } else
         break;
      gArith. low = ( gArith. low << 1) & MSMASK;			/* shift in 0s */
      gArith. high = ( gArith. high << 1) & MSMASK;
      gArith. high |= 1;						/* shift in 1s */
   }
}   /* shift_out_encoded_bits()	*/


void encode(FILE *input,FILE *output)
{
	int mark, c;
	int k,  b;
   uint16 mp;
   // long range;

	gModel. state = 0;
	do {
		mark = c = getc( input );

		for ( k = 0; k < 8; ++k ){
			b = 0x80 & c;
			b = ( b > 0 ) ? 1 : 0;
			mp = get_mp();
			if (gModel. last_state == maxstates )
         	initialize_model();
			update_count( b );
         c <<= 1;
			if ( b == 1 )
         	gArith. low = mp;            /* pick upper part of range */
			else
				gArith. high = mp - 1;		/* pick lower part of range */
			shift_out_encoded_bits( output );
		}	/* for k */
	}	while ( mark != EOF );  /* do loop */
}	/* encode */


void	remove_and_get_bits( FILE	*input )
{
	int	bit;

	for ( ; ; ) {
		/* If the MSBs match, shift out the bits.*/
		if ((gArith. high & MSBIT)==(gArith. low & MSBIT))
			;
		/* Else, throw away 2nd MSB to prevent underflow.*/
		else if ((gArith. low & (MSBIT>>1)) && !(gArith. high & (MSBIT >> 1))) {
			gArith. code ^= (MSBIT>>1);
			gArith. low = gArith. low & MASK2;
			gArith. high |= (MSBIT>>1);
      } else
			/* Otherwise, nothing to shift, so  return.*/
      	break;
      gArith. low = (gArith. low << 1) & MSMASK;
      gArith. high = (gArith. high << 1) & MSMASK;
      gArith. high |= 1;
      gArith. code = (gArith. code << 1) & MSMASK;
      input_bit(input, bit);
      gArith. code += bit;
   }    /* for (;;) */
}   /* remove_and_get_bits() */


void decode(uint32 flen, FILE *input, FILE *output)
{
	uint16 b, k=0;
	uint32  len = 0;
   uint16 mp;

   gModel. state = 0;
	while ( 1 ) {
   	mp = get_mp();
		if ( gArith. code >= mp ){		/* determine if symbol is 0 or 1 */
      	b = 1;
         gArith. low = mp;
      } else{
      	b = 0;
         gArith. high = mp -1;
		}
		output_bit(output, b);			/* output a bit */
      if ( ++k == 8 ){
      	++len;
         k = 0;
      }
      if ( len == flen )
      	break;
		if (gModel. last_state == maxstates)
			initialize_model();
		update_count(b);   /* update state */

      /* Next, remove matched or underflow bits. */
 		remove_and_get_bits( input );
	}    /* while ( 1 ) */
}	/* decode */

long	get_file_length(FILE	*input)
{
   uint16 i;
   uint32 length = 0;
   	/* input msbs first */
	for (i=0; i<4; i++) {
   	length = (length << 8) | getc(input);
   }
   return length;
}

void	output_bits(FILE *output, uint32 value, uint16 count)
{
	uint32 p;
	static uint16 comforter = 0;

	p = 1L << ( count - 1 );
	while ( p != 0 ) {
		if ( p & value )
			out_control.code |= out_control.mask;	/* non-zero bit */
		p >>= 1;
		out_control.mask >>= 1;
		if ( out_control.mask == 0 ){
			putc( (char)out_control.code, output );
			out_control.mask = 0x80;
			out_control.code = 0;
			if ( !(comforter++ & 0x0fff)  ){
				putc('.', stdout );
				fflush( stdout );
			}
		}
	}
}	/* output_bits() */

