#define DEBUG 1
#define VERBOSE 0
#define BESTTB 0
#define OLOOP 3128

typedef unsigned int uint;

#include "bithdr.h"
#include "bittbl.h"

#define WLEN 32

#define N 64
#define NDIV2 32

#define SRMASK 0x3f
#define PUSHST 0x20

#define G0 0171
#define G1 0133

#define G0S 074
#define G1S 055

uint metrics0[N];
uint metrics1[N];

/* this is SIZETB*32 bit traceback */
#define SIZETB 4
#define SIZETBMASK 0x03
uint tbbuf[SIZETB][N];


#if (DEBUG)
uint *chkbit=bitsdata;
uint errs=0;
#endif


uint outport_r;
uint *outport = &outport_r;
uint inporti_r;
uint *inporti = &inporti_r;
uint inportq_r;
uint *inportq = &inportq_r;


main() {
  register uint *om = metrics0;
  register uint *nm = metrics1;
  register uint *tptr;

  register uint thisbuf=0;
  uint startoutput=0;

  register uint bitno;

  uint *inptr=softdata;

  uint im, qm, ip, qp;
  register uint d0, d1;

  register uint i;
  uint k;

  register uint cg, cg0, cg1;

#if (OLOOP)
  uint outerloop;
#endif


#if (OLOOP)
  for(outerloop=OLOOP; outerloop--; ) {
#else
  while (1) {
#endif

    for (bitno=WLEN; bitno--; ) {	

      /* this shouldn't really be here */
      im = *inptr++;
      qm = *inptr++;
#if (VERBOSE>5)
      printf("im=%d qm=%d\n", im, qm);
#endif
      /* this is the correct stuff */
#if 0
      im = *inporti;
      qm = *inportq;
#endif

      ip = sqptable[im];
      qp = sqptable[qm];
      im = sqmtable[im];
      qm = sqmtable[qm];

      /* UNROLL WORKS SINCE MSB(G0)=MSB(G1)=1 */
      for (i=NDIV2; i--; ) {

	d0 = (i << 1) & SRMASK;
	d1 = d0 | 1;
	d0 = om[d0];
	d1 = om[d1];

	cg0 = (i & G0S);
	cg1 = (i & G1S);
	cg0 = codetable[cg0];
	cg1 = codetable[cg1];
#if (VERBOSE>5)
	printf("    cg0=%d, cg1=%d\n", cg0, cg1);
#endif

	/* UNROLL: MSB=0 */

	if(cg0) {
	  d0 += im;
	  d1 += ip;
	}
	else {
	  d0 += ip;
	  d1 += im;
	}
	if(cg1) {
	  d0 += qm;
	  d1 += qp;
	}
	else {
	  d0 += qp;
	  d1 += qm;
	}

#if (VERBOSE>1)
	printf("    i=%d d0=%d d1=%d\n", i, d0, d1);
#endif

	tptr = &tbbuf[thisbuf][i];
	cg = *tptr;

	cg = cg<<1;		/* serialize traceback */
	if (d0 < d1) {		/* easy since metrics positive! */
	  nm[i] = d0;
	}
	else {
	  cg |= 1;
	  nm[i] = d1;
	}
	*tptr = cg;

	/* UNROLL: MSB=1 */

	cg = ip-im;
	if(cg0) {
/*	  d0 -= im;
	  d1 -= ip;
	  d0 += ip;
	  d1 += im; */
	  d0 += cg;
	  d1 -= cg;
	}
	else {
/*	  d0 -= ip;
	  d1 -= im;
	  d0 += im;
	  d1 += ip; */
	  d0 -= cg;
	  d1 += cg;
	}
	cg = qp-qm;
	if(cg1) {
/*	  d0 -= qm;
	  d1 -= qp;
	  d0 += qp;
	  d1 += qm; */
	  d0 += cg;
	  d1 -= cg;
	}
	else {
/*	  d0 -= qp;
	  d1 -= qm;
	  d0 += qm;
	  d1 += qp; */
	  d0 -= cg;
	  d1 += cg;
	}

#if (VERBOSE>1)
	printf("    i=%d d0=%d d1=%d\n", i+NDIV2, d0, d1);
#endif

	k = i+NDIV2;
	tptr = &tbbuf[thisbuf][k];
	cg = *tptr;

	cg = cg<<1;		/* serialize traceback */
	if (d0 < d1) {		/* easy since metrics positive! */
	  nm[k] = d0;
	}
	else {
	  cg |= 1;
	  nm[k] = d1;
	}
	*tptr = cg;
      }

      /* swap metric buffers */
      tptr = om;
      om = nm;
      nm = tptr;

    }

    qm = thisbuf;			/* save traceback buffer number */
#if (SIZETBMASK)
    thisbuf = (thisbuf+1)&SIZETBMASK;
#else
    thisbuf = (thisbuf+1)%SIZETB;
#endif

    if(!thisbuf)
      startoutput = 1;

    if(startoutput) {

      /* choose arbitrary start point for traceback, output last 32 bits */
      im = 0;				/* state */
      tptr = tbbuf[qm];
#if (BESTTB)
      /* this does smarter (costlier) traceback */
      d0 = om[0];
      for (d1=N; d1--; ) {
	if(om[d1]<d0) {
	  d0 = om[d1];
	  im = d1;
	}
      }
#endif

      /* last iteration unrolled for efficiency */
#if (SIZETBMASK)
      for (i=SIZETBMASK; i--; ) {
#else
      for (i=SIZETB-1; i--; ) {
#endif
	for (cg=0; cg<WLEN; cg++) {
	  /* this is bad on anything that doesn't have a barrel shifter */
	  im = ((im<<1)&SRMASK) | ((tptr[im]>>cg)&1);
	}
#if (SIZETBMASK)
	qm = (qm-1)&SIZETBMASK;
#else
	qm = (SIZETB+qm-1)%SIZETB;
#endif
	tptr = tbbuf[qm];
      }
      for (cg=0; cg<WLEN; cg++) {
#if (VERBOSE>3)
	printf("=== tbim=%x\n", im);
#endif
	d0 = (d0<<1);
	if (im & PUSHST)
	{
	  d0 |= 1;
	}
	/* this is bad on anything that doesn't have a barrel shifter */
	im = ((im<<1)&SRMASK) | ((tptr[im]>>cg)&1);
      }

      *outport = d0;
#if (DEBUG)
      for (i=WLEN; i--;) {
	printf("%d", (d0 & 1));
	if ((d0 & 1) != *chkbit++) {
	  errs++;
	}
	d0 = (d0>>1);
      }
      printf("\n");
      printf("After %d, errs=%d\n", chkbit-bitsdata, errs);
#endif

    }

  }
}