#define N 64 #define SRMASK 0x3f #define G0 0171 #define G1 0133 /* this falls out of favour against a simpler table lookup */ /* as long as the table can be kept in cache */ /* may need to revert to shift and look method if this isn't so */ #if 0 /* quick lookup method for number of 1 bits in a 5-bit quantity */ /* nth bit == parity for quantity n */ /* only useful on StrongArm with arbitrary shift */ /* 1001 0110 0110 1001 0110 1001 1001 0110 */ #define PARITYBIT 0x96696996 /* CODE finds parity of 6-bit quantity */ #if 0 /* this is the simple approach */ #define CODE(n) ( ((n)&1) ^ ((PARITYBIT >> ((n)>>1)) & 1) ) #endif /* this is 1 instruction more efficient */ #define CODE(n) ( ((n) ^ (PARITYBIT >> ((n)>>1))) & 1) #endif int codetable[N]; /* constant TBD */ #define FPONE FIXED_POINT_ONE /* fixed point mpy */ #define FPTIMES(a,b) ((a)*(b)>>FP_SHIFT) int metrics[2][N]; /* this is 4*32 bit traceback */ #define SIZETB 4 int tbbuf[SIZETB][N]; int *om, *nm; /* if it was hard to write, it should be hard to read */ void nextbit(int i, int q, int *tb) { /* rework this to avoid pipeline stall on the mpy */ #if 0 im1 = i - FPONE; qm1 = q - FPONE; im1 = FPTIMES(im1, im1); qm1 = FPTIMES(qm1, qm1); /* note (x+1)^2 = (x-1)^2 + 4x */ /* save us the computation cause shifts are quick */ fouri = i<<2; fourq = q<<2; #endif im1 = i - FPONE; im1 = im1*im1; qm1 = q - FPONE; qm1 = qm1*qm1; fouri = i<<2; fourq = q<<2; im1 = im1>>FP_SHIFT; qm1 = qm1>>FP_SHIFT; /* CCC=8 */ /* swap metric buffers */ /* using pointers avoids index arithmetic */ t = om; om = nm; nm = t; /* CCC=11 */ for (i=N; i--; ) { /* loop overhd, CCC=12+3N */ /* one loop unroll across MSB of i should give giant win */ /* since MSB(G0)=MSB(G1)=1 */ /* do another inversion job like the one below */ /* too complicated for now; need sleep */ /* ONLY A NOTE: DO NOT USE prev0 = (i << 1); cg0 = prev0 & G0; cg0 = CODE(cg0); <- can simplify since LSB=0 (invalid anyways, 7 bits) */ /* table lookup replaces the following */ #if 0 cg0 = i & (G0>>1); /* G0>>1 should be precomputed */ cg0 = CODE(cg0); cg1 = i & (G1>>1); cg1 = CODE(cg1); #endif cg0 = codetable[i & (G0>>1)]; cg1 = codetable[i & (G1>>1)]; /* CCC=12+9N */ d0 = im1 + qm1; d1 = d0; /* CCC=12+11N */ /* ONLY VALID SINCE LSB(G0)=LSB(G1)=1 */ /* ensures codes are inverses between m=0 and m=1 */ /* think about it if you don't get it */ if (cg0) d1 += fouri; /* check this; may be backwards */ else d0 += fouri; /* CCC=12+14N */ if (cg1) d1 += fourq; /* check this; may be backwards */ else d0 += fourq; /* CCC=12+17N */ prev0 = (i&SRMASK) << 1; prev1 = prev0 | 1; /* CCC=12+20N */ /* t = &tb[i]; */ t = tb+i; td = *t; d0 += om[prev0]; d1 += om[prev1]; /* CCC=12+26N */ td=td<<1; /* serialize traceback */ if (d0 < d1) { /* easy since metrics positive! */ /* CCC=12+28N */ nm[i] = d0; } else { td|=1; nm[i] = d1; /* worst case: CCC=12+31N */ } *t = td; /* CCC=12+32N */ } } void traceback(int tbnum) { /* CCC=1 for pass by value */ /* choose arbitrary start point for TB, output trailing 32 bits */ state=0; for (i=SIZETB; i--; ) { /* CCC=3+4*3=15 */ for (j=0; j<32; j++) { /* CCC=15+4*(1+32*3)=403 */ state = (state&SRMASK)>>1; /* CCC=403+2*4*32=659 */ if((tbbuf[tbnum][state]>>j)&1) { /* CCC=659+6*32*4=1427 */ if(!i) OUTPUT 1; /* assume one cycle for this */ state |= SRMASK+1; /* SRMASK+1 should be preevaluated */ /* CCC=1555+3*32*4=1811 */ } else if(!i) OUTPUT 0; /* CCC=1811+2*32*4=2067 */ } tbnum=(SIZETB+tbnum-1)%SIZETB; /* CCC=2067+2*4=2075 */ } } main() { thisbuf=0; startoutput=0; /* CCC for this section is cycles to decode 32 bits */ while (1) { /* CCC=1 */ for (i=32; i--; ) { /* CCC=2+3*32=98 */ READ(&i,&q); /* CCC=98+2*32=162 */ nextbit(i,q,tbbuf[thisbuf]); /* CCC=162+32*2060+32*1=65952 */ } thisbuf = (thisbuf+1)%SIZETB; /* CCC=65954 */ if(!thisbuf) startoutput=1; if(startoutput) /* CCC=65957 */ traceback(thisbuf); /* CCC=65957+2075=68032 */ } }