Statistics
| Branch: | Revision:

napa-baselibs / ml / fec / RSfec.c @ 98141414

History | View | Annotate | Download (23.7 KB)

1
/*
2
 * fec.c -- forward error correction based on Vandermonde matrices
3
 * 980624
4
 * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
5
 *
6
 * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
7
 * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
8
 * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 *
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above
17
 *    copyright notice, this list of conditions and the following
18
 *    disclaimer in the documentation and/or other materials
19
 *    provided with the distribution.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
22
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
25
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
26
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
28
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
30
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
32
 * OF SUCH DAMAGE.
33
 */
34

    
35
/*
36
 * The following parameter defines how many bits are used for
37
 * field elements. The code supports any value from 2 to 16
38
 * but fastest operation is achieved with 8 bit elements
39
 * This is the only parameter you may want to change.
40
 */
41
#ifndef GF_BITS
42
#define GF_BITS  8        /* code over GF(2**GF_BITS) - change to suit */
43
#endif
44

    
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48

    
49
/*
50
 * compatibility stuff
51
 */
52
#ifdef MSDOS        /* but also for others, e.g. sun... */
53
#define NEED_BCOPY
54
#define bcmp(a,b,n) memcmp(a,b,n)
55
#endif
56

    
57
#ifdef NEED_BCOPY
58
#define bcopy(s, d, siz)        memcpy((d), (s), (siz))
59
#define bzero(d, siz)   memset((d), '\0', (siz))
60
#endif
61

    
62
/*
63
 * stuff used for testing purposes only
64
 */
65

    
66
#ifdef        TEST
67
#define DEB(x)
68
#define DDB(x) x
69
#define        DEBUG        0        /* minimal debugging */
70
#ifdef        MSDOS
71
#include <time.h>
72
struct timeval {
73
    unsigned long ticks;
74
};
75
#define gettimeofday(x, dummy) { (x)->ticks = clock() ; }
76
#define DIFF_T(a,b) (1+ 1000000*(a.ticks - b.ticks) / CLOCKS_PER_SEC )
77
typedef unsigned long u_long ;
78
typedef unsigned short u_short ;
79
#else /* typically, unix systems */
80
#include <sys/time.h>
81
#define DIFF_T(a,b) \
82
        (1+ 1000000*(a.tv_sec - b.tv_sec) + (a.tv_usec - b.tv_usec) )
83
#endif
84

    
85
#define TICK(t) \
86
        {struct timeval x ; \
87
        gettimeofday(&x, NULL) ; \
88
        t = x.tv_usec + 1000000* (x.tv_sec & 0xff ) ; \
89
        }
90
#define TOCK(t) \
91
        { u_long t1 ; TICK(t1) ; \
92
          if (t1 < t) t = 256000000 + t1 - t ; \
93
          else t = t1 - t ; \
94
          if (t == 0) t = 1 ;}
95
        
96
u_long ticks[10];        /* vars for timekeeping */
97
#else
98
#define DEB(x)
99
#define DDB(x)
100
#define TICK(x)
101
#define TOCK(x)
102
#endif /* TEST */
103

    
104
/*
105
 * You should not need to change anything beyond this point.
106
 * The first part of the file implements linear algebra in GF.
107
 *
108
 * gf is the type used to store an element of the Galois Field.
109
 * Must constain at least GF_BITS bits.
110
 *
111
 * Note: unsigned char will work up to GF(256) but int seems to run
112
 * faster on the Pentium. We use int whenever have to deal with an
113
 * index, since they are generally faster.
114
 */
115
#if (GF_BITS < 2  && GF_BITS >16)
116
#error "GF_BITS must be 2 .. 16"
117
#endif
118
#if (GF_BITS <= 8)
119
typedef unsigned char gf;
120
#else
121
typedef unsigned short gf;
122
#endif
123

    
124
#define        GF_SIZE ((1 << GF_BITS) - 1)        /* powers of \alpha */
125

    
126
/*
127
 * Primitive polynomials - see Lin & Costello, Appendix A,
128
 * and  Lee & Messerschmitt, p. 453.
129
 */
130
static char *allPp[] = {    /* GF_BITS        polynomial                */
131
    NULL,                    /*  0        no code                        */
132
    NULL,                    /*  1        no code                        */
133
    "111",                    /*  2        1+x+x^2                        */
134
    "1101",                    /*  3        1+x+x^3                        */
135
    "11001",                    /*  4        1+x+x^4                        */
136
    "101001",                    /*  5        1+x^2+x^5                */
137
    "1100001",                    /*  6        1+x+x^6                        */
138
    "10010001",                    /*  7        1 + x^3 + x^7                */
139
    "101110001",            /*  8        1+x^2+x^3+x^4+x^8        */
140
    "1000100001",            /*  9        1+x^4+x^9                */
141
    "10010000001",            /* 10        1+x^3+x^10                */
142
    "101000000001",            /* 11        1+x^2+x^11                */
143
    "1100101000001",            /* 12        1+x+x^4+x^6+x^12        */
144
    "11011000000001",            /* 13        1+x+x^3+x^4+x^13        */
145
    "110000100010001",            /* 14        1+x+x^6+x^10+x^14        */
146
    "1100000000000001",            /* 15        1+x+x^15                */
147
    "11010000000010001"            /* 16        1+x+x^3+x^12+x^16        */
148
};
149

    
150

    
151
/*
152
 * To speed up computations, we have tables for logarithm, exponent
153
 * and inverse of a number. If GF_BITS <= 8, we use a table for
154
 * multiplication as well (it takes 64K, no big deal even on a PDA,
155
 * especially because it can be pre-initialized an put into a ROM!),
156
 * otherwhise we use a table of logarithms.
157
 * In any case the macro gf_mul(x,y) takes care of multiplications.
158
 */
159

    
160
static gf gf_exp[2*GF_SIZE];        /* index->poly form conversion table        */
161
static int gf_log[GF_SIZE + 1];        /* Poly->index form conversion table        */
162
static gf inverse[GF_SIZE+1];        /* inverse of field elem.                */
163
                                /* inv[\alpha**i]=\alpha**(GF_SIZE-i-1)        */
164

    
165
/*
166
 * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
167
 * without a slow divide.
168
 */
169
static inline gf
170
modnn(int x)
171
{
172
    while (x >= GF_SIZE) {
173
        x -= GF_SIZE;
174
        x = (x >> GF_BITS) + (x & GF_SIZE);
175
    }
176
    return x;
177
}
178

    
179
#define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
180

    
181
/*
182
 * gf_mul(x,y) multiplies two numbers. If GF_BITS<=8, it is much
183
 * faster to use a multiplication table.
184
 *
185
 * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
186
 * many numbers by the same constant. In this case the first
187
 * call sets the constant, and others perform the multiplications.
188
 * A value related to the multiplication is held in a local variable
189
 * declared with USE_GF_MULC . See usage in addmul1().
190
 */
191
#if (GF_BITS <= 8)
192
static gf gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
193

    
194
#define gf_mul(x,y) gf_mul_table[x][y]
195

    
196
#define USE_GF_MULC register gf * __gf_mulc_
197
#define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
198
#define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
199

    
200
static void
201
init_mul_table()
202
{
203
    int i, j;
204
    for (i=0; i< GF_SIZE+1; i++)
205
        for (j=0; j< GF_SIZE+1; j++)
206
            gf_mul_table[i][j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ;
207

    
208
    for (j=0; j< GF_SIZE+1; j++)
209
            gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
210
}
211
#else        /* GF_BITS > 8 */
212
static inline gf
213
gf_mul(x,y)
214
{
215
    if ( (x) == 0 || (y)==0 ) return 0;
216
     
217
    return gf_exp[gf_log[x] + gf_log[y] ] ;
218
}
219
#define init_mul_table()
220

    
221
#define USE_GF_MULC register gf * __gf_mulc_
222
#define GF_MULC0(c) __gf_mulc_ = &gf_exp[ gf_log[c] ]
223
#define GF_ADDMULC(dst, x) { if (x) dst ^= __gf_mulc_[ gf_log[x] ] ; }
224
#endif
225

    
226
/*
227
 * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
228
 * Lookup tables:
229
 *     index->polynomial form                gf_exp[] contains j= \alpha^i;
230
 *     polynomial form -> index form        gf_log[ j = \alpha^i ] = i
231
 * \alpha=x is the primitive element of GF(2^m)
232
 *
233
 * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
234
 * multiplication of two numbers can be resolved without calling modnn
235
 */
236

    
237
/*
238
 * i use malloc so many times, it is easier to put checks all in
239
 * one place.
240
 */
241
static void *
242
my_malloc(int sz, char *err_string)
243
{
244
    void *p = malloc( sz );
245
    if (p == NULL) {
246
        fprintf(stderr, "-- malloc failure allocating %s\n", err_string);
247
        exit(1) ;
248
    }
249
    return p ;
250
}
251

    
252
#define NEW_GF_MATRIX(rows, cols) \
253
    (gf *)my_malloc(rows * cols * sizeof(gf), " ## __LINE__ ## " )
254

    
255
/*
256
 * initialize the data structures used for computations in GF.
257
 */
258
static void
259
generate_gf(void)
260
{
261
    int i;
262
    gf mask;
263
    char *Pp =  allPp[GF_BITS] ;
264

    
265
    mask = 1;        /* x ** 0 = 1 */
266
    gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
267
    /*
268
     * first, generate the (polynomial representation of) powers of \alpha,
269
     * which are stored in gf_exp[i] = \alpha ** i .
270
     * At the same time build gf_log[gf_exp[i]] = i .
271
     * The first GF_BITS powers are simply bits shifted to the left.
272
     */
273
    for (i = 0; i < GF_BITS; i++, mask <<= 1 ) {
274
        gf_exp[i] = mask;
275
        gf_log[gf_exp[i]] = i;
276
        /*
277
         * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
278
         * gf_exp[GF_BITS] = \alpha ** GF_BITS
279
         */
280
        if ( Pp[i] == '1' )
281
            gf_exp[GF_BITS] ^= mask;
282
    }
283
    /*
284
     * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als
285
     * compute its inverse.
286
     */
287
    gf_log[gf_exp[GF_BITS]] = GF_BITS;
288
    /*
289
     * Poly-repr of \alpha ** (i+1) is given by poly-repr of
290
     * \alpha ** i shifted left one-bit and accounting for any
291
     * \alpha ** GF_BITS term that may occur when poly-repr of
292
     * \alpha ** i is shifted.
293
     */
294
    mask = 1 << (GF_BITS - 1 ) ;
295
    for (i = GF_BITS + 1; i < GF_SIZE; i++) {
296
        if (gf_exp[i - 1] >= mask)
297
            gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
298
        else
299
            gf_exp[i] = gf_exp[i - 1] << 1;
300
        gf_log[gf_exp[i]] = i;
301
    }
302
    /*
303
     * log(0) is not defined, so use a special value
304
     */
305
    gf_log[0] =        GF_SIZE ;
306
    /* set the extended gf_exp values for fast multiply */
307
    for (i = 0 ; i < GF_SIZE ; i++)
308
        gf_exp[i + GF_SIZE] = gf_exp[i] ;
309

    
310
    /*
311
     * again special cases. 0 has no inverse. This used to
312
     * be initialized to GF_SIZE, but it should make no difference
313
     * since noone is supposed to read from here.
314
     */
315
    inverse[0] = 0 ;
316
    inverse[1] = 1;
317
    for (i=2; i<=GF_SIZE; i++)
318
        inverse[i] = gf_exp[GF_SIZE-gf_log[i]];
319
}
320

    
321
/*
322
 * Various linear algebra operations that i use often.
323
 */
324

    
325
/*
326
 * addmul() computes dst[] = dst[] + c * src[]
327
 * This is used often, so better optimize it! Currently the loop is
328
 * unrolled 16 times, a good value for 486 and pentium-class machines.
329
 * The case c=0 is also optimized, whereas c=1 is not. These
330
 * calls are unfrequent in my typical apps so I did not bother.
331
 * 
332
 * Note that gcc on
333
 */
334
#define addmul(dst, src, c, sz) \
335
    if (c != 0) addmul1(dst, src, c, sz)
336

    
337
#define UNROLL 16 /* 1, 4, 8, 16 */
338
static void
339
addmul1(gf *dst1, gf *src1, gf c, int sz)
340
{
341
    USE_GF_MULC ;
342
    register gf *dst = dst1, *src = src1 ;
343
    gf *lim = &dst[sz - UNROLL + 1] ;
344

    
345
    GF_MULC0(c) ;
346

    
347
#if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
348
    for (; dst < lim ; dst += UNROLL, src += UNROLL ) {
349
        GF_ADDMULC( dst[0] , src[0] );
350
        GF_ADDMULC( dst[1] , src[1] );
351
        GF_ADDMULC( dst[2] , src[2] );
352
        GF_ADDMULC( dst[3] , src[3] );
353
#if (UNROLL > 4)
354
        GF_ADDMULC( dst[4] , src[4] );
355
        GF_ADDMULC( dst[5] , src[5] );
356
        GF_ADDMULC( dst[6] , src[6] );
357
        GF_ADDMULC( dst[7] , src[7] );
358
#endif
359
#if (UNROLL > 8)
360
        GF_ADDMULC( dst[8] , src[8] );
361
        GF_ADDMULC( dst[9] , src[9] );
362
        GF_ADDMULC( dst[10] , src[10] );
363
        GF_ADDMULC( dst[11] , src[11] );
364
        GF_ADDMULC( dst[12] , src[12] );
365
        GF_ADDMULC( dst[13] , src[13] );
366
        GF_ADDMULC( dst[14] , src[14] );
367
        GF_ADDMULC( dst[15] , src[15] );
368
#endif
369
    }
370
#endif
371
    lim += UNROLL - 1 ;
372
    for (; dst < lim; dst++, src++ )                /* final components */
373
        GF_ADDMULC( *dst , *src );
374
}
375

    
376
/*
377
 * computes C = AB where A is n*k, B is k*m, C is n*m
378
 */
379
static void
380
matmul(gf *a, gf *b, gf *c, int n, int k, int m)
381
{
382
    int row, col, i ;
383

    
384
    for (row = 0; row < n ; row++) {
385
        for (col = 0; col < m ; col++) {
386
            gf *pa = &a[ row * k ];
387
            gf *pb = &b[ col ];
388
            gf acc = 0 ;
389
            for (i = 0; i < k ; i++, pa++, pb += m )
390
                acc ^= gf_mul( *pa, *pb ) ;
391
            c[ row * m + col ] = acc ;
392
        }
393
    }
394
}
395

    
396
#ifdef DEBUG
397
/*
398
 * returns 1 if the square matrix is identiy
399
 * (only for test)
400
 */
401
static int
402
is_identity(gf *m, int k)
403
{
404
    int row, col ;
405
    for (row=0; row<k; row++)
406
        for (col=0; col<k; col++)
407
            if ( (row==col && *m != 1) ||
408
                 (row!=col && *m != 0) )
409
                 return 0 ;
410
            else
411
                m++ ;
412
    return 1 ;
413
}
414
#endif /* debug */
415

    
416
/*
417
 * invert_mat() takes a matrix and produces its inverse
418
 * k is the size of the matrix.
419
 * (Gauss-Jordan, adapted from Numerical Recipes in C)
420
 * Return non-zero if singular.
421
 */
422
DEB( int pivloops=0; int pivswaps=0 ; /* diagnostic */)
423
static int
424
invert_mat(gf *src, int k)
425
{
426
    gf c, *p ;
427
    int irow, icol, row, col, i, ix ;
428

    
429
    int error = 1 ;
430
    int *indxc = my_malloc(k*sizeof(int), "indxc");
431
    int *indxr = my_malloc(k*sizeof(int), "indxr");
432
    int *ipiv = my_malloc(k*sizeof(int), "ipiv");
433
    gf *id_row = NEW_GF_MATRIX(1, k);
434
    gf *temp_row = NEW_GF_MATRIX(1, k);
435

    
436
    bzero(id_row, k*sizeof(gf));
437
    DEB( pivloops=0; pivswaps=0 ; /* diagnostic */ )
438
    /*
439
     * ipiv marks elements already used as pivots.
440
     */
441
    for (i = 0; i < k ; i++)
442
        ipiv[i] = 0 ;
443

    
444
    for (col = 0; col < k ; col++) {
445
        gf *pivot_row ;
446
        /*
447
         * Zeroing column 'col', look for a non-zero element.
448
         * First try on the diagonal, if it fails, look elsewhere.
449
         */
450
        irow = icol = -1 ;
451
        if (ipiv[col] != 1 && src[col*k + col] != 0) {
452
            irow = col ;
453
            icol = col ;
454
            goto found_piv ;
455
        }
456
        for (row = 0 ; row < k ; row++) {
457
            if (ipiv[row] != 1) {
458
                for (ix = 0 ; ix < k ; ix++) {
459
                    DEB( pivloops++ ; )
460
                    if (ipiv[ix] == 0) {
461
                        if (src[row*k + ix] != 0) {
462
                            irow = row ;
463
                            icol = ix ;
464
                            goto found_piv ;
465
                        }
466
                    } else if (ipiv[ix] > 1) {
467
                        fprintf(stderr, "singular matrix\n");
468
                        goto fail ; 
469
                    }
470
                }
471
            }
472
        }
473
        if (icol == -1) {
474
            fprintf(stderr, "XXX pivot not found!\n");
475
            goto fail ;
476
        }
477
found_piv:
478
        ++(ipiv[icol]) ;
479
        /*
480
         * swap rows irow and icol, so afterwards the diagonal
481
         * element will be correct. Rarely done, not worth
482
         * optimizing.
483
         */
484
        if (irow != icol) {
485
            for (ix = 0 ; ix < k ; ix++ ) {
486
                SWAP( src[irow*k + ix], src[icol*k + ix], gf) ;
487
            }
488
        }
489
        indxr[col] = irow ;
490
        indxc[col] = icol ;
491
        pivot_row = &src[icol*k] ;
492
        c = pivot_row[icol] ;
493
        if (c == 0) {
494
            fprintf(stderr, "singular matrix 2\n");
495
            goto fail ;
496
        }
497
        if (c != 1 ) { /* otherwhise this is a NOP */
498
            /*
499
             * this is done often , but optimizing is not so
500
             * fruitful, at least in the obvious ways (unrolling)
501
             */
502
            DEB( pivswaps++ ; )
503
            c = inverse[ c ] ;
504
            pivot_row[icol] = 1 ;
505
            for (ix = 0 ; ix < k ; ix++ )
506
                pivot_row[ix] = gf_mul(c, pivot_row[ix] );
507
        }
508
        /*
509
         * from all rows, remove multiples of the selected row
510
         * to zero the relevant entry (in fact, the entry is not zero
511
         * because we know it must be zero).
512
         * (Here, if we know that the pivot_row is the identity,
513
         * we can optimize the addmul).
514
         */
515
        id_row[icol] = 1;
516
        if (bcmp(pivot_row, id_row, k*sizeof(gf)) != 0) {
517
            for (p = src, ix = 0 ; ix < k ; ix++, p += k ) {
518
                if (ix != icol) {
519
                    c = p[icol] ;
520
                    p[icol] = 0 ;
521
                    addmul(p, pivot_row, c, k );
522
                }
523
            }
524
        }
525
        id_row[icol] = 0;
526
    } /* done all columns */
527
    for (col = k-1 ; col >= 0 ; col-- ) {
528
        if (indxr[col] <0 || indxr[col] >= k)
529
            fprintf(stderr, "AARGH, indxr[col] %d\n", indxr[col]);
530
        else if (indxc[col] <0 || indxc[col] >= k)
531
            fprintf(stderr, "AARGH, indxc[col] %d\n", indxc[col]);
532
        else
533
        if (indxr[col] != indxc[col] ) {
534
            for (row = 0 ; row < k ; row++ ) {
535
                SWAP( src[row*k + indxr[col]], src[row*k + indxc[col]], gf) ;
536
            }
537
        }
538
    }
539
    error = 0 ;
540
fail:
541
    free(indxc);
542
    free(indxr);
543
    free(ipiv);
544
    free(id_row);
545
    free(temp_row);
546
    return error ;
547
}
548

    
549
/*
550
 * fast code for inverting a vandermonde matrix.
551
 * XXX NOTE: It assumes that the matrix
552
 * is not singular and _IS_ a vandermonde matrix. Only uses
553
 * the second column of the matrix, containing the p_i's.
554
 *
555
 * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but
556
 * largely revised for my purposes.
557
 * p = coefficients of the matrix (p_i)
558
 * q = values of the polynomial (known)
559
 */
560

    
561
int
562
invert_vdm(gf *src, int k)
563
{
564
    int i, j, row, col ;
565
    gf *b, *c, *p;
566
    gf t, xx ;
567

    
568
    if (k == 1)         /* degenerate case, matrix must be p^0 = 1 */
569
        return 0 ;
570
    /*
571
     * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
572
     * b holds the coefficient for the matrix inversion
573
     */
574
    c = NEW_GF_MATRIX(1, k);
575
    b = NEW_GF_MATRIX(1, k);
576

    
577
    p = NEW_GF_MATRIX(1, k);
578
   
579
    for ( j=1, i = 0 ; i < k ; i++, j+=k ) {
580
        c[i] = 0 ;
581
        p[i] = src[j] ;    /* p[i] */
582
    }
583
    /*
584
     * construct coeffs. recursively. We know c[k] = 1 (implicit)
585
     * and start P_0 = x - p_0, then at each stage multiply by
586
     * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
587
     * After k steps we are done.
588
     */
589
    c[k-1] = p[0] ;        /* really -p(0), but x = -x in GF(2^m) */
590
    for (i = 1 ; i < k ; i++ ) {
591
        gf p_i = p[i] ; /* see above comment */
592
        for (j = k-1  - ( i - 1 ) ; j < k-1 ; j++ )
593
            c[j] ^= gf_mul( p_i, c[j+1] ) ;
594
        c[k-1] ^= p_i ;
595
    }
596

    
597
    for (row = 0 ; row < k ; row++ ) {
598
        /*
599
         * synthetic division etc.
600
         */
601
        xx = p[row] ;
602
        t = 1 ;
603
        b[k-1] = 1 ; /* this is in fact c[k] */
604
        for (i = k-2 ; i >= 0 ; i-- ) {
605
            b[i] = c[i+1] ^ gf_mul(xx, b[i+1]) ;
606
            t = gf_mul(xx, t) ^ b[i] ;
607
        }
608
        for (col = 0 ; col < k ; col++ )
609
            src[col*k + row] = gf_mul(inverse[t], b[col] );
610
    }
611
    free(c) ;
612
    free(b) ;
613
    free(p) ;
614
    return 0 ;
615
}
616

    
617
static int fec_initialized = 0 ;
618
static void
619
init_fec()
620
{
621
    TICK(ticks[0]);
622
    generate_gf();
623
    TOCK(ticks[0]);
624
    DDB(fprintf(stderr, "generate_gf took %ldus\n", ticks[0]);)
625
    TICK(ticks[0]);
626
    init_mul_table();
627
    TOCK(ticks[0]);
628
    DDB(fprintf(stderr, "init_mul_table took %ldus\n", ticks[0]);)
629
    fec_initialized = 1 ;
630
}
631

    
632
/*
633
 * This section contains the proper FEC encoding/decoding routines.
634
 * The encoding matrix is computed starting with a Vandermonde matrix,
635
 * and then transforming it into a systematic matrix.
636
 */
637

    
638
#define FEC_MAGIC        0xFECC0DEC
639

    
640
struct fec_parms {
641
    u_long magic ;
642
    int k, n ;                /* parameters of the code */
643
    gf *enc_matrix ;
644
} ;
645

    
646
void
647
fec_free(struct fec_parms *p)
648
{
649
    if (p==NULL ||
650
       p->magic != ( ( (FEC_MAGIC ^ p->k) ^ p->n) ^ (int)(p->enc_matrix)) ) {
651
        fprintf(stderr, "bad parameters to fec_free\n");
652
        return ;
653
    }
654
    free(p->enc_matrix);
655
    free(p);
656
}
657

    
658
/*
659
 * create a new encoder, returning a descriptor. This contains k,n and
660
 * the encoding matrix.
661
 */
662
struct fec_parms *
663
fec_new(int k, int n)
664
{
665
    int row, col ;
666
    gf *p, *tmp_m ;
667

    
668
    struct fec_parms *retval ;
669

    
670
    if (fec_initialized == 0)
671
        init_fec();
672

    
673
    if (k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n ) {
674
        fprintf(stderr, "Invalid parameters k %d n %d GF_SIZE %d\n",
675
                k, n, GF_SIZE );
676
        return NULL ;
677
    }
678
    retval = my_malloc(sizeof(struct fec_parms), "new_code");
679
    retval->k = k ;
680
    retval->n = n ;
681
    retval->enc_matrix = NEW_GF_MATRIX(n, k);
682
    retval->magic = ( ( FEC_MAGIC ^ k) ^ n) ^ (int)(retval->enc_matrix) ;
683
    tmp_m = NEW_GF_MATRIX(n, k);
684
    /*
685
     * fill the matrix with powers of field elements, starting from 0.
686
     * The first row is special, cannot be computed with exp. table.
687
     */
688
    tmp_m[0] = 1 ;
689
    for (col = 1; col < k ; col++)
690
        tmp_m[col] = 0 ;
691
    for (p = tmp_m + k, row = 0; row < n-1 ; row++, p += k) {
692
        for ( col = 0 ; col < k ; col ++ )
693
            p[col] = gf_exp[modnn(row*col)];
694
    }
695

    
696
    /*
697
     * quick code to build systematic matrix: invert the top
698
     * k*k vandermonde matrix, multiply right the bottom n-k rows
699
     * by the inverse, and construct the identity matrix at the top.
700
     */
701
    TICK(ticks[3]);
702
    invert_vdm(tmp_m, k); /* much faster than invert_mat */
703
    matmul(tmp_m + k*k, tmp_m, retval->enc_matrix + k*k, n - k, k, k);
704
    /*
705
     * the upper matrix is I so do not bother with a slow multiply
706
     */
707
    bzero(retval->enc_matrix, k*k*sizeof(gf) );
708
    for (p = retval->enc_matrix, col = 0 ; col < k ; col++, p += k+1 )
709
        *p = 1 ;
710
    free(tmp_m);
711
    TOCK(ticks[3]);
712

    
713
    DDB(fprintf(stderr, "--- %ld us to build encoding matrix\n",
714
            ticks[3]);)
715
    DEB(pr_matrix(retval->enc_matrix, n, k, "encoding_matrix");)
716
    return retval ;
717
}
718

    
719
/*
720
 * fec_encode accepts as input pointers to n data packets of size sz,
721
 * and produces as output a packet pointed to by fec, computed
722
 * with index "index".
723
 */
724
void
725
fec_encode(struct fec_parms *code, gf *src[], gf *fec, int index, int sz)
726
{
727
    int i, k = code->k ;
728
    gf *p ;
729

    
730
    if (GF_BITS > 8)
731
        sz /= 2 ;
732

    
733
    if (index < k)
734
         bcopy(src[index], fec, sz*sizeof(gf) ) ;
735
    else if (index < code->n) {
736
        p = &(code->enc_matrix[index*k] );
737
        bzero(fec, sz*sizeof(gf));
738
        for (i = 0; i < k ; i++)
739
            addmul(fec, src[i], p[i], sz ) ;
740
    } else
741
        fprintf(stderr, "Invalid index %d (max %d)\n",
742
            index, code->n - 1 );
743
}
744

    
745
/*
746
 * shuffle move src packets in their position
747
 */
748
static int
749
shuffle(gf *pkt[], int index[], int k)
750
{
751
    int i;
752

    
753
    for ( i = 0 ; i < k ; ) {
754
        if (index[i] >= k || index[i] == i)
755
            i++ ;
756
        else {
757
            /*
758
             * put pkt in the right position (first check for conflicts).
759
             */
760
            int c = index[i] ;
761

    
762
            if (index[c] == c) {
763
                DEB(fprintf(stderr, "\nshuffle, error at %d\n", i);)
764
                return 1 ;
765
            }
766
            SWAP(index[i], index[c], int) ;
767
            SWAP(pkt[i], pkt[c], gf *) ;
768
        }
769
    }
770
    DEB( /* just test that it works... */
771
    for ( i = 0 ; i < k ; i++ ) {
772
        if (index[i] < k && index[i] != i) {
773
            fprintf(stderr, "shuffle: after\n");
774
            for (i=0; i<k ; i++) fprintf(stderr, "%3d ", index[i]);
775
            fprintf(stderr, "\n");
776
            return 1 ;
777
        }
778
    }
779
    )
780
    return 0 ;
781
}
782

    
783
/*
784
 * build_decode_matrix constructs the encoding matrix given the
785
 * indexes. The matrix must be already allocated as
786
 * a vector of k*k elements, in row-major order
787
 */
788
static gf *
789
build_decode_matrix(struct fec_parms *code, gf *pkt[], int index[])
790
{
791
    int i , k = code->k ;
792
    gf *p, *matrix = NEW_GF_MATRIX(k, k);
793

    
794
    TICK(ticks[9]);
795
    for (i = 0, p = matrix ; i < k ; i++, p += k ) {
796
#if 1 /* this is simply an optimization, not very useful indeed */
797
        if (index[i] < k) {
798
            bzero(p, k*sizeof(gf) );
799
            p[i] = 1 ;
800
        } else
801
#endif
802
        if (index[i] < code->n )
803
            bcopy( &(code->enc_matrix[index[i]*k]), p, k*sizeof(gf) ); 
804
        else {
805
            fprintf(stderr, "decode: invalid index %d (max %d)\n",
806
                index[i], code->n - 1 );
807
            free(matrix) ;
808
            return NULL ;
809
        }
810
    }
811
    TICK(ticks[9]);
812
    if (invert_mat(matrix, k)) {
813
        free(matrix);
814
        matrix = NULL ;
815
    }
816
    TOCK(ticks[9]);
817
    return matrix ;
818
}
819

    
820
/*
821
 * fec_decode receives as input a vector of packets, the indexes of
822
 * packets, and produces the correct vector as output.
823
 *
824
 * Input:
825
 *        code: pointer to code descriptor
826
 *        pkt:  pointers to received packets. They are modified
827
 *              to store the output packets (in place)
828
 *        index: pointer to packet indexes (modified)
829
 *        sz:    size of each packet
830
 */
831
int
832
fec_decode(struct fec_parms *code, gf *pkt[], int index[], int sz)
833
{
834
    gf *m_dec ; 
835
    gf **new_pkt ;
836
    int row, col , k = code->k ;
837

    
838
    if (GF_BITS > 8)
839
        sz /= 2 ;
840

    
841
    if (shuffle(pkt, index, k))        /* error if true */
842
        return 1 ;
843
    m_dec = build_decode_matrix(code, pkt, index);
844

    
845
    if (m_dec == NULL)
846
        return 1 ; /* error */
847
    /*
848
     * do the actual decoding
849
     */
850
    new_pkt = my_malloc (k * sizeof (gf * ), "new pkt pointers" );
851
    for (row = 0 ; row < k ; row++ ) {
852
        if (index[row] >= k) {
853
            new_pkt[row] = my_malloc (sz * sizeof (gf), "new pkt buffer" );
854
            bzero(new_pkt[row], sz * sizeof(gf) ) ;
855
            for (col = 0 ; col < k ; col++ )
856
                addmul(new_pkt[row], pkt[col], m_dec[row*k + col], sz) ;
857
        }
858
    }
859
    /*
860
     * move pkts to their final destination
861
     */
862
    for (row = 0 ; row < k ; row++ ) {
863
        if (index[row] >= k) {
864
            bcopy(new_pkt[row], pkt[row], sz*sizeof(gf));
865
            free(new_pkt[row]);
866
        }
867
    }
868
    free(new_pkt);
869
    free(m_dec);
870

    
871
    return 0;
872
}
873

    
874
/*********** end of FEC code -- beginning of test code ************/
875

    
876
#if (TEST || DEBUG)
877
void
878
test_gf()
879
{
880
    int i ;
881
    /*
882
     * test gf tables. Sufficiently tested...
883
     */
884
    for (i=0; i<= GF_SIZE; i++) {
885
        if (gf_exp[gf_log[i]] != i)
886
            fprintf(stderr, "bad exp/log i %d log %d exp(log) %d\n",
887
                i, gf_log[i], gf_exp[gf_log[i]]);
888

    
889
        if (i != 0 && gf_mul(i, inverse[i]) != 1)
890
            fprintf(stderr, "bad mul/inv i %d inv %d i*inv(i) %d\n",
891
                i, inverse[i], gf_mul(i, inverse[i]) );
892
        if (gf_mul(0,i) != 0)
893
            fprintf(stderr, "bad mul table 0,%d\n",i);
894
        if (gf_mul(i,0) != 0)
895
            fprintf(stderr, "bad mul table %d,0\n",i);
896
    }
897
}
898
#endif /* TEST */