Statistics
| Branch: | Revision:

iof-bird / bird-2.0.1 / lib / sha512.c @ 6b3f1a54

History | View | Annotate | Download (12.1 KB)

1
/*
2
 *        BIRD Library -- SHA-512 and SHA-384 Hash Functions
3
 *
4
 *        (c) 2015 CZ.NIC z.s.p.o.
5
 *
6
 *        Based on the code from libgcrypt-1.6.0, which is
7
 *        (c) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
8
 *
9
 *        Can be freely distributed and used under the terms of the GNU GPL.
10
 */
11

    
12
#include "lib/sha512.h"
13
#include "lib/unaligned.h"
14

    
15

    
16
// #define SHA512_UNROLLED
17

    
18
void
19
sha512_init(struct hash_context *CTX)
20
{
21
  struct sha512_context *ctx = (void *) CTX;
22

    
23
  ctx->h0 = U64(0x6a09e667f3bcc908);
24
  ctx->h1 = U64(0xbb67ae8584caa73b);
25
  ctx->h2 = U64(0x3c6ef372fe94f82b);
26
  ctx->h3 = U64(0xa54ff53a5f1d36f1);
27
  ctx->h4 = U64(0x510e527fade682d1);
28
  ctx->h5 = U64(0x9b05688c2b3e6c1f);
29
  ctx->h6 = U64(0x1f83d9abfb41bd6b);
30
  ctx->h7 = U64(0x5be0cd19137e2179);
31

    
32
  ctx->nblocks = 0;
33
  ctx->count = 0;
34
}
35

    
36
void
37
sha384_init(struct hash_context *CTX)
38
{
39
  struct sha384_context *ctx = (void *) CTX;
40

    
41
  ctx->h0 = U64(0xcbbb9d5dc1059ed8);
42
  ctx->h1 = U64(0x629a292a367cd507);
43
  ctx->h2 = U64(0x9159015a3070dd17);
44
  ctx->h3 = U64(0x152fecd8f70e5939);
45
  ctx->h4 = U64(0x67332667ffc00b31);
46
  ctx->h5 = U64(0x8eb44a8768581511);
47
  ctx->h6 = U64(0xdb0c2e0d64f98fa7);
48
  ctx->h7 = U64(0x47b5481dbefa4fa4);
49

    
50
  ctx->nblocks = 0;
51
  ctx->count = 0;
52
}
53

    
54
static inline u64
55
ROTR(u64 x, u64 n)
56
{
57
  return ((x >> n) | (x << (64 - n)));
58
}
59

    
60
static inline u64
61
Ch(u64 x, u64 y, u64 z)
62
{
63
  return ((x & y) ^ ( ~x & z));
64
}
65

    
66
static inline u64
67
Maj(u64 x, u64 y, u64 z)
68
{
69
  return ((x & y) ^ (x & z) ^ (y & z));
70
}
71

    
72
static inline u64
73
sum0(u64 x)
74
{
75
  return (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39));
76
}
77

    
78
static inline u64
79
sum1(u64 x)
80
{
81
  return (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41));
82
}
83

    
84
static const u64 k[] =
85
{
86
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
87
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
88
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
89
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
90
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
91
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
92
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
93
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
94
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
95
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
96
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
97
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
98
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
99
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
100
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
101
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
102
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
103
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
104
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
105
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
106
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
107
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
108
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
109
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
110
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
111
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
112
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
113
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
114
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
115
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
116
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
117
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
118
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
119
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
120
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
121
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
122
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
123
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
124
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
125
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
126
};
127

    
128
/*
129
 * Transform the message W which consists of 16 64-bit-words
130
 */
131
static uint
132
sha512_transform(struct sha512_context *ctx, const byte *data)
133
{
134
  u64 a, b, c, d, e, f, g, h;
135
  u64 w[16];
136
  uint t;
137

    
138
  /* get values from the chaining vars */
139
  a = ctx->h0;
140
  b = ctx->h1;
141
  c = ctx->h2;
142
  d = ctx->h3;
143
  e = ctx->h4;
144
  f = ctx->h5;
145
  g = ctx->h6;
146
  h = ctx->h7;
147

    
148
  for (t = 0; t < 16; t++)
149
    w[t] = get_u64(data + t * 8);
150

    
151
#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
152
#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
153

    
154
  for (t = 0; t < 80 - 16; )
155
  {
156
    u64 t1, t2;
157

    
158
    /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
159
         with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
160
         initialized to 0,1,2,3...255,0,... and 1000 iterations:
161

162
         Not unrolled with macros:  440ms
163
         Unrolled with macros:      350ms
164
         Unrolled with inline:      330ms
165
     */
166
#ifndef SHA512_UNROLLED
167
    t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[t%16];
168
    w[t%16] += S1(w[(t - 2)%16]) + w[(t - 7)%16] + S0(w[(t - 15)%16]);
169
    t2 = sum0(a) + Maj(a, b, c);
170
    h = g;
171
    g = f;
172
    f = e;
173
    e = d + t1;
174
    d = c;
175
    c = b;
176
    b = a;
177
    a = t1 + t2;
178
    t++;
179
#else /* Unrolled */
180
    t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[0];
181
    w[0] += S1(w[14]) + w[9] + S0(w[1]);
182
    t2 = sum0(a) + Maj(a, b, c);
183
    d += t1;
184
    h = t1 + t2;
185

    
186
    t1 = g + sum1(d) + Ch(d, e, f) + k[t+1] + w[1];
187
    w[1] += S1(w[15]) + w[10] + S0(w[2]);
188
    t2 = sum0(h) + Maj(h, a, b);
189
    c += t1;
190
    g  = t1 + t2;
191

    
192
    t1 = f + sum1(c) + Ch(c, d, e) + k[t+2] + w[2];
193
    w[2] += S1(w[0]) + w[11] + S0(w[3]);
194
    t2 = sum0(g) + Maj(g, h, a);
195
    b += t1;
196
    f  = t1 + t2;
197

    
198
    t1 = e + sum1(b) + Ch(b, c, d) + k[t+3] + w[3];
199
    w[3] += S1(w[1]) + w[12] + S0(w[4]);
200
    t2 = sum0(f) + Maj(f, g, h);
201
    a += t1;
202
    e  = t1 + t2;
203

    
204
    t1 = d + sum1(a) + Ch(a, b, c) + k[t+4] + w[4];
205
    w[4] += S1(w[2]) + w[13] + S0(w[5]);
206
    t2 = sum0(e) + Maj(e, f, g);
207
    h += t1;
208
    d  = t1 + t2;
209

    
210
    t1 = c + sum1(h) + Ch(h, a, b) + k[t+5] + w[5];
211
    w[5] += S1(w[3]) + w[14] + S0(w[6]);
212
    t2 = sum0(d) + Maj(d, e, f);
213
    g += t1;
214
    c  = t1 + t2;
215

    
216
    t1 = b + sum1(g) + Ch(g, h, a) + k[t+6] + w[6];
217
    w[6] += S1(w[4]) + w[15] + S0(w[7]);
218
    t2 = sum0(c) + Maj(c, d, e);
219
    f += t1;
220
    b  = t1 + t2;
221

    
222
    t1 = a + sum1(f) + Ch(f, g, h) + k[t+7] + w[7];
223
    w[7] += S1(w[5]) + w[0] + S0(w[8]);
224
    t2 = sum0(b) + Maj(b, c, d);
225
    e += t1;
226
    a  = t1 + t2;
227

    
228
    t1 = h + sum1(e) + Ch(e, f, g) + k[t+8] + w[8];
229
    w[8] += S1(w[6]) + w[1] + S0(w[9]);
230
    t2 = sum0(a) + Maj(a, b, c);
231
    d += t1;
232
    h  = t1 + t2;
233

    
234
    t1 = g + sum1(d) + Ch(d, e, f) + k[t+9] + w[9];
235
    w[9] += S1(w[7]) + w[2] + S0(w[10]);
236
    t2 = sum0(h) + Maj(h, a, b);
237
    c += t1;
238
    g  = t1 + t2;
239

    
240
    t1 = f + sum1(c) + Ch(c, d, e) + k[t+10] + w[10];
241
    w[10] += S1(w[8]) + w[3] + S0(w[11]);
242
    t2 = sum0(g) + Maj(g, h, a);
243
    b += t1;
244
    f  = t1 + t2;
245

    
246
    t1 = e + sum1(b) + Ch(b, c, d) + k[t+11] + w[11];
247
    w[11] += S1(w[9]) + w[4] + S0(w[12]);
248
    t2 = sum0(f) + Maj(f, g, h);
249
    a += t1;
250
    e  = t1 + t2;
251

    
252
    t1 = d + sum1(a) + Ch(a, b, c) + k[t+12] + w[12];
253
    w[12] += S1(w[10]) + w[5] + S0(w[13]);
254
    t2 = sum0(e) + Maj(e, f, g);
255
    h += t1;
256
    d  = t1 + t2;
257

    
258
    t1 = c + sum1(h) + Ch(h, a, b) + k[t+13] + w[13];
259
    w[13] += S1(w[11]) + w[6] + S0(w[14]);
260
    t2 = sum0(d) + Maj(d, e, f);
261
    g += t1;
262
    c  = t1 + t2;
263

    
264
    t1 = b + sum1(g) + Ch(g, h, a) + k[t+14] + w[14];
265
    w[14] += S1(w[12]) + w[7] + S0(w[15]);
266
    t2 = sum0(c) + Maj(c, d, e);
267
    f += t1;
268
    b  = t1 + t2;
269

    
270
    t1 = a + sum1(f) + Ch(f, g, h) + k[t+15] + w[15];
271
    w[15] += S1(w[13]) + w[8] + S0(w[0]);
272
    t2 = sum0(b) + Maj(b, c, d);
273
    e += t1;
274
    a  = t1 + t2;
275

    
276
    t += 16;
277
#endif
278
  }
279

    
280
  for (; t < 80; )
281
  {
282
    u64 t1, t2;
283

    
284
#ifndef SHA512_UNROLLED
285
    t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[t%16];
286
    t2 = sum0(a) + Maj(a, b, c);
287
    h = g;
288
    g = f;
289
    f = e;
290
    e = d + t1;
291
    d = c;
292
    c = b;
293
    b = a;
294
    a = t1 + t2;
295
    t++;
296
#else /* Unrolled */
297
    t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[0];
298
    t2 = sum0(a) + Maj(a, b, c);
299
    d += t1;
300
    h  = t1 + t2;
301

    
302
    t1 = g + sum1(d) + Ch(d, e, f) + k[t+1] + w[1];
303
    t2 = sum0(h) + Maj(h, a, b);
304
    c += t1;
305
    g  = t1 + t2;
306

    
307
    t1 = f + sum1(c) + Ch(c, d, e) + k[t+2] + w[2];
308
    t2 = sum0(g) + Maj(g, h, a);
309
    b += t1;
310
    f  = t1 + t2;
311

    
312
    t1 = e + sum1(b) + Ch(b, c, d) + k[t+3] + w[3];
313
    t2 = sum0(f) + Maj(f, g, h);
314
    a += t1;
315
    e  = t1 + t2;
316

    
317
    t1 = d + sum1(a) + Ch(a, b, c) + k[t+4] + w[4];
318
    t2 = sum0(e) + Maj(e, f, g);
319
    h += t1;
320
    d  = t1 + t2;
321

    
322
    t1 = c + sum1(h) + Ch(h, a, b) + k[t+5] + w[5];
323
    t2 = sum0(d) + Maj(d, e, f);
324
    g += t1;
325
    c  = t1 + t2;
326

    
327
    t1 = b + sum1(g) + Ch(g, h, a) + k[t+6] + w[6];
328
    t2 = sum0(c) + Maj(c, d, e);
329
    f += t1;
330
    b  = t1 + t2;
331

    
332
    t1 = a + sum1(f) + Ch(f, g, h) + k[t+7] + w[7];
333
    t2 = sum0(b) + Maj(b, c, d);
334
    e += t1;
335
    a  = t1 + t2;
336

    
337
    t1 = h + sum1(e) + Ch(e, f, g) + k[t+8] + w[8];
338
    t2 = sum0(a) + Maj(a, b, c);
339
    d += t1;
340
    h  = t1 + t2;
341

    
342
    t1 = g + sum1(d) + Ch(d, e, f) + k[t+9] + w[9];
343
    t2 = sum0(h) + Maj(h, a, b);
344
    c += t1;
345
    g  = t1 + t2;
346

    
347
    t1 = f + sum1(c) + Ch(c, d, e) + k[t+10] + w[10];
348
    t2 = sum0(g) + Maj(g, h, a);
349
    b += t1;
350
    f  = t1 + t2;
351

    
352
    t1 = e + sum1(b) + Ch(b, c, d) + k[t+11] + w[11];
353
    t2 = sum0(f) + Maj(f, g, h);
354
    a += t1;
355
    e  = t1 + t2;
356

    
357
    t1 = d + sum1(a) + Ch(a, b, c) + k[t+12] + w[12];
358
    t2 = sum0(e) + Maj(e, f, g);
359
    h += t1;
360
    d  = t1 + t2;
361

    
362
    t1 = c + sum1(h) + Ch(h, a, b) + k[t+13] + w[13];
363
    t2 = sum0(d) + Maj(d, e, f);
364
    g += t1;
365
    c  = t1 + t2;
366

    
367
    t1 = b + sum1(g) + Ch(g, h, a) + k[t+14] + w[14];
368
    t2 = sum0(c) + Maj(c, d, e);
369
    f += t1;
370
    b  = t1 + t2;
371

    
372
    t1 = a + sum1(f) + Ch(f, g, h) + k[t+15] + w[15];
373
    t2 = sum0(b) + Maj(b, c, d);
374
    e += t1;
375
    a  = t1 + t2;
376

    
377
    t += 16;
378
#endif
379
  }
380

    
381
  /* Update chaining vars.  */
382
  ctx->h0 += a;
383
  ctx->h1 += b;
384
  ctx->h2 += c;
385
  ctx->h3 += d;
386
  ctx->h4 += e;
387
  ctx->h5 += f;
388
  ctx->h6 += g;
389
  ctx->h7 += h;
390

    
391
  return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
392
}
393

    
394
void
395
sha512_update(struct hash_context *CTX, const byte *buf, uint len)
396
{
397
  struct sha512_context *ctx = (void *) CTX;
398

    
399
  if (ctx->count)
400
  {
401
    /* Fill rest of internal buffer */
402
    for (; len && ctx->count < SHA512_BLOCK_SIZE; len--)
403
      ctx->buf[ctx->count++] = *buf++;
404

    
405
    if (ctx->count < SHA512_BLOCK_SIZE)
406
      return;
407

    
408
    /* Process data from internal buffer */
409
    sha512_transform(ctx, ctx->buf);
410
    ctx->nblocks++;
411
    ctx->count = 0;
412
  }
413

    
414
  if (!len)
415
    return;
416

    
417
  /* Process data from input buffer */
418
  while (len >= SHA512_BLOCK_SIZE)
419
  {
420
    sha512_transform(ctx, buf);
421
    ctx->nblocks++;
422
    buf += SHA512_BLOCK_SIZE;
423
    len -= SHA512_BLOCK_SIZE;
424
  }
425

    
426
  /* Copy remaining data to internal buffer */
427
  memcpy(ctx->buf, buf, len);
428
  ctx->count = len;
429
}
430

    
431
/*
432
 * The routine final terminates the computation and returns the digest. The
433
 * handle is prepared for a new cycle, but adding bytes to the handle will the
434
 * destroy the returned buffer.
435
 *
436
 * Returns: 64 bytes representing the digest. When used for sha384, we take the
437
 * first 48 of those bytes.
438
 */
439
byte *
440
sha512_final(struct hash_context *CTX)
441
{
442
  struct sha512_context *ctx = (void *) CTX;
443
  u64 t, th, msb, lsb;
444

    
445
  sha512_update(CTX, NULL, 0);        /* flush */
446

    
447
  t = ctx->nblocks;
448
  th = 0;
449

    
450
  /* multiply by 128 to make a byte count */
451
  lsb = t << 7;
452
  msb = (th << 7) | (t >> 57);
453
  /* add the count */
454
  t = lsb;
455
  if ((lsb += ctx->count) < t)
456
    msb++;
457
  /* multiply by 8 to make a bit count */
458
  t = lsb;
459
  lsb <<= 3;
460
  msb <<= 3;
461
  msb |= t >> 61;
462

    
463
  if (ctx->count < 112)
464
  {
465
    /* enough room */
466
    ctx->buf[ctx->count++] = 0x80;        /* pad */
467
    while(ctx->count < 112)
468
      ctx->buf[ctx->count++] = 0;        /* pad */
469
  }
470
  else
471
  {
472
    /* need one extra block */
473
    ctx->buf[ctx->count++] = 0x80;        /* pad character */
474
    while(ctx->count < 128)
475
      ctx->buf[ctx->count++] = 0;
476
    sha512_update(CTX, NULL, 0);         /* flush */
477
    memset(ctx->buf, 0, 112);                /* fill next block with zeroes */
478
  }
479

    
480
  /* append the 128 bit count */
481
  put_u64(ctx->buf + 112, msb);
482
  put_u64(ctx->buf + 120, lsb);
483
  sha512_transform(ctx, ctx->buf);
484

    
485
  byte *p = ctx->buf;
486
#define X(a) do { put_u64(p, ctx->h##a); p += 8; } while(0)
487
  X(0);
488
  X(1);
489
  X(2);
490
  X(3);
491
  X(4);
492
  X(5);
493
  X(6);
494
  X(7);
495
#undef X
496

    
497
  return ctx->buf;
498
}