1 
/*


2 
* BIRD Library  SHA512 and SHA384 Hash Functions

3 
*

4 
* (c) 2015 CZ.NIC z.s.p.o.

5 
*

6 
* Based on the code from libgcrypt1.6.0, which is

7 
* (c) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.

8 
*

9 
* Can be freely distributed and used under the terms of the GNU GPL.

10 
*/

11  
12 
#include "lib/sha512.h" 
13 
#include "lib/unaligned.h" 
14  
15  
16 
// #define SHA512_UNROLLED

17  
18 
void

19 
sha512_init(struct hash_context *CTX)

20 
{ 
21 
struct sha512_context *ctx = (void *) CTX; 
22  
23 
ctx>h0 = U64(0x6a09e667f3bcc908);

24 
ctx>h1 = U64(0xbb67ae8584caa73b);

25 
ctx>h2 = U64(0x3c6ef372fe94f82b);

26 
ctx>h3 = U64(0xa54ff53a5f1d36f1);

27 
ctx>h4 = U64(0x510e527fade682d1);

28 
ctx>h5 = U64(0x9b05688c2b3e6c1f);

29 
ctx>h6 = U64(0x1f83d9abfb41bd6b);

30 
ctx>h7 = U64(0x5be0cd19137e2179);

31  
32 
ctx>nblocks = 0;

33 
ctx>count = 0;

34 
} 
35  
36 
void

37 
sha384_init(struct hash_context *CTX)

38 
{ 
39 
struct sha384_context *ctx = (void *) CTX; 
40  
41 
ctx>h0 = U64(0xcbbb9d5dc1059ed8);

42 
ctx>h1 = U64(0x629a292a367cd507);

43 
ctx>h2 = U64(0x9159015a3070dd17);

44 
ctx>h3 = U64(0x152fecd8f70e5939);

45 
ctx>h4 = U64(0x67332667ffc00b31);

46 
ctx>h5 = U64(0x8eb44a8768581511);

47 
ctx>h6 = U64(0xdb0c2e0d64f98fa7);

48 
ctx>h7 = U64(0x47b5481dbefa4fa4);

49  
50 
ctx>nblocks = 0;

51 
ctx>count = 0;

52 
} 
53  
54 
static inline u64 
55 
ROTR(u64 x, u64 n) 
56 
{ 
57 
return ((x >> n)  (x << (64  n))); 
58 
} 
59  
60 
static inline u64 
61 
Ch(u64 x, u64 y, u64 z) 
62 
{ 
63 
return ((x & y) ^ ( ~x & z));

64 
} 
65  
66 
static inline u64 
67 
Maj(u64 x, u64 y, u64 z) 
68 
{ 
69 
return ((x & y) ^ (x & z) ^ (y & z));

70 
} 
71  
72 
static inline u64 
73 
sum0(u64 x) 
74 
{ 
75 
return (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39)); 
76 
} 
77  
78 
static inline u64 
79 
sum1(u64 x) 
80 
{ 
81 
return (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41)); 
82 
} 
83  
84 
static const u64 k[] = 
85 
{ 
86 
U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd), 
87 
U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc), 
88 
U64(0x3956c25bf348b538), U64(0x59f111f1b605d019), 
89 
U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118), 
90 
U64(0xd807aa98a3030242), U64(0x12835b0145706fbe), 
91 
U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2), 
92 
U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1), 
93 
U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694), 
94 
U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3), 
95 
U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65), 
96 
U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483), 
97 
U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5), 
98 
U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210), 
99 
U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4), 
100 
U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725), 
101 
U64(0x06ca6351e003826f), U64(0x142929670a0e6e70), 
102 
U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926), 
103 
U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df), 
104 
U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8), 
105 
U64(0x81c2c92e47edaee6), U64(0x92722c851482353b), 
106 
U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001), 
107 
U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30), 
108 
U64(0xd192e819d6ef5218), U64(0xd69906245565a910), 
109 
U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8), 
110 
U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53), 
111 
U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8), 
112 
U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb), 
113 
U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3), 
114 
U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60), 
115 
U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec), 
116 
U64(0x90befffa23631e28), U64(0xa4506cebde82bde9), 
117 
U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b), 
118 
U64(0xca273eceea26619c), U64(0xd186b8c721c0c207), 
119 
U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178), 
120 
U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6), 
121 
U64(0x113f9804bef90dae), U64(0x1b710b35131c471b), 
122 
U64(0x28db77f523047d84), U64(0x32caab7b40c72493), 
123 
U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c), 
124 
U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a), 
125 
U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817) 
126 
}; 
127  
128 
/*

129 
* Transform the message W which consists of 16 64bitwords

130 
*/

131 
static uint

132 
sha512_transform(struct sha512_context *ctx, const byte *data) 
133 
{ 
134 
u64 a, b, c, d, e, f, g, h; 
135 
u64 w[16];

136 
uint t; 
137  
138 
/* get values from the chaining vars */

139 
a = ctx>h0; 
140 
b = ctx>h1; 
141 
c = ctx>h2; 
142 
d = ctx>h3; 
143 
e = ctx>h4; 
144 
f = ctx>h5; 
145 
g = ctx>h6; 
146 
h = ctx>h7; 
147  
148 
for (t = 0; t < 16; t++) 
149 
w[t] = get_u64(data + t * 8);

150  
151 
#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 
152 
#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 
153  
154 
for (t = 0; t < 80  16; ) 
155 
{ 
156 
u64 t1, t2; 
157  
158 
/* Performance on a AMD Athlon(tm) Dual Core Processor 4050e

159 
with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes

160 
initialized to 0,1,2,3...255,0,... and 1000 iterations:

161 

162 
Not unrolled with macros: 440ms

163 
Unrolled with macros: 350ms

164 
Unrolled with inline: 330ms

165 
*/

166 
#ifndef SHA512_UNROLLED

167 
t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[t%16];

168 
w[t%16] += S1(w[(t  2)%16]) + w[(t  7)%16] + S0(w[(t  15)%16]); 
169 
t2 = sum0(a) + Maj(a, b, c); 
170 
h = g; 
171 
g = f; 
172 
f = e; 
173 
e = d + t1; 
174 
d = c; 
175 
c = b; 
176 
b = a; 
177 
a = t1 + t2; 
178 
t++; 
179 
#else /* Unrolled */ 
180 
t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[0];

181 
w[0] += S1(w[14]) + w[9] + S0(w[1]); 
182 
t2 = sum0(a) + Maj(a, b, c); 
183 
d += t1; 
184 
h = t1 + t2; 
185  
186 
t1 = g + sum1(d) + Ch(d, e, f) + k[t+1] + w[1]; 
187 
w[1] += S1(w[15]) + w[10] + S0(w[2]); 
188 
t2 = sum0(h) + Maj(h, a, b); 
189 
c += t1; 
190 
g = t1 + t2; 
191  
192 
t1 = f + sum1(c) + Ch(c, d, e) + k[t+2] + w[2]; 
193 
w[2] += S1(w[0]) + w[11] + S0(w[3]); 
194 
t2 = sum0(g) + Maj(g, h, a); 
195 
b += t1; 
196 
f = t1 + t2; 
197  
198 
t1 = e + sum1(b) + Ch(b, c, d) + k[t+3] + w[3]; 
199 
w[3] += S1(w[1]) + w[12] + S0(w[4]); 
200 
t2 = sum0(f) + Maj(f, g, h); 
201 
a += t1; 
202 
e = t1 + t2; 
203  
204 
t1 = d + sum1(a) + Ch(a, b, c) + k[t+4] + w[4]; 
205 
w[4] += S1(w[2]) + w[13] + S0(w[5]); 
206 
t2 = sum0(e) + Maj(e, f, g); 
207 
h += t1; 
208 
d = t1 + t2; 
209  
210 
t1 = c + sum1(h) + Ch(h, a, b) + k[t+5] + w[5]; 
211 
w[5] += S1(w[3]) + w[14] + S0(w[6]); 
212 
t2 = sum0(d) + Maj(d, e, f); 
213 
g += t1; 
214 
c = t1 + t2; 
215  
216 
t1 = b + sum1(g) + Ch(g, h, a) + k[t+6] + w[6]; 
217 
w[6] += S1(w[4]) + w[15] + S0(w[7]); 
218 
t2 = sum0(c) + Maj(c, d, e); 
219 
f += t1; 
220 
b = t1 + t2; 
221  
222 
t1 = a + sum1(f) + Ch(f, g, h) + k[t+7] + w[7]; 
223 
w[7] += S1(w[5]) + w[0] + S0(w[8]); 
224 
t2 = sum0(b) + Maj(b, c, d); 
225 
e += t1; 
226 
a = t1 + t2; 
227  
228 
t1 = h + sum1(e) + Ch(e, f, g) + k[t+8] + w[8]; 
229 
w[8] += S1(w[6]) + w[1] + S0(w[9]); 
230 
t2 = sum0(a) + Maj(a, b, c); 
231 
d += t1; 
232 
h = t1 + t2; 
233  
234 
t1 = g + sum1(d) + Ch(d, e, f) + k[t+9] + w[9]; 
235 
w[9] += S1(w[7]) + w[2] + S0(w[10]); 
236 
t2 = sum0(h) + Maj(h, a, b); 
237 
c += t1; 
238 
g = t1 + t2; 
239  
240 
t1 = f + sum1(c) + Ch(c, d, e) + k[t+10] + w[10]; 
241 
w[10] += S1(w[8]) + w[3] + S0(w[11]); 
242 
t2 = sum0(g) + Maj(g, h, a); 
243 
b += t1; 
244 
f = t1 + t2; 
245  
246 
t1 = e + sum1(b) + Ch(b, c, d) + k[t+11] + w[11]; 
247 
w[11] += S1(w[9]) + w[4] + S0(w[12]); 
248 
t2 = sum0(f) + Maj(f, g, h); 
249 
a += t1; 
250 
e = t1 + t2; 
251  
252 
t1 = d + sum1(a) + Ch(a, b, c) + k[t+12] + w[12]; 
253 
w[12] += S1(w[10]) + w[5] + S0(w[13]); 
254 
t2 = sum0(e) + Maj(e, f, g); 
255 
h += t1; 
256 
d = t1 + t2; 
257  
258 
t1 = c + sum1(h) + Ch(h, a, b) + k[t+13] + w[13]; 
259 
w[13] += S1(w[11]) + w[6] + S0(w[14]); 
260 
t2 = sum0(d) + Maj(d, e, f); 
261 
g += t1; 
262 
c = t1 + t2; 
263  
264 
t1 = b + sum1(g) + Ch(g, h, a) + k[t+14] + w[14]; 
265 
w[14] += S1(w[12]) + w[7] + S0(w[15]); 
266 
t2 = sum0(c) + Maj(c, d, e); 
267 
f += t1; 
268 
b = t1 + t2; 
269  
270 
t1 = a + sum1(f) + Ch(f, g, h) + k[t+15] + w[15]; 
271 
w[15] += S1(w[13]) + w[8] + S0(w[0]); 
272 
t2 = sum0(b) + Maj(b, c, d); 
273 
e += t1; 
274 
a = t1 + t2; 
275  
276 
t += 16;

277 
#endif

278 
} 
279  
280 
for (; t < 80; ) 
281 
{ 
282 
u64 t1, t2; 
283  
284 
#ifndef SHA512_UNROLLED

285 
t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[t%16];

286 
t2 = sum0(a) + Maj(a, b, c); 
287 
h = g; 
288 
g = f; 
289 
f = e; 
290 
e = d + t1; 
291 
d = c; 
292 
c = b; 
293 
b = a; 
294 
a = t1 + t2; 
295 
t++; 
296 
#else /* Unrolled */ 
297 
t1 = h + sum1(e) + Ch(e, f, g) + k[t] + w[0];

298 
t2 = sum0(a) + Maj(a, b, c); 
299 
d += t1; 
300 
h = t1 + t2; 
301  
302 
t1 = g + sum1(d) + Ch(d, e, f) + k[t+1] + w[1]; 
303 
t2 = sum0(h) + Maj(h, a, b); 
304 
c += t1; 
305 
g = t1 + t2; 
306  
307 
t1 = f + sum1(c) + Ch(c, d, e) + k[t+2] + w[2]; 
308 
t2 = sum0(g) + Maj(g, h, a); 
309 
b += t1; 
310 
f = t1 + t2; 
311  
312 
t1 = e + sum1(b) + Ch(b, c, d) + k[t+3] + w[3]; 
313 
t2 = sum0(f) + Maj(f, g, h); 
314 
a += t1; 
315 
e = t1 + t2; 
316  
317 
t1 = d + sum1(a) + Ch(a, b, c) + k[t+4] + w[4]; 
318 
t2 = sum0(e) + Maj(e, f, g); 
319 
h += t1; 
320 
d = t1 + t2; 
321  
322 
t1 = c + sum1(h) + Ch(h, a, b) + k[t+5] + w[5]; 
323 
t2 = sum0(d) + Maj(d, e, f); 
324 
g += t1; 
325 
c = t1 + t2; 
326  
327 
t1 = b + sum1(g) + Ch(g, h, a) + k[t+6] + w[6]; 
328 
t2 = sum0(c) + Maj(c, d, e); 
329 
f += t1; 
330 
b = t1 + t2; 
331  
332 
t1 = a + sum1(f) + Ch(f, g, h) + k[t+7] + w[7]; 
333 
t2 = sum0(b) + Maj(b, c, d); 
334 
e += t1; 
335 
a = t1 + t2; 
336  
337 
t1 = h + sum1(e) + Ch(e, f, g) + k[t+8] + w[8]; 
338 
t2 = sum0(a) + Maj(a, b, c); 
339 
d += t1; 
340 
h = t1 + t2; 
341  
342 
t1 = g + sum1(d) + Ch(d, e, f) + k[t+9] + w[9]; 
343 
t2 = sum0(h) + Maj(h, a, b); 
344 
c += t1; 
345 
g = t1 + t2; 
346  
347 
t1 = f + sum1(c) + Ch(c, d, e) + k[t+10] + w[10]; 
348 
t2 = sum0(g) + Maj(g, h, a); 
349 
b += t1; 
350 
f = t1 + t2; 
351  
352 
t1 = e + sum1(b) + Ch(b, c, d) + k[t+11] + w[11]; 
353 
t2 = sum0(f) + Maj(f, g, h); 
354 
a += t1; 
355 
e = t1 + t2; 
356  
357 
t1 = d + sum1(a) + Ch(a, b, c) + k[t+12] + w[12]; 
358 
t2 = sum0(e) + Maj(e, f, g); 
359 
h += t1; 
360 
d = t1 + t2; 
361  
362 
t1 = c + sum1(h) + Ch(h, a, b) + k[t+13] + w[13]; 
363 
t2 = sum0(d) + Maj(d, e, f); 
364 
g += t1; 
365 
c = t1 + t2; 
366  
367 
t1 = b + sum1(g) + Ch(g, h, a) + k[t+14] + w[14]; 
368 
t2 = sum0(c) + Maj(c, d, e); 
369 
f += t1; 
370 
b = t1 + t2; 
371  
372 
t1 = a + sum1(f) + Ch(f, g, h) + k[t+15] + w[15]; 
373 
t2 = sum0(b) + Maj(b, c, d); 
374 
e += t1; 
375 
a = t1 + t2; 
376  
377 
t += 16;

378 
#endif

379 
} 
380  
381 
/* Update chaining vars. */

382 
ctx>h0 += a; 
383 
ctx>h1 += b; 
384 
ctx>h2 += c; 
385 
ctx>h3 += d; 
386 
ctx>h4 += e; 
387 
ctx>h5 += f; 
388 
ctx>h6 += g; 
389 
ctx>h7 += h; 
390  
391 
return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*); 
392 
} 
393  
394 
void

395 
sha512_update(struct hash_context *CTX, const byte *buf, uint len) 
396 
{ 
397 
struct sha512_context *ctx = (void *) CTX; 
398  
399 
if (ctx>count)

400 
{ 
401 
/* Fill rest of internal buffer */

402 
for (; len && ctx>count < SHA512_BLOCK_SIZE; len)

403 
ctx>buf[ctx>count++] = *buf++; 
404  
405 
if (ctx>count < SHA512_BLOCK_SIZE)

406 
return;

407  
408 
/* Process data from internal buffer */

409 
sha512_transform(ctx, ctx>buf); 
410 
ctx>nblocks++; 
411 
ctx>count = 0;

412 
} 
413  
414 
if (!len)

415 
return;

416  
417 
/* Process data from input buffer */

418 
while (len >= SHA512_BLOCK_SIZE)

419 
{ 
420 
sha512_transform(ctx, buf); 
421 
ctx>nblocks++; 
422 
buf += SHA512_BLOCK_SIZE; 
423 
len = SHA512_BLOCK_SIZE; 
424 
} 
425  
426 
/* Copy remaining data to internal buffer */

427 
memcpy(ctx>buf, buf, len); 
428 
ctx>count = len; 
429 
} 
430  
431 
/*

432 
* The routine final terminates the computation and returns the digest. The

433 
* handle is prepared for a new cycle, but adding bytes to the handle will the

434 
* destroy the returned buffer.

435 
*

436 
* Returns: 64 bytes representing the digest. When used for sha384, we take the

437 
* first 48 of those bytes.

438 
*/

439 
byte * 
440 
sha512_final(struct hash_context *CTX)

441 
{ 
442 
struct sha512_context *ctx = (void *) CTX; 
443 
u64 t, th, msb, lsb; 
444  
445 
sha512_update(CTX, NULL, 0); /* flush */ 
446  
447 
t = ctx>nblocks; 
448 
th = 0;

449  
450 
/* multiply by 128 to make a byte count */

451 
lsb = t << 7;

452 
msb = (th << 7)  (t >> 57); 
453 
/* add the count */

454 
t = lsb; 
455 
if ((lsb += ctx>count) < t)

456 
msb++; 
457 
/* multiply by 8 to make a bit count */

458 
t = lsb; 
459 
lsb <<= 3;

460 
msb <<= 3;

461 
msb = t >> 61;

462  
463 
if (ctx>count < 112) 
464 
{ 
465 
/* enough room */

466 
ctx>buf[ctx>count++] = 0x80; /* pad */ 
467 
while(ctx>count < 112) 
468 
ctx>buf[ctx>count++] = 0; /* pad */ 
469 
} 
470 
else

471 
{ 
472 
/* need one extra block */

473 
ctx>buf[ctx>count++] = 0x80; /* pad character */ 
474 
while(ctx>count < 128) 
475 
ctx>buf[ctx>count++] = 0;

476 
sha512_update(CTX, NULL, 0); /* flush */ 
477 
memset(ctx>buf, 0, 112); /* fill next block with zeroes */ 
478 
} 
479  
480 
/* append the 128 bit count */

481 
put_u64(ctx>buf + 112, msb);

482 
put_u64(ctx>buf + 120, lsb);

483 
sha512_transform(ctx, ctx>buf); 
484  
485 
byte *p = ctx>buf; 
486 
#define X(a) do { put_u64(p, ctx>h##a); p += 8; } while(0) 
487 
X(0);

488 
X(1);

489 
X(2);

490 
X(3);

491 
X(4);

492 
X(5);

493 
X(6);

494 
X(7);

495 
#undef X

496  
497 
return ctx>buf;

498 
} 