1
/* ====================================================================
2
* Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4
* Redistribution and use in source and binary forms, with or without
5
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in
13
* the documentation and/or other materials provided with the
16
* 3. All advertising materials mentioning features or use of this
17
* software must display the following acknowledgment:
18
* "This product includes software developed by the OpenSSL Project
19
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22
* endorse or promote products derived from this software without
23
* prior written permission. For written permission, please contact
24
* openssl-core@openssl.org.
26
* 5. Products derived from this software may not be called "OpenSSL"
27
* nor may "OpenSSL" appear in their names without prior written
28
* permission of the OpenSSL Project.
30
* 6. Redistributions of any form whatsoever must retain the following
32
* "This product includes software developed by the OpenSSL Project
33
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46
* OF THE POSSIBILITY OF SUCH DAMAGE.
47
* ====================================================================
50
#define OPENSSL_FIPSAPI
52
#include <openssl/crypto.h>
53
#include "modes_lcl.h"
63
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64
/* redefine, because alignment is ensured */
66
#define GETU32(p) BSWAP4(*(const u32 *)(p))
68
#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71
#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72
#define REDUCE1BIT(V) do { \
73
if (sizeof(size_t)==8) { \
74
u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75
V.lo = (V.hi<<63)|(V.lo>>1); \
76
V.hi = (V.hi>>1 )^T; \
79
u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80
V.lo = (V.hi<<63)|(V.lo>>1); \
81
V.hi = (V.hi>>1 )^((u64)T<<32); \
86
* Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87
* never be set to 8. 8 is effectively reserved for testing purposes.
88
* TABLE_BITS>1 are lookup-table-driven implementations referred to as
89
* "Shoup's" in GCM specification. In other words OpenSSL does not cover
90
* whole spectrum of possible table driven implementations. Why? In
91
* non-"Shoup's" case memory access pattern is segmented in such manner,
92
* that it's trivial to see that cache timing information can reveal
93
* fair portion of intermediate hash value. Given that ciphertext is
94
* always available to attacker, it's possible for him to attempt to
95
* deduce secret parameter H and if successful, tamper with messages
96
* [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97
* not as trivial, but there is no reason to believe that it's resistant
98
* to cache-timing attack. And the thing about "8-bit" implementation is
99
* that it consumes 16 (sixteen) times more memory, 4KB per individual
100
* key + 1KB shared. Well, on pros side it should be twice as fast as
101
* "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102
* was observed to run ~75% faster, closer to 100% for commercial
103
* compilers... Yet "4-bit" procedure is preferred, because it's
104
* believed to provide better security-performance balance and adequate
105
* all-round performance. "All-round" refers to things like:
107
* - shorter setup time effectively improves overall timing for
108
* handling short messages;
109
* - larger table allocation can become unbearable because of VM
110
* subsystem penalties (for example on Windows large enough free
111
* results in VM working set trimming, meaning that consequent
112
* malloc would immediately incur working set expansion);
113
* - larger table has larger cache footprint, which can affect
114
* performance of other code paths (not necessarily even from same
115
* thread in Hyper-Threading world);
117
* Value of 1 is not appropriate for performance reasons.
121
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131
for (Htable[128]=V, i=64; i>0; i>>=1) {
136
for (i=2; i<256; i<<=1) {
137
u128 *Hi = Htable+i, H0 = *Hi;
138
for (j=1; j<i; ++j) {
139
Hi[j].hi = H0.hi^Htable[j].hi;
140
Hi[j].lo = H0.lo^Htable[j].lo;
145
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148
const u8 *xi = (const u8 *)Xi+15;
150
const union { long one; char little; } is_endian = {1};
151
static const size_t rem_8bit[256] = {
152
PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153
PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154
PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155
PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156
PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157
PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158
PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159
PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160
PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161
PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162
PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163
PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164
PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165
PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166
PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167
PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168
PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169
PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170
PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171
PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172
PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173
PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174
PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175
PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176
PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177
PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178
PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179
PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180
PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181
PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182
PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183
PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184
PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185
PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186
PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187
PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188
PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189
PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190
PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191
PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192
PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193
PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194
PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195
PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196
PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197
PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198
PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199
PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200
PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201
PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202
PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203
PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204
PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205
PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206
PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207
PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208
PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209
PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210
PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211
PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212
PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213
PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214
PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215
PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218
Z.hi ^= Htable[n].hi;
219
Z.lo ^= Htable[n].lo;
221
if ((u8 *)Xi==xi) break;
225
rem = (size_t)Z.lo&0xff;
226
Z.lo = (Z.hi<<56)|(Z.lo>>8);
228
if (sizeof(size_t)==8)
229
Z.hi ^= rem_8bit[rem];
231
Z.hi ^= (u64)rem_8bit[rem]<<32;
234
if (is_endian.little) {
236
Xi[0] = BSWAP8(Z.hi);
237
Xi[1] = BSWAP8(Z.lo);
241
v = (u32)(Z.hi>>32); PUTU32(p,v);
242
v = (u32)(Z.hi); PUTU32(p+4,v);
243
v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244
v = (u32)(Z.lo); PUTU32(p+12,v);
252
#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259
#if defined(OPENSSL_SMALL_FOOTPRINT)
268
#if defined(OPENSSL_SMALL_FOOTPRINT)
269
for (Htable[8]=V, i=4; i>0; i>>=1) {
274
for (i=2; i<16; i<<=1) {
277
for (V=*Hi, j=1; j<i; ++j) {
278
Hi[j].hi = V.hi^Htable[j].hi;
279
Hi[j].lo = V.lo^Htable[j].lo;
290
Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292
Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293
Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294
Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296
Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297
Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298
Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299
Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300
Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301
Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302
Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306
* ARM assembler expects specific dword order in Htable.
310
const union { long one; char little; } is_endian = {1};
312
if (is_endian.little)
321
Htable[j].hi = V.lo<<32|V.lo>>32;
322
Htable[j].lo = V.hi<<32|V.hi>>32;
329
static const size_t rem_4bit[16] = {
330
PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331
PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332
PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333
PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
335
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
339
size_t rem, nlo, nhi;
340
const union { long one; char little; } is_endian = {1};
342
nlo = ((const u8 *)Xi)[15];
346
Z.hi = Htable[nlo].hi;
347
Z.lo = Htable[nlo].lo;
350
rem = (size_t)Z.lo&0xf;
351
Z.lo = (Z.hi<<60)|(Z.lo>>4);
353
if (sizeof(size_t)==8)
354
Z.hi ^= rem_4bit[rem];
356
Z.hi ^= (u64)rem_4bit[rem]<<32;
358
Z.hi ^= Htable[nhi].hi;
359
Z.lo ^= Htable[nhi].lo;
363
nlo = ((const u8 *)Xi)[cnt];
367
rem = (size_t)Z.lo&0xf;
368
Z.lo = (Z.hi<<60)|(Z.lo>>4);
370
if (sizeof(size_t)==8)
371
Z.hi ^= rem_4bit[rem];
373
Z.hi ^= (u64)rem_4bit[rem]<<32;
375
Z.hi ^= Htable[nlo].hi;
376
Z.lo ^= Htable[nlo].lo;
379
if (is_endian.little) {
381
Xi[0] = BSWAP8(Z.hi);
382
Xi[1] = BSWAP8(Z.lo);
386
v = (u32)(Z.hi>>32); PUTU32(p,v);
387
v = (u32)(Z.hi); PUTU32(p+4,v);
388
v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389
v = (u32)(Z.lo); PUTU32(p+12,v);
398
#if !defined(OPENSSL_SMALL_FOOTPRINT)
400
* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401
* details... Compiler-generated code doesn't seem to give any
402
* performance improvement, at least not on x86[_64]. It's here
403
* mostly as reference and a placeholder for possible future
404
* non-trivial optimization[s]...
406
static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407
const u8 *inp,size_t len)
411
size_t rem, nlo, nhi;
412
const union { long one; char little; } is_endian = {1};
417
nlo = ((const u8 *)Xi)[15];
422
Z.hi = Htable[nlo].hi;
423
Z.lo = Htable[nlo].lo;
426
rem = (size_t)Z.lo&0xf;
427
Z.lo = (Z.hi<<60)|(Z.lo>>4);
429
if (sizeof(size_t)==8)
430
Z.hi ^= rem_4bit[rem];
432
Z.hi ^= (u64)rem_4bit[rem]<<32;
434
Z.hi ^= Htable[nhi].hi;
435
Z.lo ^= Htable[nhi].lo;
439
nlo = ((const u8 *)Xi)[cnt];
444
rem = (size_t)Z.lo&0xf;
445
Z.lo = (Z.hi<<60)|(Z.lo>>4);
447
if (sizeof(size_t)==8)
448
Z.hi ^= rem_4bit[rem];
450
Z.hi ^= (u64)rem_4bit[rem]<<32;
452
Z.hi ^= Htable[nlo].hi;
453
Z.lo ^= Htable[nlo].lo;
457
* Extra 256+16 bytes per-key plus 512 bytes shared tables
458
* [should] give ~50% improvement... One could have PACK()-ed
459
* the rem_8bit even here, but the priority is to minimize
462
u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463
u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464
static const unsigned short rem_8bit[256] = {
465
0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466
0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467
0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468
0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469
0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470
0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471
0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472
0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473
0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474
0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475
0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476
0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477
0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478
0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479
0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480
0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481
0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482
0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483
0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484
0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485
0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486
0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487
0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488
0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489
0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490
0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491
0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492
0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493
0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494
0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495
0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496
0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
498
* This pre-processing phase slows down procedure by approximately
499
* same time as it makes each loop spin faster. In other words
500
* single block performance is approximately same as straightforward
501
* "4-bit" implementation, and then it goes only faster...
503
for (cnt=0; cnt<16; ++cnt) {
504
Z.hi = Htable[cnt].hi;
505
Z.lo = Htable[cnt].lo;
506
Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507
Hshr4[cnt].hi = (Z.hi>>4);
508
Hshl4[cnt] = (u8)(Z.lo<<4);
512
for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513
nlo = ((const u8 *)Xi)[cnt];
518
Z.hi ^= Htable[nlo].hi;
519
Z.lo ^= Htable[nlo].lo;
521
rem = (size_t)Z.lo&0xff;
523
Z.lo = (Z.hi<<56)|(Z.lo>>8);
526
Z.hi ^= Hshr4[nhi].hi;
527
Z.lo ^= Hshr4[nhi].lo;
528
Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
531
nlo = ((const u8 *)Xi)[0];
536
Z.hi ^= Htable[nlo].hi;
537
Z.lo ^= Htable[nlo].lo;
539
rem = (size_t)Z.lo&0xf;
541
Z.lo = (Z.hi<<60)|(Z.lo>>4);
544
Z.hi ^= Htable[nhi].hi;
545
Z.lo ^= Htable[nhi].lo;
546
Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
549
if (is_endian.little) {
551
Xi[0] = BSWAP8(Z.hi);
552
Xi[1] = BSWAP8(Z.lo);
556
v = (u32)(Z.hi>>32); PUTU32(p,v);
557
v = (u32)(Z.hi); PUTU32(p+4,v);
558
v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559
v = (u32)(Z.lo); PUTU32(p+12,v);
566
} while (inp+=16, len-=16);
570
void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571
void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
574
#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576
#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578
* trashing effect. In other words idea is to hash data while it's
579
* still in L1 cache after encryption pass... */
580
#define GHASH_CHUNK (3*1024)
583
#else /* TABLE_BITS */
585
static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
590
const long *xi = (const long *)Xi;
591
const union { long one; char little; } is_endian = {1};
593
V.hi = H[0]; /* H is in host byte order, no byte swapping */
596
for (j=0; j<16/sizeof(long); ++j) {
597
if (is_endian.little) {
598
if (sizeof(long)==8) {
600
X = (long)(BSWAP8(xi[j]));
602
const u8 *p = (const u8 *)(xi+j);
603
X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607
const u8 *p = (const u8 *)(xi+j);
614
for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615
u64 M = (u64)(X>>(8*sizeof(long)-1));
623
if (is_endian.little) {
625
Xi[0] = BSWAP8(Z.hi);
626
Xi[1] = BSWAP8(Z.lo);
630
v = (u32)(Z.hi>>32); PUTU32(p,v);
631
v = (u32)(Z.hi); PUTU32(p+4,v);
632
v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633
v = (u32)(Z.lo); PUTU32(p+12,v);
641
#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645
#if TABLE_BITS==4 && defined(GHASH_ASM)
646
# if !defined(I386_ONLY) && \
647
(defined(__i386) || defined(__i386__) || \
648
defined(__x86_64) || defined(__x86_64__) || \
649
defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650
# define GHASH_ASM_X86_OR_64
651
# define GCM_FUNCREF_4BIT
652
extern unsigned int OPENSSL_ia32cap_P[2];
654
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656
void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659
# define GHASH_ASM_X86
660
void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661
void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
663
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666
# elif defined(__arm__) || defined(__arm)
667
# include "arm_arch.h"
669
# define GHASH_ASM_ARM
670
# define GCM_FUNCREF_4BIT
671
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
672
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
677
#ifdef GCM_FUNCREF_4BIT
679
# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
682
# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
686
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
688
const union { long one; char little; } is_endian = {1};
690
memset(ctx,0,sizeof(*ctx));
694
(*block)(ctx->H.c,ctx->H.c,key);
696
if (is_endian.little) {
697
/* H is stored in host byte order */
699
ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
700
ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
704
hi = (u64)GETU32(p) <<32|GETU32(p+4);
705
lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
712
gcm_init_8bit(ctx->Htable,ctx->H.u);
714
# if defined(GHASH_ASM_X86_OR_64)
715
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
716
if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
717
OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
718
gcm_init_clmul(ctx->Htable,ctx->H.u);
719
ctx->gmult = gcm_gmult_clmul;
720
ctx->ghash = gcm_ghash_clmul;
724
gcm_init_4bit(ctx->Htable,ctx->H.u);
725
# if defined(GHASH_ASM_X86) /* x86 only */
726
# if defined(OPENSSL_IA32_SSE2)
727
if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
729
if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
731
ctx->gmult = gcm_gmult_4bit_mmx;
732
ctx->ghash = gcm_ghash_4bit_mmx;
734
ctx->gmult = gcm_gmult_4bit_x86;
735
ctx->ghash = gcm_ghash_4bit_x86;
738
ctx->gmult = gcm_gmult_4bit;
739
ctx->ghash = gcm_ghash_4bit;
741
# elif defined(GHASH_ASM_ARM)
742
if (OPENSSL_armcap_P & ARMV7_NEON) {
743
ctx->gmult = gcm_gmult_neon;
744
ctx->ghash = gcm_ghash_neon;
746
gcm_init_4bit(ctx->Htable,ctx->H.u);
747
ctx->gmult = gcm_gmult_4bit;
748
ctx->ghash = gcm_ghash_4bit;
751
gcm_init_4bit(ctx->Htable,ctx->H.u);
756
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
758
const union { long one; char little; } is_endian = {1};
760
#ifdef GCM_FUNCREF_4BIT
761
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
768
ctx->len.u[0] = 0; /* AAD length */
769
ctx->len.u[1] = 0; /* message length */
774
memcpy(ctx->Yi.c,iv,12);
783
for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
789
for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
793
if (is_endian.little) {
795
ctx->Yi.u[1] ^= BSWAP8(len0);
797
ctx->Yi.c[8] ^= (u8)(len0>>56);
798
ctx->Yi.c[9] ^= (u8)(len0>>48);
799
ctx->Yi.c[10] ^= (u8)(len0>>40);
800
ctx->Yi.c[11] ^= (u8)(len0>>32);
801
ctx->Yi.c[12] ^= (u8)(len0>>24);
802
ctx->Yi.c[13] ^= (u8)(len0>>16);
803
ctx->Yi.c[14] ^= (u8)(len0>>8);
804
ctx->Yi.c[15] ^= (u8)(len0);
808
ctx->Yi.u[1] ^= len0;
812
if (is_endian.little)
813
ctr = GETU32(ctx->Yi.c+12);
818
(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
820
if (is_endian.little)
821
PUTU32(ctx->Yi.c+12,ctr);
826
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
830
u64 alen = ctx->len.u[0];
831
#ifdef GCM_FUNCREF_4BIT
832
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
834
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835
const u8 *inp,size_t len) = ctx->ghash;
839
if (ctx->len.u[1]) return -2;
842
if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
844
ctx->len.u[0] = alen;
849
ctx->Xi.c[n] ^= *(aad++);
853
if (n==0) GCM_MUL(ctx,Xi);
861
if ((i = (len&(size_t)-16))) {
868
for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
875
n = (unsigned int)len;
876
for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
883
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
884
const unsigned char *in, unsigned char *out,
887
const union { long one; char little; } is_endian = {1};
890
u64 mlen = ctx->len.u[1];
891
block128_f block = ctx->block;
892
void *key = ctx->key;
893
#ifdef GCM_FUNCREF_4BIT
894
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
896
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
897
const u8 *inp,size_t len) = ctx->ghash;
902
n = (unsigned int)mlen%16; /* alternative to ctx->mres */
905
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
907
ctx->len.u[1] = mlen;
910
/* First call to encrypt finalizes GHASH(AAD) */
915
if (is_endian.little)
916
ctr = GETU32(ctx->Yi.c+12);
921
#if !defined(OPENSSL_SMALL_FOOTPRINT)
922
if (16%sizeof(size_t) == 0) do { /* always true actually */
925
ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
929
if (n==0) GCM_MUL(ctx,Xi);
935
#if defined(STRICT_ALIGNMENT)
936
if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
939
#if defined(GHASH) && defined(GHASH_CHUNK)
940
while (len>=GHASH_CHUNK) {
941
size_t j=GHASH_CHUNK;
944
size_t *out_t=(size_t *)out;
945
const size_t *in_t=(const size_t *)in;
947
(*block)(ctx->Yi.c,ctx->EKi.c,key);
949
if (is_endian.little)
950
PUTU32(ctx->Yi.c+12,ctr);
953
for (i=0; i<16/sizeof(size_t); ++i)
954
out_t[i] = in_t[i] ^ ctx->EKi.t[i];
959
GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
962
if ((i = (len&(size_t)-16))) {
966
size_t *out_t=(size_t *)out;
967
const size_t *in_t=(const size_t *)in;
969
(*block)(ctx->Yi.c,ctx->EKi.c,key);
971
if (is_endian.little)
972
PUTU32(ctx->Yi.c+12,ctr);
975
for (i=0; i<16/sizeof(size_t); ++i)
976
out_t[i] = in_t[i] ^ ctx->EKi.t[i];
985
size_t *out_t=(size_t *)out;
986
const size_t *in_t=(const size_t *)in;
988
(*block)(ctx->Yi.c,ctx->EKi.c,key);
990
if (is_endian.little)
991
PUTU32(ctx->Yi.c+12,ctr);
994
for (i=0; i<16/sizeof(size_t); ++i)
996
out_t[i] = in_t[i]^ctx->EKi.t[i];
1004
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1006
if (is_endian.little)
1007
PUTU32(ctx->Yi.c+12,ctr);
1011
ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1020
for (i=0;i<len;++i) {
1022
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1024
if (is_endian.little)
1025
PUTU32(ctx->Yi.c+12,ctr);
1029
ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1039
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1040
const unsigned char *in, unsigned char *out,
1043
const union { long one; char little; } is_endian = {1};
1044
unsigned int n, ctr;
1046
u64 mlen = ctx->len.u[1];
1047
block128_f block = ctx->block;
1048
void *key = ctx->key;
1049
#ifdef GCM_FUNCREF_4BIT
1050
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1052
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1053
const u8 *inp,size_t len) = ctx->ghash;
1058
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1060
ctx->len.u[1] = mlen;
1063
/* First call to decrypt finalizes GHASH(AAD) */
1068
if (is_endian.little)
1069
ctr = GETU32(ctx->Yi.c+12);
1074
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1075
if (16%sizeof(size_t) == 0) do { /* always true actually */
1079
*(out++) = c^ctx->EKi.c[n];
1084
if (n==0) GCM_MUL (ctx,Xi);
1090
#if defined(STRICT_ALIGNMENT)
1091
if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1094
#if defined(GHASH) && defined(GHASH_CHUNK)
1095
while (len>=GHASH_CHUNK) {
1096
size_t j=GHASH_CHUNK;
1098
GHASH(ctx,in,GHASH_CHUNK);
1100
size_t *out_t=(size_t *)out;
1101
const size_t *in_t=(const size_t *)in;
1103
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1105
if (is_endian.little)
1106
PUTU32(ctx->Yi.c+12,ctr);
1109
for (i=0; i<16/sizeof(size_t); ++i)
1110
out_t[i] = in_t[i]^ctx->EKi.t[i];
1117
if ((i = (len&(size_t)-16))) {
1120
size_t *out_t=(size_t *)out;
1121
const size_t *in_t=(const size_t *)in;
1123
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1125
if (is_endian.little)
1126
PUTU32(ctx->Yi.c+12,ctr);
1129
for (i=0; i<16/sizeof(size_t); ++i)
1130
out_t[i] = in_t[i]^ctx->EKi.t[i];
1138
size_t *out_t=(size_t *)out;
1139
const size_t *in_t=(const size_t *)in;
1141
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1143
if (is_endian.little)
1144
PUTU32(ctx->Yi.c+12,ctr);
1147
for (i=0; i<16/sizeof(size_t); ++i) {
1149
out[i] = c^ctx->EKi.t[i];
1159
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1161
if (is_endian.little)
1162
PUTU32(ctx->Yi.c+12,ctr);
1168
out[n] = c^ctx->EKi.c[n];
1177
for (i=0;i<len;++i) {
1180
(*block)(ctx->Yi.c,ctx->EKi.c,key);
1182
if (is_endian.little)
1183
PUTU32(ctx->Yi.c+12,ctr);
1188
out[i] = c^ctx->EKi.c[n];
1199
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1200
const unsigned char *in, unsigned char *out,
1201
size_t len, ctr128_f stream)
1203
const union { long one; char little; } is_endian = {1};
1204
unsigned int n, ctr;
1206
u64 mlen = ctx->len.u[1];
1207
void *key = ctx->key;
1208
#ifdef GCM_FUNCREF_4BIT
1209
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1211
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1212
const u8 *inp,size_t len) = ctx->ghash;
1217
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1219
ctx->len.u[1] = mlen;
1222
/* First call to encrypt finalizes GHASH(AAD) */
1227
if (is_endian.little)
1228
ctr = GETU32(ctx->Yi.c+12);
1235
ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1239
if (n==0) GCM_MUL(ctx,Xi);
1245
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1246
while (len>=GHASH_CHUNK) {
1247
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1248
ctr += GHASH_CHUNK/16;
1249
if (is_endian.little)
1250
PUTU32(ctx->Yi.c+12,ctr);
1253
GHASH(ctx,out,GHASH_CHUNK);
1259
if ((i = (len&(size_t)-16))) {
1262
(*stream)(in,out,j,key,ctx->Yi.c);
1263
ctr += (unsigned int)j;
1264
if (is_endian.little)
1265
PUTU32(ctx->Yi.c+12,ctr);
1275
for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1282
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1284
if (is_endian.little)
1285
PUTU32(ctx->Yi.c+12,ctr);
1289
ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1298
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1299
const unsigned char *in, unsigned char *out,
1300
size_t len,ctr128_f stream)
1302
const union { long one; char little; } is_endian = {1};
1303
unsigned int n, ctr;
1305
u64 mlen = ctx->len.u[1];
1306
void *key = ctx->key;
1307
#ifdef GCM_FUNCREF_4BIT
1308
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1310
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1311
const u8 *inp,size_t len) = ctx->ghash;
1316
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1318
ctx->len.u[1] = mlen;
1321
/* First call to decrypt finalizes GHASH(AAD) */
1326
if (is_endian.little)
1327
ctr = GETU32(ctx->Yi.c+12);
1335
*(out++) = c^ctx->EKi.c[n];
1340
if (n==0) GCM_MUL (ctx,Xi);
1346
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1347
while (len>=GHASH_CHUNK) {
1348
GHASH(ctx,in,GHASH_CHUNK);
1349
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1350
ctr += GHASH_CHUNK/16;
1351
if (is_endian.little)
1352
PUTU32(ctx->Yi.c+12,ctr);
1360
if ((i = (len&(size_t)-16))) {
1368
for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1375
(*stream)(in,out,j,key,ctx->Yi.c);
1376
ctr += (unsigned int)j;
1377
if (is_endian.little)
1378
PUTU32(ctx->Yi.c+12,ctr);
1386
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1388
if (is_endian.little)
1389
PUTU32(ctx->Yi.c+12,ctr);
1395
out[n] = c^ctx->EKi.c[n];
1404
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1407
const union { long one; char little; } is_endian = {1};
1408
u64 alen = ctx->len.u[0]<<3;
1409
u64 clen = ctx->len.u[1]<<3;
1410
#ifdef GCM_FUNCREF_4BIT
1411
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1414
if (ctx->mres || ctx->ares)
1417
if (is_endian.little) {
1419
alen = BSWAP8(alen);
1420
clen = BSWAP8(clen);
1424
ctx->len.u[0] = alen;
1425
ctx->len.u[1] = clen;
1427
alen = (u64)GETU32(p) <<32|GETU32(p+4);
1428
clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1432
ctx->Xi.u[0] ^= alen;
1433
ctx->Xi.u[1] ^= clen;
1436
ctx->Xi.u[0] ^= ctx->EK0.u[0];
1437
ctx->Xi.u[1] ^= ctx->EK0.u[1];
1439
if (tag && len<=sizeof(ctx->Xi))
1440
return memcmp(ctx->Xi.c,tag,len);
1445
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1447
CRYPTO_gcm128_finish(ctx, NULL, 0);
1448
memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1451
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1453
GCM128_CONTEXT *ret;
1455
if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1456
CRYPTO_gcm128_init(ret,key,block);
1461
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1464
OPENSSL_cleanse(ctx,sizeof(*ctx));
1469
#if defined(SELFTEST)
1471
#include <openssl/aes.h>
1474
static const u8 K1[16],
1479
T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1485
static const u8 P2[16],
1486
C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1487
T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1491
static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1492
P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1493
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1494
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1495
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1496
IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1497
C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1498
0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1499
0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1500
0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1501
T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1506
static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1507
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1508
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1509
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1510
A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1511
0xab,0xad,0xda,0xd2},
1512
C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1513
0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1514
0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1515
0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1516
T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1522
static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1523
C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1524
0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1525
0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1526
0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1527
T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1533
static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1534
0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1535
0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1536
0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1537
C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1538
0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1539
0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1540
0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1541
T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1544
static const u8 K7[24],
1549
T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1555
static const u8 P8[16],
1556
C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1557
T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1561
static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1562
0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1563
P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1564
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1565
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1566
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1567
IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1568
C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1569
0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1570
0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1571
0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1572
T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1577
static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1578
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1579
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1580
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1581
A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1582
0xab,0xad,0xda,0xd2},
1583
C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1584
0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1585
0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1586
0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1587
T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1593
static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1594
C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1595
0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1596
0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1597
0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1598
T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1604
static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1605
0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1606
0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1607
0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1608
C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1609
0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1610
0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1611
0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1612
T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1615
static const u8 K13[32],
1620
T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1625
static const u8 P14[16],
1627
C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1628
T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1632
static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1633
0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1634
P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1635
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1636
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1637
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1638
IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1639
C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1640
0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1641
0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1642
0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1643
T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1648
static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1649
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1650
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1651
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1652
A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1653
0xab,0xad,0xda,0xd2},
1654
C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1655
0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1656
0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1657
0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1658
T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1664
static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1665
C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1666
0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1667
0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1668
0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1669
T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1675
static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1676
0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1677
0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1678
0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1679
C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1680
0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1681
0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1682
0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1683
T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1690
static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1691
0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1692
0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1693
0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1694
0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1695
0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1696
0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1697
0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1698
T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1703
static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1705
C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1706
0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1707
0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1708
0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1709
0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1710
0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1711
0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1712
0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1713
0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1714
0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1715
0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1716
0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1717
0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1718
0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1719
0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1720
0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1721
0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1722
0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1723
T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1725
#define TEST_CASE(n) do { \
1726
u8 out[sizeof(P##n)]; \
1727
AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1728
CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1729
CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1730
memset(out,0,sizeof(out)); \
1731
if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1732
if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1733
if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1734
(C##n && memcmp(out,C##n,sizeof(out)))) \
1735
ret++, printf ("encrypt test#%d failed.\n",n); \
1736
CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1737
memset(out,0,sizeof(out)); \
1738
if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1739
if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1740
if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1741
(P##n && memcmp(out,P##n,sizeof(out)))) \
1742
ret++, printf ("decrypt test#%d failed.\n",n); \
1772
#ifdef OPENSSL_CPUID_OBJ
1774
size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1775
union { u64 u; u8 c[1024]; } buf;
1778
AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1779
CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1780
CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1782
CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1783
start = OPENSSL_rdtsc();
1784
CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1785
gcm_t = OPENSSL_rdtsc() - start;
1787
CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1788
&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1789
(block128_f)AES_encrypt);
1790
start = OPENSSL_rdtsc();
1791
CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1792
&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1793
(block128_f)AES_encrypt);
1794
ctr_t = OPENSSL_rdtsc() - start;
1796
printf("%.2f-%.2f=%.2f\n",
1797
gcm_t/(double)sizeof(buf),
1798
ctr_t/(double)sizeof(buf),
1799
(gcm_t-ctr_t)/(double)sizeof(buf));
1802
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1803
const u8 *inp,size_t len) = ctx.ghash;
1805
GHASH((&ctx),buf.c,sizeof(buf));
1806
start = OPENSSL_rdtsc();
1807
for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1808
gcm_t = OPENSSL_rdtsc() - start;
1809
printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);