~ubuntu-branches/ubuntu/wily/openssl/wily

« back to all changes in this revision

Viewing changes to .pc/power8-optimisations.patch/crypto/modes/gcm128.c

  • Committer: Package Import Robot
  • Author(s): Colin Watson
  • Date: 2014-09-26 11:32:32 UTC
  • Revision ID: package-import@ubuntu.com-20140926113232-ds6gavd9wl43wft5
Tags: 1.0.1f-1ubuntu8
Backport collected POWER8 optimisations from upstream (LP: #1290579).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* ====================================================================
 
2
 * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
 
3
 *
 
4
 * Redistribution and use in source and binary forms, with or without
 
5
 * modification, are permitted provided that the following conditions
 
6
 * are met:
 
7
 *
 
8
 * 1. Redistributions of source code must retain the above copyright
 
9
 *    notice, this list of conditions and the following disclaimer. 
 
10
 *
 
11
 * 2. Redistributions in binary form must reproduce the above copyright
 
12
 *    notice, this list of conditions and the following disclaimer in
 
13
 *    the documentation and/or other materials provided with the
 
14
 *    distribution.
 
15
 *
 
16
 * 3. All advertising materials mentioning features or use of this
 
17
 *    software must display the following acknowledgment:
 
18
 *    "This product includes software developed by the OpenSSL Project
 
19
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
 
20
 *
 
21
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
 
22
 *    endorse or promote products derived from this software without
 
23
 *    prior written permission. For written permission, please contact
 
24
 *    openssl-core@openssl.org.
 
25
 *
 
26
 * 5. Products derived from this software may not be called "OpenSSL"
 
27
 *    nor may "OpenSSL" appear in their names without prior written
 
28
 *    permission of the OpenSSL Project.
 
29
 *
 
30
 * 6. Redistributions of any form whatsoever must retain the following
 
31
 *    acknowledgment:
 
32
 *    "This product includes software developed by the OpenSSL Project
 
33
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
 
34
 *
 
35
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
 
36
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
37
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 
38
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
 
39
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 
40
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 
41
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 
42
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 
43
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 
44
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 
45
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 
46
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 
47
 * ====================================================================
 
48
 */
 
49
 
 
50
#define OPENSSL_FIPSAPI
 
51
 
 
52
#include <openssl/crypto.h>
 
53
#include "modes_lcl.h"
 
54
#include <string.h>
 
55
 
 
56
#ifndef MODES_DEBUG
 
57
# ifndef NDEBUG
 
58
#  define NDEBUG
 
59
# endif
 
60
#endif
 
61
#include <assert.h>
 
62
 
 
63
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
 
64
/* redefine, because alignment is ensured */
 
65
#undef  GETU32
 
66
#define GETU32(p)       BSWAP4(*(const u32 *)(p))
 
67
#undef  PUTU32
 
68
#define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
 
69
#endif
 
70
 
 
71
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
 
72
#define REDUCE1BIT(V)   do { \
 
73
        if (sizeof(size_t)==8) { \
 
74
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
 
75
                V.lo  = (V.hi<<63)|(V.lo>>1); \
 
76
                V.hi  = (V.hi>>1 )^T; \
 
77
        } \
 
78
        else { \
 
79
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
 
80
                V.lo  = (V.hi<<63)|(V.lo>>1); \
 
81
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
 
82
        } \
 
83
} while(0)
 
84
 
 
85
/*
 
86
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
 
87
 * never be set to 8. 8 is effectively reserved for testing purposes.
 
88
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
 
89
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
 
90
 * whole spectrum of possible table driven implementations. Why? In
 
91
 * non-"Shoup's" case memory access pattern is segmented in such manner,
 
92
 * that it's trivial to see that cache timing information can reveal
 
93
 * fair portion of intermediate hash value. Given that ciphertext is
 
94
 * always available to attacker, it's possible for him to attempt to
 
95
 * deduce secret parameter H and if successful, tamper with messages
 
96
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
 
97
 * not as trivial, but there is no reason to believe that it's resistant
 
98
 * to cache-timing attack. And the thing about "8-bit" implementation is
 
99
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
 
100
 * key + 1KB shared. Well, on pros side it should be twice as fast as
 
101
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
 
102
 * was observed to run ~75% faster, closer to 100% for commercial
 
103
 * compilers... Yet "4-bit" procedure is preferred, because it's
 
104
 * believed to provide better security-performance balance and adequate
 
105
 * all-round performance. "All-round" refers to things like:
 
106
 *
 
107
 * - shorter setup time effectively improves overall timing for
 
108
 *   handling short messages;
 
109
 * - larger table allocation can become unbearable because of VM
 
110
 *   subsystem penalties (for example on Windows large enough free
 
111
 *   results in VM working set trimming, meaning that consequent
 
112
 *   malloc would immediately incur working set expansion);
 
113
 * - larger table has larger cache footprint, which can affect
 
114
 *   performance of other code paths (not necessarily even from same
 
115
 *   thread in Hyper-Threading world);
 
116
 *
 
117
 * Value of 1 is not appropriate for performance reasons.
 
118
 */
 
119
#if     TABLE_BITS==8
 
120
 
 
121
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
 
122
{
 
123
        int  i, j;
 
124
        u128 V;
 
125
 
 
126
        Htable[0].hi = 0;
 
127
        Htable[0].lo = 0;
 
128
        V.hi = H[0];
 
129
        V.lo = H[1];
 
130
 
 
131
        for (Htable[128]=V, i=64; i>0; i>>=1) {
 
132
                REDUCE1BIT(V);
 
133
                Htable[i] = V;
 
134
        }
 
135
 
 
136
        for (i=2; i<256; i<<=1) {
 
137
                u128 *Hi = Htable+i, H0 = *Hi;
 
138
                for (j=1; j<i; ++j) {
 
139
                        Hi[j].hi = H0.hi^Htable[j].hi;
 
140
                        Hi[j].lo = H0.lo^Htable[j].lo;
 
141
                }
 
142
        }
 
143
}
 
144
 
 
145
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
 
146
{
 
147
        u128 Z = { 0, 0};
 
148
        const u8 *xi = (const u8 *)Xi+15;
 
149
        size_t rem, n = *xi;
 
150
        const union { long one; char little; } is_endian = {1};
 
151
        static const size_t rem_8bit[256] = {
 
152
                PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
 
153
                PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
 
154
                PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
 
155
                PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
 
156
                PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
 
157
                PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
 
158
                PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
 
159
                PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
 
160
                PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
 
161
                PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
 
162
                PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
 
163
                PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
 
164
                PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
 
165
                PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
 
166
                PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
 
167
                PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
 
168
                PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
 
169
                PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
 
170
                PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
 
171
                PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
 
172
                PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
 
173
                PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
 
174
                PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
 
175
                PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
 
176
                PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
 
177
                PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
 
178
                PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
 
179
                PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
 
180
                PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
 
181
                PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
 
182
                PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
 
183
                PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
 
184
                PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
 
185
                PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
 
186
                PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
 
187
                PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
 
188
                PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
 
189
                PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
 
190
                PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
 
191
                PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
 
192
                PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
 
193
                PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
 
194
                PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
 
195
                PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
 
196
                PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
 
197
                PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
 
198
                PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
 
199
                PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
 
200
                PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
 
201
                PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
 
202
                PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
 
203
                PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
 
204
                PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
 
205
                PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
 
206
                PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
 
207
                PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
 
208
                PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
 
209
                PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
 
210
                PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
 
211
                PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
 
212
                PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
 
213
                PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
 
214
                PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
 
215
                PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
 
216
 
 
217
        while (1) {
 
218
                Z.hi ^= Htable[n].hi;
 
219
                Z.lo ^= Htable[n].lo;
 
220
 
 
221
                if ((u8 *)Xi==xi)       break;
 
222
 
 
223
                n = *(--xi);
 
224
 
 
225
                rem  = (size_t)Z.lo&0xff;
 
226
                Z.lo = (Z.hi<<56)|(Z.lo>>8);
 
227
                Z.hi = (Z.hi>>8);
 
228
                if (sizeof(size_t)==8)
 
229
                        Z.hi ^= rem_8bit[rem];
 
230
                else
 
231
                        Z.hi ^= (u64)rem_8bit[rem]<<32;
 
232
        }
 
233
 
 
234
        if (is_endian.little) {
 
235
#ifdef BSWAP8
 
236
                Xi[0] = BSWAP8(Z.hi);
 
237
                Xi[1] = BSWAP8(Z.lo);
 
238
#else
 
239
                u8 *p = (u8 *)Xi;
 
240
                u32 v;
 
241
                v = (u32)(Z.hi>>32);    PUTU32(p,v);
 
242
                v = (u32)(Z.hi);        PUTU32(p+4,v);
 
243
                v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
 
244
                v = (u32)(Z.lo);        PUTU32(p+12,v);
 
245
#endif
 
246
        }
 
247
        else {
 
248
                Xi[0] = Z.hi;
 
249
                Xi[1] = Z.lo;
 
250
        }
 
251
}
 
252
#define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
 
253
 
 
254
#elif   TABLE_BITS==4
 
255
 
 
256
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
 
257
{
 
258
        u128 V;
 
259
#if defined(OPENSSL_SMALL_FOOTPRINT)
 
260
        int  i;
 
261
#endif
 
262
 
 
263
        Htable[0].hi = 0;
 
264
        Htable[0].lo = 0;
 
265
        V.hi = H[0];
 
266
        V.lo = H[1];
 
267
 
 
268
#if defined(OPENSSL_SMALL_FOOTPRINT)
 
269
        for (Htable[8]=V, i=4; i>0; i>>=1) {
 
270
                REDUCE1BIT(V);
 
271
                Htable[i] = V;
 
272
        }
 
273
 
 
274
        for (i=2; i<16; i<<=1) {
 
275
                u128 *Hi = Htable+i;
 
276
                int   j;
 
277
                for (V=*Hi, j=1; j<i; ++j) {
 
278
                        Hi[j].hi = V.hi^Htable[j].hi;
 
279
                        Hi[j].lo = V.lo^Htable[j].lo;
 
280
                }
 
281
        }
 
282
#else
 
283
        Htable[8] = V;
 
284
        REDUCE1BIT(V);
 
285
        Htable[4] = V;
 
286
        REDUCE1BIT(V);
 
287
        Htable[2] = V;
 
288
        REDUCE1BIT(V);
 
289
        Htable[1] = V;
 
290
        Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
 
291
        V=Htable[4];
 
292
        Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
 
293
        Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
 
294
        Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
 
295
        V=Htable[8];
 
296
        Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
 
297
        Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
 
298
        Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
 
299
        Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
 
300
        Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
 
301
        Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
 
302
        Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
 
303
#endif
 
304
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
 
305
        /*
 
306
         * ARM assembler expects specific dword order in Htable.
 
307
         */
 
308
        {
 
309
        int j;
 
310
        const union { long one; char little; } is_endian = {1};
 
311
 
 
312
        if (is_endian.little)
 
313
                for (j=0;j<16;++j) {
 
314
                        V = Htable[j];
 
315
                        Htable[j].hi = V.lo;
 
316
                        Htable[j].lo = V.hi;
 
317
                }
 
318
        else
 
319
                for (j=0;j<16;++j) {
 
320
                        V = Htable[j];
 
321
                        Htable[j].hi = V.lo<<32|V.lo>>32;
 
322
                        Htable[j].lo = V.hi<<32|V.hi>>32;
 
323
                }
 
324
        }
 
325
#endif
 
326
}
 
327
 
 
328
#ifndef GHASH_ASM
 
329
static const size_t rem_4bit[16] = {
 
330
        PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
 
331
        PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
 
332
        PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
 
333
        PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
 
334
 
 
335
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
 
336
{
 
337
        u128 Z;
 
338
        int cnt = 15;
 
339
        size_t rem, nlo, nhi;
 
340
        const union { long one; char little; } is_endian = {1};
 
341
 
 
342
        nlo  = ((const u8 *)Xi)[15];
 
343
        nhi  = nlo>>4;
 
344
        nlo &= 0xf;
 
345
 
 
346
        Z.hi = Htable[nlo].hi;
 
347
        Z.lo = Htable[nlo].lo;
 
348
 
 
349
        while (1) {
 
350
                rem  = (size_t)Z.lo&0xf;
 
351
                Z.lo = (Z.hi<<60)|(Z.lo>>4);
 
352
                Z.hi = (Z.hi>>4);
 
353
                if (sizeof(size_t)==8)
 
354
                        Z.hi ^= rem_4bit[rem];
 
355
                else
 
356
                        Z.hi ^= (u64)rem_4bit[rem]<<32;
 
357
 
 
358
                Z.hi ^= Htable[nhi].hi;
 
359
                Z.lo ^= Htable[nhi].lo;
 
360
 
 
361
                if (--cnt<0)            break;
 
362
 
 
363
                nlo  = ((const u8 *)Xi)[cnt];
 
364
                nhi  = nlo>>4;
 
365
                nlo &= 0xf;
 
366
 
 
367
                rem  = (size_t)Z.lo&0xf;
 
368
                Z.lo = (Z.hi<<60)|(Z.lo>>4);
 
369
                Z.hi = (Z.hi>>4);
 
370
                if (sizeof(size_t)==8)
 
371
                        Z.hi ^= rem_4bit[rem];
 
372
                else
 
373
                        Z.hi ^= (u64)rem_4bit[rem]<<32;
 
374
 
 
375
                Z.hi ^= Htable[nlo].hi;
 
376
                Z.lo ^= Htable[nlo].lo;
 
377
        }
 
378
 
 
379
        if (is_endian.little) {
 
380
#ifdef BSWAP8
 
381
                Xi[0] = BSWAP8(Z.hi);
 
382
                Xi[1] = BSWAP8(Z.lo);
 
383
#else
 
384
                u8 *p = (u8 *)Xi;
 
385
                u32 v;
 
386
                v = (u32)(Z.hi>>32);    PUTU32(p,v);
 
387
                v = (u32)(Z.hi);        PUTU32(p+4,v);
 
388
                v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
 
389
                v = (u32)(Z.lo);        PUTU32(p+12,v);
 
390
#endif
 
391
        }
 
392
        else {
 
393
                Xi[0] = Z.hi;
 
394
                Xi[1] = Z.lo;
 
395
        }
 
396
}
 
397
 
 
398
#if !defined(OPENSSL_SMALL_FOOTPRINT)
 
399
/*
 
400
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
 
401
 * details... Compiler-generated code doesn't seem to give any
 
402
 * performance improvement, at least not on x86[_64]. It's here
 
403
 * mostly as reference and a placeholder for possible future
 
404
 * non-trivial optimization[s]...
 
405
 */
 
406
static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
 
407
                                const u8 *inp,size_t len)
 
408
{
 
409
    u128 Z;
 
410
    int cnt;
 
411
    size_t rem, nlo, nhi;
 
412
    const union { long one; char little; } is_endian = {1};
 
413
 
 
414
#if 1
 
415
    do {
 
416
        cnt  = 15;
 
417
        nlo  = ((const u8 *)Xi)[15];
 
418
        nlo ^= inp[15];
 
419
        nhi  = nlo>>4;
 
420
        nlo &= 0xf;
 
421
 
 
422
        Z.hi = Htable[nlo].hi;
 
423
        Z.lo = Htable[nlo].lo;
 
424
 
 
425
        while (1) {
 
426
                rem  = (size_t)Z.lo&0xf;
 
427
                Z.lo = (Z.hi<<60)|(Z.lo>>4);
 
428
                Z.hi = (Z.hi>>4);
 
429
                if (sizeof(size_t)==8)
 
430
                        Z.hi ^= rem_4bit[rem];
 
431
                else
 
432
                        Z.hi ^= (u64)rem_4bit[rem]<<32;
 
433
 
 
434
                Z.hi ^= Htable[nhi].hi;
 
435
                Z.lo ^= Htable[nhi].lo;
 
436
 
 
437
                if (--cnt<0)            break;
 
438
 
 
439
                nlo  = ((const u8 *)Xi)[cnt];
 
440
                nlo ^= inp[cnt];
 
441
                nhi  = nlo>>4;
 
442
                nlo &= 0xf;
 
443
 
 
444
                rem  = (size_t)Z.lo&0xf;
 
445
                Z.lo = (Z.hi<<60)|(Z.lo>>4);
 
446
                Z.hi = (Z.hi>>4);
 
447
                if (sizeof(size_t)==8)
 
448
                        Z.hi ^= rem_4bit[rem];
 
449
                else
 
450
                        Z.hi ^= (u64)rem_4bit[rem]<<32;
 
451
 
 
452
                Z.hi ^= Htable[nlo].hi;
 
453
                Z.lo ^= Htable[nlo].lo;
 
454
        }
 
455
#else
 
456
    /*
 
457
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
 
458
     * [should] give ~50% improvement... One could have PACK()-ed
 
459
     * the rem_8bit even here, but the priority is to minimize
 
460
     * cache footprint...
 
461
     */ 
 
462
    u128 Hshr4[16];     /* Htable shifted right by 4 bits */
 
463
    u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
 
464
    static const unsigned short rem_8bit[256] = {
 
465
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
 
466
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
 
467
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
 
468
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
 
469
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
 
470
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
 
471
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
 
472
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
 
473
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
 
474
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
 
475
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
 
476
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
 
477
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
 
478
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
 
479
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
 
480
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
 
481
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
 
482
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
 
483
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
 
484
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
 
485
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
 
486
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
 
487
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
 
488
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
 
489
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
 
490
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
 
491
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
 
492
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
 
493
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
 
494
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
 
495
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
 
496
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
 
497
    /*
 
498
     * This pre-processing phase slows down procedure by approximately
 
499
     * same time as it makes each loop spin faster. In other words
 
500
     * single block performance is approximately same as straightforward
 
501
     * "4-bit" implementation, and then it goes only faster...
 
502
     */
 
503
    for (cnt=0; cnt<16; ++cnt) {
 
504
        Z.hi = Htable[cnt].hi;
 
505
        Z.lo = Htable[cnt].lo;
 
506
        Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
 
507
        Hshr4[cnt].hi = (Z.hi>>4);
 
508
        Hshl4[cnt]    = (u8)(Z.lo<<4);
 
509
    }
 
510
 
 
511
    do {
 
512
        for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
 
513
                nlo  = ((const u8 *)Xi)[cnt];
 
514
                nlo ^= inp[cnt];
 
515
                nhi  = nlo>>4;
 
516
                nlo &= 0xf;
 
517
 
 
518
                Z.hi ^= Htable[nlo].hi;
 
519
                Z.lo ^= Htable[nlo].lo;
 
520
 
 
521
                rem = (size_t)Z.lo&0xff;
 
522
 
 
523
                Z.lo = (Z.hi<<56)|(Z.lo>>8);
 
524
                Z.hi = (Z.hi>>8);
 
525
 
 
526
                Z.hi ^= Hshr4[nhi].hi;
 
527
                Z.lo ^= Hshr4[nhi].lo;
 
528
                Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
 
529
        }
 
530
 
 
531
        nlo  = ((const u8 *)Xi)[0];
 
532
        nlo ^= inp[0];
 
533
        nhi  = nlo>>4;
 
534
        nlo &= 0xf;
 
535
 
 
536
        Z.hi ^= Htable[nlo].hi;
 
537
        Z.lo ^= Htable[nlo].lo;
 
538
 
 
539
        rem = (size_t)Z.lo&0xf;
 
540
 
 
541
        Z.lo = (Z.hi<<60)|(Z.lo>>4);
 
542
        Z.hi = (Z.hi>>4);
 
543
 
 
544
        Z.hi ^= Htable[nhi].hi;
 
545
        Z.lo ^= Htable[nhi].lo;
 
546
        Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
 
547
#endif
 
548
 
 
549
        if (is_endian.little) {
 
550
#ifdef BSWAP8
 
551
                Xi[0] = BSWAP8(Z.hi);
 
552
                Xi[1] = BSWAP8(Z.lo);
 
553
#else
 
554
                u8 *p = (u8 *)Xi;
 
555
                u32 v;
 
556
                v = (u32)(Z.hi>>32);    PUTU32(p,v);
 
557
                v = (u32)(Z.hi);        PUTU32(p+4,v);
 
558
                v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
 
559
                v = (u32)(Z.lo);        PUTU32(p+12,v);
 
560
#endif
 
561
        }
 
562
        else {
 
563
                Xi[0] = Z.hi;
 
564
                Xi[1] = Z.lo;
 
565
        }
 
566
    } while (inp+=16, len-=16);
 
567
}
 
568
#endif
 
569
#else
 
570
void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
 
571
void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
572
#endif
 
573
 
 
574
#define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
 
575
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
 
576
#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
 
577
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
 
578
 * trashing effect. In other words idea is to hash data while it's
 
579
 * still in L1 cache after encryption pass... */
 
580
#define GHASH_CHUNK       (3*1024)
 
581
#endif
 
582
 
 
583
#else   /* TABLE_BITS */
 
584
 
 
585
static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
 
586
{
 
587
        u128 V,Z = { 0,0 };
 
588
        long X;
 
589
        int  i,j;
 
590
        const long *xi = (const long *)Xi;
 
591
        const union { long one; char little; } is_endian = {1};
 
592
 
 
593
        V.hi = H[0];    /* H is in host byte order, no byte swapping */
 
594
        V.lo = H[1];
 
595
 
 
596
        for (j=0; j<16/sizeof(long); ++j) {
 
597
                if (is_endian.little) {
 
598
                        if (sizeof(long)==8) {
 
599
#ifdef BSWAP8
 
600
                                X = (long)(BSWAP8(xi[j]));
 
601
#else
 
602
                                const u8 *p = (const u8 *)(xi+j);
 
603
                                X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
 
604
#endif
 
605
                        }
 
606
                        else {
 
607
                                const u8 *p = (const u8 *)(xi+j);
 
608
                                X = (long)GETU32(p);
 
609
                        }
 
610
                }
 
611
                else
 
612
                        X = xi[j];
 
613
 
 
614
                for (i=0; i<8*sizeof(long); ++i, X<<=1) {
 
615
                        u64 M = (u64)(X>>(8*sizeof(long)-1));
 
616
                        Z.hi ^= V.hi&M;
 
617
                        Z.lo ^= V.lo&M;
 
618
 
 
619
                        REDUCE1BIT(V);
 
620
                }
 
621
        }
 
622
 
 
623
        if (is_endian.little) {
 
624
#ifdef BSWAP8
 
625
                Xi[0] = BSWAP8(Z.hi);
 
626
                Xi[1] = BSWAP8(Z.lo);
 
627
#else
 
628
                u8 *p = (u8 *)Xi;
 
629
                u32 v;
 
630
                v = (u32)(Z.hi>>32);    PUTU32(p,v);
 
631
                v = (u32)(Z.hi);        PUTU32(p+4,v);
 
632
                v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
 
633
                v = (u32)(Z.lo);        PUTU32(p+12,v);
 
634
#endif
 
635
        }
 
636
        else {
 
637
                Xi[0] = Z.hi;
 
638
                Xi[1] = Z.lo;
 
639
        }
 
640
}
 
641
#define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
 
642
 
 
643
#endif
 
644
 
 
645
#if     TABLE_BITS==4 && defined(GHASH_ASM)
 
646
# if    !defined(I386_ONLY) && \
 
647
        (defined(__i386)        || defined(__i386__)    || \
 
648
         defined(__x86_64)      || defined(__x86_64__)  || \
 
649
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
 
650
#  define GHASH_ASM_X86_OR_64
 
651
#  define GCM_FUNCREF_4BIT
 
652
extern unsigned int OPENSSL_ia32cap_P[2];
 
653
 
 
654
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
 
655
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
 
656
void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
657
 
 
658
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
 
659
#   define GHASH_ASM_X86
 
660
void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
 
661
void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
662
 
 
663
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
 
664
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
665
#  endif
 
666
# elif defined(__arm__) || defined(__arm)
 
667
#  include "arm_arch.h"
 
668
#  if __ARM_ARCH__>=7
 
669
#   define GHASH_ASM_ARM
 
670
#   define GCM_FUNCREF_4BIT
 
671
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
 
672
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
673
#  endif
 
674
# endif
 
675
#endif
 
676
 
 
677
#ifdef GCM_FUNCREF_4BIT
 
678
# undef  GCM_MUL
 
679
# define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
 
680
# ifdef GHASH
 
681
#  undef  GHASH
 
682
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
 
683
# endif
 
684
#endif
 
685
 
 
686
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
 
687
{
 
688
        const union { long one; char little; } is_endian = {1};
 
689
 
 
690
        memset(ctx,0,sizeof(*ctx));
 
691
        ctx->block = block;
 
692
        ctx->key   = key;
 
693
 
 
694
        (*block)(ctx->H.c,ctx->H.c,key);
 
695
 
 
696
        if (is_endian.little) {
 
697
                /* H is stored in host byte order */
 
698
#ifdef BSWAP8
 
699
                ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
 
700
                ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
 
701
#else
 
702
                u8 *p = ctx->H.c;
 
703
                u64 hi,lo;
 
704
                hi = (u64)GETU32(p)  <<32|GETU32(p+4);
 
705
                lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
 
706
                ctx->H.u[0] = hi;
 
707
                ctx->H.u[1] = lo;
 
708
#endif
 
709
        }
 
710
 
 
711
#if     TABLE_BITS==8
 
712
        gcm_init_8bit(ctx->Htable,ctx->H.u);
 
713
#elif   TABLE_BITS==4
 
714
# if    defined(GHASH_ASM_X86_OR_64)
 
715
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
 
716
        if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
 
717
            OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
 
718
                gcm_init_clmul(ctx->Htable,ctx->H.u);
 
719
                ctx->gmult = gcm_gmult_clmul;
 
720
                ctx->ghash = gcm_ghash_clmul;
 
721
                return;
 
722
        }
 
723
#  endif
 
724
        gcm_init_4bit(ctx->Htable,ctx->H.u);
 
725
#  if   defined(GHASH_ASM_X86)                  /* x86 only */
 
726
#   if  defined(OPENSSL_IA32_SSE2)
 
727
        if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
 
728
#   else
 
729
        if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
 
730
#   endif
 
731
                ctx->gmult = gcm_gmult_4bit_mmx;
 
732
                ctx->ghash = gcm_ghash_4bit_mmx;
 
733
        } else {
 
734
                ctx->gmult = gcm_gmult_4bit_x86;
 
735
                ctx->ghash = gcm_ghash_4bit_x86;
 
736
        }
 
737
#  else
 
738
        ctx->gmult = gcm_gmult_4bit;
 
739
        ctx->ghash = gcm_ghash_4bit;
 
740
#  endif
 
741
# elif  defined(GHASH_ASM_ARM)
 
742
        if (OPENSSL_armcap_P & ARMV7_NEON) {
 
743
                ctx->gmult = gcm_gmult_neon;
 
744
                ctx->ghash = gcm_ghash_neon;
 
745
        } else {
 
746
                gcm_init_4bit(ctx->Htable,ctx->H.u);
 
747
                ctx->gmult = gcm_gmult_4bit;
 
748
                ctx->ghash = gcm_ghash_4bit;
 
749
        }
 
750
# else
 
751
        gcm_init_4bit(ctx->Htable,ctx->H.u);
 
752
# endif
 
753
#endif
 
754
}
 
755
 
 
756
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
 
757
{
 
758
        const union { long one; char little; } is_endian = {1};
 
759
        unsigned int ctr;
 
760
#ifdef GCM_FUNCREF_4BIT
 
761
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
762
#endif
 
763
 
 
764
        ctx->Yi.u[0]  = 0;
 
765
        ctx->Yi.u[1]  = 0;
 
766
        ctx->Xi.u[0]  = 0;
 
767
        ctx->Xi.u[1]  = 0;
 
768
        ctx->len.u[0] = 0;      /* AAD length */
 
769
        ctx->len.u[1] = 0;      /* message length */
 
770
        ctx->ares = 0;
 
771
        ctx->mres = 0;
 
772
 
 
773
        if (len==12) {
 
774
                memcpy(ctx->Yi.c,iv,12);
 
775
                ctx->Yi.c[15]=1;
 
776
                ctr=1;
 
777
        }
 
778
        else {
 
779
                size_t i;
 
780
                u64 len0 = len;
 
781
 
 
782
                while (len>=16) {
 
783
                        for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
 
784
                        GCM_MUL(ctx,Yi);
 
785
                        iv += 16;
 
786
                        len -= 16;
 
787
                }
 
788
                if (len) {
 
789
                        for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
 
790
                        GCM_MUL(ctx,Yi);
 
791
                }
 
792
                len0 <<= 3;
 
793
                if (is_endian.little) {
 
794
#ifdef BSWAP8
 
795
                        ctx->Yi.u[1]  ^= BSWAP8(len0);
 
796
#else
 
797
                        ctx->Yi.c[8]  ^= (u8)(len0>>56);
 
798
                        ctx->Yi.c[9]  ^= (u8)(len0>>48);
 
799
                        ctx->Yi.c[10] ^= (u8)(len0>>40);
 
800
                        ctx->Yi.c[11] ^= (u8)(len0>>32);
 
801
                        ctx->Yi.c[12] ^= (u8)(len0>>24);
 
802
                        ctx->Yi.c[13] ^= (u8)(len0>>16);
 
803
                        ctx->Yi.c[14] ^= (u8)(len0>>8);
 
804
                        ctx->Yi.c[15] ^= (u8)(len0);
 
805
#endif
 
806
                }
 
807
                else
 
808
                        ctx->Yi.u[1]  ^= len0;
 
809
 
 
810
                GCM_MUL(ctx,Yi);
 
811
 
 
812
                if (is_endian.little)
 
813
                        ctr = GETU32(ctx->Yi.c+12);
 
814
                else
 
815
                        ctr = ctx->Yi.d[3];
 
816
        }
 
817
 
 
818
        (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
 
819
        ++ctr;
 
820
        if (is_endian.little)
 
821
                PUTU32(ctx->Yi.c+12,ctr);
 
822
        else
 
823
                ctx->Yi.d[3] = ctr;
 
824
}
 
825
 
 
826
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
 
827
{
 
828
        size_t i;
 
829
        unsigned int n;
 
830
        u64 alen = ctx->len.u[0];
 
831
#ifdef GCM_FUNCREF_4BIT
 
832
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
833
# ifdef GHASH
 
834
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
835
                                const u8 *inp,size_t len)       = ctx->ghash;
 
836
# endif
 
837
#endif
 
838
 
 
839
        if (ctx->len.u[1]) return -2;
 
840
 
 
841
        alen += len;
 
842
        if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
 
843
                return -1;
 
844
        ctx->len.u[0] = alen;
 
845
 
 
846
        n = ctx->ares;
 
847
        if (n) {
 
848
                while (n && len) {
 
849
                        ctx->Xi.c[n] ^= *(aad++);
 
850
                        --len;
 
851
                        n = (n+1)%16;
 
852
                }
 
853
                if (n==0) GCM_MUL(ctx,Xi);
 
854
                else {
 
855
                        ctx->ares = n;
 
856
                        return 0;
 
857
                }
 
858
        }
 
859
 
 
860
#ifdef GHASH
 
861
        if ((i = (len&(size_t)-16))) {
 
862
                GHASH(ctx,aad,i);
 
863
                aad += i;
 
864
                len -= i;
 
865
        }
 
866
#else
 
867
        while (len>=16) {
 
868
                for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
 
869
                GCM_MUL(ctx,Xi);
 
870
                aad += 16;
 
871
                len -= 16;
 
872
        }
 
873
#endif
 
874
        if (len) {
 
875
                n = (unsigned int)len;
 
876
                for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
 
877
        }
 
878
 
 
879
        ctx->ares = n;
 
880
        return 0;
 
881
}
 
882
 
 
883
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
 
884
                const unsigned char *in, unsigned char *out,
 
885
                size_t len)
 
886
{
 
887
        const union { long one; char little; } is_endian = {1};
 
888
        unsigned int n, ctr;
 
889
        size_t i;
 
890
        u64        mlen  = ctx->len.u[1];
 
891
        block128_f block = ctx->block;
 
892
        void      *key   = ctx->key;
 
893
#ifdef GCM_FUNCREF_4BIT
 
894
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
895
# ifdef GHASH
 
896
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
897
                                const u8 *inp,size_t len)       = ctx->ghash;
 
898
# endif
 
899
#endif
 
900
 
 
901
#if 0
 
902
        n = (unsigned int)mlen%16; /* alternative to ctx->mres */
 
903
#endif
 
904
        mlen += len;
 
905
        if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
 
906
                return -1;
 
907
        ctx->len.u[1] = mlen;
 
908
 
 
909
        if (ctx->ares) {
 
910
                /* First call to encrypt finalizes GHASH(AAD) */
 
911
                GCM_MUL(ctx,Xi);
 
912
                ctx->ares = 0;
 
913
        }
 
914
 
 
915
        if (is_endian.little)
 
916
                ctr = GETU32(ctx->Yi.c+12);
 
917
        else
 
918
                ctr = ctx->Yi.d[3];
 
919
 
 
920
        n = ctx->mres;
 
921
#if !defined(OPENSSL_SMALL_FOOTPRINT)
 
922
        if (16%sizeof(size_t) == 0) do {        /* always true actually */
 
923
                if (n) {
 
924
                        while (n && len) {
 
925
                                ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
 
926
                                --len;
 
927
                                n = (n+1)%16;
 
928
                        }
 
929
                        if (n==0) GCM_MUL(ctx,Xi);
 
930
                        else {
 
931
                                ctx->mres = n;
 
932
                                return 0;
 
933
                        }
 
934
                }
 
935
#if defined(STRICT_ALIGNMENT)
 
936
                if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
 
937
                        break;
 
938
#endif
 
939
#if defined(GHASH) && defined(GHASH_CHUNK)
 
940
                while (len>=GHASH_CHUNK) {
 
941
                    size_t j=GHASH_CHUNK;
 
942
 
 
943
                    while (j) {
 
944
                        size_t *out_t=(size_t *)out;
 
945
                        const size_t *in_t=(const size_t *)in;
 
946
 
 
947
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
948
                        ++ctr;
 
949
                        if (is_endian.little)
 
950
                                PUTU32(ctx->Yi.c+12,ctr);
 
951
                        else
 
952
                                ctx->Yi.d[3] = ctr;
 
953
                        for (i=0; i<16/sizeof(size_t); ++i)
 
954
                                out_t[i] = in_t[i] ^ ctx->EKi.t[i];
 
955
                        out += 16;
 
956
                        in  += 16;
 
957
                        j   -= 16;
 
958
                    }
 
959
                    GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
 
960
                    len -= GHASH_CHUNK;
 
961
                }
 
962
                if ((i = (len&(size_t)-16))) {
 
963
                    size_t j=i;
 
964
 
 
965
                    while (len>=16) {
 
966
                        size_t *out_t=(size_t *)out;
 
967
                        const size_t *in_t=(const size_t *)in;
 
968
 
 
969
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
970
                        ++ctr;
 
971
                        if (is_endian.little)
 
972
                                PUTU32(ctx->Yi.c+12,ctr);
 
973
                        else
 
974
                                ctx->Yi.d[3] = ctr;
 
975
                        for (i=0; i<16/sizeof(size_t); ++i)
 
976
                                out_t[i] = in_t[i] ^ ctx->EKi.t[i];
 
977
                        out += 16;
 
978
                        in  += 16;
 
979
                        len -= 16;
 
980
                    }
 
981
                    GHASH(ctx,out-j,j);
 
982
                }
 
983
#else
 
984
                while (len>=16) {
 
985
                        size_t *out_t=(size_t *)out;
 
986
                        const size_t *in_t=(const size_t *)in;
 
987
 
 
988
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
989
                        ++ctr;
 
990
                        if (is_endian.little)
 
991
                                PUTU32(ctx->Yi.c+12,ctr);
 
992
                        else
 
993
                                ctx->Yi.d[3] = ctr;
 
994
                        for (i=0; i<16/sizeof(size_t); ++i)
 
995
                                ctx->Xi.t[i] ^=
 
996
                                out_t[i] = in_t[i]^ctx->EKi.t[i];
 
997
                        GCM_MUL(ctx,Xi);
 
998
                        out += 16;
 
999
                        in  += 16;
 
1000
                        len -= 16;
 
1001
                }
 
1002
#endif
 
1003
                if (len) {
 
1004
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1005
                        ++ctr;
 
1006
                        if (is_endian.little)
 
1007
                                PUTU32(ctx->Yi.c+12,ctr);
 
1008
                        else
 
1009
                                ctx->Yi.d[3] = ctr;
 
1010
                        while (len--) {
 
1011
                                ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
 
1012
                                ++n;
 
1013
                        }
 
1014
                }
 
1015
 
 
1016
                ctx->mres = n;
 
1017
                return 0;
 
1018
        } while(0);
 
1019
#endif
 
1020
        for (i=0;i<len;++i) {
 
1021
                if (n==0) {
 
1022
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1023
                        ++ctr;
 
1024
                        if (is_endian.little)
 
1025
                                PUTU32(ctx->Yi.c+12,ctr);
 
1026
                        else
 
1027
                                ctx->Yi.d[3] = ctr;
 
1028
                }
 
1029
                ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
 
1030
                n = (n+1)%16;
 
1031
                if (n==0)
 
1032
                        GCM_MUL(ctx,Xi);
 
1033
        }
 
1034
 
 
1035
        ctx->mres = n;
 
1036
        return 0;
 
1037
}
 
1038
 
 
1039
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
 
1040
                const unsigned char *in, unsigned char *out,
 
1041
                size_t len)
 
1042
{
 
1043
        const union { long one; char little; } is_endian = {1};
 
1044
        unsigned int n, ctr;
 
1045
        size_t i;
 
1046
        u64        mlen  = ctx->len.u[1];
 
1047
        block128_f block = ctx->block;
 
1048
        void      *key   = ctx->key;
 
1049
#ifdef GCM_FUNCREF_4BIT
 
1050
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
1051
# ifdef GHASH
 
1052
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
1053
                                const u8 *inp,size_t len)       = ctx->ghash;
 
1054
# endif
 
1055
#endif
 
1056
 
 
1057
        mlen += len;
 
1058
        if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
 
1059
                return -1;
 
1060
        ctx->len.u[1] = mlen;
 
1061
 
 
1062
        if (ctx->ares) {
 
1063
                /* First call to decrypt finalizes GHASH(AAD) */
 
1064
                GCM_MUL(ctx,Xi);
 
1065
                ctx->ares = 0;
 
1066
        }
 
1067
 
 
1068
        if (is_endian.little)
 
1069
                ctr = GETU32(ctx->Yi.c+12);
 
1070
        else
 
1071
                ctr = ctx->Yi.d[3];
 
1072
 
 
1073
        n = ctx->mres;
 
1074
#if !defined(OPENSSL_SMALL_FOOTPRINT)
 
1075
        if (16%sizeof(size_t) == 0) do {        /* always true actually */
 
1076
                if (n) {
 
1077
                        while (n && len) {
 
1078
                                u8 c = *(in++);
 
1079
                                *(out++) = c^ctx->EKi.c[n];
 
1080
                                ctx->Xi.c[n] ^= c;
 
1081
                                --len;
 
1082
                                n = (n+1)%16;
 
1083
                        }
 
1084
                        if (n==0) GCM_MUL (ctx,Xi);
 
1085
                        else {
 
1086
                                ctx->mres = n;
 
1087
                                return 0;
 
1088
                        }
 
1089
                }
 
1090
#if defined(STRICT_ALIGNMENT)
 
1091
                if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
 
1092
                        break;
 
1093
#endif
 
1094
#if defined(GHASH) && defined(GHASH_CHUNK)
 
1095
                while (len>=GHASH_CHUNK) {
 
1096
                    size_t j=GHASH_CHUNK;
 
1097
 
 
1098
                    GHASH(ctx,in,GHASH_CHUNK);
 
1099
                    while (j) {
 
1100
                        size_t *out_t=(size_t *)out;
 
1101
                        const size_t *in_t=(const size_t *)in;
 
1102
 
 
1103
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1104
                        ++ctr;
 
1105
                        if (is_endian.little)
 
1106
                                PUTU32(ctx->Yi.c+12,ctr);
 
1107
                        else
 
1108
                                ctx->Yi.d[3] = ctr;
 
1109
                        for (i=0; i<16/sizeof(size_t); ++i)
 
1110
                                out_t[i] = in_t[i]^ctx->EKi.t[i];
 
1111
                        out += 16;
 
1112
                        in  += 16;
 
1113
                        j   -= 16;
 
1114
                    }
 
1115
                    len -= GHASH_CHUNK;
 
1116
                }
 
1117
                if ((i = (len&(size_t)-16))) {
 
1118
                    GHASH(ctx,in,i);
 
1119
                    while (len>=16) {
 
1120
                        size_t *out_t=(size_t *)out;
 
1121
                        const size_t *in_t=(const size_t *)in;
 
1122
 
 
1123
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1124
                        ++ctr;
 
1125
                        if (is_endian.little)
 
1126
                                PUTU32(ctx->Yi.c+12,ctr);
 
1127
                        else
 
1128
                                ctx->Yi.d[3] = ctr;
 
1129
                        for (i=0; i<16/sizeof(size_t); ++i)
 
1130
                                out_t[i] = in_t[i]^ctx->EKi.t[i];
 
1131
                        out += 16;
 
1132
                        in  += 16;
 
1133
                        len -= 16;
 
1134
                    }
 
1135
                }
 
1136
#else
 
1137
                while (len>=16) {
 
1138
                        size_t *out_t=(size_t *)out;
 
1139
                        const size_t *in_t=(const size_t *)in;
 
1140
 
 
1141
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1142
                        ++ctr;
 
1143
                        if (is_endian.little)
 
1144
                                PUTU32(ctx->Yi.c+12,ctr);
 
1145
                        else
 
1146
                                ctx->Yi.d[3] = ctr;
 
1147
                        for (i=0; i<16/sizeof(size_t); ++i) {
 
1148
                                size_t c = in[i];
 
1149
                                out[i] = c^ctx->EKi.t[i];
 
1150
                                ctx->Xi.t[i] ^= c;
 
1151
                        }
 
1152
                        GCM_MUL(ctx,Xi);
 
1153
                        out += 16;
 
1154
                        in  += 16;
 
1155
                        len -= 16;
 
1156
                }
 
1157
#endif
 
1158
                if (len) {
 
1159
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1160
                        ++ctr;
 
1161
                        if (is_endian.little)
 
1162
                                PUTU32(ctx->Yi.c+12,ctr);
 
1163
                        else
 
1164
                                ctx->Yi.d[3] = ctr;
 
1165
                        while (len--) {
 
1166
                                u8 c = in[n];
 
1167
                                ctx->Xi.c[n] ^= c;
 
1168
                                out[n] = c^ctx->EKi.c[n];
 
1169
                                ++n;
 
1170
                        }
 
1171
                }
 
1172
 
 
1173
                ctx->mres = n;
 
1174
                return 0;
 
1175
        } while(0);
 
1176
#endif
 
1177
        for (i=0;i<len;++i) {
 
1178
                u8 c;
 
1179
                if (n==0) {
 
1180
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
 
1181
                        ++ctr;
 
1182
                        if (is_endian.little)
 
1183
                                PUTU32(ctx->Yi.c+12,ctr);
 
1184
                        else
 
1185
                                ctx->Yi.d[3] = ctr;
 
1186
                }
 
1187
                c = in[i];
 
1188
                out[i] = c^ctx->EKi.c[n];
 
1189
                ctx->Xi.c[n] ^= c;
 
1190
                n = (n+1)%16;
 
1191
                if (n==0)
 
1192
                        GCM_MUL(ctx,Xi);
 
1193
        }
 
1194
 
 
1195
        ctx->mres = n;
 
1196
        return 0;
 
1197
}
 
1198
 
 
1199
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
 
1200
                const unsigned char *in, unsigned char *out,
 
1201
                size_t len, ctr128_f stream)
 
1202
{
 
1203
        const union { long one; char little; } is_endian = {1};
 
1204
        unsigned int n, ctr;
 
1205
        size_t i;
 
1206
        u64   mlen = ctx->len.u[1];
 
1207
        void *key  = ctx->key;
 
1208
#ifdef GCM_FUNCREF_4BIT
 
1209
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
1210
# ifdef GHASH
 
1211
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
1212
                                const u8 *inp,size_t len)       = ctx->ghash;
 
1213
# endif
 
1214
#endif
 
1215
 
 
1216
        mlen += len;
 
1217
        if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
 
1218
                return -1;
 
1219
        ctx->len.u[1] = mlen;
 
1220
 
 
1221
        if (ctx->ares) {
 
1222
                /* First call to encrypt finalizes GHASH(AAD) */
 
1223
                GCM_MUL(ctx,Xi);
 
1224
                ctx->ares = 0;
 
1225
        }
 
1226
 
 
1227
        if (is_endian.little)
 
1228
                ctr = GETU32(ctx->Yi.c+12);
 
1229
        else
 
1230
                ctr = ctx->Yi.d[3];
 
1231
 
 
1232
        n = ctx->mres;
 
1233
        if (n) {
 
1234
                while (n && len) {
 
1235
                        ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
 
1236
                        --len;
 
1237
                        n = (n+1)%16;
 
1238
                }
 
1239
                if (n==0) GCM_MUL(ctx,Xi);
 
1240
                else {
 
1241
                        ctx->mres = n;
 
1242
                        return 0;
 
1243
                }
 
1244
        }
 
1245
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
 
1246
        while (len>=GHASH_CHUNK) {
 
1247
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
 
1248
                ctr += GHASH_CHUNK/16;
 
1249
                if (is_endian.little)
 
1250
                        PUTU32(ctx->Yi.c+12,ctr);
 
1251
                else
 
1252
                        ctx->Yi.d[3] = ctr;
 
1253
                GHASH(ctx,out,GHASH_CHUNK);
 
1254
                out += GHASH_CHUNK;
 
1255
                in  += GHASH_CHUNK;
 
1256
                len -= GHASH_CHUNK;
 
1257
        }
 
1258
#endif
 
1259
        if ((i = (len&(size_t)-16))) {
 
1260
                size_t j=i/16;
 
1261
 
 
1262
                (*stream)(in,out,j,key,ctx->Yi.c);
 
1263
                ctr += (unsigned int)j;
 
1264
                if (is_endian.little)
 
1265
                        PUTU32(ctx->Yi.c+12,ctr);
 
1266
                else
 
1267
                        ctx->Yi.d[3] = ctr;
 
1268
                in  += i;
 
1269
                len -= i;
 
1270
#if defined(GHASH)
 
1271
                GHASH(ctx,out,i);
 
1272
                out += i;
 
1273
#else
 
1274
                while (j--) {
 
1275
                        for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
 
1276
                        GCM_MUL(ctx,Xi);
 
1277
                        out += 16;
 
1278
                }
 
1279
#endif
 
1280
        }
 
1281
        if (len) {
 
1282
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
 
1283
                ++ctr;
 
1284
                if (is_endian.little)
 
1285
                        PUTU32(ctx->Yi.c+12,ctr);
 
1286
                else
 
1287
                        ctx->Yi.d[3] = ctr;
 
1288
                while (len--) {
 
1289
                        ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
 
1290
                        ++n;
 
1291
                }
 
1292
        }
 
1293
 
 
1294
        ctx->mres = n;
 
1295
        return 0;
 
1296
}
 
1297
 
 
1298
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
 
1299
                const unsigned char *in, unsigned char *out,
 
1300
                size_t len,ctr128_f stream)
 
1301
{
 
1302
        const union { long one; char little; } is_endian = {1};
 
1303
        unsigned int n, ctr;
 
1304
        size_t i;
 
1305
        u64   mlen = ctx->len.u[1];
 
1306
        void *key  = ctx->key;
 
1307
#ifdef GCM_FUNCREF_4BIT
 
1308
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
1309
# ifdef GHASH
 
1310
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
1311
                                const u8 *inp,size_t len)       = ctx->ghash;
 
1312
# endif
 
1313
#endif
 
1314
 
 
1315
        mlen += len;
 
1316
        if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
 
1317
                return -1;
 
1318
        ctx->len.u[1] = mlen;
 
1319
 
 
1320
        if (ctx->ares) {
 
1321
                /* First call to decrypt finalizes GHASH(AAD) */
 
1322
                GCM_MUL(ctx,Xi);
 
1323
                ctx->ares = 0;
 
1324
        }
 
1325
 
 
1326
        if (is_endian.little)
 
1327
                ctr = GETU32(ctx->Yi.c+12);
 
1328
        else
 
1329
                ctr = ctx->Yi.d[3];
 
1330
 
 
1331
        n = ctx->mres;
 
1332
        if (n) {
 
1333
                while (n && len) {
 
1334
                        u8 c = *(in++);
 
1335
                        *(out++) = c^ctx->EKi.c[n];
 
1336
                        ctx->Xi.c[n] ^= c;
 
1337
                        --len;
 
1338
                        n = (n+1)%16;
 
1339
                }
 
1340
                if (n==0) GCM_MUL (ctx,Xi);
 
1341
                else {
 
1342
                        ctx->mres = n;
 
1343
                        return 0;
 
1344
                }
 
1345
        }
 
1346
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
 
1347
        while (len>=GHASH_CHUNK) {
 
1348
                GHASH(ctx,in,GHASH_CHUNK);
 
1349
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
 
1350
                ctr += GHASH_CHUNK/16;
 
1351
                if (is_endian.little)
 
1352
                        PUTU32(ctx->Yi.c+12,ctr);
 
1353
                else
 
1354
                        ctx->Yi.d[3] = ctr;
 
1355
                out += GHASH_CHUNK;
 
1356
                in  += GHASH_CHUNK;
 
1357
                len -= GHASH_CHUNK;
 
1358
        }
 
1359
#endif
 
1360
        if ((i = (len&(size_t)-16))) {
 
1361
                size_t j=i/16;
 
1362
 
 
1363
#if defined(GHASH)
 
1364
                GHASH(ctx,in,i);
 
1365
#else
 
1366
                while (j--) {
 
1367
                        size_t k;
 
1368
                        for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
 
1369
                        GCM_MUL(ctx,Xi);
 
1370
                        in += 16;
 
1371
                }
 
1372
                j   = i/16;
 
1373
                in -= i;
 
1374
#endif
 
1375
                (*stream)(in,out,j,key,ctx->Yi.c);
 
1376
                ctr += (unsigned int)j;
 
1377
                if (is_endian.little)
 
1378
                        PUTU32(ctx->Yi.c+12,ctr);
 
1379
                else
 
1380
                        ctx->Yi.d[3] = ctr;
 
1381
                out += i;
 
1382
                in  += i;
 
1383
                len -= i;
 
1384
        }
 
1385
        if (len) {
 
1386
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
 
1387
                ++ctr;
 
1388
                if (is_endian.little)
 
1389
                        PUTU32(ctx->Yi.c+12,ctr);
 
1390
                else
 
1391
                        ctx->Yi.d[3] = ctr;
 
1392
                while (len--) {
 
1393
                        u8 c = in[n];
 
1394
                        ctx->Xi.c[n] ^= c;
 
1395
                        out[n] = c^ctx->EKi.c[n];
 
1396
                        ++n;
 
1397
                }
 
1398
        }
 
1399
 
 
1400
        ctx->mres = n;
 
1401
        return 0;
 
1402
}
 
1403
 
 
1404
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
 
1405
                        size_t len)
 
1406
{
 
1407
        const union { long one; char little; } is_endian = {1};
 
1408
        u64 alen = ctx->len.u[0]<<3;
 
1409
        u64 clen = ctx->len.u[1]<<3;
 
1410
#ifdef GCM_FUNCREF_4BIT
 
1411
        void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
 
1412
#endif
 
1413
 
 
1414
        if (ctx->mres || ctx->ares)
 
1415
                GCM_MUL(ctx,Xi);
 
1416
 
 
1417
        if (is_endian.little) {
 
1418
#ifdef BSWAP8
 
1419
                alen = BSWAP8(alen);
 
1420
                clen = BSWAP8(clen);
 
1421
#else
 
1422
                u8 *p = ctx->len.c;
 
1423
 
 
1424
                ctx->len.u[0] = alen;
 
1425
                ctx->len.u[1] = clen;
 
1426
 
 
1427
                alen = (u64)GETU32(p)  <<32|GETU32(p+4);
 
1428
                clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
 
1429
#endif
 
1430
        }
 
1431
 
 
1432
        ctx->Xi.u[0] ^= alen;
 
1433
        ctx->Xi.u[1] ^= clen;
 
1434
        GCM_MUL(ctx,Xi);
 
1435
 
 
1436
        ctx->Xi.u[0] ^= ctx->EK0.u[0];
 
1437
        ctx->Xi.u[1] ^= ctx->EK0.u[1];
 
1438
 
 
1439
        if (tag && len<=sizeof(ctx->Xi))
 
1440
                return memcmp(ctx->Xi.c,tag,len);
 
1441
        else
 
1442
                return -1;
 
1443
}
 
1444
 
 
1445
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
 
1446
{
 
1447
        CRYPTO_gcm128_finish(ctx, NULL, 0);
 
1448
        memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
 
1449
}
 
1450
 
 
1451
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
 
1452
{
 
1453
        GCM128_CONTEXT *ret;
 
1454
 
 
1455
        if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
 
1456
                CRYPTO_gcm128_init(ret,key,block);
 
1457
 
 
1458
        return ret;
 
1459
}
 
1460
 
 
1461
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
 
1462
{
 
1463
        if (ctx) {
 
1464
                OPENSSL_cleanse(ctx,sizeof(*ctx));
 
1465
                OPENSSL_free(ctx);
 
1466
        }
 
1467
}
 
1468
 
 
1469
#if defined(SELFTEST)
 
1470
#include <stdio.h>
 
1471
#include <openssl/aes.h>
 
1472
 
 
1473
/* Test Case 1 */
 
1474
static const u8 K1[16],
 
1475
                *P1=NULL,
 
1476
                *A1=NULL,
 
1477
                IV1[12],
 
1478
                *C1=NULL,
 
1479
                T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
 
1480
 
 
1481
/* Test Case 2 */
 
1482
#define K2 K1
 
1483
#define A2 A1
 
1484
#define IV2 IV1
 
1485
static const u8 P2[16],
 
1486
                C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
 
1487
                T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
 
1488
 
 
1489
/* Test Case 3 */
 
1490
#define A3 A2
 
1491
static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
 
1492
                P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1493
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1494
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1495
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
 
1496
                IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
 
1497
                C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
 
1498
                        0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
 
1499
                        0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
 
1500
                        0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
 
1501
                T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
 
1502
 
 
1503
/* Test Case 4 */
 
1504
#define K4 K3
 
1505
#define IV4 IV3
 
1506
static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1507
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1508
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1509
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
 
1510
                A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
 
1511
                        0xab,0xad,0xda,0xd2},
 
1512
                C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
 
1513
                        0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
 
1514
                        0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
 
1515
                        0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
 
1516
                T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
 
1517
 
 
1518
/* Test Case 5 */
 
1519
#define K5 K4
 
1520
#define P5 P4
 
1521
#define A5 A4
 
1522
static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
 
1523
                C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
 
1524
                        0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
 
1525
                        0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
 
1526
                        0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
 
1527
                T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
 
1528
 
 
1529
/* Test Case 6 */
 
1530
#define K6 K5
 
1531
#define P6 P5
 
1532
#define A6 A5
 
1533
static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
 
1534
                        0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
 
1535
                        0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
 
1536
                        0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
 
1537
                C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
 
1538
                        0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
 
1539
                        0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
 
1540
                        0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
 
1541
                T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
 
1542
 
 
1543
/* Test Case 7 */
 
1544
static const u8 K7[24],
 
1545
                *P7=NULL,
 
1546
                *A7=NULL,
 
1547
                IV7[12],
 
1548
                *C7=NULL,
 
1549
                T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
 
1550
 
 
1551
/* Test Case 8 */
 
1552
#define K8 K7
 
1553
#define IV8 IV7
 
1554
#define A8 A7
 
1555
static const u8 P8[16],
 
1556
                C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
 
1557
                T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
 
1558
 
 
1559
/* Test Case 9 */
 
1560
#define A9 A8
 
1561
static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
 
1562
                        0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
 
1563
                P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1564
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1565
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1566
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
 
1567
                IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
 
1568
                C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
 
1569
                        0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
 
1570
                        0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
 
1571
                        0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
 
1572
                T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
 
1573
 
 
1574
/* Test Case 10 */
 
1575
#define K10 K9
 
1576
#define IV10 IV9
 
1577
static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1578
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1579
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1580
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
 
1581
                A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
 
1582
                        0xab,0xad,0xda,0xd2},
 
1583
                C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
 
1584
                        0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
 
1585
                        0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
 
1586
                        0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
 
1587
                T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
 
1588
 
 
1589
/* Test Case 11 */
 
1590
#define K11 K10
 
1591
#define P11 P10
 
1592
#define A11 A10
 
1593
static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
 
1594
                C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
 
1595
                        0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
 
1596
                        0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
 
1597
                        0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
 
1598
                T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
 
1599
 
 
1600
/* Test Case 12 */
 
1601
#define K12 K11
 
1602
#define P12 P11
 
1603
#define A12 A11
 
1604
static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
 
1605
                        0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
 
1606
                        0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
 
1607
                        0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
 
1608
                C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
 
1609
                        0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
 
1610
                        0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
 
1611
                        0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
 
1612
                T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
 
1613
 
 
1614
/* Test Case 13 */
 
1615
static const u8 K13[32],
 
1616
                *P13=NULL,
 
1617
                *A13=NULL,
 
1618
                IV13[12],
 
1619
                *C13=NULL,
 
1620
                T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
 
1621
 
 
1622
/* Test Case 14 */
 
1623
#define K14 K13
 
1624
#define A14 A13
 
1625
static const u8 P14[16],
 
1626
                IV14[12],
 
1627
                C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
 
1628
                T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
 
1629
 
 
1630
/* Test Case 15 */
 
1631
#define A15 A14
 
1632
static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
 
1633
                        0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
 
1634
                P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1635
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1636
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1637
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
 
1638
                IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
 
1639
                C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
 
1640
                        0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
 
1641
                        0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
 
1642
                        0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
 
1643
                T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
 
1644
 
 
1645
/* Test Case 16 */
 
1646
#define K16 K15
 
1647
#define IV16 IV15
 
1648
static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1649
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1650
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1651
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
 
1652
                A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
 
1653
                        0xab,0xad,0xda,0xd2},
 
1654
                C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
 
1655
                        0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
 
1656
                        0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
 
1657
                        0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
 
1658
                T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
 
1659
 
 
1660
/* Test Case 17 */
 
1661
#define K17 K16
 
1662
#define P17 P16
 
1663
#define A17 A16
 
1664
static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
 
1665
                C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
 
1666
                        0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
 
1667
                        0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
 
1668
                        0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
 
1669
                T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
 
1670
 
 
1671
/* Test Case 18 */
 
1672
#define K18 K17
 
1673
#define P18 P17
 
1674
#define A18 A17
 
1675
static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
 
1676
                        0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
 
1677
                        0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
 
1678
                        0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
 
1679
                C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
 
1680
                        0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
 
1681
                        0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
 
1682
                        0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
 
1683
                T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
 
1684
 
 
1685
/* Test Case 19 */
 
1686
#define K19 K1
 
1687
#define P19 P1
 
1688
#define IV19 IV1
 
1689
#define C19 C1
 
1690
static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
 
1691
                        0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
 
1692
                        0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
 
1693
                        0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
 
1694
                        0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
 
1695
                        0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
 
1696
                        0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
 
1697
                        0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
 
1698
                T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
 
1699
 
 
1700
/* Test Case 20 */
 
1701
#define K20 K1
 
1702
#define A20 A1
 
1703
static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
 
1704
                P20[288],
 
1705
                C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
 
1706
                        0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
 
1707
                        0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
 
1708
                        0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
 
1709
                        0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
 
1710
                        0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
 
1711
                        0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
 
1712
                        0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
 
1713
                        0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
 
1714
                        0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
 
1715
                        0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
 
1716
                        0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
 
1717
                        0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
 
1718
                        0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
 
1719
                        0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
 
1720
                        0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
 
1721
                        0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
 
1722
                        0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
 
1723
                T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
 
1724
 
 
1725
#define TEST_CASE(n)    do {                                    \
 
1726
        u8 out[sizeof(P##n)];                                   \
 
1727
        AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
 
1728
        CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
 
1729
        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
 
1730
        memset(out,0,sizeof(out));                              \
 
1731
        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
 
1732
        if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
 
1733
        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
 
1734
            (C##n && memcmp(out,C##n,sizeof(out))))             \
 
1735
                ret++, printf ("encrypt test#%d failed.\n",n);  \
 
1736
        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
 
1737
        memset(out,0,sizeof(out));                              \
 
1738
        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
 
1739
        if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
 
1740
        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
 
1741
            (P##n && memcmp(out,P##n,sizeof(out))))             \
 
1742
                ret++, printf ("decrypt test#%d failed.\n",n);  \
 
1743
        } while(0)
 
1744
 
 
1745
int main()
 
1746
{
 
1747
        GCM128_CONTEXT ctx;
 
1748
        AES_KEY key;
 
1749
        int ret=0;
 
1750
 
 
1751
        TEST_CASE(1);
 
1752
        TEST_CASE(2);
 
1753
        TEST_CASE(3);
 
1754
        TEST_CASE(4);
 
1755
        TEST_CASE(5);
 
1756
        TEST_CASE(6);
 
1757
        TEST_CASE(7);
 
1758
        TEST_CASE(8);
 
1759
        TEST_CASE(9);
 
1760
        TEST_CASE(10);
 
1761
        TEST_CASE(11);
 
1762
        TEST_CASE(12);
 
1763
        TEST_CASE(13);
 
1764
        TEST_CASE(14);
 
1765
        TEST_CASE(15);
 
1766
        TEST_CASE(16);
 
1767
        TEST_CASE(17);
 
1768
        TEST_CASE(18);
 
1769
        TEST_CASE(19);
 
1770
        TEST_CASE(20);
 
1771
 
 
1772
#ifdef OPENSSL_CPUID_OBJ
 
1773
        {
 
1774
        size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
 
1775
        union { u64 u; u8 c[1024]; } buf;
 
1776
        int i;
 
1777
 
 
1778
        AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
 
1779
        CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
 
1780
        CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
 
1781
 
 
1782
        CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
 
1783
        start = OPENSSL_rdtsc();
 
1784
        CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
 
1785
        gcm_t = OPENSSL_rdtsc() - start;
 
1786
 
 
1787
        CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
 
1788
                        &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
 
1789
                        (block128_f)AES_encrypt);
 
1790
        start = OPENSSL_rdtsc();
 
1791
        CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
 
1792
                        &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
 
1793
                        (block128_f)AES_encrypt);
 
1794
        ctr_t = OPENSSL_rdtsc() - start;
 
1795
 
 
1796
        printf("%.2f-%.2f=%.2f\n",
 
1797
                        gcm_t/(double)sizeof(buf),
 
1798
                        ctr_t/(double)sizeof(buf),
 
1799
                        (gcm_t-ctr_t)/(double)sizeof(buf));
 
1800
#ifdef GHASH
 
1801
        {
 
1802
        void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
 
1803
                                const u8 *inp,size_t len)       = ctx.ghash;
 
1804
 
 
1805
        GHASH((&ctx),buf.c,sizeof(buf));
 
1806
        start = OPENSSL_rdtsc();
 
1807
        for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
 
1808
        gcm_t = OPENSSL_rdtsc() - start;
 
1809
        printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
 
1810
        }
 
1811
#endif
 
1812
        }
 
1813
#endif
 
1814
 
 
1815
        return ret;
 
1816
}
 
1817
#endif