676
674
#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
677
675
NEAREST_ALIGNED(ctx->cipher_data))
677
#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
678
#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
679
#define EVP_CIPHER_block_size_OFB 1
680
#define EVP_CIPHER_block_size_CFB 1
679
682
/* Declaring so many ciphers by hand would be a pain.
680
683
Instead introduce a bit of preprocessor magic :-) */
681
684
#define DECLARE_AES_EVP(ksize,lmode,umode) \
682
685
static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
683
686
NID_aes_##ksize##_##lmode, \
687
EVP_CIPHER_block_size_##umode, \
685
688
AES_KEY_SIZE_##ksize, \
686
689
AES_BLOCK_SIZE, \
687
690
0 | EVP_CIPH_##umode##_MODE, \
783
786
memset(cdata, 0, sizeof(struct padlock_cipher_data));
785
788
/* Prepare Control word. */
786
cdata->cword.b.encdec = (ctx->encrypt == 0);
789
if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
790
cdata->cword.b.encdec = 0;
792
cdata->cword.b.encdec = (ctx->encrypt == 0);
787
793
cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
788
794
cdata->cword.b.ksize = (key_len - 128) / 64;
803
809
and is listed as hardware errata. They most
804
810
likely will fix it at some point and then
805
811
a check for stepping would be due here. */
812
if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
813
EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
807
815
AES_set_encrypt_key(key, key_len, &cdata->ks);
809
817
AES_set_decrypt_key(key, key_len, &cdata->ks);
897
905
int inp_misaligned, out_misaligned, realign_in_loop;
898
906
size_t chunk, allocated=0;
908
/* ctx->num is maintained in byte-oriented modes,
909
such as CFB and OFB... */
910
if ((chunk = ctx->num)) { /* borrow chunk variable */
911
unsigned char *ivp=ctx->iv;
913
switch (EVP_CIPHER_CTX_mode(ctx)) {
914
case EVP_CIPH_CFB_MODE:
915
if (chunk >= AES_BLOCK_SIZE)
916
return 0; /* bogus value */
919
while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
920
ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
923
else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
924
unsigned char c = *(in_arg++);
925
*(out_arg++) = c ^ ivp[chunk];
926
ivp[chunk++] = c, nbytes--;
929
ctx->num = chunk%AES_BLOCK_SIZE;
931
case EVP_CIPH_OFB_MODE:
932
if (chunk >= AES_BLOCK_SIZE)
933
return 0; /* bogus value */
935
while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
936
*(out_arg++) = *(in_arg++) ^ ivp[chunk];
940
ctx->num = chunk%AES_BLOCK_SIZE;
902
948
if (nbytes % AES_BLOCK_SIZE)
903
949
return 0; /* are we expected to do tail processing? */
951
/* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
952
modes and arbitrary value in byte-oriented modes, such as
905
956
/* VIA promises CPUs that won't require alignment in the future.
906
957
For now padlock_aes_align_required is initialized to 1 and
907
958
the condition is never met... */
908
if (!padlock_aes_align_required)
959
/* C7 core is capable to manage unaligned input in non-ECB[!]
960
mode, but performance penalties appear to be approximately
961
same as for software alignment below or ~3x. They promise to
962
improve it in the future, but for now we can just as well
963
pretend that it can only handle aligned input... */
964
if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
909
965
return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
911
967
inp_misaligned = (((size_t)in_arg) & 0x0F);
917
973
* in order to improve L1 cache utilization... */
918
974
realign_in_loop = out_misaligned|inp_misaligned;
920
if (!realign_in_loop)
976
if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
921
977
return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
923
979
/* this takes one "if" out of the loops */
986
1042
case EVP_CIPH_CFB_MODE:
987
memcpy (cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1043
memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1044
chunk &= ~(AES_BLOCK_SIZE-1);
1045
if (chunk) goto cfb_shortcut;
1046
else goto cfb_skiploop;
990
1048
if (iv != cdata->iv)
991
1049
memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1005
1063
out = out_arg+=chunk;
1007
} while (nbytes -= chunk);
1066
} while (nbytes >= AES_BLOCK_SIZE);
1070
unsigned char *ivp = cdata->iv;
1073
memcpy(ivp, iv, AES_BLOCK_SIZE);
1077
if (cdata->cword.b.encdec) {
1078
cdata->cword.b.encdec=0;
1079
padlock_reload_key();
1080
padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1081
cdata->cword.b.encdec=1;
1082
padlock_reload_key();
1084
unsigned char c = *(in_arg++);
1085
*(out_arg++) = c ^ *ivp;
1086
*(ivp++) = c, nbytes--;
1089
else { padlock_reload_key();
1090
padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1091
padlock_reload_key();
1093
*ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1008
1099
memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1011
1102
case EVP_CIPH_OFB_MODE:
1012
1103
memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1104
chunk &= ~(AES_BLOCK_SIZE-1);
1014
1106
if (inp_misaligned)
1015
1107
inp = padlock_memcpy(out, in_arg, chunk);
1027
1119
nbytes -= chunk;
1028
1120
chunk = PADLOCK_CHUNK;
1121
} while (nbytes >= AES_BLOCK_SIZE);
1124
unsigned char *ivp = cdata->iv;
1127
padlock_reload_key(); /* empirically found */
1128
padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1129
padlock_reload_key(); /* empirically found */
1131
*(out_arg++) = *(in_arg++) ^ *ivp;
1030
1136
memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);