~ubuntu-branches/ubuntu/vivid/unrar-nonfree/vivid

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Based on public domain code written in 2012 by Samuel Neves

#include "rar.hpp"

#ifdef USE_SSE
#include "blake2s_sse.cpp"
#endif

static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
static void blake2s_final( blake2s_state *S, byte *digest );

#include "blake2sp.cpp"

static const uint32 blake2s_IV[8] =
{
  0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
  0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};

static const byte blake2s_sigma[10][16] =
{
  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
  { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
  {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
  {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
  {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
  { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
  { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
  {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
  { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
};

static inline void blake2s_set_lastnode( blake2s_state *S )
{
  S->f[1] = ~0U;
}


/* Some helper functions, not necessarily useful */
static inline void blake2s_set_lastblock( blake2s_state *S )
{
  if( S->last_node ) blake2s_set_lastnode( S );

  S->f[0] = ~0U;
}


static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
{
  S->t[0] += inc;
  S->t[1] += ( S->t[0] < inc );
}


/* init2 xors IV with input parameter block */
void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
{
#ifdef USE_SSE
  if (_SSE_Version>=SSE_SSE2)
    blake2s_init_sse();
#endif

  S->init(); // Clean data.
  for( int i = 0; i < 8; ++i )
    S->h[i] = blake2s_IV[i];

  S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
  S->h[2] ^= node_offset;
  S->h[3] ^= (node_depth<<16)|0x20000000;
}


static _forceinline uint32 rotr32( const uint32 w, const unsigned c )
{
  return ( w >> c ) | ( w << ( 32 - c ) );
}


#define G(r,i,m,a,b,c,d) \
  a = a + b + m[blake2s_sigma[r][2*i+0]]; \
  d = rotr32(d ^ a, 16); \
  c = c + d; \
  b = rotr32(b ^ c, 12); \
  a = a + b + m[blake2s_sigma[r][2*i+1]]; \
  d = rotr32(d ^ a, 8); \
  c = c + d; \
  b = rotr32(b ^ c, 7);


static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
{
  uint32 m[16];
  uint32 v[16];

  for( size_t i = 0; i < 16; ++i )
    m[i] = RawGet4( block + i * 4 );

  for( size_t i = 0; i < 8; ++i )
    v[i] = S->h[i];

  v[ 8] = blake2s_IV[0];
  v[ 9] = blake2s_IV[1];
  v[10] = blake2s_IV[2];
  v[11] = blake2s_IV[3];
  v[12] = S->t[0] ^ blake2s_IV[4];
  v[13] = S->t[1] ^ blake2s_IV[5];
  v[14] = S->f[0] ^ blake2s_IV[6];
  v[15] = S->f[1] ^ blake2s_IV[7];

  for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
  {
    G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
    G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
    G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
    G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
    G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
    G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
    G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
    G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
  }

  for( size_t i = 0; i < 8; ++i )
    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}


void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
{
  while( inlen > 0 )
  {
    size_t left = S->buflen;
    size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;

    if( inlen > fill )
    {
      memcpy( S->buf + left, in, fill ); // Fill buffer
      S->buflen += fill;
      blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );

#ifdef USE_SSE
#ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
      if (_SSE_Version>=SSE_SSE2)
#else
      if (_SSE_Version>=SSE_SSSE3)
#endif
        blake2s_compress_sse( S, S->buf );
      else
        blake2s_compress( S, S->buf ); // Compress
#else
      blake2s_compress( S, S->buf ); // Compress
#endif
      
      memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
      S->buflen -= BLAKE2S_BLOCKBYTES;
      in += fill;
      inlen -= fill;
    }
    else // inlen <= fill
    {
      memcpy( S->buf + left, in, (size_t)inlen );
      S->buflen += (size_t)inlen; // Be lazy, do not compress
      in += inlen;
      inlen -= inlen;
    }
  }
}


void blake2s_final( blake2s_state *S, byte *digest )
{
  if( S->buflen > BLAKE2S_BLOCKBYTES )
  {
    blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
    blake2s_compress( S, S->buf );
    S->buflen -= BLAKE2S_BLOCKBYTES;
    memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
  }

  blake2s_increment_counter( S, ( uint32 )S->buflen );
  blake2s_set_lastblock( S );
  memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
  blake2s_compress( S, S->buf );

  for( int i = 0; i < 8; ++i ) /* Output full hash  */
    RawPut4( S->h[i], digest + 4 * i );
}