~ubuntu-branches/ubuntu/vivid/unrar-nonfree/vivid

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
   BLAKE2 reference source code package - reference C implementations

   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>

   To the extent possible under law, the author(s) have dedicated all copyright
   and related and neighboring rights to this software to the public domain
   worldwide. This software is distributed without any warranty.

   You should have received a copy of the CC0 Public Domain Dedication along with
   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/

#define PARALLELISM_DEGREE 8

void blake2sp_init( blake2sp_state *S )
{
  memset( S->buf, 0, sizeof( S->buf ) );
  S->buflen = 0;

  blake2s_init_param( &S->R, 0, 1 ); // Init root.

  for( uint i = 0; i < PARALLELISM_DEGREE; ++i )
    blake2s_init_param( &S->S[i], i, 0 ); // Init leaf.

  S->R.last_node = 1;
  S->S[PARALLELISM_DEGREE - 1].last_node = 1;
}


struct Blake2ThreadData
{
  void Update();
  blake2s_state *S;
  const byte *in;
  size_t inlen;
};


void Blake2ThreadData::Update()
{
  size_t inlen__ = inlen;
  const byte *in__ = ( const byte * )in;

  while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
  {
#ifdef USE_SSE
    // We gain 5% in i7 SSE mode by prefetching next data block.
    if (_SSE_Version>=SSE_SSE && inlen__ >= 2 * PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES)
      _mm_prefetch((char*)(in__ +  PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES), _MM_HINT_T0);
#endif
    blake2s_update( S, in__, BLAKE2S_BLOCKBYTES );
    in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
    inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
  }
}

#ifdef RAR_SMP
THREAD_PROC(Blake2Thread)
{
  Blake2ThreadData *td=(Blake2ThreadData *)Data;
  td->Update();
}
#endif


void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen )
{
  size_t left = S->buflen;
  size_t fill = sizeof( S->buf ) - left;

  if( left && inlen >= fill )
  {
    memcpy( S->buf + left, in, fill );

    for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
      blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );

    in += fill;
    inlen -= fill;
    left = 0;
  }

  Blake2ThreadData btd_array[PARALLELISM_DEGREE];

#ifdef RAR_SMP
  uint ThreadNumber = inlen < 0x1000 ? 1 : S->MaxThreads;
  
  if (ThreadNumber==6 || ThreadNumber==7) // 6 and 7 threads work slower than 4 here.
    ThreadNumber=4;
#else
  uint ThreadNumber=1;
#endif

  for (size_t id__=0;id__<PARALLELISM_DEGREE;)
  {
    for (uint Thread=0;Thread<ThreadNumber && id__<PARALLELISM_DEGREE;Thread++)
    {
      Blake2ThreadData *btd=btd_array+Thread;

      btd->inlen = inlen;
      btd->in = in + id__ * BLAKE2S_BLOCKBYTES;
      btd->S = &S->S[id__];
  
#ifdef RAR_SMP
      if (ThreadNumber>1)
        S->ThPool->AddTask(Blake2Thread,(void*)btd);
      else
        btd->Update();
#else
      btd->Update();
#endif
      id__++;
    }
#ifdef RAR_SMP
    if (S->ThPool!=NULL) // Can be NULL in -mt1 mode.
      S->ThPool->WaitDone();
#endif // RAR_SMP
  }

  in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
  inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;

  if( inlen > 0 )
    memcpy( S->buf + left, in, (size_t)inlen );

  S->buflen = left + (size_t)inlen;
}


void blake2sp_final( blake2sp_state *S, byte *digest )
{
  byte hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];

  for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
  {
    if( S->buflen > i * BLAKE2S_BLOCKBYTES )
    {
      size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;

      if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;

      blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
    }

    blake2s_final( &S->S[i], hash[i] );
  }

  for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
    blake2s_update( &S->R, hash[i], BLAKE2S_OUTBYTES );

  blake2s_final( &S->R, digest );
}