1
// Copyright 2016 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
7
// This code is a port of the public domain, “ref10” implementation of ed25519
10
// FieldElement represents an element of the field GF(2^255 - 19). An element
11
// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
12
// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
14
type FieldElement [10]int32
18
func FeZero(fe *FieldElement) {
22
func FeOne(fe *FieldElement) {
27
func FeAdd(dst, a, b *FieldElement) {
40
func FeSub(dst, a, b *FieldElement) {
53
func FeCopy(dst, src *FieldElement) {
57
// Replace (f,g) with (g,g) if b == 1;
58
// replace (f,g) with (f,g) if b == 0.
60
// Preconditions: b in {0,1}.
61
func FeCMove(f, g *FieldElement, b int32) {
63
f[0] ^= b & (f[0] ^ g[0])
64
f[1] ^= b & (f[1] ^ g[1])
65
f[2] ^= b & (f[2] ^ g[2])
66
f[3] ^= b & (f[3] ^ g[3])
67
f[4] ^= b & (f[4] ^ g[4])
68
f[5] ^= b & (f[5] ^ g[5])
69
f[6] ^= b & (f[6] ^ g[6])
70
f[7] ^= b & (f[7] ^ g[7])
71
f[8] ^= b & (f[8] ^ g[8])
72
f[9] ^= b & (f[9] ^ g[9])
75
func load3(in []byte) int64 {
78
r |= int64(in[1]) << 8
79
r |= int64(in[2]) << 16
83
func load4(in []byte) int64 {
86
r |= int64(in[1]) << 8
87
r |= int64(in[2]) << 16
88
r |= int64(in[3]) << 24
92
func FeFromBytes(dst *FieldElement, src *[32]byte) {
94
h1 := load3(src[4:]) << 6
95
h2 := load3(src[7:]) << 5
96
h3 := load3(src[10:]) << 3
97
h4 := load3(src[13:]) << 2
99
h6 := load3(src[20:]) << 7
100
h7 := load3(src[23:]) << 5
101
h8 := load3(src[26:]) << 4
102
h9 := (load3(src[29:]) & 8388607) << 2
104
FeCombine(dst, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
107
// FeToBytes marshals h to s.
109
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
111
// Write p=2^255-19; q=floor(h/p).
112
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
115
// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
116
// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
118
// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
122
// Have 0<=r<=p-1=2^255-20.
123
// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
125
// Write x=r+19(2^-255)r+y.
126
// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
128
// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
129
// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
130
func FeToBytes(s *[32]byte, h *FieldElement) {
133
q := (19*h[9] + (1 << 24)) >> 25
145
// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
147
// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
149
carry[0] = h[0] >> 26
151
h[0] -= carry[0] << 26
152
carry[1] = h[1] >> 25
154
h[1] -= carry[1] << 25
155
carry[2] = h[2] >> 26
157
h[2] -= carry[2] << 26
158
carry[3] = h[3] >> 25
160
h[3] -= carry[3] << 25
161
carry[4] = h[4] >> 26
163
h[4] -= carry[4] << 26
164
carry[5] = h[5] >> 25
166
h[5] -= carry[5] << 25
167
carry[6] = h[6] >> 26
169
h[6] -= carry[6] << 26
170
carry[7] = h[7] >> 25
172
h[7] -= carry[7] << 25
173
carry[8] = h[8] >> 26
175
h[8] -= carry[8] << 26
176
carry[9] = h[9] >> 25
177
h[9] -= carry[9] << 25
180
// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
181
// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
182
// evidently 2^255 h10-2^255 q = 0.
183
// Goal: Output h[0]+...+2^230 h[9].
185
s[0] = byte(h[0] >> 0)
186
s[1] = byte(h[0] >> 8)
187
s[2] = byte(h[0] >> 16)
188
s[3] = byte((h[0] >> 24) | (h[1] << 2))
189
s[4] = byte(h[1] >> 6)
190
s[5] = byte(h[1] >> 14)
191
s[6] = byte((h[1] >> 22) | (h[2] << 3))
192
s[7] = byte(h[2] >> 5)
193
s[8] = byte(h[2] >> 13)
194
s[9] = byte((h[2] >> 21) | (h[3] << 5))
195
s[10] = byte(h[3] >> 3)
196
s[11] = byte(h[3] >> 11)
197
s[12] = byte((h[3] >> 19) | (h[4] << 6))
198
s[13] = byte(h[4] >> 2)
199
s[14] = byte(h[4] >> 10)
200
s[15] = byte(h[4] >> 18)
201
s[16] = byte(h[5] >> 0)
202
s[17] = byte(h[5] >> 8)
203
s[18] = byte(h[5] >> 16)
204
s[19] = byte((h[5] >> 24) | (h[6] << 1))
205
s[20] = byte(h[6] >> 7)
206
s[21] = byte(h[6] >> 15)
207
s[22] = byte((h[6] >> 23) | (h[7] << 3))
208
s[23] = byte(h[7] >> 5)
209
s[24] = byte(h[7] >> 13)
210
s[25] = byte((h[7] >> 21) | (h[8] << 4))
211
s[26] = byte(h[8] >> 4)
212
s[27] = byte(h[8] >> 12)
213
s[28] = byte((h[8] >> 20) | (h[9] << 6))
214
s[29] = byte(h[9] >> 2)
215
s[30] = byte(h[9] >> 10)
216
s[31] = byte(h[9] >> 18)
219
func FeIsNegative(f *FieldElement) byte {
225
func FeIsNonZero(f *FieldElement) int32 {
229
for _, b := range s {
241
// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
244
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
245
func FeNeg(h, f *FieldElement) {
258
func FeCombine(h *FieldElement, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
259
var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64
262
|h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
263
i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
264
|h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
265
i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
268
c0 = (h0 + (1 << 25)) >> 26
271
c4 = (h4 + (1 << 25)) >> 26
276
/* |h1| <= 1.51*2^58 */
277
/* |h5| <= 1.51*2^58 */
279
c1 = (h1 + (1 << 24)) >> 25
282
c5 = (h5 + (1 << 24)) >> 25
285
/* |h1| <= 2^24; from now on fits into int32 */
286
/* |h5| <= 2^24; from now on fits into int32 */
287
/* |h2| <= 1.21*2^59 */
288
/* |h6| <= 1.21*2^59 */
290
c2 = (h2 + (1 << 25)) >> 26
293
c6 = (h6 + (1 << 25)) >> 26
296
/* |h2| <= 2^25; from now on fits into int32 unchanged */
297
/* |h6| <= 2^25; from now on fits into int32 unchanged */
298
/* |h3| <= 1.51*2^58 */
299
/* |h7| <= 1.51*2^58 */
301
c3 = (h3 + (1 << 24)) >> 25
304
c7 = (h7 + (1 << 24)) >> 25
307
/* |h3| <= 2^24; from now on fits into int32 unchanged */
308
/* |h7| <= 2^24; from now on fits into int32 unchanged */
309
/* |h4| <= 1.52*2^33 */
310
/* |h8| <= 1.52*2^33 */
312
c4 = (h4 + (1 << 25)) >> 26
315
c8 = (h8 + (1 << 25)) >> 26
318
/* |h4| <= 2^25; from now on fits into int32 unchanged */
319
/* |h8| <= 2^25; from now on fits into int32 unchanged */
320
/* |h5| <= 1.01*2^24 */
321
/* |h9| <= 1.51*2^58 */
323
c9 = (h9 + (1 << 24)) >> 25
326
/* |h9| <= 2^24; from now on fits into int32 unchanged */
327
/* |h0| <= 1.8*2^37 */
329
c0 = (h0 + (1 << 25)) >> 26
332
/* |h0| <= 2^25; from now on fits into int32 unchanged */
333
/* |h1| <= 1.01*2^24 */
347
// FeMul calculates h = f * g
348
// Can overlap h with f or g.
351
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
352
// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
355
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
357
// Notes on implementation strategy:
359
// Using schoolbook multiplication.
360
// Karatsuba would save a little in some cost models.
362
// Most multiplications by 2 and 19 are 32-bit precomputations;
363
// cheaper than 64-bit postcomputations.
365
// There is one remaining multiplication by 19 in the carry chain;
366
// one *19 precomputation can be merged into this,
367
// but the resulting data flow is considerably less clean.
369
// There are 12 carries below.
370
// 10 of them are 2-way parallelizable and vectorizable.
371
// Can get away with 11 carries, but then data flow is much deeper.
373
// With tighter constraints on inputs, can squeeze carries into int32.
374
func FeMul(h, f, g *FieldElement) {
386
f1_2 := int64(2 * f[1])
387
f3_2 := int64(2 * f[3])
388
f5_2 := int64(2 * f[5])
389
f7_2 := int64(2 * f[7])
390
f9_2 := int64(2 * f[9])
403
g1_19 := int64(19 * g[1]) /* 1.4*2^29 */
404
g2_19 := int64(19 * g[2]) /* 1.4*2^30; still ok */
405
g3_19 := int64(19 * g[3])
406
g4_19 := int64(19 * g[4])
407
g5_19 := int64(19 * g[5])
408
g6_19 := int64(19 * g[6])
409
g7_19 := int64(19 * g[7])
410
g8_19 := int64(19 * g[8])
411
g9_19 := int64(19 * g[9])
413
h0 := f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19
414
h1 := f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19
415
h2 := f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19
416
h3 := f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19
417
h4 := f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19
418
h5 := f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19
419
h6 := f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19
420
h7 := f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19
421
h8 := f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19
422
h9 := f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0
424
FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
427
func feSquare(f *FieldElement) (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
438
f0_2 := int64(2 * f[0])
439
f1_2 := int64(2 * f[1])
440
f2_2 := int64(2 * f[2])
441
f3_2 := int64(2 * f[3])
442
f4_2 := int64(2 * f[4])
443
f5_2 := int64(2 * f[5])
444
f6_2 := int64(2 * f[6])
445
f7_2 := int64(2 * f[7])
446
f5_38 := 38 * f5 // 1.31*2^30
447
f6_19 := 19 * f6 // 1.31*2^30
448
f7_38 := 38 * f7 // 1.31*2^30
449
f8_19 := 19 * f8 // 1.31*2^30
450
f9_38 := 38 * f9 // 1.31*2^30
452
h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38
453
h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19
454
h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19
455
h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38
456
h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38
457
h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19
458
h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19
459
h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38
460
h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38
461
h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5
466
// FeSquare calculates h = f*f. Can overlap h with f.
469
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
472
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
473
func FeSquare(h, f *FieldElement) {
474
h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
475
FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
478
// FeSquare2 sets h = 2 * f * f
480
// Can overlap h with f.
483
// |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
486
// |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
487
// See fe_mul.c for discussion of implementation strategy.
488
func FeSquare2(h, f *FieldElement) {
489
h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
502
FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
505
func FeInvert(out, z *FieldElement) {
506
var t0, t1, t2, t3 FieldElement
509
FeSquare(&t0, z) // 2^1
510
FeSquare(&t1, &t0) // 2^2
511
for i = 1; i < 2; i++ { // 2^3
514
FeMul(&t1, z, &t1) // 2^3 + 2^0
515
FeMul(&t0, &t0, &t1) // 2^3 + 2^1 + 2^0
516
FeSquare(&t2, &t0) // 2^4 + 2^2 + 2^1
517
FeMul(&t1, &t1, &t2) // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
518
FeSquare(&t2, &t1) // 5,4,3,2,1
519
for i = 1; i < 5; i++ { // 9,8,7,6,5
522
FeMul(&t1, &t2, &t1) // 9,8,7,6,5,4,3,2,1,0
523
FeSquare(&t2, &t1) // 10..1
524
for i = 1; i < 10; i++ { // 19..10
527
FeMul(&t2, &t2, &t1) // 19..0
528
FeSquare(&t3, &t2) // 20..1
529
for i = 1; i < 20; i++ { // 39..20
532
FeMul(&t2, &t3, &t2) // 39..0
533
FeSquare(&t2, &t2) // 40..1
534
for i = 1; i < 10; i++ { // 49..10
537
FeMul(&t1, &t2, &t1) // 49..0
538
FeSquare(&t2, &t1) // 50..1
539
for i = 1; i < 50; i++ { // 99..50
542
FeMul(&t2, &t2, &t1) // 99..0
543
FeSquare(&t3, &t2) // 100..1
544
for i = 1; i < 100; i++ { // 199..100
547
FeMul(&t2, &t3, &t2) // 199..0
548
FeSquare(&t2, &t2) // 200..1
549
for i = 1; i < 50; i++ { // 249..50
552
FeMul(&t1, &t2, &t1) // 249..0
553
FeSquare(&t1, &t1) // 250..1
554
for i = 1; i < 5; i++ { // 254..5
557
FeMul(out, &t1, &t0) // 254..5,3,1,0
560
func fePow22523(out, z *FieldElement) {
561
var t0, t1, t2 FieldElement
565
for i = 1; i < 1; i++ {
569
for i = 1; i < 2; i++ {
575
for i = 1; i < 1; i++ {
580
for i = 1; i < 5; i++ {
585
for i = 1; i < 10; i++ {
590
for i = 1; i < 20; i++ {
595
for i = 1; i < 10; i++ {
600
for i = 1; i < 50; i++ {
605
for i = 1; i < 100; i++ {
610
for i = 1; i < 50; i++ {
615
for i = 1; i < 2; i++ {
621
// Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
622
// y^2 where d = -121665/121666.
624
// Several representations are used:
625
// ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
626
// ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
627
// CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
628
// PreComputedGroupElement: (y+x,y-x,2dxy)
630
type ProjectiveGroupElement struct {
634
type ExtendedGroupElement struct {
635
X, Y, Z, T FieldElement
638
type CompletedGroupElement struct {
639
X, Y, Z, T FieldElement
642
type PreComputedGroupElement struct {
643
yPlusX, yMinusX, xy2d FieldElement
646
type CachedGroupElement struct {
647
yPlusX, yMinusX, Z, T2d FieldElement
650
func (p *ProjectiveGroupElement) Zero() {
656
func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) {
661
FeSquare2(&r.T, &p.Z)
662
FeAdd(&r.Y, &p.X, &p.Y)
664
FeAdd(&r.Y, &r.Z, &r.X)
665
FeSub(&r.Z, &r.Z, &r.X)
666
FeSub(&r.X, &t0, &r.Y)
667
FeSub(&r.T, &r.T, &r.Z)
670
func (p *ProjectiveGroupElement) ToBytes(s *[32]byte) {
671
var recip, x, y FieldElement
673
FeInvert(&recip, &p.Z)
674
FeMul(&x, &p.X, &recip)
675
FeMul(&y, &p.Y, &recip)
677
s[31] ^= FeIsNegative(&x) << 7
680
func (p *ExtendedGroupElement) Zero() {
687
func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) {
688
var q ProjectiveGroupElement
693
func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) {
694
FeAdd(&r.yPlusX, &p.Y, &p.X)
695
FeSub(&r.yMinusX, &p.Y, &p.X)
697
FeMul(&r.T2d, &p.T, &d2)
700
func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) {
706
func (p *ExtendedGroupElement) ToBytes(s *[32]byte) {
707
var recip, x, y FieldElement
709
FeInvert(&recip, &p.Z)
710
FeMul(&x, &p.X, &recip)
711
FeMul(&y, &p.Y, &recip)
713
s[31] ^= FeIsNegative(&x) << 7
716
func (p *ExtendedGroupElement) FromBytes(s *[32]byte) bool {
717
var u, v, v3, vxx, check FieldElement
723
FeSub(&u, &u, &p.Z) // y = y^2-1
724
FeAdd(&v, &v, &p.Z) // v = dy^2+1
727
FeMul(&v3, &v3, &v) // v3 = v^3
729
FeMul(&p.X, &p.X, &v)
730
FeMul(&p.X, &p.X, &u) // x = uv^7
732
fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
733
FeMul(&p.X, &p.X, &v3)
734
FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)
736
var tmpX, tmp2 [32]byte
739
FeMul(&vxx, &vxx, &v)
740
FeSub(&check, &vxx, &u) // vx^2-u
741
if FeIsNonZero(&check) == 1 {
742
FeAdd(&check, &vxx, &u) // vx^2+u
743
if FeIsNonZero(&check) == 1 {
746
FeMul(&p.X, &p.X, &SqrtM1)
748
FeToBytes(&tmpX, &p.X)
749
for i, v := range tmpX {
754
if FeIsNegative(&p.X) != (s[31] >> 7) {
758
FeMul(&p.T, &p.X, &p.Y)
762
func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) {
763
FeMul(&r.X, &p.X, &p.T)
764
FeMul(&r.Y, &p.Y, &p.Z)
765
FeMul(&r.Z, &p.Z, &p.T)
768
func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) {
769
FeMul(&r.X, &p.X, &p.T)
770
FeMul(&r.Y, &p.Y, &p.Z)
771
FeMul(&r.Z, &p.Z, &p.T)
772
FeMul(&r.T, &p.X, &p.Y)
775
func (p *PreComputedGroupElement) Zero() {
781
func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
784
FeAdd(&r.X, &p.Y, &p.X)
785
FeSub(&r.Y, &p.Y, &p.X)
786
FeMul(&r.Z, &r.X, &q.yPlusX)
787
FeMul(&r.Y, &r.Y, &q.yMinusX)
788
FeMul(&r.T, &q.T2d, &p.T)
789
FeMul(&r.X, &p.Z, &q.Z)
790
FeAdd(&t0, &r.X, &r.X)
791
FeSub(&r.X, &r.Z, &r.Y)
792
FeAdd(&r.Y, &r.Z, &r.Y)
793
FeAdd(&r.Z, &t0, &r.T)
794
FeSub(&r.T, &t0, &r.T)
797
func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
800
FeAdd(&r.X, &p.Y, &p.X)
801
FeSub(&r.Y, &p.Y, &p.X)
802
FeMul(&r.Z, &r.X, &q.yMinusX)
803
FeMul(&r.Y, &r.Y, &q.yPlusX)
804
FeMul(&r.T, &q.T2d, &p.T)
805
FeMul(&r.X, &p.Z, &q.Z)
806
FeAdd(&t0, &r.X, &r.X)
807
FeSub(&r.X, &r.Z, &r.Y)
808
FeAdd(&r.Y, &r.Z, &r.Y)
809
FeSub(&r.Z, &t0, &r.T)
810
FeAdd(&r.T, &t0, &r.T)
813
func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
816
FeAdd(&r.X, &p.Y, &p.X)
817
FeSub(&r.Y, &p.Y, &p.X)
818
FeMul(&r.Z, &r.X, &q.yPlusX)
819
FeMul(&r.Y, &r.Y, &q.yMinusX)
820
FeMul(&r.T, &q.xy2d, &p.T)
821
FeAdd(&t0, &p.Z, &p.Z)
822
FeSub(&r.X, &r.Z, &r.Y)
823
FeAdd(&r.Y, &r.Z, &r.Y)
824
FeAdd(&r.Z, &t0, &r.T)
825
FeSub(&r.T, &t0, &r.T)
828
func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
831
FeAdd(&r.X, &p.Y, &p.X)
832
FeSub(&r.Y, &p.Y, &p.X)
833
FeMul(&r.Z, &r.X, &q.yMinusX)
834
FeMul(&r.Y, &r.Y, &q.yPlusX)
835
FeMul(&r.T, &q.xy2d, &p.T)
836
FeAdd(&t0, &p.Z, &p.Z)
837
FeSub(&r.X, &r.Z, &r.Y)
838
FeAdd(&r.Y, &r.Z, &r.Y)
839
FeSub(&r.Z, &t0, &r.T)
840
FeAdd(&r.T, &t0, &r.T)
843
func slide(r *[256]int8, a *[32]byte) {
845
r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
850
for b := 1; b <= 6 && i+b < 256; b++ {
852
if r[i]+(r[i+b]<<uint(b)) <= 15 {
853
r[i] += r[i+b] << uint(b)
855
} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
856
r[i] -= r[i+b] << uint(b)
857
for k := i + b; k < 256; k++ {
873
// GeDoubleScalarMultVartime sets r = a*A + b*B
874
// where a = a[0]+256*a[1]+...+256^31 a[31].
875
// and b = b[0]+256*b[1]+...+256^31 b[31].
876
// B is the Ed25519 base point (x,4/5) with x positive.
877
func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *[32]byte, A *ExtendedGroupElement, b *[32]byte) {
878
var aSlide, bSlide [256]int8
879
var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
880
var t CompletedGroupElement
881
var u, A2 ExtendedGroupElement
891
for i := 0; i < 7; i++ {
892
geAdd(&t, &A2, &Ai[i])
899
for i = 255; i >= 0; i-- {
900
if aSlide[i] != 0 || bSlide[i] != 0 {
910
geAdd(&t, &u, &Ai[aSlide[i]/2])
911
} else if aSlide[i] < 0 {
913
geSub(&t, &u, &Ai[(-aSlide[i])/2])
918
geMixedAdd(&t, &u, &bi[bSlide[i]/2])
919
} else if bSlide[i] < 0 {
921
geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
928
// equal returns 1 if b == c and 0 otherwise, assuming that b and c are
930
func equal(b, c int32) int32 {
933
return int32(x >> 31)
936
// negative returns 1 if b < 0 and 0 otherwise.
937
func negative(b int32) int32 {
941
func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) {
942
FeCMove(&t.yPlusX, &u.yPlusX, b)
943
FeCMove(&t.yMinusX, &u.yMinusX, b)
944
FeCMove(&t.xy2d, &u.xy2d, b)
947
func selectPoint(t *PreComputedGroupElement, pos int32, b int32) {
948
var minusT PreComputedGroupElement
949
bNegative := negative(b)
950
bAbs := b - (((-bNegative) & b) << 1)
953
for i := int32(0); i < 8; i++ {
954
PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
956
FeCopy(&minusT.yPlusX, &t.yMinusX)
957
FeCopy(&minusT.yMinusX, &t.yPlusX)
958
FeNeg(&minusT.xy2d, &t.xy2d)
959
PreComputedGroupElementCMove(t, &minusT, bNegative)
962
// GeScalarMultBase computes h = a*B, where
963
// a = a[0]+256*a[1]+...+256^31 a[31]
964
// B is the Ed25519 base point (x,4/5) with x positive.
968
func GeScalarMultBase(h *ExtendedGroupElement, a *[32]byte) {
971
for i, v := range a {
972
e[2*i] = int8(v & 15)
973
e[2*i+1] = int8((v >> 4) & 15)
976
// each e[i] is between 0 and 15 and e[63] is between 0 and 7.
979
for i := 0; i < 63; i++ {
981
carry = (e[i] + 8) >> 4
985
// each e[i] is between -8 and 8.
988
var t PreComputedGroupElement
989
var r CompletedGroupElement
990
for i := int32(1); i < 64; i += 2 {
991
selectPoint(&t, i/2, int32(e[i]))
992
geMixedAdd(&r, h, &t)
996
var s ProjectiveGroupElement
1007
for i := int32(0); i < 64; i += 2 {
1008
selectPoint(&t, i/2, int32(e[i]))
1009
geMixedAdd(&r, h, &t)
1014
// The scalars are GF(2^252 + 27742317777372353535851937790883648493).
1017
// a[0]+256*a[1]+...+256^31*a[31] = a
1018
// b[0]+256*b[1]+...+256^31*b[31] = b
1019
// c[0]+256*c[1]+...+256^31*c[31] = c
1022
// s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1023
// where l = 2^252 + 27742317777372353535851937790883648493.
1024
func ScMulAdd(s, a, b, c *[32]byte) {
1025
a0 := 2097151 & load3(a[:])
1026
a1 := 2097151 & (load4(a[2:]) >> 5)
1027
a2 := 2097151 & (load3(a[5:]) >> 2)
1028
a3 := 2097151 & (load4(a[7:]) >> 7)
1029
a4 := 2097151 & (load4(a[10:]) >> 4)
1030
a5 := 2097151 & (load3(a[13:]) >> 1)
1031
a6 := 2097151 & (load4(a[15:]) >> 6)
1032
a7 := 2097151 & (load3(a[18:]) >> 3)
1033
a8 := 2097151 & load3(a[21:])
1034
a9 := 2097151 & (load4(a[23:]) >> 5)
1035
a10 := 2097151 & (load3(a[26:]) >> 2)
1036
a11 := (load4(a[28:]) >> 7)
1037
b0 := 2097151 & load3(b[:])
1038
b1 := 2097151 & (load4(b[2:]) >> 5)
1039
b2 := 2097151 & (load3(b[5:]) >> 2)
1040
b3 := 2097151 & (load4(b[7:]) >> 7)
1041
b4 := 2097151 & (load4(b[10:]) >> 4)
1042
b5 := 2097151 & (load3(b[13:]) >> 1)
1043
b6 := 2097151 & (load4(b[15:]) >> 6)
1044
b7 := 2097151 & (load3(b[18:]) >> 3)
1045
b8 := 2097151 & load3(b[21:])
1046
b9 := 2097151 & (load4(b[23:]) >> 5)
1047
b10 := 2097151 & (load3(b[26:]) >> 2)
1048
b11 := (load4(b[28:]) >> 7)
1049
c0 := 2097151 & load3(c[:])
1050
c1 := 2097151 & (load4(c[2:]) >> 5)
1051
c2 := 2097151 & (load3(c[5:]) >> 2)
1052
c3 := 2097151 & (load4(c[7:]) >> 7)
1053
c4 := 2097151 & (load4(c[10:]) >> 4)
1054
c5 := 2097151 & (load3(c[13:]) >> 1)
1055
c6 := 2097151 & (load4(c[15:]) >> 6)
1056
c7 := 2097151 & (load3(c[18:]) >> 3)
1057
c8 := 2097151 & load3(c[21:])
1058
c9 := 2097151 & (load4(c[23:]) >> 5)
1059
c10 := 2097151 & (load3(c[26:]) >> 2)
1060
c11 := (load4(c[28:]) >> 7)
1064
s1 := c1 + a0*b1 + a1*b0
1065
s2 := c2 + a0*b2 + a1*b1 + a2*b0
1066
s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
1067
s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
1068
s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
1069
s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
1070
s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
1071
s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
1072
s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
1073
s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
1074
s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
1075
s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
1076
s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
1077
s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
1078
s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
1079
s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
1080
s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
1081
s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
1082
s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
1083
s20 := a9*b11 + a10*b10 + a11*b9
1084
s21 := a10*b11 + a11*b10
1088
carry[0] = (s0 + (1 << 20)) >> 21
1090
s0 -= carry[0] << 21
1091
carry[2] = (s2 + (1 << 20)) >> 21
1093
s2 -= carry[2] << 21
1094
carry[4] = (s4 + (1 << 20)) >> 21
1096
s4 -= carry[4] << 21
1097
carry[6] = (s6 + (1 << 20)) >> 21
1099
s6 -= carry[6] << 21
1100
carry[8] = (s8 + (1 << 20)) >> 21
1102
s8 -= carry[8] << 21
1103
carry[10] = (s10 + (1 << 20)) >> 21
1105
s10 -= carry[10] << 21
1106
carry[12] = (s12 + (1 << 20)) >> 21
1108
s12 -= carry[12] << 21
1109
carry[14] = (s14 + (1 << 20)) >> 21
1111
s14 -= carry[14] << 21
1112
carry[16] = (s16 + (1 << 20)) >> 21
1114
s16 -= carry[16] << 21
1115
carry[18] = (s18 + (1 << 20)) >> 21
1117
s18 -= carry[18] << 21
1118
carry[20] = (s20 + (1 << 20)) >> 21
1120
s20 -= carry[20] << 21
1121
carry[22] = (s22 + (1 << 20)) >> 21
1123
s22 -= carry[22] << 21
1125
carry[1] = (s1 + (1 << 20)) >> 21
1127
s1 -= carry[1] << 21
1128
carry[3] = (s3 + (1 << 20)) >> 21
1130
s3 -= carry[3] << 21
1131
carry[5] = (s5 + (1 << 20)) >> 21
1133
s5 -= carry[5] << 21
1134
carry[7] = (s7 + (1 << 20)) >> 21
1136
s7 -= carry[7] << 21
1137
carry[9] = (s9 + (1 << 20)) >> 21
1139
s9 -= carry[9] << 21
1140
carry[11] = (s11 + (1 << 20)) >> 21
1142
s11 -= carry[11] << 21
1143
carry[13] = (s13 + (1 << 20)) >> 21
1145
s13 -= carry[13] << 21
1146
carry[15] = (s15 + (1 << 20)) >> 21
1148
s15 -= carry[15] << 21
1149
carry[17] = (s17 + (1 << 20)) >> 21
1151
s17 -= carry[17] << 21
1152
carry[19] = (s19 + (1 << 20)) >> 21
1154
s19 -= carry[19] << 21
1155
carry[21] = (s21 + (1 << 20)) >> 21
1157
s21 -= carry[21] << 21
1207
carry[6] = (s6 + (1 << 20)) >> 21
1209
s6 -= carry[6] << 21
1210
carry[8] = (s8 + (1 << 20)) >> 21
1212
s8 -= carry[8] << 21
1213
carry[10] = (s10 + (1 << 20)) >> 21
1215
s10 -= carry[10] << 21
1216
carry[12] = (s12 + (1 << 20)) >> 21
1218
s12 -= carry[12] << 21
1219
carry[14] = (s14 + (1 << 20)) >> 21
1221
s14 -= carry[14] << 21
1222
carry[16] = (s16 + (1 << 20)) >> 21
1224
s16 -= carry[16] << 21
1226
carry[7] = (s7 + (1 << 20)) >> 21
1228
s7 -= carry[7] << 21
1229
carry[9] = (s9 + (1 << 20)) >> 21
1231
s9 -= carry[9] << 21
1232
carry[11] = (s11 + (1 << 20)) >> 21
1234
s11 -= carry[11] << 21
1235
carry[13] = (s13 + (1 << 20)) >> 21
1237
s13 -= carry[13] << 21
1238
carry[15] = (s15 + (1 << 20)) >> 21
1240
s15 -= carry[15] << 21
1290
carry[0] = (s0 + (1 << 20)) >> 21
1292
s0 -= carry[0] << 21
1293
carry[2] = (s2 + (1 << 20)) >> 21
1295
s2 -= carry[2] << 21
1296
carry[4] = (s4 + (1 << 20)) >> 21
1298
s4 -= carry[4] << 21
1299
carry[6] = (s6 + (1 << 20)) >> 21
1301
s6 -= carry[6] << 21
1302
carry[8] = (s8 + (1 << 20)) >> 21
1304
s8 -= carry[8] << 21
1305
carry[10] = (s10 + (1 << 20)) >> 21
1307
s10 -= carry[10] << 21
1309
carry[1] = (s1 + (1 << 20)) >> 21
1311
s1 -= carry[1] << 21
1312
carry[3] = (s3 + (1 << 20)) >> 21
1314
s3 -= carry[3] << 21
1315
carry[5] = (s5 + (1 << 20)) >> 21
1317
s5 -= carry[5] << 21
1318
carry[7] = (s7 + (1 << 20)) >> 21
1320
s7 -= carry[7] << 21
1321
carry[9] = (s9 + (1 << 20)) >> 21
1323
s9 -= carry[9] << 21
1324
carry[11] = (s11 + (1 << 20)) >> 21
1326
s11 -= carry[11] << 21
1338
s0 -= carry[0] << 21
1341
s1 -= carry[1] << 21
1344
s2 -= carry[2] << 21
1347
s3 -= carry[3] << 21
1350
s4 -= carry[4] << 21
1353
s5 -= carry[5] << 21
1356
s6 -= carry[6] << 21
1359
s7 -= carry[7] << 21
1362
s8 -= carry[8] << 21
1365
s9 -= carry[9] << 21
1366
carry[10] = s10 >> 21
1368
s10 -= carry[10] << 21
1369
carry[11] = s11 >> 21
1371
s11 -= carry[11] << 21
1383
s0 -= carry[0] << 21
1386
s1 -= carry[1] << 21
1389
s2 -= carry[2] << 21
1392
s3 -= carry[3] << 21
1395
s4 -= carry[4] << 21
1398
s5 -= carry[5] << 21
1401
s6 -= carry[6] << 21
1404
s7 -= carry[7] << 21
1407
s8 -= carry[8] << 21
1410
s9 -= carry[9] << 21
1411
carry[10] = s10 >> 21
1413
s10 -= carry[10] << 21
1415
s[0] = byte(s0 >> 0)
1416
s[1] = byte(s0 >> 8)
1417
s[2] = byte((s0 >> 16) | (s1 << 5))
1418
s[3] = byte(s1 >> 3)
1419
s[4] = byte(s1 >> 11)
1420
s[5] = byte((s1 >> 19) | (s2 << 2))
1421
s[6] = byte(s2 >> 6)
1422
s[7] = byte((s2 >> 14) | (s3 << 7))
1423
s[8] = byte(s3 >> 1)
1424
s[9] = byte(s3 >> 9)
1425
s[10] = byte((s3 >> 17) | (s4 << 4))
1426
s[11] = byte(s4 >> 4)
1427
s[12] = byte(s4 >> 12)
1428
s[13] = byte((s4 >> 20) | (s5 << 1))
1429
s[14] = byte(s5 >> 7)
1430
s[15] = byte((s5 >> 15) | (s6 << 6))
1431
s[16] = byte(s6 >> 2)
1432
s[17] = byte(s6 >> 10)
1433
s[18] = byte((s6 >> 18) | (s7 << 3))
1434
s[19] = byte(s7 >> 5)
1435
s[20] = byte(s7 >> 13)
1436
s[21] = byte(s8 >> 0)
1437
s[22] = byte(s8 >> 8)
1438
s[23] = byte((s8 >> 16) | (s9 << 5))
1439
s[24] = byte(s9 >> 3)
1440
s[25] = byte(s9 >> 11)
1441
s[26] = byte((s9 >> 19) | (s10 << 2))
1442
s[27] = byte(s10 >> 6)
1443
s[28] = byte((s10 >> 14) | (s11 << 7))
1444
s[29] = byte(s11 >> 1)
1445
s[30] = byte(s11 >> 9)
1446
s[31] = byte(s11 >> 17)
1450
// s[0]+256*s[1]+...+256^63*s[63] = s
1453
// s[0]+256*s[1]+...+256^31*s[31] = s mod l
1454
// where l = 2^252 + 27742317777372353535851937790883648493.
1455
func ScReduce(out *[32]byte, s *[64]byte) {
1456
s0 := 2097151 & load3(s[:])
1457
s1 := 2097151 & (load4(s[2:]) >> 5)
1458
s2 := 2097151 & (load3(s[5:]) >> 2)
1459
s3 := 2097151 & (load4(s[7:]) >> 7)
1460
s4 := 2097151 & (load4(s[10:]) >> 4)
1461
s5 := 2097151 & (load3(s[13:]) >> 1)
1462
s6 := 2097151 & (load4(s[15:]) >> 6)
1463
s7 := 2097151 & (load3(s[18:]) >> 3)
1464
s8 := 2097151 & load3(s[21:])
1465
s9 := 2097151 & (load4(s[23:]) >> 5)
1466
s10 := 2097151 & (load3(s[26:]) >> 2)
1467
s11 := 2097151 & (load4(s[28:]) >> 7)
1468
s12 := 2097151 & (load4(s[31:]) >> 4)
1469
s13 := 2097151 & (load3(s[34:]) >> 1)
1470
s14 := 2097151 & (load4(s[36:]) >> 6)
1471
s15 := 2097151 & (load3(s[39:]) >> 3)
1472
s16 := 2097151 & load3(s[42:])
1473
s17 := 2097151 & (load4(s[44:]) >> 5)
1474
s18 := 2097151 & (load3(s[47:]) >> 2)
1475
s19 := 2097151 & (load4(s[49:]) >> 7)
1476
s20 := 2097151 & (load4(s[52:]) >> 4)
1477
s21 := 2097151 & (load3(s[55:]) >> 1)
1478
s22 := 2097151 & (load4(s[57:]) >> 6)
1479
s23 := (load4(s[60:]) >> 3)
1531
carry[6] = (s6 + (1 << 20)) >> 21
1533
s6 -= carry[6] << 21
1534
carry[8] = (s8 + (1 << 20)) >> 21
1536
s8 -= carry[8] << 21
1537
carry[10] = (s10 + (1 << 20)) >> 21
1539
s10 -= carry[10] << 21
1540
carry[12] = (s12 + (1 << 20)) >> 21
1542
s12 -= carry[12] << 21
1543
carry[14] = (s14 + (1 << 20)) >> 21
1545
s14 -= carry[14] << 21
1546
carry[16] = (s16 + (1 << 20)) >> 21
1548
s16 -= carry[16] << 21
1550
carry[7] = (s7 + (1 << 20)) >> 21
1552
s7 -= carry[7] << 21
1553
carry[9] = (s9 + (1 << 20)) >> 21
1555
s9 -= carry[9] << 21
1556
carry[11] = (s11 + (1 << 20)) >> 21
1558
s11 -= carry[11] << 21
1559
carry[13] = (s13 + (1 << 20)) >> 21
1561
s13 -= carry[13] << 21
1562
carry[15] = (s15 + (1 << 20)) >> 21
1564
s15 -= carry[15] << 21
1614
carry[0] = (s0 + (1 << 20)) >> 21
1616
s0 -= carry[0] << 21
1617
carry[2] = (s2 + (1 << 20)) >> 21
1619
s2 -= carry[2] << 21
1620
carry[4] = (s4 + (1 << 20)) >> 21
1622
s4 -= carry[4] << 21
1623
carry[6] = (s6 + (1 << 20)) >> 21
1625
s6 -= carry[6] << 21
1626
carry[8] = (s8 + (1 << 20)) >> 21
1628
s8 -= carry[8] << 21
1629
carry[10] = (s10 + (1 << 20)) >> 21
1631
s10 -= carry[10] << 21
1633
carry[1] = (s1 + (1 << 20)) >> 21
1635
s1 -= carry[1] << 21
1636
carry[3] = (s3 + (1 << 20)) >> 21
1638
s3 -= carry[3] << 21
1639
carry[5] = (s5 + (1 << 20)) >> 21
1641
s5 -= carry[5] << 21
1642
carry[7] = (s7 + (1 << 20)) >> 21
1644
s7 -= carry[7] << 21
1645
carry[9] = (s9 + (1 << 20)) >> 21
1647
s9 -= carry[9] << 21
1648
carry[11] = (s11 + (1 << 20)) >> 21
1650
s11 -= carry[11] << 21
1662
s0 -= carry[0] << 21
1665
s1 -= carry[1] << 21
1668
s2 -= carry[2] << 21
1671
s3 -= carry[3] << 21
1674
s4 -= carry[4] << 21
1677
s5 -= carry[5] << 21
1680
s6 -= carry[6] << 21
1683
s7 -= carry[7] << 21
1686
s8 -= carry[8] << 21
1689
s9 -= carry[9] << 21
1690
carry[10] = s10 >> 21
1692
s10 -= carry[10] << 21
1693
carry[11] = s11 >> 21
1695
s11 -= carry[11] << 21
1707
s0 -= carry[0] << 21
1710
s1 -= carry[1] << 21
1713
s2 -= carry[2] << 21
1716
s3 -= carry[3] << 21
1719
s4 -= carry[4] << 21
1722
s5 -= carry[5] << 21
1725
s6 -= carry[6] << 21
1728
s7 -= carry[7] << 21
1731
s8 -= carry[8] << 21
1734
s9 -= carry[9] << 21
1735
carry[10] = s10 >> 21
1737
s10 -= carry[10] << 21
1739
out[0] = byte(s0 >> 0)
1740
out[1] = byte(s0 >> 8)
1741
out[2] = byte((s0 >> 16) | (s1 << 5))
1742
out[3] = byte(s1 >> 3)
1743
out[4] = byte(s1 >> 11)
1744
out[5] = byte((s1 >> 19) | (s2 << 2))
1745
out[6] = byte(s2 >> 6)
1746
out[7] = byte((s2 >> 14) | (s3 << 7))
1747
out[8] = byte(s3 >> 1)
1748
out[9] = byte(s3 >> 9)
1749
out[10] = byte((s3 >> 17) | (s4 << 4))
1750
out[11] = byte(s4 >> 4)
1751
out[12] = byte(s4 >> 12)
1752
out[13] = byte((s4 >> 20) | (s5 << 1))
1753
out[14] = byte(s5 >> 7)
1754
out[15] = byte((s5 >> 15) | (s6 << 6))
1755
out[16] = byte(s6 >> 2)
1756
out[17] = byte(s6 >> 10)
1757
out[18] = byte((s6 >> 18) | (s7 << 3))
1758
out[19] = byte(s7 >> 5)
1759
out[20] = byte(s7 >> 13)
1760
out[21] = byte(s8 >> 0)
1761
out[22] = byte(s8 >> 8)
1762
out[23] = byte((s8 >> 16) | (s9 << 5))
1763
out[24] = byte(s9 >> 3)
1764
out[25] = byte(s9 >> 11)
1765
out[26] = byte((s9 >> 19) | (s10 << 2))
1766
out[27] = byte(s10 >> 6)
1767
out[28] = byte((s10 >> 14) | (s11 << 7))
1768
out[29] = byte(s11 >> 1)
1769
out[30] = byte(s11 >> 9)
1770
out[31] = byte(s11 >> 17)