1
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
5
// This file provides fast assembly versions for the elementary
6
// arithmetic operations on vectors implemented in arith.go.
8
// TODO(gri) - experiment with unrolled loops for faster execution
10
// func mulWW(x, y Word) (z1, z0 Word)
19
// func divWW(x1, x0, y Word) (q, r Word)
29
// func addVV(z, x, y []Word) (c Word)
39
L1: MOVQ (R8)(BX*8), AX
46
E1: CMPQ BX, R11 // i < n
53
// func subVV(z, x, y []Word) (c Word)
54
// (same as addVV_s except for SBBQ instead of ADCQ and label names)
64
L2: MOVQ (R8)(BX*8), AX
71
E2: CMPQ BX, R11 // i < n
78
// func addVW(z, x []Word, y Word) (c Word)
82
MOVQ y+32(FP), AX // c = y
87
L3: ADDQ (R8)(BX*8), AX
93
E3: CMPQ BX, R11 // i < n
100
// func subVW(z, x []Word, y Word) (c Word)
104
MOVQ y+32(FP), AX // c = y
109
L4: MOVQ (R8)(BX*8), DX // TODO(gri) is there a reverse SUBQ?
116
E4: CMPQ BX, R11 // i < n
123
// func shlVW(z, x []Word, s Word) (c Word)
125
MOVL n+8(FP), BX // i = n
127
JL X8b // i < 0 (n <= 0)
133
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
135
SHLQ CX, DX:AX // w1>>ŝ
142
L8: MOVQ AX, DX // w = w1
143
MOVQ -8(R8)(BX*8), AX // w1 = x[i-1]
144
SHLQ CX, DX:AX // w<<s | w1>>ŝ
145
MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ
150
X8a: SHLQ CX, AX // w1<<s
151
MOVQ AX, (R10) // z[0] = w1<<s
154
X8b: MOVQ $0, c+40(FP)
158
// func shrVW(z, x []Word, s Word) (c Word)
162
JL X9b // n < 0 (n <= 0)
168
MOVQ (R8), AX // w1 = x[0]
170
SHRQ CX, DX:AX // w1<<ŝ
177
L9: MOVQ AX, DX // w = w1
178
MOVQ 8(R8)(BX*8), AX // w1 = x[i+1]
179
SHRQ CX, DX:AX // w>>s | w1<<ŝ
180
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
187
X9a: SHRQ CX, AX // w1>>s
188
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
191
X9b: MOVQ $0, c+40(FP)
195
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
196
TEXT ·mulAddVWW(SB),7,$0
200
MOVQ r+40(FP), CX // c = r
205
L5: MOVQ (R8)(BX*8), AX
213
E5: CMPQ BX, R11 // i < n
220
// func addMulVVW(z, x []Word, y Word) (c Word)
221
TEXT ·addMulVVW(SB),7,$0
230
L6: MOVQ (R8)(BX*8), AX
239
E6: CMPQ BX, R11 // i < n
246
// divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
247
TEXT ·divWVW(SB),7,$0
249
MOVQ xn+16(FP), DX // r = xn
252
MOVL n+8(FP), BX // i = n
255
L7: MOVQ (R8)(BX*8), AX
259
E7: SUBL $1, BX // i--