1
/* ***** BEGIN LICENSE BLOCK *****
2
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
* The contents of this file are subject to the Mozilla Public License Version
5
* 1.1 (the "License"); you may not use this file except in compliance with
6
* the License. You may obtain a copy of the License at
7
* http://www.mozilla.org/MPL/
9
* Software distributed under the License is distributed on an "AS IS" basis,
10
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11
* for the specific language governing rights and limitations under the
14
* The Original Code is the Netscape security libraries.
16
* The Initial Developer of the Original Code is
17
* Netscape Communications Corporation.
18
* Portions created by the Initial Developer are Copyright (C) 2000
19
* the Initial Developer. All Rights Reserved.
23
* Alternatively, the contents of this file may be used under the terms of
24
* either the GNU General Public License Version 2 or later (the "GPL"), or
25
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26
* in which case the provisions of the GPL or the LGPL are applicable instead
27
* of those above. If you wish to allow use of your version of this file only
28
* under the terms of either the GPL or the LGPL, and not to allow others to
29
* use your version of this file under the terms of the MPL, indicate your
30
* decision by deleting the provisions above and replace them with the notice
31
* and other provisions required by the GPL or the LGPL. If you do not delete
32
* the provisions above, a recipient may use your version of this file under
33
* the terms of any one of the MPL, the GPL or the LGPL.
35
* ***** END LICENSE BLOCK ***** */
36
/* $Id: mpi_sparc.c,v 1.6.30.1 2006/01/23 00:39:33 nelsonb%netscape.com Exp $ */
38
/* Multiplication performance enhancements for sparc v8+vis CPUs. */
42
#include <sys/systeminfo.h>
45
/* In the functions below, */
46
/* vector y must be 8-byte aligned, and n must be even */
47
/* returns carry out of high order word of result */
48
/* maximum n is 256 */
50
/* vector x += vector y * scaler a; where y is of length n words. */
51
extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
53
/* vector z = vector x + vector y * scaler a; where y is of length n words. */
54
extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
57
/* v8 versions of these functions run on any Sparc v8 CPU. */
59
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
60
#define MP_MUL_DxD(a, b, Phi, Plo) \
61
{ unsigned long long product = (unsigned long long)a * b; \
62
Plo = (mp_digit)product; \
63
Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
67
v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
69
#if !defined(MP_NO_MP_WORD)
72
/* Inner product: Digits of a */
74
mp_word w = ((mp_word)b * *a++) + d;
85
MP_MUL_DxD(a_i, b, a1b1, a0b0);
99
v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
101
#if !defined(MP_NO_MP_WORD)
104
/* Inner product: Digits of a */
106
mp_word w = ((mp_word)b * *a++) + *c + d;
117
MP_MUL_DxD(a_i, b, a1b1, a0b0);
132
/* Presently, this is only used by the Montgomery arithmetic code. */
135
v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
137
#if !defined(MP_NO_MP_WORD)
140
/* Inner product: Digits of a */
142
mp_word w = ((mp_word)b * *a++) + *c + d;
148
mp_word w = (mp_word)*c + d;
158
MP_MUL_DxD(a_i, b, a1b1, a0b0);
180
/* These functions run only on v8plus+vis or v9+vis CPUs. */
184
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
189
if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
191
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
192
memcpy(px, a, a_len * sizeof(*a));
198
s_mp_setz(c, a_len + 1);
199
d = mul_add_inp(c, a, a_len, b);
202
v8_mpv_mul_d(a, a_len, b, c);
206
/* c += a * b, where a is a_len words long. */
208
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
213
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
215
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
216
memcpy(px, a, a_len * sizeof(*a));
222
d = mul_add_inp(c, a, a_len, b);
225
v8_mpv_mul_d_add(a, a_len, b, c);
229
/* c += a * b, where a is y words long. */
231
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
236
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
238
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
239
memcpy(px, a, a_len * sizeof(*a));
245
d = mul_add_inp(c, a, a_len, b);
249
mp_digit sum = d + *c;
255
v8_mpv_mul_d_add_prop(a, a_len, b, c);