2
unrolled operations, for better Pentium FPU scheduling
3
Copyright (C) 2001 Martin Vogt
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU Library General Public License as published by
7
the Free Software Foundation.
9
For more information look at the file COPYRIGHT in this package
19
The Pentium has two pipelined FPUs which makes it possible
20
to do two operations in one cycle.
25
#define PTR_DIST (1024)
27
#define OS r1=vp1[0] * dp[0]; \
28
r2=vp1[PTR_DIST-0] * dp[0]; \
31
#define XX1 vp1+=15;dp++;
33
#define XX2 r1+=vp1[0] * dp[-1]; \
34
r2+=vp1[PTR_DIST-0] * dp[-1];
36
#define OP_END(val) vp1-=val;dp+=val;
38
#define OP_END_1(vVal,dVal) vp1+=(vVal-dVal),dp+=dVal
41
// this is OP_END(x);XX1; together:
42
#define OP_END_2(vVal) vp1+=(15-vVal),dp+=vVal+1
45
// check this to test pipelining
47
#define SCHEDULE1(op,r1,r2) r1;op;r2;
48
#define SCHEDULE2(op,r1,r2) op;r1;r2;
50
#define SCHEDULE(a,b,c) SCHEDULE2(a,b,c);
53
#define OP r1+=vp1[-1] * dp[0]; \
54
r2+=vp1[PTR_DIST-1] * dp[0];
58
#define OP2 SCHEDULE(OP ,r1+=vp1[-2] * dp[1] ,r2+=vp1[PTR_DIST-2] *dp[1]);
59
#define OP3 SCHEDULE(OP2 ,r1+=vp1[-3] * dp[2] ,r2+=vp1[PTR_DIST-3] *dp[2]);
60
#define OP4 SCHEDULE(OP3 ,r1+=vp1[-4] * dp[3] ,r2+=vp1[PTR_DIST-4] *dp[3]);
61
#define OP5 SCHEDULE(OP4 ,r1+=vp1[-5] * dp[4] ,r2+=vp1[PTR_DIST-5] *dp[4]);
62
#define OP6 SCHEDULE(OP5 ,r1+=vp1[-6] * dp[5] ,r2+=vp1[PTR_DIST-6] *dp[5]);
63
#define OP7 SCHEDULE(OP6 ,r1+=vp1[-7] * dp[6] ,r2+=vp1[PTR_DIST-7] *dp[6]);
64
#define OP8 SCHEDULE(OP7 ,r1+=vp1[-8] * dp[7] ,r2+=vp1[PTR_DIST-8] *dp[7]);
65
#define OP9 SCHEDULE(OP8 ,r1+=vp1[-9] * dp[8] ,r2+=vp1[PTR_DIST-9] *dp[8]);
66
#define OP10 SCHEDULE(OP9 ,r1+=vp1[-10] * dp[9] ,r2+=vp1[PTR_DIST-10] *dp[9]);
67
#define OP11 SCHEDULE(OP10,r1+=vp1[-11] * dp[10],r2+=vp1[PTR_DIST-11] *dp[10]);
68
#define OP12 SCHEDULE(OP11,r1+=vp1[-12] * dp[11],r2+=vp1[PTR_DIST-12] *dp[11]);
69
#define OP13 SCHEDULE(OP12,r1+=vp1[-13] * dp[12],r2+=vp1[PTR_DIST-13] *dp[12]);
70
#define OP14 SCHEDULE(OP13,r1+=vp1[-14] * dp[13],r2+=vp1[PTR_DIST-14] *dp[13]);
71
#define OP15 SCHEDULE(OP14,r1+=vp1[-15] * dp[14],r2+=vp1[PTR_DIST-15] *dp[14]);
75
#define OP r1+=vp1[-1] * dp[0]; \
80
#define OP2 SCHEDULE(OP ,r1+=vp1[-2] * dp[1] ,r2+=vp2[-2] * dp[1]);
81
#define OP3 SCHEDULE(OP2 ,r1+=vp1[-3] * dp[2] ,r2+=vp2[-3] * dp[2]);
82
#define OP4 SCHEDULE(OP3 ,r1+=vp1[-4] * dp[3] ,r2+=vp2[-4] * dp[3]);
83
#define OP5 SCHEDULE(OP4 ,r1+=vp1[-5] * dp[4] ,r2+=vp2[-5] * dp[4]);
84
#define OP6 SCHEDULE(OP5 ,r1+=vp1[-6] * dp[5] ,r2+=vp2[-6] * dp[5]);
85
#define OP7 SCHEDULE(OP6 ,r1+=vp1[-7] * dp[6] ,r2+=vp2[-7] * dp[6]);
86
#define OP8 SCHEDULE(OP7 ,r1+=vp1[-8] * dp[7] ,r2+=vp2[-8] * dp[7]);
87
#define OP9 SCHEDULE(OP8 ,r1+=vp1[-9] * dp[8] ,r2+=vp2[-9] * dp[8]);
88
#define OP10 SCHEDULE(OP9 ,r1+=vp1[-10] * dp[9] ,r2+=vp2[-10] * dp[9]);
89
#define OP11 SCHEDULE(OP10,r1+=vp1[-11] * dp[10],r2+=vp2[-11] * dp[10]);
90
#define OP12 SCHEDULE(OP11,r1+=vp1[-12] * dp[11],r2+=vp2[-12] * dp[11]);
91
#define OP13 SCHEDULE(OP12,r1+=vp1[-13] * dp[12],r2+=vp2[-13] * dp[12]);
92
#define OP14 SCHEDULE(OP13,r1+=vp1[-14] * dp[13],r2+=vp2[-14] * dp[13]);
93
#define OP15 SCHEDULE(OP14,r1+=vp1[-15] * dp[14],r2+=vp2[-15] * dp[14]);