5
/* $Id: ga_profile.c,v 1.5 2005-07-21 08:13:26 manoj Exp $ */
7
* Note #1: Right now, only process 0's profile is printed.
8
* Each and every process saves its profile in the correspoding data struture.
9
* However profiler prints process 0's profile when ga_profile_terminate()
10
* is called. Do the corresponding changes in ga_profile_terminate() to
11
* print the profile of other processes.
13
* Note #2: By default profiles prints message ranges #s 21. Example: range 10
14
* corresponds to message ranges from 1024 bytes to 2047 bytes.
15
* Message ranges are in the power of 2. for ex:
16
* ------------------------------------
17
* MSG_RANGE (r) BYTES (2^r to 2^(r+1)-1)
18
* ------------------------------------
27
* -------------------------------------
29
* Note#3: If Stride information needs to be printed, set GA_PRINT_STRIDE.
30
* Stride information is printed in ga_profile_terminate() for a various
31
* selective message ranges and event types. Modify according to your needs.
51
#include "ga_profile.h"
57
# define MP_TIMER tcg_time
60
# define MP_TIMER MPI_Wtime
64
#define GA_PRINT_STRIDE 1
65
#define GA_MAX_MSG_RANGE 21
68
#define STRIDE_COUNT 1000
69
typedef struct ga_stride {
78
#define GA_EVENTS 6 /* get, put, acc, Non-Contiguous get, put, acc*/
79
enum events {GET, /* Contiguous get */
82
NC_GET, /* Non contiguous Get */
87
char *event_name[GA_EVENTS] = {"GET", "PUT", "ACC", "NON CONTIGUOUS GET",
88
"NON CONTIGUOUS PUT", "NON CONTIGUOUS ACC"};
90
typedef struct ga_profile {
91
int count; /* number of times called */
92
double exectime; /* total execution time for "count" calls */
94
ga_stride_t stride[STRIDE_COUNT];
98
/* profile get/put/acc for various message ranges (i.e GA_MAX_MSG_RANGE) */
99
static ga_profile_t GA_PROF[GA_EVENTS][GA_MAX_MSG_RANGE];
109
void ga_profile_init() {
111
if(pnga_nodeid()==0) {printf("\nProfiling GA - ON\n");fflush(stdout);}
112
for(i=0; i<GA_EVENTS; i++)
113
for(j=0; j<GA_MAX_MSG_RANGE; j++) {
114
GA_PROF[i][j].count = 0; GA_PROF[i][j].exectime = 0.0;
118
static void ga_profile_set_event(int event_type, int range) {
119
gCURRENT_EVNT.event_type = event_type;
120
gCURRENT_EVNT.range = range;
121
gCURRENT_EVNT.is_set = 1;
122
gCURRENT_EVNT.start_time = MP_TIMER();
125
void ga_profile_start(int g_a, long bytes, int ndim, Integer *lo, Integer *hi,
127
int i, count=0, non_contig=0, event_type, range;
129
/* find the message range */
130
if(bytes<=0) range=0;
131
else range = (int) (log((double)bytes)/log(2.0));
132
if(range>=GA_MAX_MSG_RANGE) range = GA_MAX_MSG_RANGE;
134
/* check contiguous or non-contiguous */
135
for(i=0; i<ndim; i++) if(hi[0]-lo[0]) count++;
136
if(count>1) non_contig=1; /* i.e. non-contiguous */
139
case ENABLE_PROFILE_PUT:
140
if(non_contig) event_type = NC_PUT;
141
else event_type = PUT;
143
case ENABLE_PROFILE_GET:
144
if(non_contig) event_type = NC_GET;
145
else event_type = GET;
147
case ENABLE_PROFILE_ACC:
148
if(non_contig) event_type = NC_ACC;
149
else event_type = ACC;
151
default: pnga_error("ENABLE_PROFILE: Invalid communication type", 0L);
154
/* set the curent event for timer */
155
ga_profile_set_event(event_type, range);
157
/* profile update: i.e. update event count */
158
GA_PROF[event_type][range].count++;
162
int idx = GA_PROF[event_type][range].count-1;
163
if(idx<STRIDE_COUNT) {
164
GA_PROF[event_type][range].stride[idx].ndim = ndim;
165
strcpy(GA_PROF[event_type][range].stride[idx].name, GA[g_a].name);
166
for(i=0;i<ndim;i++) {
167
GA_PROF[event_type][range].stride[idx].lo[i] = (int)lo[i];
168
GA_PROF[event_type][range].stride[idx].hi[i] = (int)hi[i];
175
void ga_profile_stop() {
176
int event_type = gCURRENT_EVNT.event_type;
177
int idx, range = gCURRENT_EVNT.range;
178
double time = MP_TIMER() - gCURRENT_EVNT.start_time;
180
if(gCURRENT_EVNT.is_set) { /* Yep, there is an event set */
181
GA_PROF[event_type][range].exectime += time;
182
gCURRENT_EVNT.is_set = 0; /* clear the event */
185
pnga_error("ENABLE_PROFILE: No event set. Probably ga_profile_stop() is called before ga_profile_start()", 0L);
188
{ /* measure the time of each strided data transfer */
189
idx = GA_PROF[event_type][range].count-1;
190
if(idx<STRIDE_COUNT) GA_PROF[event_type][range].stride[idx].time = time;
195
#define GA_HDR1() printf("\n\n************ CONTIGUOUS DATA TRANSFER ************\n\n");
196
#define GA_HDR2() printf("\n\n********** NON-CONTIGUOUS DATA TRANSFER **********\n\n");
197
#define GA_HDR3() printf("RANK\t #Gets\t #puts\t #accs\t RANGE\n\n");
198
#define GA_HDR4() printf("RANK\t get_time\t put_time\t acc_time\t RANGE\n\n");
199
#define GA_HDR5() printf("SL#\tndim time stride_info (array name)\n\n");
201
/* This prints the number of contiguous get/put/acc/ calls for every
203
void ga_print_numcalls1() {
205
GA_HDR1(); GA_HDR3();
206
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
207
printf("%d\t %d\t %d\t %d\t (%d-%d)\n", pnga_nodeid(),
208
GA_PROF[GET][i].count, GA_PROF[PUT][i].count,
209
GA_PROF[ACC][i].count, 1<<i, 1<<(i+1));
210
printf("%d\t %d\t %d\t %d\t (>%d)\n", pnga_nodeid(),
211
GA_PROF[GET][i].count, GA_PROF[PUT][i].count,
212
GA_PROF[ACC][i].count, 1<<GA_MAX_MSG_RANGE);
215
/* This prints the number of non-contiguous get/put/acc/ calls for every
217
void ga_print_numcalls2() {
219
GA_HDR2(); GA_HDR3();
220
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
221
printf("%d\t %d\t %d\t %d\t (%d-%d)\n", pnga_nodeid(),
222
GA_PROF[NC_GET][i].count, GA_PROF[NC_PUT][i].count,
223
GA_PROF[NC_ACC][i].count, 1<<i, 1<<(i+1));
224
printf("%d\t %d\t %d\t %d\t (>%d)\n",pnga_nodeid(),
225
GA_PROF[NC_GET][i].count, GA_PROF[NC_PUT][i].count,
226
GA_PROF[NC_ACC][i].count, 1<<GA_MAX_MSG_RANGE);
229
/* This prints timings of all contiguous get/put/acc/ calls for every
231
void ga_print_timings1() {
233
GA_HDR1(); GA_HDR4();
234
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
235
printf("%d\t %.2e\t %.2e\t %.2e\t (%d-%d)\n", pnga_nodeid(),
236
GA_PROF[GET][i].exectime, GA_PROF[PUT][i].exectime,
237
GA_PROF[ACC][i].exectime, 1<<i, 1<<(i+1));
238
printf("%d\t %.2e\t %.2e\t %.2e\t (>%d)\n", pnga_nodeid(),
239
GA_PROF[GET][i].exectime, GA_PROF[PUT][i].exectime,
240
GA_PROF[ACC][i].exectime, 1<<GA_MAX_MSG_RANGE);
243
/* This prints timings of all non-contiguous get/put/acc/ calls for every
245
void ga_print_timings2() {
247
GA_HDR2(); GA_HDR4();
248
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
249
printf("%d\t %.2e\t %.2e\t %.2e\t (%d-%d)\n", pnga_nodeid(),
250
GA_PROF[NC_GET][i].exectime, GA_PROF[NC_PUT][i].exectime,
251
GA_PROF[NC_ACC][i].exectime, 1<<i, 1<<(i+1));
252
printf("%d\t %.2e\t %.2e\t %.2e\t (>%d)\n", pnga_nodeid(),
253
GA_PROF[NC_GET][i].exectime, GA_PROF[NC_PUT][i].exectime,
254
GA_PROF[NC_ACC][i].exectime, 1<<GA_MAX_MSG_RANGE);
257
void ga_print_stridedinfo(int event, int range) {
260
printf("\n\nSTRIDE INFORMATION FOR MSG_RANGE %d-%d for EVENT: %s\n",
261
1<<range, 1<<(range+1), event_name[event]);
263
for(i=0; i< GA_PROF[event][range].count; i++) {
264
if(i>=STRIDE_COUNT) break;
265
time += GA_PROF[event][range].stride[i].time;
266
ndim = GA_PROF[event][range].stride[i].ndim;
267
printf("%d\t%d %.2e (",i, ndim,
268
GA_PROF[event][range].stride[i].time);
269
for(j=0;j<ndim;j++) {
270
printf("%d", GA_PROF[event][range].stride[i].hi[j] -
271
GA_PROF[event][range].stride[i].lo[j] +1);
272
if(j!=ndim-1) printf("x");
276
printf("[%d-%d]", GA_PROF[event][range].stride[i].lo[j],
277
GA_PROF[event][range].stride[i].hi[j]);
278
printf(" \"%s\"\n", GA_PROF[event][range].stride[i].name);
280
/*This o/p is just for verification*/
281
printf("**** STRIDE_COUNT = %d ; TOTAL TIME = %.2e\n",GA_PROF[event][range].count,
285
void ga_profile_terminate() {
287
if(pnga_nodeid() == 0) { /* process 0's profile only */
289
/* contiguous calls */
290
ga_print_numcalls1();
293
/* non-contiguous calls */
294
ga_print_numcalls2();
299
int msg_range, event_type;
301
* printing stride info for non-contiguous get (NC_GET) for message
302
* range #6. 2^6 - 2^(6+1) bytes. (i.e. 64-128 bytes)
304
msg_range = 6; /* message range 2^6-2^(6+1) */
306
ga_print_stridedinfo(NC_GET, msg_range);
307
/*ga_print_stridedinfo(GET,19);*/ /* 2^19-2^20 range (524288-1MB)*/
313
#endif /* end of ENABLE_PROFILE */