1
/* $Id: ga_profile.c,v 1.5 2005-07-21 08:13:26 manoj Exp $ */
3
* Note #1: Right now, only process 0's profile is printed.
4
* Each and every process saves its profile in the correspoding data struture.
5
* However profiler prints process 0's profile when ga_profile_terminate()
6
* is called. Do the corresponding changes in ga_profile_terminate() to
7
* print the profile of other processes.
9
* Note #2: By default profiles prints message ranges #s 21. Example: range 10
10
* corresponds to message ranges from 1024 bytes to 2047 bytes.
11
* Message ranges are in the power of 2. for ex:
12
* ------------------------------------
13
* MSG_RANGE (r) BYTES (2^r to 2^(r+1)-1)
14
* ------------------------------------
23
* -------------------------------------
25
* Note#3: If Stride information needs to be printed, set GA_PRINT_STRIDE.
26
* Stride information is printed in ga_profile_terminate() for a various
27
* selective message ranges and event types. Modify according to your needs.
40
#include "ga_profile.h"
44
# define MP_TIMER TCGTIME_
47
# define MP_TIMER MPI_Wtime
51
#define GA_PRINT_STRIDE 1
52
#define GA_MAX_MSG_RANGE 21
55
#define STRIDE_COUNT 1000
56
typedef struct ga_stride {
65
#define GA_EVENTS 6 /* get, put, acc, Non-Contiguous get, put, acc*/
66
enum events {GET, /* Contiguous get */
69
NC_GET, /* Non contiguous Get */
74
char *event_name[GA_EVENTS] = {"GET", "PUT", "ACC", "NON CONTIGUOUS GET",
75
"NON CONTIGUOUS PUT", "NON CONTIGUOUS ACC"};
77
typedef struct ga_profile {
78
int count; /* number of times called */
79
double exectime; /* total execution time for "count" calls */
81
ga_stride_t stride[STRIDE_COUNT];
85
/* profile get/put/acc for various message ranges (i.e GA_MAX_MSG_RANGE) */
86
static ga_profile_t GA_PROF[GA_EVENTS][GA_MAX_MSG_RANGE];
96
void ga_profile_init() {
98
if(ga_nodeid_()==0) {printf("\nProfiling GA - ON\n");fflush(stdout);}
99
for(i=0; i<GA_EVENTS; i++)
100
for(j=0; j<GA_MAX_MSG_RANGE; j++) {
101
GA_PROF[i][j].count = 0; GA_PROF[i][j].exectime = 0.0;
105
static void ga_profile_set_event(int event_type, int range) {
106
gCURRENT_EVNT.event_type = event_type;
107
gCURRENT_EVNT.range = range;
108
gCURRENT_EVNT.is_set = 1;
109
gCURRENT_EVNT.start_time = MP_TIMER();
112
void ga_profile_start(int g_a, long bytes, int ndim, Integer *lo, Integer *hi,
114
int i, count=0, non_contig=0, event_type, range;
116
/* find the message range */
117
if(bytes<=0) range=0;
118
else range = (int) (log((double)bytes)/log(2.0));
119
if(range>=GA_MAX_MSG_RANGE) range = GA_MAX_MSG_RANGE;
121
/* check contiguous or non-contiguous */
122
for(i=0; i<ndim; i++) if(hi[0]-lo[0]) count++;
123
if(count>1) non_contig=1; /* i.e. non-contiguous */
127
if(non_contig) event_type = NC_PUT;
128
else event_type = PUT;
131
if(non_contig) event_type = NC_GET;
132
else event_type = GET;
135
if(non_contig) event_type = NC_ACC;
136
else event_type = ACC;
138
default: ga_error("GA_PROFILE: Invalid communication type", 0L);
141
/* set the curent event for timer */
142
ga_profile_set_event(event_type, range);
144
/* profile update: i.e. update event count */
145
GA_PROF[event_type][range].count++;
149
int idx = GA_PROF[event_type][range].count-1;
150
if(idx<STRIDE_COUNT) {
151
GA_PROF[event_type][range].stride[idx].ndim = ndim;
152
strcpy(GA_PROF[event_type][range].stride[idx].name, GA[g_a].name);
153
for(i=0;i<ndim;i++) {
154
GA_PROF[event_type][range].stride[idx].lo[i] = (int)lo[i];
155
GA_PROF[event_type][range].stride[idx].hi[i] = (int)hi[i];
162
void ga_profile_stop() {
163
int event_type = gCURRENT_EVNT.event_type;
164
int idx, range = gCURRENT_EVNT.range;
165
double time = MP_TIMER() - gCURRENT_EVNT.start_time;
167
if(gCURRENT_EVNT.is_set) { /* Yep, there is an event set */
168
GA_PROF[event_type][range].exectime += time;
169
gCURRENT_EVNT.is_set = 0; /* clear the event */
172
ga_error("GA_PROFILE: No event set. Probably ga_profile_stop() is called before ga_profile_start()", 0L);
175
{ /* measure the time of each strided data transfer */
176
idx = GA_PROF[event_type][range].count-1;
177
if(idx<STRIDE_COUNT) GA_PROF[event_type][range].stride[idx].time = time;
182
#define GA_HDR1() printf("\n\n************ CONTIGUOUS DATA TRANSFER ************\n\n");
183
#define GA_HDR2() printf("\n\n********** NON-CONTIGUOUS DATA TRANSFER **********\n\n");
184
#define GA_HDR3() printf("RANK\t #Gets\t #puts\t #accs\t RANGE\n\n");
185
#define GA_HDR4() printf("RANK\t get_time\t put_time\t acc_time\t RANGE\n\n");
186
#define GA_HDR5() printf("SL#\tndim time stride_info (array name)\n\n");
188
/* This prints the number of contiguous get/put/acc/ calls for every
190
void ga_print_numcalls1() {
192
GA_HDR1(); GA_HDR3();
193
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
194
printf("%d\t %d\t %d\t %d\t (%d-%d)\n", ga_nodeid_(),
195
GA_PROF[GET][i].count, GA_PROF[PUT][i].count,
196
GA_PROF[ACC][i].count, 1<<i, 1<<(i+1));
197
printf("%d\t %d\t %d\t %d\t (>%d)\n", ga_nodeid_(),
198
GA_PROF[GET][i].count, GA_PROF[PUT][i].count,
199
GA_PROF[ACC][i].count, 1<<GA_MAX_MSG_RANGE);
202
/* This prints the number of non-contiguous get/put/acc/ calls for every
204
void ga_print_numcalls2() {
206
GA_HDR2(); GA_HDR3();
207
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
208
printf("%d\t %d\t %d\t %d\t (%d-%d)\n", ga_nodeid_(),
209
GA_PROF[NC_GET][i].count, GA_PROF[NC_PUT][i].count,
210
GA_PROF[NC_ACC][i].count, 1<<i, 1<<(i+1));
211
printf("%d\t %d\t %d\t %d\t (>%d)\n",ga_nodeid_(),
212
GA_PROF[NC_GET][i].count, GA_PROF[NC_PUT][i].count,
213
GA_PROF[NC_ACC][i].count, 1<<GA_MAX_MSG_RANGE);
216
/* This prints timings of all contiguous get/put/acc/ calls for every
218
void ga_print_timings1() {
220
GA_HDR1(); GA_HDR4();
221
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
222
printf("%d\t %.2e\t %.2e\t %.2e\t (%d-%d)\n", ga_nodeid_(),
223
GA_PROF[GET][i].exectime, GA_PROF[PUT][i].exectime,
224
GA_PROF[ACC][i].exectime, 1<<i, 1<<(i+1));
225
printf("%d\t %.2e\t %.2e\t %.2e\t (>%d)\n", ga_nodeid_(),
226
GA_PROF[GET][i].exectime, GA_PROF[PUT][i].exectime,
227
GA_PROF[ACC][i].exectime, 1<<GA_MAX_MSG_RANGE);
230
/* This prints timings of all non-contiguous get/put/acc/ calls for every
232
void ga_print_timings2() {
234
GA_HDR2(); GA_HDR4();
235
for(i=0; i< GA_MAX_MSG_RANGE-1; i++)
236
printf("%d\t %.2e\t %.2e\t %.2e\t (%d-%d)\n", ga_nodeid_(),
237
GA_PROF[NC_GET][i].exectime, GA_PROF[NC_PUT][i].exectime,
238
GA_PROF[NC_ACC][i].exectime, 1<<i, 1<<(i+1));
239
printf("%d\t %.2e\t %.2e\t %.2e\t (>%d)\n", ga_nodeid_(),
240
GA_PROF[NC_GET][i].exectime, GA_PROF[NC_PUT][i].exectime,
241
GA_PROF[NC_ACC][i].exectime, 1<<GA_MAX_MSG_RANGE);
244
void ga_print_stridedinfo(int event, int range) {
247
printf("\n\nSTRIDE INFORMATION FOR MSG_RANGE %d-%d for EVENT: %s\n",
248
1<<range, 1<<(range+1), event_name[event]);
250
for(i=0; i< GA_PROF[event][range].count; i++) {
251
if(i>=STRIDE_COUNT) break;
252
time += GA_PROF[event][range].stride[i].time;
253
ndim = GA_PROF[event][range].stride[i].ndim;
254
printf("%d\t%d %.2e (",i, ndim,
255
GA_PROF[event][range].stride[i].time);
256
for(j=0;j<ndim;j++) {
257
printf("%d", GA_PROF[event][range].stride[i].hi[j] -
258
GA_PROF[event][range].stride[i].lo[j] +1);
259
if(j!=ndim-1) printf("x");
263
printf("[%d-%d]", GA_PROF[event][range].stride[i].lo[j],
264
GA_PROF[event][range].stride[i].hi[j]);
265
printf(" \"%s\"\n", GA_PROF[event][range].stride[i].name);
267
/*This o/p is just for verification*/
268
printf("**** STRIDE_COUNT = %d ; TOTAL TIME = %.2e\n",GA_PROF[event][range].count,
272
void ga_profile_terminate() {
274
if(ga_nodeid_() == 0) { /* process 0's profile only */
276
/* contiguous calls */
277
ga_print_numcalls1();
280
/* non-contiguous calls */
281
ga_print_numcalls2();
286
int msg_range, event_type;
288
* printing stride info for non-contiguous get (NC_GET) for message
289
* range #6. 2^6 - 2^(6+1) bytes. (i.e. 64-128 bytes)
291
msg_range = 6; /* message range 2^6-2^(6+1) */
293
ga_print_stridedinfo(NC_GET, msg_range);
294
/*ga_print_stridedinfo(GET,19);*/ /* 2^19-2^20 range (524288-1MB)*/
300
#endif /* end of GA_PROFILE */