85
85
/* Where are API symbols?
86
Open MPI lib/libmpi.so, soname = libmpi.so.0
86
Open MPI lib/libmpi.so, soname = libmpi.so.0
87
Quadrics MPI lib/libmpi.so, soname = libmpi.so.0
88
MPICH libmpich.so.1.0, soname = libmpich.so.1.0
89
AIX: in /usr/lpp/ppe.poe/lib/libmpi_r.a(mpicore*_r.o)
91
For the non-AIX targets, a suitable soname to match with
89
/* ifdef OpenMPI ... */
90
#define I_WRAP_FNNAME_U(_name) I_WRAP_SONAME_FNNAME_ZU(libmpiZdsoZa,_name)
95
# define I_WRAP_FNNAME_U(_name) \
96
I_WRAP_SONAME_FNNAME_ZU(libmpiZurZdaZLmpicoreZaZurZdoZR,_name)
97
/* Don't change this without also changing all the names in
100
# define I_WRAP_FNNAME_U(_name) \
101
I_WRAP_SONAME_FNNAME_ZU(libmpiZaZdsoZa,_name)
93
106
/*------------------------------------------------------------*/
119
136
static const char* preamble = "valgrind MPI wrappers";
121
138
/* established at startup */
122
static pid_t my_pid = -1;
123
static char* options_str = NULL;
124
static int opt_verbosity = 1;
125
static Bool opt_missing = 0; /* 0:silent; 1:warn; 2:abort */
126
static Bool opt_help = False;
139
static pid_t my_pid = -1;
140
static char* options_str = NULL;
141
static int opt_verbosity = 1;
142
static Bool opt_missing = 0; /* 0:silent; 1:warn; 2:abort */
143
static Bool opt_help = False;
144
static Bool opt_initkludge = False;
128
146
static void before ( char* fnname )
160
180
fprintf(stderr, "Valid options for the MPIWRAP_DEBUG environment"
161
181
" variable are:\n");
162
182
fprintf(stderr, "\n");
163
fprintf(stderr, " quiet be silent except for errors\n");
164
fprintf(stderr, " verbose show wrapper entries/exits\n");
165
fprintf(stderr, " strict abort the program if a function"
166
" with no wrapper is used\n");
167
fprintf(stderr, " warn give a warning if a function"
168
" with no wrapper is used\n");
169
fprintf(stderr, " help display this message, then exit\n");
183
fprintf(stderr, " quiet be silent except for errors\n");
184
fprintf(stderr, " verbose show wrapper entries/exits\n");
185
fprintf(stderr, " strict abort the program if a function"
186
" with no wrapper is used\n");
187
fprintf(stderr, " warn give a warning if a function"
188
" with no wrapper is used\n");
189
fprintf(stderr, " help display this message, then exit\n");
190
fprintf(stderr, " initkludge debugging hack; do not use\n");
170
191
fprintf(stderr, "\n");
171
192
fprintf(stderr, "Multiple options are allowed, eg"
172
193
" MPIWRAP_DEBUG=strict,verbose\n");
235
256
# if defined(MPI_UNSIGNED_LONG_LONG)
236
257
else if (ty == MPI_UNSIGNED_LONG_LONG) fprintf(f,"UNSIGNED_LONG_LONG");
259
# if defined(MPI_REAL8)
260
else if (ty == MPI_REAL8) fprintf(f, "REAL8");
262
# if defined(MPI_REAL4)
263
else if (ty == MPI_REAL4) fprintf(f, "REAL4");
265
# if defined(MPI_REAL)
266
else if (ty == MPI_REAL) fprintf(f, "REAL");
268
# if defined(MPI_INTEGER8)
269
else if (ty == MPI_INTEGER8) fprintf(f, "INTEGER8");
271
# if defined(MPI_INTEGER4)
272
else if (ty == MPI_INTEGER4) fprintf(f, "INTEGER4");
274
# if defined(MPI_INTEGER)
275
else if (ty == MPI_INTEGER) fprintf(f, "INTEGER");
277
# if defined(MPI_DOUBLE_PRECISION)
278
else if (ty == MPI_DOUBLE_PRECISION) fprintf(f, "DOUBLE_PRECISION");
280
# if defined(MPI_COMPLEX)
281
else if (ty == MPI_COMPLEX) fprintf(f, "COMPLEX");
283
# if defined(MPI_DOUBLE_COMPLEX)
284
else if (ty == MPI_DOUBLE_COMPLEX) fprintf(f, "DOUBLE_COMPLEX");
286
# if defined(MPI_LOGICAL)
287
else if (ty == MPI_LOGICAL) fprintf(f, "LOGICAL");
289
# if defined(MPI_2INTEGER)
290
else if (ty == MPI_2INTEGER) fprintf(f, "2INTEGER");
292
# if defined(MPI_2COMPLEX)
293
else if (ty == MPI_2COMPLEX) fprintf(f, "2COMPLEX");
295
# if defined(MPI_2DOUBLE_COMPLEX)
296
else if (ty == MPI_2DOUBLE_COMPLEX) fprintf(f, "2DOUBLE_COMPLEX");
298
# if defined(MPI_2REAL)
299
else if (ty == MPI_2REAL) fprintf(f, "2REAL");
301
# if defined(MPI_2DOUBLE_PRECISION)
302
else if (ty == MPI_2DOUBLE_PRECISION) fprintf(f, "2DOUBLE_PRECISION");
304
# if defined(MPI_CHARACTER)
305
else if (ty == MPI_CHARACTER) fprintf(f, "CHARACTER");
238
307
else fprintf(f,"showTy:???");
291
360
/* Note, PMPI_Comm_rank/size are themselves wrapped. Should work
294
static inline int comm_rank ( MPI_Comm comm )
363
static __inline__ int comm_rank ( MPI_Comm comm )
297
366
err = PMPI_Comm_rank(comm, &r);
298
367
return err ? 0/*arbitrary*/ : r;
301
static inline int comm_size ( MPI_Comm comm )
370
static __inline__ int comm_size ( MPI_Comm comm )
304
373
err = PMPI_Comm_size(comm, &r);
305
374
return err ? 0/*arbitrary*/ : r;
308
static inline Bool count_from_Status( /*OUT*/int* recv_count,
377
static __inline__ Bool count_from_Status( /*OUT*/int* recv_count,
309
378
MPI_Datatype datatype,
310
379
MPI_Status* status)
380
449
There is a subtlety, which is that this is required to return the
381
450
exact size of one item of the type, NOT the size of it when padded
382
451
suitably to make an array of them. In particular that's why the
383
size of LONG_DOUBLE is 10 and not sizeof(long double), since the
384
latter is 12 at least on x86. Except if sizeof(long double) is
385
claimed to be 8 then we'd better respect that.
452
size of LONG_DOUBLE is computed by looking at the result of doing a
453
long double store, rather than just asking what is the sizeof(long
456
For LONG_DOUBLE on x86-linux and amd64-linux my impression is that
457
the right answer is 10 even though sizeof(long double) says 12 and
458
16 respectively. On ppc32-linux it appears to be 16.
387
460
Ref: MPI 1.1 doc p18 */
388
461
static long sizeofOneNamedTy ( MPI_Datatype ty )
398
471
if (ty == MPI_FLOAT) return sizeof(float);
399
472
if (ty == MPI_DOUBLE) return sizeof(double);
400
473
if (ty == MPI_BYTE) return 1;
401
if (ty == MPI_LONG_DOUBLE)
402
return sizeof(long double)==8
403
? 8 : 10; /* NOT: sizeof(long double); */
474
if (ty == MPI_LONG_DOUBLE) return sizeof_long_double_image();
475
if (ty == MPI_PACKED) return 1;
476
if (ty == MPI_LONG_LONG_INT) return sizeof(signed long long int);
478
# if defined(MPI_REAL8)
479
if (ty == MPI_REAL8) return 8; /* MPI2 spec */;
481
# if defined(MPI_REAL4)
482
if (ty == MPI_REAL4) return 4; /* MPI2 spec */;
484
# if defined(MPI_REAL)
485
if (ty == MPI_REAL) return 4; /* MPI2 spec */;
487
# if defined(MPI_INTEGER8)
488
if (ty == MPI_INTEGER8) return 8; /* MPI2 spec */;
490
# if defined(MPI_INTEGER4)
491
if (ty == MPI_INTEGER4) return 4; /* MPI2 spec */;
493
# if defined(MPI_INTEGER)
494
if (ty == MPI_INTEGER) return 4; /* MPI2 spec */;
496
# if defined(MPI_DOUBLE_PRECISION)
497
if (ty == MPI_DOUBLE_PRECISION) return 8; /* MPI2 spec */;
405
500
/* new in MPI2: */
406
501
# if defined(MPI_WCHAR)
407
if (ty == MPI_WCHAR) return sizeof(wchar_t);
502
if (ty == MPI_WCHAR) return 2; /* MPI2 spec */;
409
504
# if defined(MPI_SIGNED_CHAR)
410
if (ty == MPI_SIGNED_CHAR) return sizeof(signed char);
505
if (ty == MPI_SIGNED_CHAR) return 1; /* MPI2 spec */;
412
507
# if defined(MPI_UNSIGNED_LONG_LONG)
413
if (ty == MPI_UNSIGNED_LONG_LONG) return sizeof(unsigned long long int);
415
if (ty == MPI_LONG_LONG_INT) return sizeof(signed long long int);
508
if (ty == MPI_UNSIGNED_LONG_LONG) return 8; /* MPI2 spec */;
510
# if defined(MPI_COMPLEX)
511
if (ty == MPI_COMPLEX) return 2 * 4; /* MPI2 spec */
513
# if defined(MPI_DOUBLE_COMPLEX)
514
if (ty == MPI_DOUBLE_COMPLEX) return 2 * 8; /* MPI2 spec */
516
# if defined(MPI_LOGICAL)
517
if (ty == MPI_LOGICAL) return 4; /* MPI2 spec */
519
# if defined(MPI_2INTEGER)
520
if (ty == MPI_2INTEGER) return 2 * 4; /* undocumented in MPI2 */
522
# if defined(MPI_2COMPLEX)
523
if (ty == MPI_2COMPLEX) return 2 * 8; /* undocumented in MPI2 */
525
# if defined(MPI_2DOUBLE_COMPLEX)
526
/* 32: this is how openmpi-1.2.2 behaves on x86-linux, but I have
527
really no idea if this is right. */
528
if (ty == MPI_2DOUBLE_COMPLEX) return 32; /* undocumented in MPI2 */
530
# if defined(MPI_2REAL)
531
if (ty == MPI_2REAL) return 2 * 4; /* undocumented in MPI2 */
533
# if defined(MPI_2DOUBLE_PRECISION)
534
if (ty == MPI_2DOUBLE_PRECISION) return 2 * 8; /* undocumented in MPI2 */
536
# if defined(MPI_CHARACTER)
537
if (ty == MPI_CHARACTER) return 1; /* MPI2 spec */
416
540
/* Note: the following are named structs, not named basic types,
417
541
and so are not handled here:
418
542
FLOAT_INT DOUBLE_INT LONG_INT 2INT SHORT_INT LONG_DOUBLE_INT
551
/* Find the size of long double image (not 'sizeof(long double)').
552
See comments in sizeofOneNamedTy.
554
static long sizeof_long_double_image ( void )
558
static long cached_result = 0;
560
/* Hopefully we have it already. */
561
if (cached_result != 0) {
562
assert(cached_result == 10 || cached_result == 16 || cached_result == 8);
563
return cached_result;
566
/* No? Then we'll have to compute it. This isn't thread-safe but
567
it doesn't really matter since all races to compute it should
568
produce the same answer. */
571
for (i = 0; i < 64; i++)
574
/* Write a value which isn't known at compile time and therefore
575
must come out of a register. If we just store a constant here,
576
some compilers write more data than a store from a machine
577
register would. Therefore we have to force a store from a
578
machine register by storing a value which isn't known at compile
579
time. Since getpid() will return a value < 1 million, turn it
580
into a zero by dividing by 1e+30. */
581
*(long double*)(&p[16]) = (long double)(1.0e-30 * (double)getpid());
583
for (i = 0; i < 16; i++) {
584
assert(p[i] == 0x55);
585
assert(p[i+48] == 0x55);
587
for (i = 16; i <= 48; i++) {
595
cached_result = i - 16;
598
printf("sizeof_long_double_image: computed %d\n", (int)cached_result);
600
assert(cached_result == 10 || cached_result == 16 || cached_result == 8);
601
return cached_result;
427
605
/*------------------------------------------------------------*/
428
606
/*--- Unpicking datatypes ---*/
429
607
/*------------------------------------------------------------*/
432
610
void walk_type_array ( void(*f)(void*,long), char* base,
433
611
MPI_Datatype ty, long count );
482
670
f(base + offsetof(Ty,loc), sizeof(int));
673
if (ty == MPI_FLOAT_INT) {
674
typedef struct { float val; int loc; } Ty;
675
f(base + offsetof(Ty,val), sizeof(float));
676
f(base + offsetof(Ty,loc), sizeof(int));
679
if (ty == MPI_LONG_DOUBLE_INT) {
680
typedef struct { long double val; int loc; } Ty;
681
f(base + offsetof(Ty,val), sizeof_long_double_image());
682
f(base + offsetof(Ty,loc), sizeof(int));
485
685
if (ty == MPI_LB || ty == MPI_UB)
486
686
return; /* have zero size, so nothing needs to be done */
610
if (tycon == MPI_COMBINER_NAMED) {
611
fprintf(stderr, "%s %5d: walk_type: unhandled base type 0x%lx ",
612
preamble, my_pid, (long)ty);
614
fprintf(stderr, "\n");
616
fprintf(stderr, "%s %5d: walk_type: unhandled combiner 0x%lx\n",
617
preamble, my_pid, (long)tycon);
810
/* Complain, but limit the amount of complaining that can happen to
811
the first 3 different unhandled tycons that show up, so as to
812
avoid swamping users with thousands of duplicate messages. */
813
if (complaints > 0 && tycon != last_complained_about_tycon) {
815
last_complained_about_tycon = tycon;
816
if (tycon == MPI_COMBINER_NAMED) {
817
fprintf(stderr, "%s %5d: walk_type: unhandled base type 0x%lx ",
818
preamble, my_pid, (long)ty);
820
fprintf(stderr, "\n");
822
fprintf(stderr, "%s %5d: walk_type: unhandled combiner 0x%lx\n",
823
preamble, my_pid, (long)tycon);
619
826
if (ints) free(ints);
620
827
if (addrs) free(addrs);
1406
/* --- Waitany --- */
1407
int WRAPPER_FOR(PMPI_Waitany)( int count,
1408
MPI_Request* requests,
1410
MPI_Status* status )
1412
MPI_Request* requests_before = NULL;
1415
VALGRIND_GET_ORIG_FN(fn);
1417
if (0) fprintf(stderr, "Waitany: %d\n", count);
1418
check_mem_is_addressable_untyped(index, sizeof(int));
1419
check_mem_is_addressable_untyped(status, sizeof(MPI_Status));
1420
for (i = 0; i < count; i++) {
1421
check_mem_is_defined_untyped(&requests[i], sizeof(MPI_Request));
1423
requests_before = clone_Request_array( count, requests );
1424
CALL_FN_W_WWWW(err, fn, count,requests,index,status);
1425
if (err == MPI_SUCCESS && *index >= 0 && *index < count) {
1426
maybe_complete(False/*err in status?*/,
1427
requests_before[*index], requests[*index], status);
1428
make_mem_defined_if_addressable_untyped(status, sizeof(MPI_Status));
1430
if (requests_before)
1431
free(requests_before);
1432
after("Waitany", err);
1206
1436
/* --- Waitall --- */
1207
1437
int WRAPPER_FOR(PMPI_Waitall)( int count,
1208
1438
MPI_Request* requests,
1443
1673
/*------------------------------------------------------------*/
1675
/*--- Sec 3.13, Pack and unpack ---*/
1677
/*------------------------------------------------------------*/
1680
/* pre: must be readable: position
1681
must be readable: (inbuf,incount,datatype)
1682
must be writable: outbuf[0 .. outsize-1]
1683
must be writable: outbuf[*position ..
1685
+ however much space PMPI_Pack_size
1687
post: make readable: outbuf[old *position .. new *position]
1689
int WRAPPER_FOR(PMPI_Pack)( void* inbuf, int incount, MPI_Datatype datatype,
1690
void* outbuf, int outsize,
1691
int* position, MPI_Comm comm )
1695
int position_ORIG = *position;
1696
VALGRIND_GET_ORIG_FN(fn);
1699
check_mem_is_defined_untyped(position, sizeof(*position));
1701
check_mem_is_defined(inbuf, incount, datatype);
1702
/* check output area's stated bounds make sense */
1703
check_mem_is_addressable_untyped(outbuf, outsize);
1704
/* check output area's actual used size properly */
1705
err = PMPI_Pack_size( incount, datatype, comm, &szB );
1706
if (err == MPI_SUCCESS && szB > 0) {
1707
check_mem_is_addressable_untyped(
1708
((char*)outbuf) + position_ORIG, szB
1712
CALL_FN_W_7W(err, fn, inbuf,incount,datatype, outbuf,outsize,position, comm);
1714
if (err == MPI_SUCCESS && (*position) > position_ORIG) {
1716
make_mem_defined_if_addressable_untyped(
1717
((char*)outbuf) + position_ORIG, *position - position_ORIG
1724
/* --- Unpack --- */
1725
/* pre: must be readable: position
1726
must be writable: (outbuf,outcount,datatype)
1727
must be writable: outbuf[0 .. outsize-1]
1728
must be writable: outbuf[*position ..
1730
+ however much space PMPI_Pack_size
1732
post: make readable: (outbuf,outcount,datatype)
1733
and also do a readability check of
1734
inbuf[old *position .. new *position]
1736
int WRAPPER_FOR(PMPI_Unpack)( void* inbuf, int insize, int* position,
1737
void* outbuf, int outcount, MPI_Datatype datatype,
1742
int position_ORIG = *position;
1743
VALGRIND_GET_ORIG_FN(fn);
1746
check_mem_is_defined_untyped(position, sizeof(*position));
1747
/* check output area is accessible */
1748
check_mem_is_addressable(outbuf, outcount, datatype);
1749
/* check input area's stated bounds make sense */
1750
check_mem_is_addressable_untyped(inbuf, insize);
1751
/* check input area's actual used size properly */
1752
err = PMPI_Pack_size( outcount, datatype, comm, &szB );
1753
if (err == MPI_SUCCESS && szB > 0) {
1754
check_mem_is_addressable_untyped(
1755
((char*)inbuf) + position_ORIG, szB
1759
CALL_FN_W_7W(err, fn, inbuf,insize,position, outbuf,outcount,datatype, comm);
1761
if (err == MPI_SUCCESS && (*position) > position_ORIG) {
1762
/* recheck input more carefully */
1763
check_mem_is_defined_untyped(
1764
((char*)inbuf) + position_ORIG, *position - position_ORIG
1767
make_mem_defined_if_addressable( outbuf, outcount, datatype );
1769
after("Unpack", err);
1774
/*------------------------------------------------------------*/
1445
1776
/*--- Sec 4.4, Broadcast ---*/
1447
1778
/*------------------------------------------------------------*/