8
* This material was prepared as an account of work sponsored by an
9
* agency of the United States Government. Neither the United States
10
* Government nor the United States Department of Energy, nor Battelle,
11
* nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
12
* ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
13
* COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
14
* SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
15
* INFRINGE PRIVATELY OWNED RIGHTS.
20
* This software and its documentation were produced with United States
21
* Government support under Contract Number DE-AC06-76RLO-1830 awarded by
22
* the United States Department of Energy. The United States Government
23
* retains a paid-up non-exclusive, irrevocable worldwide license to
24
* reproduce, prepare derivative works, perform publicly and display
25
* publicly by or for the US Government, including the right to
26
* distribute to other US Government contractors.
37
#if HAVE_LINUX_LIMITS_H
38
# include <linux/limits.h>
45
# define PATH_MAX _MAX_PATH
52
# include <sys/types.h>
53
# include <sys/time.h>
62
# define EAF_MAX_FILES OPEN_MAX
64
# define EAF_MAX_FILES 1024
69
char *fname; /**< Filename --- if non-null is active*/
70
Fd_t elio_fd; /**< ELIO file descriptor */
71
int type; /**< file type */
72
int nwait; /**< #waits */
73
int nwrite; /**< #synchronous writes */
74
int nread; /**< #synchronous reads */
75
int nawrite; /**< #asynchronous writes */
76
int naread; /**< #asynchronous reads */
77
double nb_write; /**< #synchronous bytes written */
78
double nb_read; /**< #synchronous bytes read */
79
double nb_awrite; /**< #asynchronous bytes written */
80
double nb_aread; /**< #asynchronous bytes read */
81
double t_write; /**< Wall seconds synchronous writing */
82
double t_read; /**< Wall seconds synchronous reading */
83
double t_awrite; /**< Wall seconds asynchronous writing */
84
double t_aread; /**< Wall seconds asynchronous reading */
85
double t_wait; /**< Wall seconds waiting */
86
long size; /**< size for MA hack */
87
long handle; /**< handle for MA hack */
88
char *pointer; /**< pointer for MA */
89
long openma; /**< open yes or no for MA to simulate file behavoir */
90
} file[EAF_MAX_FILES];
93
int eaf_flushbuf(int , eaf_off_t , const void *, size_t );
95
static int valid_fd(int fd)
97
return ( (fd >= 0) && (fd < EAF_MAX_FILES) && (file[fd].fname) );
102
* Return wall_time in seconds as cheaply and as accurately as possible
104
static double wall_time(void)
107
static int firstcall = 1;
108
static unsigned firstsec, firstusec;
114
(void) gettimeofday(&tp,&tzp);
115
firstusec = tp.tv_usec;
116
firstsec = tp.tv_sec;
120
(void) gettimeofday(&tp,&tzp);
122
low = (double) (tp.tv_usec>>1) - (double) (firstusec>>1);
123
high = (double) (tp.tv_sec - firstsec);
125
return high + 1.0e-6*(low+low);
126
#else /* EAF_STATS */
128
#endif /* EAF_STATS */
133
* Open the named file returning the EAF file descriptor in fd.
134
* Return 0 on success, non-zero on failure
136
int EAF_Open(const char *fname, int type, int *fd)
138
int i=0, j=0, found=0;
145
while ((i<EAF_MAX_FILES) && file[i].fname) /* Find first empty slot */
147
if (i == EAF_MAX_FILES) return EAF_ERR_MAX_OPEN;
150
for (j=0; j< i; j++){
151
if(strcmp(file[j].fname,fname) == 0 && file[j].size >0) {
158
/* check if this file aka MA region labeled by fname is already open with size >=0*/
160
printf(" JJJ %d III %d fname %s filejfname %s found %d \n", j, i, fname, file[j].fname, found);
163
/* if arg gt 1M then use remainder as size */
164
/* we grab 3/4 of avail mem */
166
file[i].size=type-1000000;
168
file[i].size=MA_inquire_avail(MT_CHAR)*8/10;
171
if (!MA_alloc_get(MT_CHAR, file[i].size, fname, &handle, &index))
173
/* MA hack: we pass type = sizeof MA alloc in megabytes */
174
MA_get_pointer(handle, &ptr);
175
if (!(file[i].fname = strdup(fname)))
176
return EAF_ERR_MEMORY;
178
file[i].handle=handle;
182
printf(" found old fileMA %d size %ld \n", j, file[j].size);
184
/* need check if new size is <= old size*/
191
printf(" size %ld ptr %p \n", file[i].size, file[i].pointer);
194
if (!(file[i].fname = strdup(fname)))
195
return EAF_ERR_MEMORY;
197
if(type > 0) type = EAF_RW;
199
printf(" opening regular %d eaf %s \n", i, fname);
202
if (!(file[i].elio_fd = elio_open(fname, type, ELIO_PRIVATE))) {
204
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
205
/* printf(" %d sleeping for %d usec \n", myid, (myid+1)/4); */
207
if (!(file[i].elio_fd = elio_open(fname, type, ELIO_PRIVATE))) {
211
return ELIO_PENDING_ERR;
218
file[i].nwait = file[i].nread = file[i].nwrite =
219
file[i].naread = file[i].nawrite = 0;
220
file[i].nb_read = file[i].nb_write = file[i].nb_aread =
221
file[i].nb_awrite = file[i].t_read = file[i].t_write =
222
file[i].t_wait = 0.0;
231
* Close the EAF file and return 0 on success, non-zero on failure
233
int EAF_Close(int fd)
235
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
237
if (file[fd].size > 0) {
239
printf(" maclosing %d %s \n", fd, file[fd].fname);
245
printf(" closing regular file %s fd %d \n", file[fd].fname, fd);
247
free(file[fd].fname);
250
return elio_close(file[fd].elio_fd);
256
* Write the buffer to the file at the specified offset.
257
* Return 0 on success, non-zero on failure
259
int EAF_Write(int fd, eaf_off_t offset, const void *buf, size_t bytes)
261
double start = wall_time();
264
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
266
if (file[fd].size > 0) {
267
if((offset+bytes)>file[fd].size){
269
printf("eaf_write failure: increase MA stack memory \n ");
270
return EAF_ERR_WRITE;
273
printf("eaf_write: offset %ld larger than MA size %ld ptr %p \n", (long)(offset+bytes), file[fd].size, file[fd].pointer);
274
rc=eaf_flushbuf(fd, offset, buf, bytes);
275
printf("eaf_write: from flushbug rc %d bytes %d\n ", rc, bytes);
278
memcpy(((char*)file[fd].pointer)+(long)offset, buf, bytes);
282
rc = elio_write(file[fd].elio_fd, (Off_t) offset, buf, (Size_t) bytes);
284
if (rc != ((Size_t)bytes)){
285
printf("eaf_write: rc ne bytes %d bytes %d\n ", rc, bytes);
286
if(rc < 0) return((int)rc); /* rc<0 means ELIO detected error */
287
else return EAF_ERR_WRITE;
290
file[fd].nb_write += bytes;
291
file[fd].t_write += wall_time() - start;
299
* Initiate an asynchronous write of the buffer to the file at the
300
* specified offset. Return in *req_id the ID of the request for
301
* subsequent use in EAF_Wait/probe. The buffer may not be reused until
302
* the operation has completed.
303
* Return 0 on success, non-zero on failure
306
int fd, eaf_off_t offset, const void *buf, size_t bytes, int *req_id)
308
double start = wall_time();
312
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
314
if (file[fd].size > 0) {
315
if(offset>file[fd].size){
317
printf("eaf_awrite: offset %f larger than MA size %ld \n", offset, file[fd].size);
318
return EAF_ERR_WRITE;
320
memcpy(((char*)file[fd].pointer)+(long)offset, buf, bytes);
324
rc = elio_awrite(file[fd].elio_fd, (Off_t)offset, buf, (Size_t)bytes, &req);
329
file[fd].nb_awrite += bytes;
331
file[fd].t_awrite += wall_time() - start;
337
* Read the buffer from the specified offset in the file.
338
* Return 0 on success, non-zero on failure
340
int EAF_Read(int fd, eaf_off_t offset, void *buf, size_t bytes)
342
double start = wall_time();
345
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
347
if (file[fd].size > 0) {
348
if(offset>file[fd].size){
350
printf("eaf_read: offset %f larger than MA size %ld \n", offset, file[fd].size);
352
memcpy(buf, ((char*)file[fd].pointer)+(long)offset, bytes);
356
rc = elio_read(file[fd].elio_fd, (Off_t) offset, buf, (Size_t) bytes);
358
if (rc != ((Size_t)bytes)){
359
if(rc < 0) return((int)rc); /* rc<0 means ELIO detected error */
360
else return EAF_ERR_READ;
363
file[fd].nb_read += bytes;
364
file[fd].t_read += wall_time() - start;
371
* Initiate an asynchronous read of the buffer from the file at the
372
* specified offset. Return in *req_id the ID of the request for
373
* subsequent use in EAF_Wait/probe. The buffer may not be reused until
374
* the operation has completed.
375
* Return 0 on success, non-zero on failure
377
int EAF_Aread(int fd, eaf_off_t offset, void *buf, size_t bytes, int *req_id)
379
double start = wall_time();
383
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
385
if (file[fd].size > 0) {
386
if(offset>file[fd].size){
388
printf("eaf_aread: offset %f larger than MA size %ld \n", offset, file[fd].size);
391
memcpy(file[fd].pointer, buf, bytes);
395
rc = elio_aread(file[fd].elio_fd, (Off_t) offset, buf, (Size_t)bytes, &req);
401
file[fd].nb_aread += bytes;
403
file[fd].t_aread += wall_time() - start;
409
* Wait for the I/O operation referred to by req_id to complete.
410
* Return 0 on success, non-zero on failure
412
int EAF_Wait(int fd, int req_id)
414
double start = wall_time();
417
io_request_t req = req_id;
418
if (file[fd].size > 0) {
419
/* got nothin' to do */
421
code = elio_wait(&req);
423
file[fd].t_wait += wall_time() - start;
431
* status returns 0 if the I/O operation reffered to by req_id
432
* is complete, 1 otherwise.
433
* Return 0 on success, non-zero on failure.
435
int EAF_Probe(int req_id, int *status)
437
io_request_t req = req_id;
440
if (file[fd].size > 0) {
441
/* got nothin' to do */
444
rc = elio_probe(&req, status);
449
if(!rc) *status = !(*status == ELIO_DONE);
455
* Delete the named file. If the delete succeeds, or the file
456
* does not exist, return 0. Otherwise return non-zero.
458
int EAF_Delete(const char *fname)
461
if (access(fname, F_OK) == 0)
463
return EAF_ERR_UNLINK;
469
/* get fd from fname */
470
for (j=0; (j< EAF_MAX_FILES) && file[j].fname; j++){
471
if(strcmp(file[j].fname,fname) == 0 && file[j].size >0) {
477
printf("eaf_delete: fname %s found %d \n", fname, found);
478
if (found ==1) printf("eaf_delete: j %d filej.fname %s \n", j, file[j].fname);
481
if(!MA_free_heap(file[j].handle)) {
482
MA_summarize_allocated_blocks();
483
return EAF_ERR_UNLINK;
489
/* Now that ELIO files can have extents must call its
490
routine to delete files */
492
if (elio_delete(fname) == ELIO_OK)
495
return EAF_ERR_UNLINK;
501
* Return in *avail_mb and *fstype the amount of free space (in Mb)
502
* and filesystem type (currenly UFS, PFS, or PIOFS) of the filesystem
503
* associated with path. Path should be either a filename, or a directory
504
* name ending in a slash (/). fslen should specify the size of the
505
* buffer pointed to by fstype.
507
* Return 0 on success, non-zero on failure.
509
int EAF_Stat(const char *path, int *avail_mb, char *fstype, int fslen)
511
char dirname[PATH_MAX];
515
if ((rc = elio_dirname(path, dirname, sizeof(dirname)))) return rc;
516
if ((rc = elio_stat(dirname, &statinfo))) return rc;
517
if (fslen < 8) return EAF_ERR_TOO_SHORT;
519
*avail_mb = (int)(statinfo.avail>>10);
520
if (statinfo.fs == ELIO_UFS)
521
strcpy(fstype, "UFS");
522
else if (statinfo.fs == ELIO_PFS)
523
strcpy(fstype, "PFS");
524
else if (statinfo.fs == ELIO_PIOFS)
525
strcpy(fstype, "PIOFS");
527
strcpy(fstype, "UNKNOWN");
534
* Return 0 if code corresponds to EOF, or non-zero.
536
int EAF_Eof(int code)
538
return !(code == EAF_ERR_EOF);
543
* Return in msg (assumed to hold up to 80 characters)
544
* a description of the error code obtained from an EAF call,
545
* or an empty string if there is no such code
547
void EAF_Errmsg(int code, char *msg)
550
(void) strcpy(msg, "OK");
551
else if (code == EAF_ERR_EOF)
552
(void) strcpy(msg, "end of file");
553
else if (code == EAF_ERR_MAX_OPEN)
554
(void) strcpy(msg, "too many open files");
555
else if (code == EAF_ERR_MEMORY)
556
(void) strcpy(msg, "memory allocation failed");
557
else if (code == EAF_ERR_OPEN)
558
(void) strcpy(msg, "failed opening file");
559
else if (code == EAF_ERR_CLOSE)
560
(void) strcpy(msg, "failed closing file");
561
else if (code == EAF_ERR_INVALID_FD)
562
(void) strcpy(msg, "invalid file descriptor");
563
else if (code == EAF_ERR_WRITE)
564
(void) strcpy(msg, "write failed");
565
else if (code == EAF_ERR_AWRITE)
566
(void) strcpy(msg, "asynchronous write failed");
567
else if (code == EAF_ERR_READ)
568
(void) strcpy(msg, "read failed");
569
else if (code == EAF_ERR_AREAD)
570
(void) strcpy(msg, "asynchronous read failed");
571
else if (code == EAF_ERR_WAIT)
572
(void) strcpy(msg, "wait failed");
573
else if (code == EAF_ERR_PROBE)
574
(void) strcpy(msg, "probe failed");
575
else if (code == EAF_ERR_UNLINK)
576
(void) strcpy(msg, "unlink failed");
577
else if (code == EAF_ERR_UNIMPLEMENTED)
578
(void) strcpy(msg, "unimplemented operation");
579
else if (code == EAF_ERR_STAT)
580
(void) strcpy(msg, "stat failed");
581
else if (code == EAF_ERR_TOO_SHORT)
582
(void) strcpy(msg, "an argument string/buffer is too short");
583
else if (code == EAF_ERR_TOO_LONG)
584
(void) strcpy(msg, "an argument string/buffer is too long");
585
else if (code == EAF_ERR_NONINTEGER_OFFSET)
586
(void) strcpy(msg, "offset is not an integer");
587
else if (code == EAF_ERR_TRUNCATE)
588
(void) strcpy(msg, "truncate failed");
590
elio_errmsg(code, msg);
595
* Truncate the file to the specified length.
596
* Return 0 on success, non-zero otherwise.
598
int EAF_Truncate(int fd, eaf_off_t length)
604
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
607
/* ftruncate does not work with Cray FFIO, we need to implement it
608
* as a sequence of generic close, truncate, open calls
611
rc = elio_close(file[fd].elio_fd);
613
if(truncate(file[fd].fname, (off_t) length)) return EAF_ERR_TRUNCATE;
614
if (!(file[fd].elio_fd = elio_open(file[fd].fname, file[fd].type, ELIO_PRIVATE))) {
615
free(file[fd].fname);
617
return ELIO_PENDING_ERR;
620
if(elio_truncate(file[fd].elio_fd, (Off_t)length)) return EAF_ERR_TRUNCATE;
624
/* return elio_truncate(file[fd].elio_fd, (Off_t) length);*/
629
* Return in length the length of the file.
630
* Return 0 on success, nonzero on failure.
632
int EAF_Length(int fd, eaf_off_t *length)
637
if (!valid_fd(fd)) return EAF_ERR_INVALID_FD;
639
if (file[fd].size > 0) {
640
// should be in MB???
641
if(file[fd].openma == 0) return EAF_ERR_INVALID_FD;
645
rc = elio_length(file[fd].elio_fd, &len);
647
if(!rc) *length = (eaf_off_t) len;
653
* Print performance statistics for this file to standard output
655
void EAF_Print_stats(int fd)
658
double mbr, mbw, mbra, mbwa;
659
if (!valid_fd(fd)) return;
661
if (EAF_Length(fd, &len)) len = -1;
664
printf("------------------------------------------------------------\n");
665
#if HAVE_UNSIGNED_LONG_LONG_INT
666
printf("EAF file %d: \"%s\" size=%llu bytes\n",
667
fd, file[fd].fname, (unsigned long long) len);
669
printf("EAF file %d: \"%s\" size=%lu bytes\n",
670
fd, file[fd].fname, (unsigned long) len);
672
printf("------------------------------------------------------------\n");
673
printf(" write read awrite aread wait\n");
674
printf(" ----- ---- ------ ----- ----\n");
675
printf(" calls: %8d %8d %8d %8d %8d\n",
676
file[fd].nwrite, file[fd].nread, file[fd].nawrite,
677
file[fd].naread, file[fd].nwait);
678
printf(" data(b): %.2e %.2e %.2e %.2e\n",
679
file[fd].nb_write, file[fd].nb_read, file[fd].nb_awrite,
681
printf(" time(s): %.2e %.2e %.2e %.2e %.2e\n",
682
file[fd].t_write, file[fd].t_read,
683
file[fd].t_awrite, file[fd].t_aread,
689
if (file[fd].t_write > 0.0) mbw = file[fd].nb_write/(1e6*file[fd].t_write);
690
if (file[fd].t_read > 0.0) mbr = file[fd].nb_read/(1e6*file[fd].t_read);
691
if ((file[fd].t_wait + file[fd].t_aread) > 0.0)
692
mbra = 1e-6*file[fd].nb_aread /
693
(file[fd].t_wait + file[fd].t_aread);
694
if ((file[fd].t_wait + file[fd].t_awrite) > 0.0)
695
mbwa = 1e-6*file[fd].nb_awrite /
696
(file[fd].t_wait + file[fd].t_awrite);
698
/* Note that wait time does not distinguish between read/write completion
699
so that entire wait time is counted
700
in computing effective speed for async read & write */
702
printf("rate(mb/s): %.2e %.2e %.2e* %.2e*\n", mbw, mbr, mbwa, mbra);
703
printf("------------------------------------------------------------\n");
704
printf("* = Effective rate. Full wait time used for read and write.\n\n");
707
printf("rate(mb/s): %.2e %.2e\n", mbw, mbr);
708
printf("------------------------------------------------------------\n\n");
713
int eaf_flushbuf(int fd, eaf_off_t offset, const void *buf, size_t bytes)
714
/* once we run out of MA memory, let's open a real eaf file,
715
flush the whole MA allocation to the file, plus the last bytes
719
long masize, mahandle;
720
char *mapointer, *oldfname;
721
double start = wall_time();
722
/* invalidate old FD but do not deallocate MA */
723
masize=file[fd].size;
724
mahandle=file[fd].handle;
725
mapointer=file[fd].pointer;
726
oldfname = malloc((unsigned) (strlen(file[fd].fname)));
727
strcpy(oldfname, file[fd].fname);
728
file[fd].fname= NULL;
729
rc=EAF_Open(oldfname, EAF_RW, &fd_new);
730
(void) free(oldfname);
732
printf(" flushbuf: open failure \n");
736
rc = elio_write(file[fd_new].elio_fd, 0., (char*)mapointer, (Size_t) masize);
737
/* write last bytes */
738
rc = elio_write(file[fd_new].elio_fd, (Off_t) file[fd].size , buf, (Size_t) bytes);
740
printf(" flushbuf: write failure \n");
741
if(rc < 0) return((int)rc); /* rc<0 means ELIO detected error */
742
else return EAF_ERR_WRITE;
744
file[fd_new].nwrite++;
745
file[fd_new].nb_write += file[fd].size;
746
file[fd_new].nwrite++;
747
file[fd_new].nb_write += bytes;
748
file[fd_new].t_write += wall_time() - start;
750
if(!MA_free_heap(mahandle)) {
751
MA_summarize_allocated_blocks();
752
return EAF_ERR_UNLINK;
754
/* swap fd with fd_new, is this too little?? */