1
1
/*****************************************************************************
2
* RRDtool 1.3.8 Copyright by Tobi Oetiker, 1997-2009
2
* RRDtool 1.4.3 Copyright by Tobi Oetiker, 1997-2010
3
3
*****************************************************************************
4
4
* rrd_open.c Open an RRD File
5
5
*****************************************************************************
6
* $Id: rrd_open.c 1801 2009-05-19 13:45:05Z oetiker $
6
* $Id: rrd_open.c 2042 2010-03-22 16:05:55Z oetiker $
7
7
*****************************************************************************/
13
10
#include <stdlib.h>
15
12
#include <sys/stat.h>
15
#ifdef HAVE_BROKEN_MS_ASYNC
16
#include <sys/types.h>
19
22
#define MEMBLK 8192
25
#define _LK_UNLCK 0 /* Unlock */
26
#define _LK_LOCK 1 /* Lock */
27
#define _LK_NBLCK 2 /* Non-blocking lock */
28
#define _LK_RLCK 3 /* Lock for read only */
29
#define _LK_NBRLCK 4 /* Non-blocking lock for read only */
32
#define LK_UNLCK _LK_UNLCK
33
#define LK_LOCK _LK_LOCK
34
#define LK_NBLCK _LK_NBLCK
35
#define LK_RLCK _LK_RLCK
36
#define LK_NBRLCK _LK_NBRLCK
21
39
/* DEBUG 2 prints information obtained via mincore(2) */
23
41
/* do not calculate exact madvise hints but assume 1 page for headers and
66
84
* positioned to the first cdp in the first rra.
67
85
* In the error path of rrd_open, only rrd_free(&rrd) has to be called
68
86
* before returning an error. Do not call rrd_close upon failure of rrd_open.
87
* If creating a new file, the parameter rrd must be initialised with
88
* details of the file content.
89
* If opening an existing file, then use rrd must be initialised by
90
* rrd_init(rrd) prior to invoking rrd_open
71
93
rrd_file_t *rrd_open(
78
/* Win32 can't use S_IRUSR flag */
80
mode_t mode = S_IRUSR;
87
103
ssize_t _page_size = sysconf(_SC_PAGESIZE);
88
int mm_prot = PROT_READ, mm_flags = 0;
89
104
char *data = MAP_FAILED;
92
107
struct stat statb;
93
108
rrd_file_t *rrd_file = NULL;
94
off_t newfile_size = 0;
96
if (rdwr & RRD_CREAT) {
97
/* yes bad inline signaling alert, we are using the
98
floatcookie to pass the size in ... only used in resize */
99
newfile_size = (off_t) rrd->stat_head->float_cookie;
100
free(rrd->stat_head);
109
rrd_simple_file_t *rrd_simple_file = NULL;
110
size_t newfile_size = 0;
111
size_t header_len, value_cnt, data_len;
113
/* Are we creating a new file? */
114
if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
116
header_len = rrd_get_header_size(rrd);
119
for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
120
value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
122
data_len = sizeof(rrd_value_t) * value_cnt;
124
newfile_size = header_len + data_len;
103
127
rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
104
128
if (rrd_file == NULL) {
105
129
rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
108
132
memset(rrd_file, 0, sizeof(rrd_file_t));
134
rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
135
if(rrd_file->pvt == NULL) {
136
rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
139
memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
140
rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
111
143
if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
112
144
(RRD_READONLY | RRD_READWRITE)) {
152
rrd_simple_file->mm_prot = PROT_READ;
153
rrd_simple_file->mm_flags = 0;
118
156
if (rdwr & RRD_READONLY) {
119
157
flags |= O_RDONLY;
121
mm_flags = MAP_PRIVATE;
159
rrd_simple_file->mm_flags = MAP_PRIVATE;
122
160
# ifdef MAP_NORESERVE
123
mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
161
rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
127
165
if (rdwr & RRD_READWRITE) {
128
#ifndef WIN32 // Win32 can't use this mode
133
mm_flags = MAP_SHARED;
134
mm_prot |= PROT_WRITE;
168
rrd_simple_file->mm_flags = MAP_SHARED;
169
rrd_simple_file->mm_prot |= PROT_WRITE;
137
172
if (rdwr & RRD_CREAT) {
138
173
flags |= (O_CREAT | O_TRUNC);
175
if (rdwr & RRD_EXCL) {
141
179
if (rdwr & RRD_READAHEAD) {
142
180
#ifdef MAP_POPULATE
143
mm_flags |= MAP_POPULATE; /* populate ptes and data */
181
rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
145
183
#if defined MAP_NONBLOCK
146
mm_flags |= MAP_NONBLOCK; /* just populate ptes */
184
rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
149
187
#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
150
188
flags |= O_BINARY;
153
if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
191
if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
154
192
rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
159
197
#ifdef HAVE_BROKEN_MS_ASYNC
160
if (rdwr & RRD_READWRITE) {
161
/* some unices, the files mtime does not get update
162
on msync MS_ASYNC, in order to help them,
163
we update the the timestamp at this point.
164
The thing happens pretty 'close' to the open
165
call so the chances of a race should be minimal.
167
Maybe ask your vendor to fix your OS ... */
168
utime(file_name,NULL);
198
if (rdwr & RRD_READWRITE) {
199
/* some unices, the files mtime does not get update
200
on msync MS_ASYNC, in order to help them,
201
we update the the timestamp at this point.
202
The thing happens pretty 'close' to the open
203
call so the chances of a race should be minimal.
205
Maybe ask your vendor to fix your OS ... */
206
utime(file_name,NULL);
173
211
/* Better try to avoid seeks as much as possible. stat may be heavy but
174
212
* many concurrent seeks are even worse. */
175
if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
213
if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
176
214
rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
180
218
rrd_file->file_len = statb.st_size;
182
220
rrd_file->file_len = newfile_size;
183
lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
184
write(rrd_file->fd, "\0", 1); /* poke */
185
lseek(rrd_file->fd, 0, SEEK_SET);
221
lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
222
if ( write(rrd_simple_file->fd, "\0", 1) == -1){ /* poke */
223
rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
226
lseek(rrd_simple_file->fd, 0, SEEK_SET);
187
228
#ifdef HAVE_POSIX_FADVISE
188
229
/* In general we need no read-ahead when dealing with rrd_files.
189
230
When we stop reading, it is highly unlikely that we start up again.
190
231
In this manner we actually save time and diskaccess (and buffer cache).
191
232
Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
192
posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
233
posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
196
237
if (rdwr & RRD_READWRITE)
198
if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
239
if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
199
240
rrd_set_error("failed to disable the stream buffer\n");
205
data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
206
rrd_file->fd, offset);
247
data = mmap(0, rrd_file->file_len,
248
rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
249
rrd_simple_file->fd, offset);
208
251
/* lets see if the first read worked */
209
252
if (data == MAP_FAILED) {
222
265
#ifdef USE_MADVISE
223
266
if (rdwr & RRD_COPY) {
224
267
/* We will read everything in a moment (copying) */
225
madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
268
madvise(data, rrd_file->file_len, MADV_WILLNEED );
269
madvise(data, rrd_file->file_len, MADV_SEQUENTIAL );
227
271
/* We do not need to read anything in for the moment */
228
272
madvise(data, rrd_file->file_len, MADV_RANDOM);
229
273
/* the stat_head will be needed soonish, so hint accordingly */
230
madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
274
madvise(data, sizeof(stat_head_t), MADV_WILLNEED);
275
madvise(data, sizeof(stat_head_t), MADV_RANDOM);
307
352
unsigned long row_cnt = 0;
310
for (i=0; i<rrd->stat_head->rra_cnt; i++)
311
row_cnt += rrd->rra_def[i].row_cnt;
313
off_t correct_len = rrd_file->header_len +
354
for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
355
row_cnt += rrd->rra_def[ui].row_cnt;
357
size_t correct_len = rrd_file->header_len +
314
358
sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
316
360
if (correct_len > rrd_file->file_len)
383
431
#endif /* defined DEBUG && DEBUG > 1 */
434
* get exclusive lock to whole file.
435
* lock gets removed when we close the file
437
* returns 0 on success
440
rrd_file_t *rrd_file)
443
rrd_simple_file_t *rrd_simple_file;
444
rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
447
#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
450
if (_fstat(rrd_simple_file->fd, &st) == 0) {
451
rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
458
lock.l_type = F_WRLCK; /* exclusive write lock */
459
lock.l_len = 0; /* whole file */
460
lock.l_start = 0; /* start of file */
461
lock.l_whence = SEEK_SET; /* end of file */
463
rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
386
471
/* drop cache except for the header and the active pages */
387
472
void rrd_dontneed(
388
473
rrd_file_t *rrd_file,
476
rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
391
477
#if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
392
unsigned long dontneed_start;
393
unsigned long rra_start;
394
unsigned long active_block;
478
size_t dontneed_start;
396
482
ssize_t _page_size = sysconf(_SC_PAGESIZE);
398
484
if (rrd_file == NULL) {
416
502
* rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
417
503
if (active_block > dontneed_start) {
418
504
#ifdef USE_MADVISE
419
madvise(rrd_file->file_start + dontneed_start,
505
madvise(rrd_simple_file->file_start + dontneed_start,
420
506
active_block - dontneed_start - 1, MADV_DONTNEED);
422
508
/* in linux at least only fadvise DONTNEED seems to purge pages from cache */
423
509
#ifdef HAVE_POSIX_FADVISE
424
posix_fadvise(rrd_file->fd, dontneed_start,
510
posix_fadvise(rrd_simple_file->fd, dontneed_start,
425
511
active_block - dontneed_start - 1,
426
512
POSIX_FADV_DONTNEED);
442
528
if (dontneed_start < rrd_file->file_len) {
443
529
#ifdef USE_MADVISE
444
madvise(rrd_file->file_start + dontneed_start,
530
madvise(rrd_simple_file->file_start + dontneed_start,
445
531
rrd_file->file_len - dontneed_start, MADV_DONTNEED);
447
533
#ifdef HAVE_POSIX_FADVISE
448
posix_fadvise(rrd_file->fd, dontneed_start,
534
posix_fadvise(rrd_simple_file->fd, dontneed_start,
449
535
rrd_file->file_len - dontneed_start,
450
536
POSIX_FADV_DONTNEED);
465
551
rrd_file_t *rrd_file)
553
rrd_simple_file_t *rrd_simple_file;
554
rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
470
ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
558
ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
472
560
rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
473
ret = munmap(rrd_file->file_start, rrd_file->file_len);
561
ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
475
563
rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
477
ret = close(rrd_file->fd);
565
ret = close(rrd_simple_file->fd);
479
567
rrd_set_error("closing file: %s", rrd_strerror(errno));
500
591
else if (whence == SEEK_END)
501
592
rrd_file->pos = rrd_file->file_len + off;
503
ret = lseek(rrd_file->fd, off, whence);
594
ret = lseek(rrd_simple_file->fd, off, whence);
505
596
rrd_set_error("lseek: %s", rrd_strerror(errno));
506
597
rrd_file->pos = ret;
543
635
return 0; /* EOF */
544
buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
636
buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
546
638
rrd_file->pos += _cnt; /* mimmic read() semantics */
551
ret = read(rrd_file->fd, buf, count);
643
ret = read(rrd_simple_file->fd, buf, count);
553
645
rrd_file->pos += ret; /* mimmic read() semantics */
660
rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
662
size_t old_size = rrd_file->file_len;
572
666
return -1; /* EINVAL */
573
memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
668
if((rrd_file->pos + count) > old_size)
670
rrd_set_error("attempting to write beyond end of file");
673
memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
574
674
rrd_file->pos += count;
575
675
return count; /* mimmic write() semantics */
577
ssize_t _sz = write(rrd_file->fd, buf, count);
677
ssize_t _sz = write(rrd_simple_file->fd, buf, count);
580
680
rrd_file->pos += _sz;
586
/* flush all data pending to be written to FD. */
686
/* this is a leftover from the old days, it serves no purpose
687
and is therefore turned into a no-op */
589
rrd_file_t *rrd_file)
689
rrd_file_t *rrd_file __attribute__((unused)))
592
* Win32 can only flush files by FlushFileBuffers function,
593
* but it works with HANDLE hFile, not FILE. So skipping
596
if (fdatasync(rrd_file->fd) != 0) {
597
rrd_set_error("flushing fd %d: %s", rrd_file->fd,
598
rrd_strerror(errno));
604
693
/* Initialize RRD header. */
746
* rra_update informs us about the RRAs being updated
747
* The low level storage API may use this information for
748
* aligning RRAs within stripes, or other performance enhancements
751
rrd_file_t *rrd_file __attribute__((unused)),
752
int rra_idx __attribute__((unused)),
753
unsigned long rra_row __attribute__((unused)),
754
time_t rra_time __attribute__((unused)))
759
* This function is called when creating a new RRD
760
* The storage implementation can use this opportunity to select
761
* a sensible starting row within the file.
762
* The default implementation is random, to ensure that all RRAs
763
* don't change to a new disk block at the same time
765
unsigned long rrd_select_initial_row(
766
rrd_file_t *rrd_file __attribute__((unused)),
767
int rra_idx __attribute__((unused)),
771
return rrd_random() % rra->row_cnt;