1
/******************************************************
2
The interface to the operating system file io
6
Created 10/21/1995 Heikki Tuuri
7
*******************************************************/
20
typedef struct fil_node_struct fil_node_t;
23
extern ibool os_do_not_call_flush_at_each_write;
24
#endif /* UNIV_DO_FLUSH */
25
extern ibool os_has_said_disk_full;
26
extern ibool os_aio_print_debug;
28
extern ulint os_file_n_pending_preads;
29
extern ulint os_file_n_pending_pwrites;
31
extern ulint os_n_pending_reads;
32
extern ulint os_n_pending_writes;
36
/* We define always WIN_ASYNC_IO, and check at run-time whether
37
the OS actually supports it: Win 95 does not, NT does. */
40
#define UNIV_NON_BUFFERED_IO
45
#define os_file_t HANDLE
47
typedef int os_file_t;
50
extern ulint os_innodb_umask;
52
/* If this flag is TRUE, then we will use the native aio of the
53
OS (provided we compiled Innobase with it in), otherwise we will
54
use simulated aio we build below with threads */
56
extern ibool os_aio_use_native_aio;
58
#define OS_FILE_SECTOR_SIZE 512
60
/* The next value should be smaller or equal to the smallest sector size used
61
on any disk. A log block is required to be a portion of disk which is written
62
so that if the start and the end of a block get written to disk, then the
63
whole block gets written. This should be true even in most cases of a crash:
64
if this fails for a log block, then it is equivalent to a media failure in the
67
#define OS_FILE_LOG_BLOCK_SIZE 512
69
/* Options for file_create */
70
#define OS_FILE_OPEN 51
71
#define OS_FILE_CREATE 52
72
#define OS_FILE_OVERWRITE 53
73
#define OS_FILE_OPEN_RAW 54
74
#define OS_FILE_CREATE_PATH 55
75
#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
76
the first ibdata file */
78
#define OS_FILE_READ_ONLY 333
79
#define OS_FILE_READ_WRITE 444
80
#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */
82
/* Options for file_create */
83
#define OS_FILE_AIO 61
84
#define OS_FILE_NORMAL 62
86
/* Types for file create */
87
#define OS_DATA_FILE 100
88
#define OS_LOG_FILE 101
90
/* Error codes from os_file_get_last_error */
91
#define OS_FILE_NOT_FOUND 71
92
#define OS_FILE_DISK_FULL 72
93
#define OS_FILE_ALREADY_EXISTS 73
94
#define OS_FILE_PATH_ERROR 74
95
#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
96
to become available again */
97
#define OS_FILE_SHARING_VIOLATION 76
98
#define OS_FILE_ERROR_NOT_SPECIFIED 77
99
/* 78 is used in the plugin */
100
#define OS_FILE_OPERATION_ABORTED 79
102
/* Types for aio operations */
103
#define OS_FILE_READ 10
104
#define OS_FILE_WRITE 11
106
#define OS_FILE_LOG 256 /* This can be ORed to type */
108
#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
111
/* Modes for aio operations */
112
#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
113
pages or ibuf bitmap pages */
114
#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
116
#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
117
#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
118
will itself wait for the i/o to complete,
119
doing also the job of the i/o-handler thread;
120
can be used for any pages, ibuf or non-ibuf.
121
This is used to save CPU time, as we can do
122
with fewer thread switches. Plain synchronous
123
i/o is not as good, because it must serialize
124
the file seek and read or write, causing a
125
bottleneck for parallelism. */
127
#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
128
in the call of os_aio(...),
129
if the caller wants to post several i/o
130
requests in a batch, and only after that
131
wake the i/o-handler thread; this has
132
effect only in simulated aio */
138
extern ulint os_n_file_reads;
139
extern ulint os_n_file_writes;
140
extern ulint os_n_fsyncs;
142
/* File types for directory entry data type */
144
enum os_file_type_enum{
145
OS_FILE_TYPE_UNKNOWN = 0,
146
OS_FILE_TYPE_FILE, /* regular file */
147
OS_FILE_TYPE_DIR, /* directory */
148
OS_FILE_TYPE_LINK /* symbolic link */
150
typedef enum os_file_type_enum os_file_type_t;
152
/* Maximum path string length in bytes when referring to tables with in the
153
'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
154
of this size from the thread stack; that is why this should not be made much
155
bigger than 4000 bytes */
156
#define OS_FILE_MAX_PATH 4000
158
/* Struct used in fetching information of a file in a directory */
159
struct os_file_stat_struct{
160
char name[OS_FILE_MAX_PATH]; /* path to a file */
161
os_file_type_t type; /* file type */
162
ib_longlong size; /* file size */
163
time_t ctime; /* creation time */
164
time_t mtime; /* modification time */
165
time_t atime; /* access time */
167
typedef struct os_file_stat_struct os_file_stat_t;
170
typedef HANDLE os_file_dir_t; /* directory stream */
172
typedef DIR* os_file_dir_t; /* directory stream */
175
/***************************************************************************
176
Gets the operating system version. Currently works only on Windows. */
179
os_get_os_version(void);
180
/*===================*/
181
/* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
182
/********************************************************************
183
Creates the seek mutexes used in positioned reads and writes. */
186
os_io_init_simple(void);
187
/*===================*/
188
/***************************************************************************
189
Creates a temporary file. This function is like tmpfile(3), but
190
the temporary file is created in the MySQL temporary directory.
191
On Netware, this function is like tmpfile(3), because the C run-time
192
library of Netware does not expose the delete-on-close flag. */
195
os_file_create_tmpfile(void);
196
/*========================*/
197
/* out: temporary file handle, or NULL on error */
198
/***************************************************************************
199
The os_file_opendir() function opens a directory stream corresponding to the
200
directory named by the dirname argument. The directory stream is positioned
201
at the first entry. In both Unix and Windows we automatically skip the '.'
202
and '..' items at the start of the directory listing. */
207
/* out: directory stream, NULL if
209
const char* dirname, /* in: directory name; it must not
210
contain a trailing '\' or '/' */
211
ibool error_is_fatal);/* in: TRUE if we should treat an
212
error as a fatal error; if we try to
213
open symlinks then we do not wish a
214
fatal error if it happens not to be
216
/***************************************************************************
217
Closes a directory stream. */
222
/* out: 0 if success, -1 if failure */
223
os_file_dir_t dir); /* in: directory stream */
224
/***************************************************************************
225
This function returns information of the next file in the directory. We jump
226
over the '.' and '..' entries in the directory. */
229
os_file_readdir_next_file(
230
/*======================*/
231
/* out: 0 if ok, -1 if error, 1 if at the end
233
const char* dirname,/* in: directory name or path */
234
os_file_dir_t dir, /* in: directory stream */
235
os_file_stat_t* info); /* in/out: buffer where the info is returned */
236
/*********************************************************************
237
This function attempts to create a directory named pathname. The new directory
238
gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
239
directory exists already, nothing is done and the call succeeds, unless the
240
fail_if_exists arguments is true. */
243
os_file_create_directory(
244
/*=====================*/
245
/* out: TRUE if call succeeds,
247
const char* pathname, /* in: directory name as
248
null-terminated string */
249
ibool fail_if_exists);/* in: if TRUE, pre-existing directory
250
is treated as an error. */
251
/********************************************************************
252
A simple function to open or create a file. */
255
os_file_create_simple(
256
/*==================*/
257
/* out, own: handle to the file, not defined
258
if error, error number can be retrieved with
259
os_file_get_last_error */
260
const char* name, /* in: name of the file or path as a
261
null-terminated string */
262
ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
263
opened (if does not exist, error), or
264
OS_FILE_CREATE if a new file is created
265
(if exists, error), or
266
OS_FILE_CREATE_PATH if new file
267
(if exists, error) and subdirectories along
268
its path are created (if needed)*/
269
ulint access_type,/* in: OS_FILE_READ_ONLY or
270
OS_FILE_READ_WRITE */
271
ibool* success);/* out: TRUE if succeed, FALSE if error */
272
/********************************************************************
273
A simple function to open or create a file. */
276
os_file_create_simple_no_error_handling(
277
/*====================================*/
278
/* out, own: handle to the file, not defined
279
if error, error number can be retrieved with
280
os_file_get_last_error */
281
const char* name, /* in: name of the file or path as a
282
null-terminated string */
283
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
284
is opened (if does not exist, error), or
285
OS_FILE_CREATE if a new file is created
286
(if exists, error) */
287
ulint access_type,/* in: OS_FILE_READ_ONLY,
288
OS_FILE_READ_WRITE, or
289
OS_FILE_READ_ALLOW_DELETE; the last option is
290
used by a backup program reading the file */
291
ibool* success);/* out: TRUE if succeed, FALSE if error */
292
/********************************************************************
293
Opens an existing file or creates a new. */
298
/* out, own: handle to the file, not defined
299
if error, error number can be retrieved with
300
os_file_get_last_error */
301
const char* name, /* in: name of the file or path as a
302
null-terminated string */
303
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
304
is opened (if does not exist, error), or
305
OS_FILE_CREATE if a new file is created
307
OS_FILE_OVERWRITE if a new file is created
308
or an old overwritten;
309
OS_FILE_OPEN_RAW, if a raw device or disk
310
partition should be opened */
311
ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
312
non-buffered i/o is desired,
313
OS_FILE_NORMAL, if any normal file;
314
NOTE that it also depends on type, os_aio_..
315
and srv_.. variables whether we really use
316
async i/o or unbuffered i/o: look in the
317
function source code for the exact rules */
318
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
319
ibool* success);/* out: TRUE if succeed, FALSE if error */
320
/***************************************************************************
321
Deletes a file. The file has to be closed before calling this. */
326
/* out: TRUE if success */
327
const char* name); /* in: file path as a null-terminated string */
329
/***************************************************************************
330
Deletes a file if it exists. The file has to be closed before calling this. */
333
os_file_delete_if_exists(
334
/*=====================*/
335
/* out: TRUE if success */
336
const char* name); /* in: file path as a null-terminated string */
337
/***************************************************************************
338
Renames a file (can also move it to another directory). It is safest that the
339
file is closed before calling this function. */
344
/* out: TRUE if success */
345
const char* oldpath, /* in: old file path as a
346
null-terminated string */
347
const char* newpath); /* in: new file path */
348
/***************************************************************************
349
Closes a file handle. In case of error, error number can be retrieved with
350
os_file_get_last_error. */
355
/* out: TRUE if success */
356
os_file_t file); /* in, own: handle to a file */
357
/***************************************************************************
358
Closes a file handle. */
361
os_file_close_no_error_handling(
362
/*============================*/
363
/* out: TRUE if success */
364
os_file_t file); /* in, own: handle to a file */
365
/***************************************************************************
371
/* out: TRUE if success */
372
os_file_t file, /* in: handle to a file */
373
ulint* size, /* out: least significant 32 bits of file
375
ulint* size_high);/* out: most significant 32 bits of size */
376
/***************************************************************************
377
Gets file size as a 64-bit integer ib_longlong. */
380
os_file_get_size_as_iblonglong(
381
/*===========================*/
382
/* out: size in bytes, -1 if error */
383
os_file_t file); /* in: handle to a file */
384
/***************************************************************************
385
Write the specified number of zeros to a newly created file. */
390
/* out: TRUE if success */
391
const char* name, /* in: name of the file or path as a
392
null-terminated string */
393
os_file_t file, /* in: handle to a file */
394
ulint size, /* in: least significant 32 bits of file
396
ulint size_high);/* in: most significant 32 bits of size */
397
/***************************************************************************
398
Truncates a file at its current position. */
403
/* out: TRUE if success */
404
FILE* file); /* in: file to be truncated */
405
/***************************************************************************
406
Flushes the write buffers of a given file to the disk. */
411
/* out: TRUE if success */
412
os_file_t file); /* in, own: handle to a file */
413
/***************************************************************************
414
Retrieves the last error number if an error occurs in a file io function.
415
The number should be retrieved before any other OS calls (because they may
416
overwrite the error number). If the number is not known to this program,
417
the OS error number + 100 is returned. */
420
os_file_get_last_error(
421
/*===================*/
422
/* out: error number, or OS error
424
ibool report_all_errors); /* in: TRUE if we want an error message
425
printed of all errors */
426
/***********************************************************************
427
Requests a synchronous read operation. */
432
/* out: TRUE if request was
433
successful, FALSE if fail */
434
os_file_t file, /* in: handle to a file */
435
void* buf, /* in: buffer where to read */
436
ulint offset, /* in: least significant 32 bits of file
437
offset where to read */
438
ulint offset_high,/* in: most significant 32 bits of
440
ulint n); /* in: number of bytes to read */
441
/***********************************************************************
442
Rewind file to its start, read at most size - 1 bytes from it to str, and
443
NUL-terminate str. All errors are silently ignored. This function is
444
mostly meant to be used with temporary files. */
449
FILE* file, /* in: file to read from */
450
char* str, /* in: buffer where to read */
451
ulint size); /* in: size of buffer */
452
/***********************************************************************
453
Requests a synchronous positioned read operation. This function does not do
454
any error handling. In case of error it returns FALSE. */
457
os_file_read_no_error_handling(
458
/*===========================*/
459
/* out: TRUE if request was
460
successful, FALSE if fail */
461
os_file_t file, /* in: handle to a file */
462
void* buf, /* in: buffer where to read */
463
ulint offset, /* in: least significant 32 bits of file
464
offset where to read */
465
ulint offset_high,/* in: most significant 32 bits of
467
ulint n); /* in: number of bytes to read */
469
/***********************************************************************
470
Requests a synchronous write operation. */
475
/* out: TRUE if request was
476
successful, FALSE if fail */
477
const char* name, /* in: name of the file or path as a
478
null-terminated string */
479
os_file_t file, /* in: handle to a file */
480
const void* buf, /* in: buffer from which to write */
481
ulint offset, /* in: least significant 32 bits of file
482
offset where to write */
483
ulint offset_high,/* in: most significant 32 bits of
485
ulint n); /* in: number of bytes to write */
486
/***********************************************************************
487
Check the existence and type of the given file. */
492
/* out: TRUE if call succeeded */
493
const char* path, /* in: pathname of the file */
494
ibool* exists, /* out: TRUE if file exists */
495
os_file_type_t* type); /* out: type of the file (if it exists) */
496
/********************************************************************
497
The function os_file_dirname returns a directory component of a
498
null-terminated pathname string. In the usual case, dirname returns
499
the string up to, but not including, the final '/', and basename
500
is the component following the final '/'. Trailing '/' characļæ½
501
ters are not counted as part of the pathname.
503
If path does not contain a slash, dirname returns the string ".".
505
Concatenating the string returned by dirname, a "/", and the basename
506
yields a complete pathname.
508
The return value is a copy of the directory component of the pathname.
509
The copy is allocated from heap. It is the caller responsibility
510
to free it after it is no longer needed.
512
The following list of examples (taken from SUSv2) shows the strings
513
returned by dirname and basename for different paths:
515
path dirname basename
516
"/usr/lib" "/usr" "lib"
527
/* out, own: directory component of the
529
const char* path); /* in: pathname */
530
/********************************************************************
531
Creates all missing subdirectories along the given path. */
534
os_file_create_subdirs_if_needed(
535
/*=============================*/
536
/* out: TRUE if call succeeded
538
const char* path); /* in: path name */
539
/****************************************************************************
540
Initializes the asynchronous io system. Creates separate aio array for
541
non-ibuf read and write, a third aio array for the ibuf i/o, with just one
542
segment, two aio arrays for log reads and writes with one segment, and a
543
synchronous aio array of the specified size. The combined number of segments
544
in the three first aio arrays is the parameter n_segments given to the
545
function. The caller must create an i/o handler thread for each segment in
546
the four first arrays, but not for the sync aio array. */
551
ulint n, /* in: maximum number of pending aio operations
552
allowed; n must be divisible by n_segments */
553
ulint n_segments, /* in: combined number of segments in the four
554
first aio arrays; must be >= 4 */
555
ulint n_slots_sync); /* in: number of slots in the sync aio array */
556
/***********************************************************************
557
Requests an asynchronous i/o operation. */
562
/* out: TRUE if request was queued
563
successfully, FALSE if fail */
564
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
565
ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
566
to OS_AIO_SIMULATED_WAKE_LATER: the
567
last flag advises this function not to wake
568
i/o-handler threads, but the caller will
569
do the waking explicitly later, in this
570
way the caller can post several requests in
571
a batch; NOTE that the batch must not be
572
so big that it exhausts the slots in aio
573
arrays! NOTE that a simulated batch
574
may introduce hidden chances of deadlocks,
575
because i/os are not actually handled until
576
all have been posted: use with great
578
const char* name, /* in: name of the file or path as a
579
null-terminated string */
580
os_file_t file, /* in: handle to a file */
581
void* buf, /* in: buffer where to read or from which
583
ulint offset, /* in: least significant 32 bits of file
584
offset where to read or write */
585
ulint offset_high, /* in: most significant 32 bits of
587
ulint n, /* in: number of bytes to read or write */
588
fil_node_t* message1,/* in: messages for the aio handler (these
589
can be used to identify a completed aio
590
operation); if mode is OS_AIO_SYNC, these
593
/****************************************************************************
594
Wakes up all async i/o threads so that they know to exit themselves in
598
os_aio_wake_all_threads_at_shutdown(void);
599
/*=====================================*/
600
/****************************************************************************
601
Waits until there are no pending writes in os_aio_write_array. There can
602
be other, synchronous, pending writes. */
605
os_aio_wait_until_no_pending_writes(void);
606
/*=====================================*/
607
/**************************************************************************
608
Wakes up simulated aio i/o-handler threads if they have something to do. */
611
os_aio_simulated_wake_handler_threads(void);
612
/*=======================================*/
613
/**************************************************************************
614
This function can be called if one wants to post a batch of reads and
615
prefers an i/o-handler thread to handle them all at once later. You must
616
call os_aio_simulated_wake_handler_threads later to ensure the threads
617
are not left sleeping! */
620
os_aio_simulated_put_read_threads_to_sleep(void);
621
/*============================================*/
624
/**************************************************************************
625
This function is only used in Windows asynchronous i/o.
626
Waits for an aio operation to complete. This function is used to wait the
627
for completed requests. The aio array of pending requests is divided
628
into segments. The thread specifies which segment or slot it wants to wait
629
for. NOTE: this function will also take care of freeing the aio slot,
630
therefore no other thread is allowed to do the freeing! */
633
os_aio_windows_handle(
634
/*==================*/
635
/* out: TRUE if the aio operation succeeded */
636
ulint segment, /* in: the number of the segment in the aio
637
arrays to wait for; segment 0 is the ibuf
638
i/o thread, segment 1 the log i/o thread,
639
then follow the non-ibuf read threads, and as
640
the last are the non-ibuf write threads; if
641
this is ULINT_UNDEFINED, then it means that
642
sync aio is used, and this parameter is
644
ulint pos, /* this parameter is used only in sync aio:
645
wait for the aio slot at this position */
646
fil_node_t**message1, /* out: the messages passed with the aio
647
request; note that also in the case where
648
the aio operation failed, these output
649
parameters are valid and can be used to
650
restart the operation, for example */
652
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
655
/* Currently we do not use Posix async i/o */
656
#ifdef POSIX_ASYNC_IO
657
/**************************************************************************
658
This function is only used in Posix asynchronous i/o. Waits for an aio
659
operation to complete. */
664
/* out: TRUE if the aio operation succeeded */
665
ulint array_no, /* in: array number 0 - 3 */
666
fil_node_t**message1, /* out: the messages passed with the aio
667
request; note that also in the case where
668
the aio operation failed, these output
669
parameters are valid and can be used to
670
restart the operation, for example */
673
/**************************************************************************
674
Does simulated aio. This function should be called by an i/o-handler
678
os_aio_simulated_handle(
679
/*====================*/
680
/* out: TRUE if the aio operation succeeded */
681
ulint segment, /* in: the number of the segment in the aio
682
arrays to wait for; segment 0 is the ibuf
683
i/o thread, segment 1 the log i/o thread,
684
then follow the non-ibuf read threads, and as
685
the last are the non-ibuf write threads */
686
fil_node_t**message1, /* out: the messages passed with the aio
687
request; note that also in the case where
688
the aio operation failed, these output
689
parameters are valid and can be used to
690
restart the operation, for example */
692
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
693
/**************************************************************************
694
Validates the consistency of the aio system. */
697
os_aio_validate(void);
698
/*=================*/
699
/* out: TRUE if ok */
700
/**************************************************************************
701
Prints info of the aio arrays. */
706
FILE* file); /* in: file where to print */
707
/**************************************************************************
708
Refreshes the statistics used to print per-second averages. */
711
os_aio_refresh_stats(void);
712
/*======================*/
715
/**************************************************************************
716
Checks that all slots in the system have been freed, that is, there are
717
no pending io operations. */
720
os_aio_all_slots_free(void);
721
/*=======================*/
722
#endif /* UNIV_DEBUG */
724
/***********************************************************************
725
This function returns information about the specified file */
731
const char* path, /* in: pathname of the file */
732
os_file_stat_t* stat_info); /* information of a file in a