39
if (directio) flags |= O_DIRECT | O_SYNC;
44
flags |= O_DIRECT | O_SYNC;
48
fd = ::open(fn.c_str(), flags, 0644);
52
if (TEMP_FAILURE_RETRY(::close(fd))) {
54
derr << "FileJournal::_open: error closing old fd: "
55
<< cpp_strerror(err) << dendl;
58
fd = TEMP_FAILURE_RETRY(::open(fn.c_str(), flags, 0644));
51
dout(2) << "_open failed " << cpp_strerror(err) << dendl;
52
cerr << "unable to open journal " << fn << ": " << cpp_strerror(err) << std::endl;
61
derr << "FileJournal::_open : unable to open journal: open() "
62
<< "failed: " << cpp_strerror(err) << dendl;
152
static int get_kernel_version(int *a, int *b, int *c)
156
memset(buf, 0, sizeof(buf));
157
int fd = TEMP_FAILURE_RETRY(::open("/proc/version", O_RDONLY));
160
derr << "get_kernel_version: failed to open /proc/version: "
161
<< cpp_strerror(ret) << dendl;
164
ret = safe_read(fd, buf, sizeof(buf) - 1);
166
derr << "get_kernel_version: failed to read from /proc/version: "
167
<< cpp_strerror(ret) << dendl;
171
if (sscanf(buf, "Linux version %d.%d.%d", a, b, c) != 3) {
172
derr << "get_kernel_version: failed to parse string: '"
173
<< buf << "'" << dendl;
178
dout(0) << " kernel version is " << *a <<"." << *b << "." << *c << dendl;
182
TEMP_FAILURE_RETRY(::close(fd));
141
187
void FileJournal::_check_disk_write_cache() const
189
ostringstream hdparm_cmd;
143
193
if (geteuid() != 0) {
144
dout(10) << __func__ << ": not root, NOT checking disk write "
194
dout(10) << "_check_disk_write_cache: not root, NOT checking disk write "
145
195
<< "cache on raw block device " << fn << dendl;
150
snprintf(cmd, sizeof(cmd), "/sbin/hdparm -W %s > /tmp/out.%d",
151
fn.c_str(), getpid());
152
int r = ::system(cmd);
154
dout(10) << __func__ << ": failed to run '" << cmd
155
<< "', NOT checking disk write cache on " << fn << dendl;
159
snprintf(cmd, sizeof(cmd), "/tmp/out.%d", getpid());
160
FILE *f = ::fopen(cmd, "r");
162
dout(10) << "_open failed to read '" << cmd
163
<< "', NOT checking disk write cache on " << fn << dendl;
170
fgets(s, sizeof(s), f);
199
hdparm_cmd << "/sbin/hdparm -W " << fn;
200
fp = popen(hdparm_cmd.str().c_str(), "r");
202
dout(10) << "_check_disk_write_cache: failed to run /sbin/hdparm: NOT "
203
<< "checking disk write cache on raw block device " << fn << dendl;
209
memset(buf, 0, sizeof(buf));
210
char *line = fgets(buf, sizeof(buf) - 1, fp);
214
derr << "_check_disk_write_cache: fgets error: " << cpp_strerror(ret)
172
if (sscanf(s, " write-caching = %d", &on) == 1) {
175
// check kenrel version
177
int fd = ::open("/proc/version", O_RDONLY);
183
int r = sscanf(buf, "Linux version 2.%d.%d", &b, &c);
184
dout(0) << " kernel version is 2." << b << "." << c << dendl;
190
dout(0) << "WARNING: disk write cache is ON; journaling will not be reliable" << dendl;
191
dout(0) << " on kernels prior to 2.6.33 (recent kernels are safe)" << dendl;
192
dout(0) << " disable with 'hdparm -W 0 " << fn << "'" << dendl;
194
<< " ** WARNING: disk write cache is ON on " << fn << ".\n"
195
<< " Journaling will not be reliable on kernels prior to 2.6.33\n"
196
<< " (recent kernels are safe). You can disable the write cache with\n"
197
<< " 'hdparm -W 0 " << fn << "'"
202
dout(10) << "_open disk write cache is off (good) on " << fn << dendl;
225
if (sscanf(line, " write-caching = %d", &on) != 1)
228
dout(10) << "_check_disk_write_cache: disk write cache is off (good) on "
233
// is our kernel new enough?
234
if (get_kernel_version(&a, &b, &c)) {
235
dout(10) << "_check_disk_write_cache: failed to get kernel version."
238
else if (a >= 2 && b >= 6 && c >= 33) {
239
dout(20) << "_check_disk_write_cache: disk write cache is on, but your "
240
<< "kernel is new enough to handle it correctly. (fn:"
241
<< fn << ")" << dendl;
245
<< " ** WARNING: disk write cache is ON on " << fn << ".\n"
246
<< " Journaling will not be reliable on kernels prior to 2.6.33\n"
247
<< " (recent kernels are safe). You can disable the write cache with\n"
248
<< " 'hdparm -W 0 " << fn << "'"
257
derr << "_check_disk_write_cache: fclose error: " << cpp_strerror(ret)
211
264
int FileJournal::_open_file(int64_t oldsize, blksize_t blksize,
267
324
header.start = get_top();
270
buffer::ptr bp = prepare_header();
271
int r = ::pwrite(fd, bp.c_str(), bp.length(), 0);
273
dout(0) << "create write header error " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl;
327
bp = prepare_header();
328
if (TEMP_FAILURE_RETRY(::pwrite(fd, bp.c_str(), bp.length(), 0)) < 0) {
330
derr << "FileJournal::create : create write header error "
331
<< cpp_strerror(ret) << dendl;
277
335
// zero first little bit, too.
279
memset(z, 0, block_size);
280
::pwrite(fd, z, block_size, get_top());
336
ret = posix_memalign(&buf, block_size, block_size);
338
derr << "FileJournal::create: failed to allocate " << block_size
339
<< " bytes of memory: " << cpp_strerror(ret) << dendl;
342
memset(buf, 0, block_size);
343
if (TEMP_FAILURE_RETRY(::pwrite(fd, buf, block_size, get_top())) < 0) {
345
derr << "FileJournal::create: error zeroing first " << block_size
346
<< " bytes " << cpp_strerror(ret) << dendl;
350
needed_space = g_conf.osd_max_write_size << 20;
351
needed_space += (2 * sizeof(entry_header_t)) + get_top();
352
if (header.max_size - header.start < needed_space) {
353
derr << "FileJournal::create: OSD journal is not large enough to hold "
354
<< "osd_max_write_size bytes!" << dendl;
284
359
dout(2) << "create done" << dendl;
366
if (TEMP_FAILURE_RETRY(::close(fd)) < 0) {
368
derr << "FileJournal::create: error closing fd: " << cpp_strerror(ret)
288
377
int FileJournal::open(uint64_t next_seq)
305
394
//<< " vs expected fsid = " << fsid
307
396
if (header.fsid != fsid) {
308
dout(2) << "open fsid doesn't match, invalid (someone else's?) journal" << dendl;
397
derr << "FileJournal::open: open fsid doesn't match, invalid "
398
<< "(someone else's?) journal" << dendl;
311
401
if (header.max_size > max_size) {
312
402
dout(2) << "open journal size " << header.max_size << " > current " << max_size << dendl;
315
405
if (header.block_size != block_size) {
316
406
dout(2) << "open journal block size " << header.block_size << " != current " << block_size << dendl;
319
409
if (header.max_size % header.block_size) {
320
410
dout(2) << "open journal max size " << header.max_size
321
411
<< " not a multiple of block size " << header.block_size << dendl;
324
414
if (header.alignment != block_size && directio) {
325
derr(0) << "open journal alignment " << header.alignment << " does not match block size "
415
dout(0) << "open journal alignment " << header.alignment << " does not match block size "
326
416
<< block_size << " (required for direct_io journal mode)" << dendl;
329
419
if ((header.alignment % PAGE_SIZE) && directio) {
330
derr(0) << "open journal alignment " << header.alignment << " is not multiple of page size " << PAGE_SIZE
420
dout(0) << "open journal alignment " << header.alignment << " is not multiple of page size " << PAGE_SIZE
331
421
<< " (required for direct_io journal mode)" << dendl;
337
425
// looks like a valid header.
338
426
write_pos = 0; // not writeable yet
702
793
pos = 0; // we included the header
704
795
pos = get_top(); // no header, start after that
705
write_bl(pos, second);
796
if (write_bl(pos, second)) {
797
derr << "FileJournal::do_write: write_bl(pos=" << pos
798
<< ") failed" << dendl;
709
::pwrite(fd, hbp.c_str(), hbp.length(), 0);
804
if (TEMP_FAILURE_RETRY(::pwrite(fd, hbp.c_str(), hbp.length(), 0)) < 0) {
806
derr << "FileJournal::do_write: pwrite(fd=" << fd
807
<< ", hbp.length=" << hbp.length() << ") failed :"
808
<< cpp_strerror(err) << dendl;
813
if (write_bl(pos, bl)) {
814
derr << "FileJournal::do_write: write_bl(pos=" << pos
815
<< ") failed" << dendl;
875
982
Mutex::Locker locker(write_lock);
877
984
if (seq < last_committed_seq) {
878
dout(10) << "committed_thru " << seq << " < last_committed_seq " << last_committed_seq << dendl;
985
dout(5) << "committed_thru " << seq << " < last_committed_seq " << last_committed_seq << dendl;
879
986
assert(seq >= last_committed_seq);
882
989
if (seq == last_committed_seq) {
883
dout(10) << "committed_thru " << seq << " == last_committed_seq " << last_committed_seq << dendl;
990
dout(5) << "committed_thru " << seq << " == last_committed_seq " << last_committed_seq << dendl;
887
dout(10) << "committed_thru " << seq << " (last_committed_seq " << last_committed_seq << ")" << dendl;
994
dout(5) << "committed_thru " << seq << " (last_committed_seq " << last_committed_seq << ")" << dendl;
888
995
last_committed_seq = seq;
890
997
// adjust start pointer
1032
void FileJournal::put_throttle(uint64_t ops, uint64_t bytes)
1034
uint64_t new_ops = throttle_ops.put(ops);
1035
uint64_t new_bytes = throttle_bytes.put(bytes);
1036
dout(5) << "put_throttle finished " << ops << " ops and "
1037
<< bytes << " bytes, now "
1038
<< new_ops << " ops and " << new_bytes << " bytes"
1042
logger->inc(l_os_j_ops, ops);
1043
logger->inc(l_os_j_bytes, bytes);
1044
logger->set(l_os_jq_ops, new_ops);
1045
logger->set(l_os_jq_bytes, new_bytes);
1046
logger->set(l_os_jq_max_ops, throttle_ops.get_max());
1047
logger->set(l_os_jq_max_bytes, throttle_bytes.get_max());
927
1051
void FileJournal::make_writeable()