276
276
struct drbd_conf *mdev = (struct drbd_conf *)data;
277
277
enum drbd_disk_state nps;
279
280
nps = drbd_try_outdate_peer(mdev);
280
drbd_request_state(mdev, NS(pdsk, nps));
283
drbd_request_state(mdev, NS(pdsk, nps));
284
here, because we might were able to re-establish the connection
285
in the meantime. This can only partially be solved in the state's
286
engine is_valid_state() and is_valid_state_transition()
289
nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
290
pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
291
therefore we have to have the pre state change check here.
293
spin_lock_irq(&mdev->req_lock);
295
if (ns.conn < C_WF_REPORT_PARAMS) {
297
_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
299
spin_unlock_irq(&mdev->req_lock);
311
331
while (try++ < max_tries) {
312
332
DRBD_STATE_DEBUG_INIT_VAL(val);
313
r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
333
rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
315
335
/* in case we first succeeded to outdate,
316
336
* but now suddenly could establish a connection */
317
if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
337
if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
323
if (r == SS_NO_UP_TO_DATE_DISK && force &&
343
if (rv == SS_NO_UP_TO_DATE_DISK && force &&
324
344
(mdev->state.disk < D_UP_TO_DATE &&
325
345
mdev->state.disk >= D_INCONSISTENT)) {
326
346
mask.disk = D_MASK;
553
/* input size is expected to be in KB */
536
554
char *ppsize(char *buf, unsigned long long size)
538
/* Needs 9 bytes at max. */
556
/* Needs 9 bytes at max including trailing NUL:
557
* -1ULL ==> "16384 EB" */
539
558
static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
541
while (size >= 10000) {
560
while (size >= 10000 && base < sizeof(units)-1) {
542
561
/* shift + round */
543
562
size = (size >> 10) + !!(size & (1<<9));
546
sprintf(buf, "%lu %cB", (long)size, units[base]);
565
sprintf(buf, "%u %cB", (unsigned)size, units[base]);
647
666
|| prev_size != mdev->ldev->md.md_size_sect;
649
668
if (la_size_changed || md_moved) {
650
671
drbd_al_shrink(mdev); /* All extents inactive. */
651
672
dev_info(DEV, "Writing the whole bitmap, %s\n",
652
673
la_size_changed && md_moved ? "size changed and md moved" :
653
674
la_size_changed ? "size changed" : "md moved");
654
rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
675
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
676
err = drbd_bitmap_io(mdev, &drbd_bm_write,
677
"size changed", BM_LOCKED_MASK);
655
682
drbd_md_mark_dirty(mdev);
773
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
800
static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
775
802
struct request_queue * const q = mdev->rq_queue;
776
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
777
int max_segments = mdev->ldev->dc.max_bio_bvecs;
779
max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
781
blk_queue_max_hw_sectors(q, max_seg_s >> 9);
803
int max_hw_sectors = max_bio_size >> 9;
804
int max_segments = 0;
806
if (get_ldev_if_state(mdev, D_ATTACHING)) {
807
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
809
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
810
max_segments = mdev->ldev->dc.max_bio_bvecs;
814
blk_queue_logical_block_size(q, 512);
815
blk_queue_max_hw_sectors(q, max_hw_sectors);
816
/* This is the workaround for "bio would need to, but cannot, be split" */
782
817
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
783
blk_queue_max_segment_size(q, max_seg_s);
784
blk_queue_logical_block_size(q, 512);
785
blk_queue_segment_boundary(q, PAGE_SIZE-1);
786
blk_queue_stack_limits(q, b);
788
/* KERNEL BUG in old ll_rw_blk.c
789
* t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
791
* t->max_segment_size = min_not_zero(...,...)
792
* workaround here: */
793
if (queue_max_segment_size(q) == 0)
794
blk_queue_max_segment_size(q, max_seg_s);
796
dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
798
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
799
dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
800
q->backing_dev_info.ra_pages,
801
b->backing_dev_info.ra_pages);
802
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
818
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
820
if (get_ldev_if_state(mdev, D_ATTACHING)) {
821
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
823
blk_queue_stack_limits(q, b);
825
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
826
dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
827
q->backing_dev_info.ra_pages,
828
b->backing_dev_info.ra_pages);
829
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
835
void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
837
int now, new, local, peer;
839
now = queue_max_hw_sectors(mdev->rq_queue) << 9;
840
local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
841
peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
843
if (get_ldev_if_state(mdev, D_ATTACHING)) {
844
local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
845
mdev->local_max_bio_size = local;
849
/* We may ignore peer limits if the peer is modern enough.
850
Because new from 8.3.8 onwards the peer can use multiple
851
BIOs for a single peer_request */
852
if (mdev->state.conn >= C_CONNECTED) {
853
if (mdev->agreed_pro_version < 94)
854
peer = mdev->peer_max_bio_size;
855
else if (mdev->agreed_pro_version == 94)
856
peer = DRBD_MAX_SIZE_H80_PACKET;
857
else /* drbd 8.3.8 onwards */
858
peer = DRBD_MAX_BIO_SIZE;
861
new = min_t(int, local, peer);
863
if (mdev->state.role == R_PRIMARY && new < now)
864
dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
867
dev_info(DEV, "max BIO size = %u\n", new);
869
drbd_setup_queue_param(mdev, new);
806
872
/* serialize deconfig (worker exiting, doing cleanup)
923
nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
924
if (IS_ERR(nbc->lo_file)) {
988
bdev = blkdev_get_by_path(nbc->dc.backing_dev,
989
FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
925
991
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
926
PTR_ERR(nbc->lo_file));
928
993
retcode = ERR_OPEN_DISK;
932
inode = nbc->lo_file->f_dentry->d_inode;
934
if (!S_ISBLK(inode->i_mode)) {
935
retcode = ERR_DISK_NOT_BDEV;
939
nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
940
if (IS_ERR(nbc->md_file)) {
996
nbc->backing_bdev = bdev;
999
* meta_dev_idx >= 0: external fixed size, possibly multiple
1000
* drbd sharing one meta device. TODO in that case, paranoia
1001
* check that [md_bdev, meta_dev_idx] is not yet used by some
1002
* other drbd minor! (if you use drbd.conf + drbdadm, that
1003
* should check it for you already; but if you don't, or
1004
* someone fooled it, we need to double check here)
1006
bdev = blkdev_get_by_path(nbc->dc.meta_dev,
1007
FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1008
(nbc->dc.meta_dev_idx < 0) ?
1009
(void *)mdev : (void *)drbd_m_holder);
941
1011
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
942
PTR_ERR(nbc->md_file));
944
1013
retcode = ERR_OPEN_MD_DISK;
948
inode2 = nbc->md_file->f_dentry->d_inode;
950
if (!S_ISBLK(inode2->i_mode)) {
951
retcode = ERR_MD_NOT_BDEV;
955
nbc->backing_bdev = inode->i_bdev;
956
if (bd_claim(nbc->backing_bdev, mdev)) {
957
printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
958
nbc->backing_bdev, mdev,
959
nbc->backing_bdev->bd_holder,
960
nbc->backing_bdev->bd_contains->bd_holder,
961
nbc->backing_bdev->bd_holders);
962
retcode = ERR_BDCLAIM_DISK;
1016
nbc->md_bdev = bdev;
1018
if ((nbc->backing_bdev == nbc->md_bdev) !=
1019
(nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1020
nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1021
retcode = ERR_MD_IDX_INVALID;
968
1027
offsetof(struct bm_extent, lce));
969
1028
if (!resync_lru) {
970
1029
retcode = ERR_NOMEM;
971
goto release_bdev_fail;
974
/* meta_dev_idx >= 0: external fixed size,
975
* possibly multiple drbd sharing one meta device.
976
* TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
977
* not yet used by some other drbd minor!
978
* (if you use drbd.conf + drbdadm,
979
* that should check it for you already; but if you don't, or someone
980
* fooled it, we need to double check here) */
981
nbc->md_bdev = inode2->i_bdev;
982
if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
983
: (void *) drbd_m_holder)) {
984
retcode = ERR_BDCLAIM_MD_DISK;
985
goto release_bdev_fail;
988
if ((nbc->backing_bdev == nbc->md_bdev) !=
989
(nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
990
nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
991
retcode = ERR_MD_IDX_INVALID;
992
goto release_bdev2_fail;
995
1033
/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1015
1053
if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1016
retcode = ERR_MD_DISK_TO_SMALL;
1054
retcode = ERR_MD_DISK_TOO_SMALL;
1017
1055
dev_warn(DEV, "refusing attach: md-device too small, "
1018
1056
"at least %llu sectors needed for this meta-disk type\n",
1019
1057
(unsigned long long) min_md_device_sectors);
1020
goto release_bdev2_fail;
1023
1061
/* Make sure the new disk is big enough
1024
1062
* (we may currently be R_PRIMARY with no local disk...) */
1025
1063
if (drbd_get_max_capacity(nbc) <
1026
1064
drbd_get_capacity(mdev->this_bdev)) {
1027
retcode = ERR_DISK_TO_SMALL;
1028
goto release_bdev2_fail;
1065
retcode = ERR_DISK_TOO_SMALL;
1031
1069
nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1149
1188
mdev->read_cnt = 0;
1150
1189
mdev->writ_cnt = 0;
1152
max_seg_s = DRBD_MAX_SEGMENT_SIZE;
1153
if (mdev->state.conn == C_CONNECTED) {
1154
/* We are Primary, Connected, and now attach a new local
1155
* backing store. We must not increase the user visible maximum
1156
* bio size on this device to something the peer may not be
1157
* able to handle. */
1158
if (mdev->agreed_pro_version < 94)
1159
max_seg_s = queue_max_segment_size(mdev->rq_queue);
1160
else if (mdev->agreed_pro_version == 94)
1161
max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
1162
/* else: drbd 8.3.9 and later, stay with default */
1165
drbd_setup_queue_param(mdev, max_seg_s);
1191
drbd_reconsider_max_bio_size(mdev);
1167
1193
/* If I am currently not R_PRIMARY,
1168
1194
* but meta data primary indicator is set,
1194
1220
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1195
1221
dev_info(DEV, "Assuming that all blocks are out of sync "
1196
1222
"(aka FullSync)\n");
1197
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
1223
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1224
"set_n_write from attaching", BM_LOCKED_MASK)) {
1198
1225
retcode = ERR_IO_MD_DISK;
1199
1226
goto force_diskless_dec;
1202
if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
1229
if (drbd_bitmap_io(mdev, &drbd_bm_read,
1230
"read from attaching", BM_LOCKED_MASK) < 0) {
1203
1231
retcode = ERR_IO_MD_DISK;
1204
1232
goto force_diskless_dec;
1312
1340
STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1313
1341
struct drbd_nl_cfg_reply *reply)
1343
enum drbd_ret_code retcode;
1315
1345
drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1316
reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1317
if (mdev->state.disk == D_DISKLESS)
1318
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1346
retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
1347
/* D_FAILED will transition to DISKLESS. */
1348
ret = wait_event_interruptible(mdev->misc_wait,
1349
mdev->state.disk != D_FAILED);
1319
1350
drbd_resume_io(mdev);
1351
if ((int)retcode == (int)SS_IS_DISKLESS)
1352
retcode = SS_NOTHING_TO_DO;
1355
reply->ret_code = retcode;
1595
1638
struct drbd_nl_cfg_reply *reply)
1641
struct disconnect dc;
1643
memset(&dc, 0, sizeof(struct disconnect));
1644
if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
1645
retcode = ERR_MANDATORY_TAG;
1650
spin_lock_irq(&mdev->req_lock);
1651
if (mdev->state.conn >= C_WF_CONNECTION)
1652
_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
1653
spin_unlock_irq(&mdev->req_lock);
1599
1657
retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);