~ubuntu-branches/ubuntu/trusty/drbd8/trusty

« back to all changes in this revision

Viewing changes to drbd/drbd_nl.c

  • Committer: Bazaar Package Importer
  • Author(s): Andres Rodriguez
  • Date: 2011-07-05 15:40:13 UTC
  • mfrom: (1.4.6 upstream)
  • Revision ID: james.westby@ubuntu.com-20110705154013-f9l32owj6mi9e1p0
Tags: 2:8.3.11-0ubuntu1
* New upstream release
* debian/patches/01_ubuntu_cn_idx.dpatch: Refreshed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
275
275
{
276
276
        struct drbd_conf *mdev = (struct drbd_conf *)data;
277
277
        enum drbd_disk_state nps;
 
278
        union drbd_state ns;
278
279
 
279
280
        nps = drbd_try_outdate_peer(mdev);
280
 
        drbd_request_state(mdev, NS(pdsk, nps));
 
281
 
 
282
        /* Not using
 
283
           drbd_request_state(mdev, NS(pdsk, nps));
 
284
           here, because we might were able to re-establish the connection
 
285
           in the meantime. This can only partially be solved in the state's
 
286
           engine is_valid_state() and is_valid_state_transition()
 
287
           functions.
 
288
 
 
289
           nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
 
290
           pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
 
291
           therefore we have to have the pre state change check here.
 
292
        */
 
293
        spin_lock_irq(&mdev->req_lock);
 
294
        ns = mdev->state;
 
295
        if (ns.conn < C_WF_REPORT_PARAMS) {
 
296
                ns.pdsk = nps;
 
297
                _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
 
298
        }
 
299
        spin_unlock_irq(&mdev->req_lock);
281
300
 
282
301
        return 0;
283
302
}
291
310
                dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
292
311
}
293
312
 
294
 
int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 
313
enum drbd_state_rv
 
314
drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
295
315
{
296
316
        const int max_tries = 4;
297
 
        int r = 0;
 
317
        enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
298
318
        int try = 0;
299
319
        int forced = 0;
300
320
        union drbd_state mask, val;
310
330
 
311
331
        while (try++ < max_tries) {
312
332
                DRBD_STATE_DEBUG_INIT_VAL(val);
313
 
                r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
 
333
                rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
314
334
 
315
335
                /* in case we first succeeded to outdate,
316
336
                 * but now suddenly could establish a connection */
317
 
                if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 
337
                if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
318
338
                        val.pdsk = 0;
319
339
                        mask.pdsk = 0;
320
340
                        continue;
321
341
                }
322
342
 
323
 
                if (r == SS_NO_UP_TO_DATE_DISK && force &&
 
343
                if (rv == SS_NO_UP_TO_DATE_DISK && force &&
324
344
                    (mdev->state.disk < D_UP_TO_DATE &&
325
345
                     mdev->state.disk >= D_INCONSISTENT)) {
326
346
                        mask.disk = D_MASK;
329
349
                        continue;
330
350
                }
331
351
 
332
 
                if (r == SS_NO_UP_TO_DATE_DISK &&
 
352
                if (rv == SS_NO_UP_TO_DATE_DISK &&
333
353
                    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
334
354
                        D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
335
355
                        nps = drbd_try_outdate_peer(mdev);
345
365
                        continue;
346
366
                }
347
367
 
348
 
                if (r == SS_NOTHING_TO_DO)
 
368
                if (rv == SS_NOTHING_TO_DO)
349
369
                        goto fail;
350
 
                if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
 
370
                if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
351
371
                        nps = drbd_try_outdate_peer(mdev);
352
372
 
353
373
                        if (force && nps > D_OUTDATED) {
360
380
 
361
381
                        continue;
362
382
                }
363
 
                if (r == SS_TWO_PRIMARIES) {
 
383
                if (rv == SS_TWO_PRIMARIES) {
364
384
                        /* Maybe the peer is detected as dead very soon...
365
385
                           retry at most once more in this case. */
366
 
                        __set_current_state(TASK_INTERRUPTIBLE);
367
 
                        schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
 
386
                        schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
368
387
                        if (try < max_tries)
369
388
                                try = max_tries - 1;
370
389
                        continue;
371
390
                }
372
 
                if (r < SS_SUCCESS) {
 
391
                if (rv < SS_SUCCESS) {
373
392
                        DRBD_STATE_DEBUG_INIT_VAL(val);
374
 
                        r = _drbd_request_state(mdev, mask, val,
 
393
                        rv = _drbd_request_state(mdev, mask, val,
375
394
                                                CS_VERBOSE + CS_WAIT_COMPLETE);
376
 
                        if (r < SS_SUCCESS)
 
395
                        if (rv < SS_SUCCESS)
377
396
                                goto fail;
378
397
                }
379
398
                break;
380
399
        }
381
400
 
382
 
        if (r < SS_SUCCESS)
 
401
        if (rv < SS_SUCCESS)
383
402
                goto fail;
384
403
 
385
404
        if (forced)
389
408
        wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
390
409
 
391
410
        if (new_role == R_SECONDARY) {
392
 
                set_disk_ro(mdev->vdisk, TRUE);
 
411
                set_disk_ro(mdev->vdisk, true);
393
412
                if (get_ldev(mdev)) {
394
413
                        mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
395
414
                        put_ldev(mdev);
399
418
                        mdev->net_conf->want_lose = 0;
400
419
                        put_net_conf(mdev);
401
420
                }
402
 
                set_disk_ro(mdev->vdisk, FALSE);
 
421
                set_disk_ro(mdev->vdisk, false);
403
422
                if (get_ldev(mdev)) {
404
423
                        if (((mdev->state.conn < C_CONNECTED ||
405
424
                               mdev->state.pdsk <= D_FAILED)
411
430
                }
412
431
        }
413
432
 
414
 
        if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
415
 
                drbd_al_to_on_disk_bm(mdev);
416
 
                put_ldev(mdev);
417
 
        }
 
433
        /* writeout of activity log covered areas of the bitmap
 
434
         * to stable storage done in after state change already */
418
435
 
419
436
        if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
420
437
                /* if this was forced, we should consider sync */
428
445
        drbd_kobject_uevent(mdev);
429
446
 fail:
430
447
        mutex_unlock(&mdev->state_mutex);
431
 
        return r;
 
448
        return rv;
432
449
}
433
450
 
434
451
STATIC struct drbd_conf *ensure_mdev(int minor, int create)
533
550
        }
534
551
}
535
552
 
 
553
/* input size is expected to be in KB */
536
554
char *ppsize(char *buf, unsigned long long size)
537
555
{
538
 
        /* Needs 9 bytes at max. */
 
556
        /* Needs 9 bytes at max including trailing NUL:
 
557
         * -1ULL ==> "16384 EB" */
539
558
        static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
540
559
        int base = 0;
541
 
        while (size >= 10000) {
 
560
        while (size >= 10000 && base < sizeof(units)-1) {
542
561
                /* shift + round */
543
562
                size = (size >> 10) + !!(size & (1<<9));
544
563
                base++;
545
564
        }
546
 
        sprintf(buf, "%lu %cB", (long)size, units[base]);
 
565
        sprintf(buf, "%u %cB", (unsigned)size, units[base]);
547
566
 
548
567
        return buf;
549
568
}
582
601
 * Returns 0 on success, negative return values indicate errors.
583
602
 * You should call drbd_md_sync() after calling this function.
584
603
 */
585
 
enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 
604
enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
586
605
{
587
606
        sector_t prev_first_sect, prev_size; /* previous meta location */
588
607
        sector_t la_size;
647
666
                || prev_size       != mdev->ldev->md.md_size_sect;
648
667
 
649
668
        if (la_size_changed || md_moved) {
 
669
                int err;
 
670
 
650
671
                drbd_al_shrink(mdev); /* All extents inactive. */
651
672
                dev_info(DEV, "Writing the whole bitmap, %s\n",
652
673
                         la_size_changed && md_moved ? "size changed and md moved" :
653
674
                         la_size_changed ? "size changed" : "md moved");
654
 
                rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
 
675
                /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 
676
                err = drbd_bitmap_io(mdev, &drbd_bm_write,
 
677
                                "size changed", BM_LOCKED_MASK);
 
678
                if (err) {
 
679
                        rv = dev_size_error;
 
680
                        goto out;
 
681
                }
655
682
                drbd_md_mark_dirty(mdev);
656
683
        }
657
684
 
770
797
        return 0;
771
798
}
772
799
 
773
 
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
 
800
static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
774
801
{
775
802
        struct request_queue * const q = mdev->rq_queue;
776
 
        struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
777
 
        int max_segments = mdev->ldev->dc.max_bio_bvecs;
778
 
 
779
 
        max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
780
 
 
781
 
        blk_queue_max_hw_sectors(q, max_seg_s >> 9);
 
803
        int max_hw_sectors = max_bio_size >> 9;
 
804
        int max_segments = 0;
 
805
 
 
806
        if (get_ldev_if_state(mdev, D_ATTACHING)) {
 
807
                struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 
808
 
 
809
                max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 
810
                max_segments = mdev->ldev->dc.max_bio_bvecs;
 
811
                put_ldev(mdev);
 
812
        }
 
813
 
 
814
        blk_queue_logical_block_size(q, 512);
 
815
        blk_queue_max_hw_sectors(q, max_hw_sectors);
 
816
        /* This is the workaround for "bio would need to, but cannot, be split" */
782
817
        blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
783
 
        blk_queue_max_segment_size(q, max_seg_s);
784
 
        blk_queue_logical_block_size(q, 512);
785
 
        blk_queue_segment_boundary(q, PAGE_SIZE-1);
786
 
        blk_queue_stack_limits(q, b);
787
 
 
788
 
        /* KERNEL BUG in old ll_rw_blk.c
789
 
         * t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
790
 
         * should be
791
 
         * t->max_segment_size = min_not_zero(...,...)
792
 
         * workaround here: */
793
 
        if (queue_max_segment_size(q) == 0)
794
 
                blk_queue_max_segment_size(q, max_seg_s);
795
 
 
796
 
        dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
797
 
 
798
 
        if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
799
 
                dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
800
 
                     q->backing_dev_info.ra_pages,
801
 
                     b->backing_dev_info.ra_pages);
802
 
                q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
803
 
        }
 
818
        blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
 
819
 
 
820
        if (get_ldev_if_state(mdev, D_ATTACHING)) {
 
821
                struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 
822
 
 
823
                blk_queue_stack_limits(q, b);
 
824
 
 
825
                if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
 
826
                        dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
 
827
                                 q->backing_dev_info.ra_pages,
 
828
                                 b->backing_dev_info.ra_pages);
 
829
                        q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
 
830
                }
 
831
                put_ldev(mdev);
 
832
        }
 
833
}
 
834
 
 
835
void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
 
836
{
 
837
        int now, new, local, peer;
 
838
 
 
839
        now = queue_max_hw_sectors(mdev->rq_queue) << 9;
 
840
        local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
 
841
        peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
 
842
 
 
843
        if (get_ldev_if_state(mdev, D_ATTACHING)) {
 
844
                local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
 
845
                mdev->local_max_bio_size = local;
 
846
                put_ldev(mdev);
 
847
        }
 
848
 
 
849
        /* We may ignore peer limits if the peer is modern enough.
 
850
           Because new from 8.3.8 onwards the peer can use multiple
 
851
           BIOs for a single peer_request */
 
852
        if (mdev->state.conn >= C_CONNECTED) {
 
853
                if (mdev->agreed_pro_version < 94)
 
854
                        peer = mdev->peer_max_bio_size;
 
855
                else if (mdev->agreed_pro_version == 94)
 
856
                        peer = DRBD_MAX_SIZE_H80_PACKET;
 
857
                else /* drbd 8.3.8 onwards */
 
858
                        peer = DRBD_MAX_BIO_SIZE;
 
859
        }
 
860
 
 
861
        new = min_t(int, local, peer);
 
862
 
 
863
        if (mdev->state.role == R_PRIMARY && new < now)
 
864
                dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
 
865
 
 
866
        if (new != now)
 
867
                dev_info(DEV, "max BIO size = %u\n", new);
 
868
 
 
869
        drbd_setup_queue_param(mdev, new);
804
870
}
805
871
 
806
872
/* serialize deconfig (worker exiting, doing cleanup)
863
929
STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
864
930
                             struct drbd_nl_cfg_reply *reply)
865
931
{
866
 
        enum drbd_ret_codes retcode;
 
932
        enum drbd_ret_code retcode;
867
933
        enum determine_dev_size dd;
868
934
        sector_t max_possible_sectors;
869
935
        sector_t min_md_device_sectors;
870
936
        struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
871
 
        struct inode *inode, *inode2;
 
937
        struct block_device *bdev;
872
938
        struct lru_cache *resync_lru = NULL;
873
939
        union drbd_state ns, os;
874
 
        unsigned int max_seg_s;
875
 
        int rv;
 
940
        enum drbd_state_rv rv;
876
941
        int cp_discovered = 0;
877
942
        int logical_block_size;
878
943
 
920
985
                }
921
986
        }
922
987
 
923
 
        nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
924
 
        if (IS_ERR(nbc->lo_file)) {
 
988
        bdev = blkdev_get_by_path(nbc->dc.backing_dev,
 
989
                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
 
990
        if (IS_ERR(bdev)) {
925
991
                dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
926
 
                    PTR_ERR(nbc->lo_file));
927
 
                nbc->lo_file = NULL;
 
992
                        PTR_ERR(bdev));
928
993
                retcode = ERR_OPEN_DISK;
929
994
                goto fail;
930
995
        }
931
 
 
932
 
        inode = nbc->lo_file->f_dentry->d_inode;
933
 
 
934
 
        if (!S_ISBLK(inode->i_mode)) {
935
 
                retcode = ERR_DISK_NOT_BDEV;
936
 
                goto fail;
937
 
        }
938
 
 
939
 
        nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
940
 
        if (IS_ERR(nbc->md_file)) {
 
996
        nbc->backing_bdev = bdev;
 
997
 
 
998
        /*
 
999
         * meta_dev_idx >= 0: external fixed size, possibly multiple
 
1000
         * drbd sharing one meta device.  TODO in that case, paranoia
 
1001
         * check that [md_bdev, meta_dev_idx] is not yet used by some
 
1002
         * other drbd minor!  (if you use drbd.conf + drbdadm, that
 
1003
         * should check it for you already; but if you don't, or
 
1004
         * someone fooled it, we need to double check here)
 
1005
         */
 
1006
        bdev = blkdev_get_by_path(nbc->dc.meta_dev,
 
1007
                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
 
1008
                                  (nbc->dc.meta_dev_idx < 0) ?
 
1009
                                  (void *)mdev : (void *)drbd_m_holder);
 
1010
        if (IS_ERR(bdev)) {
941
1011
                dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
942
 
                    PTR_ERR(nbc->md_file));
943
 
                nbc->md_file = NULL;
 
1012
                        PTR_ERR(bdev));
944
1013
                retcode = ERR_OPEN_MD_DISK;
945
1014
                goto fail;
946
1015
        }
947
 
 
948
 
        inode2 = nbc->md_file->f_dentry->d_inode;
949
 
 
950
 
        if (!S_ISBLK(inode2->i_mode)) {
951
 
                retcode = ERR_MD_NOT_BDEV;
952
 
                goto fail;
953
 
        }
954
 
 
955
 
        nbc->backing_bdev = inode->i_bdev;
956
 
        if (bd_claim(nbc->backing_bdev, mdev)) {
957
 
                printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
958
 
                       nbc->backing_bdev, mdev,
959
 
                       nbc->backing_bdev->bd_holder,
960
 
                       nbc->backing_bdev->bd_contains->bd_holder,
961
 
                       nbc->backing_bdev->bd_holders);
962
 
                retcode = ERR_BDCLAIM_DISK;
 
1016
        nbc->md_bdev = bdev;
 
1017
 
 
1018
        if ((nbc->backing_bdev == nbc->md_bdev) !=
 
1019
            (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
 
1020
             nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
 
1021
                retcode = ERR_MD_IDX_INVALID;
963
1022
                goto fail;
964
1023
        }
965
1024
 
968
1027
                        offsetof(struct bm_extent, lce));
969
1028
        if (!resync_lru) {
970
1029
                retcode = ERR_NOMEM;
971
 
                goto release_bdev_fail;
972
 
        }
973
 
 
974
 
        /* meta_dev_idx >= 0: external fixed size,
975
 
         * possibly multiple drbd sharing one meta device.
976
 
         * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
977
 
         * not yet used by some other drbd minor!
978
 
         * (if you use drbd.conf + drbdadm,
979
 
         * that should check it for you already; but if you don't, or someone
980
 
         * fooled it, we need to double check here) */
981
 
        nbc->md_bdev = inode2->i_bdev;
982
 
        if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
983
 
                                : (void *) drbd_m_holder)) {
984
 
                retcode = ERR_BDCLAIM_MD_DISK;
985
 
                goto release_bdev_fail;
986
 
        }
987
 
 
988
 
        if ((nbc->backing_bdev == nbc->md_bdev) !=
989
 
            (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
990
 
             nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
991
 
                retcode = ERR_MD_IDX_INVALID;
992
 
                goto release_bdev2_fail;
 
1030
                goto fail;
993
1031
        }
994
1032
 
995
1033
        /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
999
1037
                dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1000
1038
                        (unsigned long long) drbd_get_max_capacity(nbc),
1001
1039
                        (unsigned long long) nbc->dc.disk_size);
1002
 
                retcode = ERR_DISK_TO_SMALL;
1003
 
                goto release_bdev2_fail;
 
1040
                retcode = ERR_DISK_TOO_SMALL;
 
1041
                goto fail;
1004
1042
        }
1005
1043
 
1006
1044
        if (nbc->dc.meta_dev_idx < 0) {
1013
1051
        }
1014
1052
 
1015
1053
        if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1016
 
                retcode = ERR_MD_DISK_TO_SMALL;
 
1054
                retcode = ERR_MD_DISK_TOO_SMALL;
1017
1055
                dev_warn(DEV, "refusing attach: md-device too small, "
1018
1056
                     "at least %llu sectors needed for this meta-disk type\n",
1019
1057
                     (unsigned long long) min_md_device_sectors);
1020
 
                goto release_bdev2_fail;
 
1058
                goto fail;
1021
1059
        }
1022
1060
 
1023
1061
        /* Make sure the new disk is big enough
1024
1062
         * (we may currently be R_PRIMARY with no local disk...) */
1025
1063
        if (drbd_get_max_capacity(nbc) <
1026
1064
            drbd_get_capacity(mdev->this_bdev)) {
1027
 
                retcode = ERR_DISK_TO_SMALL;
1028
 
                goto release_bdev2_fail;
 
1065
                retcode = ERR_DISK_TOO_SMALL;
 
1066
                goto fail;
1029
1067
        }
1030
1068
 
1031
1069
        nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1045
1083
        /* and for any other previously queued work */
1046
1084
        drbd_flush_workqueue(mdev);
1047
1085
 
1048
 
        retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
 
1086
        rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
 
1087
        retcode = rv;  /* FIXME: Type mismatch. */
1049
1088
        drbd_resume_io(mdev);
1050
 
        if (retcode < SS_SUCCESS)
1051
 
                goto release_bdev2_fail;
 
1089
        if (rv < SS_SUCCESS)
 
1090
                goto fail;
1052
1091
 
1053
1092
        if (!get_ldev_if_state(mdev, D_ATTACHING))
1054
1093
                goto force_diskless;
1104
1143
        if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1105
1144
            drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
1106
1145
                dev_warn(DEV, "refusing to truncate a consistent device\n");
1107
 
                retcode = ERR_DISK_TO_SMALL;
 
1146
                retcode = ERR_DISK_TOO_SMALL;
1108
1147
                goto force_diskless_dec;
1109
1148
        }
1110
1149
 
1149
1188
        mdev->read_cnt = 0;
1150
1189
        mdev->writ_cnt = 0;
1151
1190
 
1152
 
        max_seg_s = DRBD_MAX_SEGMENT_SIZE;
1153
 
        if (mdev->state.conn == C_CONNECTED) {
1154
 
                /* We are Primary, Connected, and now attach a new local
1155
 
                 * backing store. We must not increase the user visible maximum
1156
 
                 * bio size on this device to something the peer may not be
1157
 
                 * able to handle. */
1158
 
                if (mdev->agreed_pro_version < 94)
1159
 
                        max_seg_s = queue_max_segment_size(mdev->rq_queue);
1160
 
                else if (mdev->agreed_pro_version == 94)
1161
 
                        max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
1162
 
                /* else: drbd 8.3.9 and later, stay with default */
1163
 
        }
1164
 
 
1165
 
        drbd_setup_queue_param(mdev, max_seg_s);
 
1191
        drbd_reconsider_max_bio_size(mdev);
1166
1192
 
1167
1193
        /* If I am currently not R_PRIMARY,
1168
1194
         * but meta data primary indicator is set,
1184
1210
            !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1185
1211
                set_bit(USE_DEGR_WFC_T, &mdev->flags);
1186
1212
 
1187
 
        dd = drbd_determin_dev_size(mdev, 0);
 
1213
        dd = drbd_determine_dev_size(mdev, 0);
1188
1214
        if (dd == dev_size_error) {
1189
1215
                retcode = ERR_NOMEM_BITMAP;
1190
1216
                goto force_diskless_dec;
1194
1220
        if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1195
1221
                dev_info(DEV, "Assuming that all blocks are out of sync "
1196
1222
                     "(aka FullSync)\n");
1197
 
                if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
 
1223
                if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
 
1224
                        "set_n_write from attaching", BM_LOCKED_MASK)) {
1198
1225
                        retcode = ERR_IO_MD_DISK;
1199
1226
                        goto force_diskless_dec;
1200
1227
                }
1201
1228
        } else {
1202
 
                if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
 
1229
                if (drbd_bitmap_io(mdev, &drbd_bm_read,
 
1230
                        "read from attaching", BM_LOCKED_MASK) < 0) {
1203
1231
                        retcode = ERR_IO_MD_DISK;
1204
1232
                        goto force_diskless_dec;
1205
1233
                }
1207
1235
 
1208
1236
        if (cp_discovered) {
1209
1237
                drbd_al_apply_to_bm(mdev);
1210
 
                drbd_al_to_on_disk_bm(mdev);
 
1238
                if (drbd_bitmap_io(mdev, &drbd_bm_write,
 
1239
                        "crashed primary apply AL", BM_LOCKED_MASK)) {
 
1240
                        retcode = ERR_IO_MD_DISK;
 
1241
                        goto force_diskless_dec;
 
1242
                }
1211
1243
        }
1212
1244
 
1213
1245
        if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1283
1315
 force_diskless:
1284
1316
        drbd_force_state(mdev, NS(disk, D_FAILED));
1285
1317
        drbd_md_sync(mdev);
1286
 
 release_bdev2_fail:
1287
 
        if (nbc)
1288
 
                bd_release(nbc->md_bdev);
1289
 
 release_bdev_fail:
1290
 
        if (nbc)
1291
 
                bd_release(nbc->backing_bdev);
1292
1318
 fail:
1293
1319
        if (nbc) {
1294
 
                if (nbc->lo_file)
1295
 
                        fput(nbc->lo_file);
1296
 
                if (nbc->md_file)
1297
 
                        fput(nbc->md_file);
 
1320
                if (nbc->backing_bdev)
 
1321
                        blkdev_put(nbc->backing_bdev,
 
1322
                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 
1323
                if (nbc->md_bdev)
 
1324
                        blkdev_put(nbc->md_bdev,
 
1325
                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1298
1326
                kfree(nbc);
1299
1327
        }
1300
1328
        lc_destroy(resync_lru);
1312
1340
STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1313
1341
                          struct drbd_nl_cfg_reply *reply)
1314
1342
{
 
1343
        enum drbd_ret_code retcode;
 
1344
        int ret;
1315
1345
        drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1316
 
        reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1317
 
        if (mdev->state.disk == D_DISKLESS)
1318
 
                wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
1346
        retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
 
1347
        /* D_FAILED will transition to DISKLESS. */
 
1348
        ret = wait_event_interruptible(mdev->misc_wait,
 
1349
                        mdev->state.disk != D_FAILED);
1319
1350
        drbd_resume_io(mdev);
 
1351
        if ((int)retcode == (int)SS_IS_DISKLESS)
 
1352
                retcode = SS_NOTHING_TO_DO;
 
1353
        if (ret)
 
1354
                retcode = ERR_INTR;
 
1355
        reply->ret_code = retcode;
1320
1356
        return 0;
1321
1357
}
1322
1358
 
1324
1360
                            struct drbd_nl_cfg_reply *reply)
1325
1361
{
1326
1362
        int i, ns;
1327
 
        enum drbd_ret_codes retcode;
 
1363
        enum drbd_ret_code retcode;
1328
1364
        struct net_conf *new_conf = NULL;
1329
1365
        struct crypto_hash *tfm = NULL;
1330
1366
        struct crypto_hash *integrity_w_tfm = NULL;
1369
1405
        new_conf->wire_protocol    = DRBD_PROT_C;
1370
1406
        new_conf->ping_timeo       = DRBD_PING_TIMEO_DEF;
1371
1407
        new_conf->rr_conflict      = DRBD_RR_CONFLICT_DEF;
 
1408
        new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
 
1409
        new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
1372
1410
 
1373
1411
        if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
1374
1412
                retcode = ERR_MANDATORY_TAG;
1390
1428
                }
1391
1429
        }
1392
1430
 
 
1431
        if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
 
1432
                retcode = ERR_CONG_NOT_PROTO_A;
 
1433
                goto fail;
 
1434
        }
 
1435
 
1393
1436
        if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1394
1437
                retcode = ERR_DISCARD;
1395
1438
                goto fail;
1595
1638
                              struct drbd_nl_cfg_reply *reply)
1596
1639
{
1597
1640
        int retcode;
 
1641
        struct disconnect dc;
 
1642
 
 
1643
        memset(&dc, 0, sizeof(struct disconnect));
 
1644
        if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
 
1645
                retcode = ERR_MANDATORY_TAG;
 
1646
                goto fail;
 
1647
        }
 
1648
 
 
1649
        if (dc.force) {
 
1650
                spin_lock_irq(&mdev->req_lock);
 
1651
                if (mdev->state.conn >= C_WF_CONNECTION)
 
1652
                        _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
 
1653
                spin_unlock_irq(&mdev->req_lock);
 
1654
                goto done;
 
1655
        }
1598
1656
 
1599
1657
        retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
1600
1658
 
1692
1750
 
1693
1751
        mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
1694
1752
        ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
1695
 
        dd = drbd_determin_dev_size(mdev, ddsf);
 
1753
        dd = drbd_determine_dev_size(mdev, ddsf);
1696
1754
        drbd_md_sync(mdev);
1697
1755
        put_ldev(mdev);
1698
1756
        if (dd == dev_size_error) {
1912
1970
{
1913
1971
        int retcode;
1914
1972
 
 
1973
        /* If there is still bitmap IO pending, probably because of a previous
 
1974
         * resync just being finished, wait for it before requesting a new resync. */
 
1975
        wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
1976
 
1915
1977
        retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
1916
1978
 
1917
1979
        if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
1947
2009
{
1948
2010
        int retcode;
1949
2011
 
 
2012
        /* If there is still bitmap IO pending, probably because of a previous
 
2013
         * resync just being finished, wait for it before requesting a new resync. */
 
2014
        wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
2015
 
1950
2016
        retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
1951
2017
 
1952
2018
        if (retcode < SS_SUCCESS) {
1955
2021
                           into a full resync. */
1956
2022
                        retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
1957
2023
                        if (retcode >= SS_SUCCESS) {
1958
 
                                /* open coded drbd_bitmap_io() */
1959
2024
                                if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
1960
 
                                                   "set_n_write from invalidate_peer"))
 
2025
                                        "set_n_write from invalidate_peer",
 
2026
                                        BM_LOCKED_SET_ALLOWED))
1961
2027
                                        retcode = ERR_IO_MD_DISK;
1962
2028
                        }
1963
2029
                } else
1984
2050
                               struct drbd_nl_cfg_reply *reply)
1985
2051
{
1986
2052
        int retcode = NO_ERROR;
 
2053
        union drbd_state s;
1987
2054
 
1988
 
        if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
1989
 
                retcode = ERR_PAUSE_IS_CLEAR;
 
2055
        if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
 
2056
                s = mdev->state;
 
2057
                if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 
2058
                        retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
 
2059
                                  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
 
2060
                } else {
 
2061
                        retcode = ERR_PAUSE_IS_CLEAR;
 
2062
                }
 
2063
        }
1990
2064
 
1991
2065
        reply->ret_code = retcode;
1992
2066
        return 0;
2124
2198
                reply->ret_code = ERR_MANDATORY_TAG;
2125
2199
                return 0;
2126
2200
        }
 
2201
 
 
2202
        /* If there is still bitmap IO pending, e.g. previous resync or verify
 
2203
         * just being finished, wait for it before requesting a new resync. */
 
2204
        wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
2205
 
2127
2206
        /* w_make_ov_request expects position to be aligned */
2128
2207
        mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
2129
2208
        reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2167
2246
        drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2168
2247
 
2169
2248
        if (args.clear_bm) {
2170
 
                err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
 
2249
                err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
 
2250
                        "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
2171
2251
                if (err) {
2172
2252
                        dev_err(DEV, "Writing bitmap failed with %d\n",err);
2173
2253
                        retcode = ERR_IO_MD_DISK;
2175
2255
                if (skip_initial_sync) {
2176
2256
                        drbd_send_uuids_skip_initial_sync(mdev);
2177
2257
                        _drbd_uuid_set(mdev, UI_BITMAP, 0);
 
2258
                        drbd_print_uuids(mdev, "cleared bitmap UUID");
2178
2259
                        spin_lock_irq(&mdev->req_lock);
2179
2260
                        _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2180
2261
                                        CS_VERBOSE, NULL);
2254
2335
        }
2255
2336
 
2256
2337
#ifdef KERNEL_HAS_CN_SKB_PARMS
 
2338
# ifdef HAVE_NL_SKB_EFF_CAP
2257
2339
        if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
2258
2340
                retcode = ERR_PERM;
2259
2341
                goto fail;
2260
2342
        }
 
2343
# else
 
2344
        if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) {
 
2345
                retcode = ERR_PERM;
 
2346
                goto fail;
 
2347
        }
 
2348
# endif
2261
2349
#endif
2262
2350
 
2263
2351
        mdev = ensure_mdev(nlp->drbd_minor,
2269
2357
 
2270
2358
        trace_drbd_netlink(req, 1);
2271
2359
 
2272
 
        if (nlp->packet_type >= P_nl_after_last_packet) {
 
2360
        if (nlp->packet_type >= P_nl_after_last_packet ||
 
2361
            nlp->packet_type == P_return_code_only) {
2273
2362
                retcode = ERR_PACKET_NR;
2274
2363
                goto fail;
2275
2364
        }
2285
2374
        reply_size += cm->reply_body_size;
2286
2375
 
2287
2376
        /* allocation not in the IO path, cqueue thread context */
2288
 
        cn_reply = kmalloc(reply_size, GFP_KERNEL);
 
2377
        cn_reply = kzalloc(reply_size, GFP_KERNEL);
2289
2378
        if (!cn_reply) {
2290
2379
                retcode = ERR_NOMEM;
2291
2380
                goto fail;
2293
2382
        reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
2294
2383
 
2295
2384
        reply->packet_type =
2296
 
                cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
 
2385
                cm->reply_body_size ? nlp->packet_type : P_return_code_only;
2297
2386
        reply->minor = nlp->drbd_minor;
2298
2387
        reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
2299
2388
        /* reply->tag_list; might be modified by cm->function. */
2459
2548
        /* receiver thread context, which is not in the writeout path (of this node),
2460
2549
         * but may be in the writeout path of the _other_ node.
2461
2550
         * GFP_NOIO to avoid potential "distributed deadlock". */
2462
 
        cn_reply = kmalloc(
 
2551
        cn_reply = kzalloc(
2463
2552
                sizeof(struct cn_msg)+
2464
2553
                sizeof(struct drbd_nl_cfg_reply)+
2465
2554
                sizeof(struct dump_ee_tag_len_struct)+
2481
2570
        tl = tl_add_int(tl, T_ee_sector, &e->sector);
2482
2571
        tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
2483
2572
 
 
2573
        /* dump the first 32k */
 
2574
        len = min_t(unsigned, e->size, 32 << 10);
2484
2575
        put_unaligned(T_ee_data, tl++);
2485
 
        put_unaligned(e->size, tl++);
 
2576
        put_unaligned(len, tl++);
2486
2577
 
2487
 
        len = e->size;
2488
2578
        page = e->pages;
2489
2579
        page_chain_for_each(page) {
2490
2580
                void *d = kmap_atomic(page, KM_USER0);
2493
2583
                kunmap_atomic(d, KM_USER0);
2494
2584
                tl = (unsigned short*)((char*)tl + l);
2495
2585
                len -= l;
 
2586
                if (len == 0)
 
2587
                        break;
2496
2588
        }
2497
2589
        put_unaligned(TT_END, tl++); /* Close the tag list */
2498
2590
 
2560
2652
 
2561
2653
typedef int (*cn_add_callback_req_nsp_fn)(struct cb_id *, char *,
2562
2654
        void (*cb)(struct cn_msg *req, struct netlink_skb_parms *nsp));
 
2655
typedef int (*cn_add_callback_const_name_req_nsp_fn)(
 
2656
                struct cb_id *id, const char *name,
 
2657
        void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
2563
2658
typedef int (*cn_add_callback_req_fn)(struct cb_id *, char *,
2564
2659
        void (*cb)(struct cn_msg *req));
2565
2660
typedef int (*cn_add_callback_void_fn)(struct cb_id *, char *,
2586
2681
                 * otherwise it will just be a compiler _warning_,
2587
2682
                 * but then BUG at runtime. */
2588
2683
#ifdef KERNEL_HAS_CN_SKB_PARMS
2589
 
                BUILD_BUG_ON(!__same_type(&cn_add_callback, cn_add_callback_req_nsp_fn));
 
2684
                BUILD_BUG_ON(!(
 
2685
                        __same_type(&cn_add_callback, cn_add_callback_req_nsp_fn) ||
 
2686
                        __same_type(&cn_add_callback, cn_add_callback_const_name_req_nsp_fn)));
2590
2687
#else
2591
2688
                BUILD_BUG_ON(!(
2592
2689
                        __same_type(&cn_add_callback, cn_add_callback_req_fn) ||
2629
2726
                (struct drbd_nl_cfg_reply *)cn_reply->data;
2630
2727
        int rr;
2631
2728
 
 
2729
        memset(buffer, 0, sizeof(buffer));
2632
2730
        cn_reply->id = req->id;
2633
2731
 
2634
2732
        cn_reply->seq = req->seq;
2636
2734
        cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
2637
2735
        cn_reply->flags = 0;
2638
2736
 
 
2737
        reply->packet_type = P_return_code_only;
2639
2738
        reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
2640
2739
        reply->ret_code = ret_code;
2641
2740