~ubuntu-branches/ubuntu/quantal/linux-lowlatency/quantal-proposed

« back to all changes in this revision

Viewing changes to drivers/block/drbd/drbd_req.c

  • Committer: Package Import Robot
  • Author(s): Andy Whitcroft, Andy Whitcroft
  • Date: 2012-06-21 09:16:38 UTC
  • Revision ID: package-import@ubuntu.com-20120621091638-gubhv4nox8xez1ct
Tags: 3.5.0-1.1
[ Andy Whitcroft]

* Rebuild lowlatency against Ubuntu-3.5.0-1.1
* All new configuration system to allow configuration deltas to be
  exposed via debian.lowlatency/config-delta

Show diffs side-by-side

added added

removed removed

Lines of Context:
37
37
        const int rw = bio_data_dir(bio);
38
38
        int cpu;
39
39
        cpu = part_stat_lock();
 
40
        part_round_stats(cpu, &mdev->vdisk->part0);
40
41
        part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
41
42
        part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
42
43
        part_inc_in_flight(&mdev->vdisk->part0, rw);
214
215
{
215
216
        const unsigned long s = req->rq_state;
216
217
        struct drbd_conf *mdev = req->mdev;
217
 
        /* only WRITES may end up here without a master bio (on barrier ack) */
218
 
        int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
 
218
        int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
219
219
 
220
220
        /* we must not complete the master bio, while it is
221
221
         *      still being processed by _drbd_send_zc_bio (drbd_send_dblock)
230
230
                return;
231
231
        if (s & RQ_NET_PENDING)
232
232
                return;
233
 
        if (s & RQ_LOCAL_PENDING)
 
233
        if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
234
234
                return;
235
235
 
236
236
        if (req->master_bio) {
277
277
                req->master_bio = NULL;
278
278
        }
279
279
 
 
280
        if (s & RQ_LOCAL_PENDING)
 
281
                return;
 
282
 
280
283
        if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
281
284
                /* this is disconnected (local only) operation,
282
285
                 * or protocol C P_WRITE_ACK,
429
432
                break;
430
433
 
431
434
        case completed_ok:
432
 
                if (bio_data_dir(req->master_bio) == WRITE)
 
435
                if (req->rq_state & RQ_WRITE)
433
436
                        mdev->writ_cnt += req->size>>9;
434
437
                else
435
438
                        mdev->read_cnt += req->size>>9;
438
441
                req->rq_state &= ~RQ_LOCAL_PENDING;
439
442
 
440
443
                _req_may_be_done_not_susp(req, m);
441
 
                put_ldev(mdev);
 
444
                break;
 
445
 
 
446
        case abort_disk_io:
 
447
                req->rq_state |= RQ_LOCAL_ABORTED;
 
448
                if (req->rq_state & RQ_WRITE)
 
449
                        _req_may_be_done_not_susp(req, m);
 
450
                else
 
451
                        goto goto_queue_for_net_read;
442
452
                break;
443
453
 
444
454
        case write_completed_with_error:
447
457
 
448
458
                __drbd_chk_io_error(mdev, false);
449
459
                _req_may_be_done_not_susp(req, m);
450
 
                put_ldev(mdev);
451
460
                break;
452
461
 
453
462
        case read_ahead_completed_with_error:
455
464
                req->rq_state |= RQ_LOCAL_COMPLETED;
456
465
                req->rq_state &= ~RQ_LOCAL_PENDING;
457
466
                _req_may_be_done_not_susp(req, m);
458
 
                put_ldev(mdev);
459
467
                break;
460
468
 
461
469
        case read_completed_with_error:
467
475
                D_ASSERT(!(req->rq_state & RQ_NET_MASK));
468
476
 
469
477
                __drbd_chk_io_error(mdev, false);
470
 
                put_ldev(mdev);
 
478
 
 
479
        goto_queue_for_net_read:
471
480
 
472
481
                /* no point in retrying if there is no good remote data,
473
482
                 * or we have no connection. */
556
565
                drbd_queue_work(&mdev->data.work, &req->w);
557
566
                break;
558
567
 
559
 
        case oos_handed_to_network:
560
 
                /* actually the same */
 
568
        case read_retry_remote_canceled:
561
569
        case send_canceled:
562
 
                /* treat it the same */
563
570
        case send_failed:
564
571
                /* real cleanup will be done from tl_clear.  just update flags
565
572
                 * so it is no longer marked as on the worker queue */
589
596
                }
590
597
                req->rq_state &= ~RQ_NET_QUEUED;
591
598
                req->rq_state |= RQ_NET_SENT;
592
 
                /* because _drbd_send_zc_bio could sleep, and may want to
593
 
                 * dereference the bio even after the "write_acked_by_peer" and
594
 
                 * "completed_ok" events came in, once we return from
595
 
                 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
596
 
                 * whether it is done already, and end it.  */
597
599
                _req_may_be_done_not_susp(req, m);
598
600
                break;
599
601
 
600
 
        case read_retry_remote_canceled:
 
602
        case oos_handed_to_network:
 
603
                /* Was not set PENDING, no longer QUEUED, so is now DONE
 
604
                 * as far as this connection is concerned. */
601
605
                req->rq_state &= ~RQ_NET_QUEUED;
602
 
                /* fall through, in case we raced with drbd_disconnect */
 
606
                req->rq_state |= RQ_NET_DONE;
 
607
                _req_may_be_done_not_susp(req, m);
 
608
                break;
 
609
 
603
610
        case connection_lost_while_pending:
604
611
                /* transfer log cleanup after connection loss */
605
612
                /* assert something? */
616
623
                        _req_may_be_done(req, m); /* Allowed while state.susp */
617
624
                break;
618
625
 
619
 
        case write_acked_by_peer_and_sis:
620
 
                req->rq_state |= RQ_NET_SIS;
621
626
        case conflict_discarded_by_peer:
622
627
                /* for discarded conflicting writes of multiple primaries,
623
628
                 * there is no need to keep anything in the tl, potential
628
633
                              (unsigned long long)req->sector, req->size);
629
634
                req->rq_state |= RQ_NET_DONE;
630
635
                /* fall through */
 
636
        case write_acked_by_peer_and_sis:
631
637
        case write_acked_by_peer:
 
638
                if (what == write_acked_by_peer_and_sis)
 
639
                        req->rq_state |= RQ_NET_SIS;
632
640
                /* protocol C; successfully written on peer.
633
 
                 * Nothing to do here.
 
641
                 * Nothing more to do here.
634
642
                 * We want to keep the tl in place for all protocols, to cater
635
 
                 * for volatile write-back caches on lower level devices.
636
 
                 *
637
 
                 * A barrier request is expected to have forced all prior
638
 
                 * requests onto stable storage, so completion of a barrier
639
 
                 * request could set NET_DONE right here, and not wait for the
640
 
                 * P_BARRIER_ACK, but that is an unnecessary optimization. */
 
643
                 * for volatile write-back caches on lower level devices. */
641
644
 
642
 
                /* this makes it effectively the same as for: */
643
645
        case recv_acked_by_peer:
644
646
                /* protocol B; pretends to be successfully written on peer.
645
647
                 * see also notes above in handed_over_to_network about
773
775
        int local, remote, send_oos = 0;
774
776
        int err = -EIO;
775
777
        int ret = 0;
 
778
        union drbd_state s;
776
779
 
777
780
        /* allocate outside of all locks; */
778
781
        req = drbd_req_new(mdev, bio);
834
837
                drbd_al_begin_io(mdev, sector);
835
838
        }
836
839
 
837
 
        remote = remote && drbd_should_do_remote(mdev->state);
838
 
        send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
 
840
        s = mdev->state;
 
841
        remote = remote && drbd_should_do_remote(s);
 
842
        send_oos = rw == WRITE && drbd_should_send_oos(s);
839
843
        D_ASSERT(!(remote && send_oos));
840
844
 
841
845
        if (!(local || remote) && !is_susp(mdev->state)) {
867
871
 
868
872
        if (is_susp(mdev->state)) {
869
873
                /* If we got suspended, use the retry mechanism of
870
 
                   generic_make_request() to restart processing of this
 
874
                   drbd_make_request() to restart processing of this
871
875
                   bio. In the next call to drbd_make_request
872
876
                   we sleep in inc_ap_bio() */
873
877
                ret = 1;
1091
1095
         */
1092
1096
        D_ASSERT(bio->bi_size > 0);
1093
1097
        D_ASSERT((bio->bi_size & 0x1ff) == 0);
1094
 
        D_ASSERT(bio->bi_idx == 0);
1095
1098
 
1096
1099
        /* to make some things easier, force alignment of requests within the
1097
1100
         * granularity of our hash tables */
1099
1102
        e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
1100
1103
 
1101
1104
        if (likely(s_enr == e_enr)) {
1102
 
                inc_ap_bio(mdev, 1);
1103
 
                drbd_make_request_common(mdev, bio, start_time);
 
1105
                do {
 
1106
                        inc_ap_bio(mdev, 1);
 
1107
                } while (drbd_make_request_common(mdev, bio, start_time));
1104
1108
                return;
1105
1109
        }
1106
1110
 
1196
1200
        struct drbd_conf *mdev = (struct drbd_conf *) data;
1197
1201
        struct drbd_request *req; /* oldest request */
1198
1202
        struct list_head *le;
1199
 
        unsigned long et = 0; /* effective timeout = ko_count * timeout */
 
1203
        unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
 
1204
        unsigned long now;
1200
1205
 
1201
1206
        if (get_net_conf(mdev)) {
1202
 
                et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
 
1207
                if (mdev->state.conn >= C_WF_REPORT_PARAMS)
 
1208
                        ent = mdev->net_conf->timeout*HZ/10
 
1209
                                * mdev->net_conf->ko_count;
1203
1210
                put_net_conf(mdev);
1204
1211
        }
1205
 
        if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
 
1212
        if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
 
1213
                dt = mdev->ldev->dc.disk_timeout * HZ / 10;
 
1214
                put_ldev(mdev);
 
1215
        }
 
1216
        et = min_not_zero(dt, ent);
 
1217
 
 
1218
        if (!et)
1206
1219
                return; /* Recurring timer stopped */
1207
1220
 
 
1221
        now = jiffies;
 
1222
 
1208
1223
        spin_lock_irq(&mdev->req_lock);
1209
1224
        le = &mdev->oldest_tle->requests;
1210
1225
        if (list_empty(le)) {
1211
1226
                spin_unlock_irq(&mdev->req_lock);
1212
 
                mod_timer(&mdev->request_timer, jiffies + et);
 
1227
                mod_timer(&mdev->request_timer, now + et);
1213
1228
                return;
1214
1229
        }
1215
1230
 
1216
1231
        le = le->prev;
1217
1232
        req = list_entry(le, struct drbd_request, tl_requests);
1218
 
        if (time_is_before_eq_jiffies(req->start_time + et)) {
1219
 
                if (req->rq_state & RQ_NET_PENDING) {
1220
 
                        dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1221
 
                        _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1222
 
                } else {
1223
 
                        dev_warn(DEV, "Local backing block device frozen?\n");
1224
 
                        mod_timer(&mdev->request_timer, jiffies + et);
1225
 
                }
1226
 
        } else {
1227
 
                mod_timer(&mdev->request_timer, req->start_time + et);
1228
 
        }
1229
1233
 
 
1234
        /* The request is considered timed out, if
 
1235
         * - we have some effective timeout from the configuration,
 
1236
         *   with above state restrictions applied,
 
1237
         * - the oldest request is waiting for a response from the network
 
1238
         *   resp. the local disk,
 
1239
         * - the oldest request is in fact older than the effective timeout,
 
1240
         * - the connection was established (resp. disk was attached)
 
1241
         *   for longer than the timeout already.
 
1242
         * Note that for 32bit jiffies and very stable connections/disks,
 
1243
         * we may have a wrap around, which is catched by
 
1244
         *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
 
1245
         *
 
1246
         * Side effect: once per 32bit wrap-around interval, which means every
 
1247
         * ~198 days with 250 HZ, we have a window where the timeout would need
 
1248
         * to expire twice (worst case) to become effective. Good enough.
 
1249
         */
 
1250
        if (ent && req->rq_state & RQ_NET_PENDING &&
 
1251
                 time_after(now, req->start_time + ent) &&
 
1252
                !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
 
1253
                dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
 
1254
                _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
 
1255
        }
 
1256
        if (dt && req->rq_state & RQ_LOCAL_PENDING &&
 
1257
                 time_after(now, req->start_time + dt) &&
 
1258
                !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
 
1259
                dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
 
1260
                __drbd_chk_io_error(mdev, 1);
 
1261
        }
 
1262
        nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
1230
1263
        spin_unlock_irq(&mdev->req_lock);
 
1264
        mod_timer(&mdev->request_timer, nt);
1231
1265
}