590
597
req->rq_state &= ~RQ_NET_QUEUED;
591
598
req->rq_state |= RQ_NET_SENT;
592
/* because _drbd_send_zc_bio could sleep, and may want to
593
* dereference the bio even after the "write_acked_by_peer" and
594
* "completed_ok" events came in, once we return from
595
* _drbd_send_zc_bio (drbd_send_dblock), we have to check
596
* whether it is done already, and end it. */
597
599
_req_may_be_done_not_susp(req, m);
600
case read_retry_remote_canceled:
602
case oos_handed_to_network:
603
/* Was not set PENDING, no longer QUEUED, so is now DONE
604
* as far as this connection is concerned. */
601
605
req->rq_state &= ~RQ_NET_QUEUED;
602
/* fall through, in case we raced with drbd_disconnect */
606
req->rq_state |= RQ_NET_DONE;
607
_req_may_be_done_not_susp(req, m);
603
610
case connection_lost_while_pending:
604
611
/* transfer log cleanup after connection loss */
605
612
/* assert something? */
628
633
(unsigned long long)req->sector, req->size);
629
634
req->rq_state |= RQ_NET_DONE;
630
635
/* fall through */
636
case write_acked_by_peer_and_sis:
631
637
case write_acked_by_peer:
638
if (what == write_acked_by_peer_and_sis)
639
req->rq_state |= RQ_NET_SIS;
632
640
/* protocol C; successfully written on peer.
633
* Nothing to do here.
641
* Nothing more to do here.
634
642
* We want to keep the tl in place for all protocols, to cater
635
* for volatile write-back caches on lower level devices.
637
* A barrier request is expected to have forced all prior
638
* requests onto stable storage, so completion of a barrier
639
* request could set NET_DONE right here, and not wait for the
640
* P_BARRIER_ACK, but that is an unnecessary optimization. */
643
* for volatile write-back caches on lower level devices. */
642
/* this makes it effectively the same as for: */
643
645
case recv_acked_by_peer:
644
646
/* protocol B; pretends to be successfully written on peer.
645
647
* see also notes above in handed_over_to_network about
1196
1200
struct drbd_conf *mdev = (struct drbd_conf *) data;
1197
1201
struct drbd_request *req; /* oldest request */
1198
1202
struct list_head *le;
1199
unsigned long et = 0; /* effective timeout = ko_count * timeout */
1203
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1201
1206
if (get_net_conf(mdev)) {
1202
et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
1207
if (mdev->state.conn >= C_WF_REPORT_PARAMS)
1208
ent = mdev->net_conf->timeout*HZ/10
1209
* mdev->net_conf->ko_count;
1203
1210
put_net_conf(mdev);
1205
if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
1212
if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
1213
dt = mdev->ldev->dc.disk_timeout * HZ / 10;
1216
et = min_not_zero(dt, ent);
1206
1219
return; /* Recurring timer stopped */
1208
1223
spin_lock_irq(&mdev->req_lock);
1209
1224
le = &mdev->oldest_tle->requests;
1210
1225
if (list_empty(le)) {
1211
1226
spin_unlock_irq(&mdev->req_lock);
1212
mod_timer(&mdev->request_timer, jiffies + et);
1227
mod_timer(&mdev->request_timer, now + et);
1217
1232
req = list_entry(le, struct drbd_request, tl_requests);
1218
if (time_is_before_eq_jiffies(req->start_time + et)) {
1219
if (req->rq_state & RQ_NET_PENDING) {
1220
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1221
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1223
dev_warn(DEV, "Local backing block device frozen?\n");
1224
mod_timer(&mdev->request_timer, jiffies + et);
1227
mod_timer(&mdev->request_timer, req->start_time + et);
1234
/* The request is considered timed out, if
1235
* - we have some effective timeout from the configuration,
1236
* with above state restrictions applied,
1237
* - the oldest request is waiting for a response from the network
1238
* resp. the local disk,
1239
* - the oldest request is in fact older than the effective timeout,
1240
* - the connection was established (resp. disk was attached)
1241
* for longer than the timeout already.
1242
* Note that for 32bit jiffies and very stable connections/disks,
1243
* we may have a wrap around, which is catched by
1244
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
1246
* Side effect: once per 32bit wrap-around interval, which means every
1247
* ~198 days with 250 HZ, we have a window where the timeout would need
1248
* to expire twice (worst case) to become effective. Good enough.
1250
if (ent && req->rq_state & RQ_NET_PENDING &&
1251
time_after(now, req->start_time + ent) &&
1252
!time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
1253
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1254
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1256
if (dt && req->rq_state & RQ_LOCAL_PENDING &&
1257
time_after(now, req->start_time + dt) &&
1258
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
1259
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1260
__drbd_chk_io_error(mdev, 1);
1262
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
1230
1263
spin_unlock_irq(&mdev->req_lock);
1264
mod_timer(&mdev->request_timer, nt);