149
150
static void req_bio_endio(struct request *rq, struct bio *bio,
150
151
unsigned int nbytes, int error)
152
struct request_queue *q = rq->q;
154
if (&q->flush_rq != rq) {
156
clear_bit(BIO_UPTODATE, &bio->bi_flags);
157
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
160
if (unlikely(nbytes > bio->bi_size)) {
161
printk(KERN_ERR "%s: want %u bytes done, %u left\n",
162
__func__, nbytes, bio->bi_size);
163
nbytes = bio->bi_size;
166
if (unlikely(rq->cmd_flags & REQ_QUIET))
167
set_bit(BIO_QUIET, &bio->bi_flags);
169
bio->bi_size -= nbytes;
170
bio->bi_sector += (nbytes >> 9);
172
if (bio_integrity(bio))
173
bio_integrity_advance(bio, nbytes);
175
if (bio->bi_size == 0)
176
bio_endio(bio, error);
179
* Okay, this is the sequenced flush request in
180
* progress, just record the error;
182
if (error && !q->flush_err)
183
q->flush_err = error;
154
clear_bit(BIO_UPTODATE, &bio->bi_flags);
155
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
158
if (unlikely(nbytes > bio->bi_size)) {
159
printk(KERN_ERR "%s: want %u bytes done, %u left\n",
160
__func__, nbytes, bio->bi_size);
161
nbytes = bio->bi_size;
164
if (unlikely(rq->cmd_flags & REQ_QUIET))
165
set_bit(BIO_QUIET, &bio->bi_flags);
167
bio->bi_size -= nbytes;
168
bio->bi_sector += (nbytes >> 9);
170
if (bio_integrity(bio))
171
bio_integrity_advance(bio, nbytes);
173
/* don't actually finish bio if it's part of flush sequence */
174
if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
175
bio_endio(bio, error);
187
178
void blk_dump_rq_flags(struct request *rq, char *msg)
208
199
EXPORT_SYMBOL(blk_dump_rq_flags);
211
* "plug" the device if there are no outstanding requests: this will
212
* force the transfer to start only after we have put all the requests
215
* This is called with interrupts off and no requests on the queue and
216
* with the queue lock held.
218
void blk_plug_device(struct request_queue *q)
220
WARN_ON(!irqs_disabled());
223
* don't plug a stopped queue, it must be paired with blk_start_queue()
224
* which will restart the queueing
226
if (blk_queue_stopped(q))
229
if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
230
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
234
EXPORT_SYMBOL(blk_plug_device);
237
* blk_plug_device_unlocked - plug a device without queue lock held
238
* @q: The &struct request_queue to plug
241
* Like @blk_plug_device(), but grabs the queue lock and disables
244
void blk_plug_device_unlocked(struct request_queue *q)
248
spin_lock_irqsave(q->queue_lock, flags);
250
spin_unlock_irqrestore(q->queue_lock, flags);
252
EXPORT_SYMBOL(blk_plug_device_unlocked);
255
* remove the queue from the plugged list, if present. called with
256
* queue lock held and interrupts disabled.
258
int blk_remove_plug(struct request_queue *q)
260
WARN_ON(!irqs_disabled());
262
if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
265
del_timer(&q->unplug_timer);
268
EXPORT_SYMBOL(blk_remove_plug);
271
* remove the plug and let it rip..
273
void __generic_unplug_device(struct request_queue *q)
275
if (unlikely(blk_queue_stopped(q)))
277
if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
284
* generic_unplug_device - fire a request queue
285
* @q: The &struct request_queue in question
288
* Linux uses plugging to build bigger requests queues before letting
289
* the device have at them. If a queue is plugged, the I/O scheduler
290
* is still adding and merging requests on the queue. Once the queue
291
* gets unplugged, the request_fn defined for the queue is invoked and
294
void generic_unplug_device(struct request_queue *q)
296
if (blk_queue_plugged(q)) {
297
spin_lock_irq(q->queue_lock);
298
__generic_unplug_device(q);
299
spin_unlock_irq(q->queue_lock);
302
EXPORT_SYMBOL(generic_unplug_device);
304
static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
307
struct request_queue *q = bdi->unplug_io_data;
312
void blk_unplug_work(struct work_struct *work)
314
struct request_queue *q =
315
container_of(work, struct request_queue, unplug_work);
317
trace_block_unplug_io(q);
321
void blk_unplug_timeout(unsigned long data)
323
struct request_queue *q = (struct request_queue *)data;
325
trace_block_unplug_timer(q);
326
kblockd_schedule_work(q, &q->unplug_work);
329
void blk_unplug(struct request_queue *q)
332
* devices don't necessarily have an ->unplug_fn defined
335
trace_block_unplug_io(q);
339
EXPORT_SYMBOL(blk_unplug);
201
static void blk_delay_work(struct work_struct *work)
203
struct request_queue *q;
205
q = container_of(work, struct request_queue, delay_work.work);
206
spin_lock_irq(q->queue_lock);
208
spin_unlock_irq(q->queue_lock);
212
* blk_delay_queue - restart queueing after defined interval
213
* @q: The &struct request_queue in question
214
* @msecs: Delay in msecs
217
* Sometimes queueing needs to be postponed for a little while, to allow
218
* resources to come back. This function will make sure that queueing is
219
* restarted around the specified time.
221
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
223
queue_delayed_work(kblockd_workqueue, &q->delay_work,
224
msecs_to_jiffies(msecs));
226
EXPORT_SYMBOL(blk_delay_queue);
342
229
* blk_start_queue - restart a previously stopped queue
390
277
* that its ->make_request_fn will not re-add plugging prior to calling
280
* This function does not cancel any asynchronous activity arising
281
* out of elevator or throttling code. That would require elevaotor_exit()
282
* and blk_throtl_exit() to be called with queue lock initialized.
394
285
void blk_sync_queue(struct request_queue *q)
396
del_timer_sync(&q->unplug_timer);
397
287
del_timer_sync(&q->timeout);
398
cancel_work_sync(&q->unplug_work);
399
throtl_shutdown_timer_wq(q);
288
cancel_delayed_work_sync(&q->delay_work);
401
290
EXPORT_SYMBOL(blk_sync_queue);
404
293
* __blk_run_queue - run a single device queue
405
294
* @q: The queue to run
406
* @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
409
297
* See @blk_run_queue. This variant must be called with the queue lock
410
298
* held and interrupts disabled.
413
void __blk_run_queue(struct request_queue *q, bool force_kblockd)
300
void __blk_run_queue(struct request_queue *q)
417
302
if (unlikely(blk_queue_stopped(q)))
420
if (elv_queue_empty(q))
424
* Only recurse once to avoid overrunning the stack, let the unplug
425
* handling reinvoke the handler shortly if we already got there.
427
if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
429
queue_flag_clear(QUEUE_FLAG_REENTER, q);
431
queue_flag_set(QUEUE_FLAG_PLUGGED, q);
432
kblockd_schedule_work(q, &q->unplug_work);
435
307
EXPORT_SYMBOL(__blk_run_queue);
310
* blk_run_queue_async - run a single device queue in workqueue context
311
* @q: The queue to run
314
* Tells kblockd to perform the equivalent of @blk_run_queue on behalf
317
void blk_run_queue_async(struct request_queue *q)
319
if (likely(!blk_queue_stopped(q))) {
320
__cancel_delayed_work(&q->delay_work);
321
queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
324
EXPORT_SYMBOL(blk_run_queue_async);
438
327
* blk_run_queue - run a single device queue
439
328
* @q: The queue to run
539
434
setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
540
435
laptop_mode_timer_fn, (unsigned long) q);
541
init_timer(&q->unplug_timer);
542
436
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
543
437
INIT_LIST_HEAD(&q->timeout_list);
544
INIT_LIST_HEAD(&q->pending_flushes);
545
INIT_WORK(&q->unplug_work, blk_unplug_work);
438
INIT_LIST_HEAD(&q->flush_queue[0]);
439
INIT_LIST_HEAD(&q->flush_queue[1]);
440
INIT_LIST_HEAD(&q->flush_data_in_flight);
441
INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
547
443
kobject_init(&q->kobj, &blk_queue_ktype);
549
445
mutex_init(&q->sysfs_lock);
550
446
spin_lock_init(&q->__queue_lock);
449
* By default initialize queue_lock to internal lock and driver can
450
* override it later if need be.
452
q->queue_lock = &q->__queue_lock;
554
456
EXPORT_SYMBOL(blk_alloc_queue_node);
1175
1106
EXPORT_SYMBOL_GPL(blk_add_request_payload);
1108
static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1111
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1113
if (!ll_back_merge_fn(q, req, bio))
1116
trace_block_bio_backmerge(q, bio);
1118
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1119
blk_rq_set_mixed_merge(req);
1121
req->biotail->bi_next = bio;
1123
req->__data_len += bio->bi_size;
1124
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1126
drive_stat_acct(req, 0);
1127
elv_bio_merged(q, req, bio);
1131
static bool bio_attempt_front_merge(struct request_queue *q,
1132
struct request *req, struct bio *bio)
1134
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1136
if (!ll_front_merge_fn(q, req, bio))
1139
trace_block_bio_frontmerge(q, bio);
1141
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1142
blk_rq_set_mixed_merge(req);
1144
bio->bi_next = req->bio;
1148
* may not be valid. if the low level driver said
1149
* it didn't need a bounce buffer then it better
1150
* not touch req->buffer either...
1152
req->buffer = bio_data(bio);
1153
req->__sector = bio->bi_sector;
1154
req->__data_len += bio->bi_size;
1155
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1157
drive_stat_acct(req, 0);
1158
elv_bio_merged(q, req, bio);
1163
* Attempts to merge with the plugged list in the current process. Returns
1164
* true if merge was successful, otherwise false.
1166
static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1169
struct blk_plug *plug;
1177
list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1183
el_ret = elv_try_merge(rq, bio);
1184
if (el_ret == ELEVATOR_BACK_MERGE) {
1185
ret = bio_attempt_back_merge(q, rq, bio);
1188
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
1189
ret = bio_attempt_front_merge(q, rq, bio);
1177
1198
void init_request_from_bio(struct request *req, struct bio *bio)
1179
1200
req->cpu = bio->bi_comp_cpu;
1189
1210
blk_rq_bio_prep(req->q, req, bio);
1193
* Only disabling plugging for non-rotational devices if it does tagging
1194
* as well, otherwise we do need the proper merging
1196
static inline bool queue_should_plug(struct request_queue *q)
1198
return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1201
1213
static int __make_request(struct request_queue *q, struct bio *bio)
1215
const bool sync = !!(bio->bi_rw & REQ_SYNC);
1216
struct blk_plug *plug;
1217
int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1203
1218
struct request *req;
1205
unsigned int bytes = bio->bi_size;
1206
const unsigned short prio = bio_prio(bio);
1207
const bool sync = !!(bio->bi_rw & REQ_SYNC);
1208
const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
1209
const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
1210
int where = ELEVATOR_INSERT_SORT;
1214
1221
* low level driver can indicate that it wants pages above a
1218
1225
blk_queue_bounce(q, &bio);
1220
spin_lock_irq(q->queue_lock);
1222
1227
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1223
where = ELEVATOR_INSERT_FRONT;
1228
spin_lock_irq(q->queue_lock);
1229
where = ELEVATOR_INSERT_FLUSH;
1227
if (elv_queue_empty(q))
1234
* Check if we can merge with the plugged list before grabbing
1237
if (attempt_plug_merge(current, q, bio))
1240
spin_lock_irq(q->queue_lock);
1230
1242
el_ret = elv_merge(q, &req, bio);
1232
case ELEVATOR_BACK_MERGE:
1233
BUG_ON(!rq_mergeable(req));
1235
if (!ll_back_merge_fn(q, req, bio))
1238
trace_block_bio_backmerge(q, bio);
1240
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1241
blk_rq_set_mixed_merge(req);
1243
req->biotail->bi_next = bio;
1245
req->__data_len += bytes;
1246
req->ioprio = ioprio_best(req->ioprio, prio);
1247
if (!blk_rq_cpu_valid(req))
1248
req->cpu = bio->bi_comp_cpu;
1249
drive_stat_acct(req, 0);
1250
elv_bio_merged(q, req, bio);
1251
if (!attempt_back_merge(q, req))
1252
elv_merged_request(q, req, el_ret);
1255
case ELEVATOR_FRONT_MERGE:
1256
BUG_ON(!rq_mergeable(req));
1258
if (!ll_front_merge_fn(q, req, bio))
1261
trace_block_bio_frontmerge(q, bio);
1263
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1264
blk_rq_set_mixed_merge(req);
1265
req->cmd_flags &= ~REQ_FAILFAST_MASK;
1266
req->cmd_flags |= ff;
1269
bio->bi_next = req->bio;
1273
* may not be valid. if the low level driver said
1274
* it didn't need a bounce buffer then it better
1275
* not touch req->buffer either...
1277
req->buffer = bio_data(bio);
1278
req->__sector = bio->bi_sector;
1279
req->__data_len += bytes;
1280
req->ioprio = ioprio_best(req->ioprio, prio);
1281
if (!blk_rq_cpu_valid(req))
1282
req->cpu = bio->bi_comp_cpu;
1283
drive_stat_acct(req, 0);
1284
elv_bio_merged(q, req, bio);
1285
if (!attempt_front_merge(q, req))
1286
elv_merged_request(q, req, el_ret);
1289
/* ELV_NO_MERGE: elevator says don't/can't merge. */
1243
if (el_ret == ELEVATOR_BACK_MERGE) {
1244
if (bio_attempt_back_merge(q, req, bio)) {
1245
if (!attempt_back_merge(q, req))
1246
elv_merged_request(q, req, el_ret);
1249
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
1250
if (bio_attempt_front_merge(q, req, bio)) {
1251
if (!attempt_front_merge(q, req))
1252
elv_merged_request(q, req, el_ret);
1316
1279
init_request_from_bio(req, bio);
1318
spin_lock_irq(q->queue_lock);
1319
1281
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1320
bio_flagged(bio, BIO_CPU_AFFINE))
1321
req->cpu = blk_cpu_to_group(smp_processor_id());
1322
if (queue_should_plug(q) && elv_queue_empty(q))
1325
/* insert the request into the elevator */
1326
drive_stat_acct(req, 1);
1327
__elv_add_request(q, req, where, 0);
1282
bio_flagged(bio, BIO_CPU_AFFINE)) {
1283
req->cpu = blk_cpu_to_group(get_cpu());
1287
plug = current->plug;
1290
* If this is the first request added after a plug, fire
1291
* of a plug trace. If others have been added before, check
1292
* if we have multiple devices in this plug. If so, make a
1293
* note to sort the list before dispatch.
1295
if (list_empty(&plug->list))
1296
trace_block_plug(q);
1297
else if (!plug->should_sort) {
1298
struct request *__rq;
1300
__rq = list_entry_rq(plug->list.prev);
1302
plug->should_sort = 1;
1304
list_add_tail(&req->queuelist, &plug->list);
1305
drive_stat_acct(req, 1);
1307
spin_lock_irq(q->queue_lock);
1308
add_acct_request(q, req, where);
1311
spin_unlock_irq(q->queue_lock);
1329
if (unplug || !queue_should_plug(q))
1330
__generic_unplug_device(q);
1331
spin_unlock_irq(q->queue_lock);
2046
2027
if (error && req->cmd_type == REQ_TYPE_FS &&
2047
2028
!(req->cmd_flags & REQ_QUIET)) {
2048
printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
2049
req->rq_disk ? req->rq_disk->disk_name : "?",
2050
(unsigned long long)blk_rq_pos(req));
2033
error_type = "recoverable transport";
2036
error_type = "critical target";
2039
error_type = "critical nexus";
2046
printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2047
error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2048
(unsigned long long)blk_rq_pos(req));
2053
2051
blk_account_io_completion(req, nr_bytes);
2612
2610
EXPORT_SYMBOL(kblockd_schedule_work);
2612
int kblockd_schedule_delayed_work(struct request_queue *q,
2613
struct delayed_work *dwork, unsigned long delay)
2615
return queue_delayed_work(kblockd_workqueue, dwork, delay);
2617
EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2619
#define PLUG_MAGIC 0x91827364
2621
void blk_start_plug(struct blk_plug *plug)
2623
struct task_struct *tsk = current;
2625
plug->magic = PLUG_MAGIC;
2626
INIT_LIST_HEAD(&plug->list);
2627
INIT_LIST_HEAD(&plug->cb_list);
2628
plug->should_sort = 0;
2631
* If this is a nested plug, don't actually assign it. It will be
2632
* flushed on its own.
2636
* Store ordering should not be needed here, since a potential
2637
* preempt will imply a full memory barrier
2642
EXPORT_SYMBOL(blk_start_plug);
2644
static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2646
struct request *rqa = container_of(a, struct request, queuelist);
2647
struct request *rqb = container_of(b, struct request, queuelist);
2649
return !(rqa->q <= rqb->q);
2653
* If 'from_schedule' is true, then postpone the dispatch of requests
2654
* until a safe kblockd context. We due this to avoid accidental big
2655
* additional stack usage in driver dispatch, in places where the originally
2656
* plugger did not intend it.
2658
static void queue_unplugged(struct request_queue *q, unsigned int depth,
2660
__releases(q->queue_lock)
2662
trace_block_unplug(q, depth, !from_schedule);
2665
* If we are punting this to kblockd, then we can safely drop
2666
* the queue_lock before waking kblockd (which needs to take
2669
if (from_schedule) {
2670
spin_unlock(q->queue_lock);
2671
blk_run_queue_async(q);
2674
spin_unlock(q->queue_lock);
2679
static void flush_plug_callbacks(struct blk_plug *plug)
2681
LIST_HEAD(callbacks);
2683
if (list_empty(&plug->cb_list))
2686
list_splice_init(&plug->cb_list, &callbacks);
2688
while (!list_empty(&callbacks)) {
2689
struct blk_plug_cb *cb = list_first_entry(&callbacks,
2692
list_del(&cb->list);
2697
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2699
struct request_queue *q;
2700
unsigned long flags;
2705
BUG_ON(plug->magic != PLUG_MAGIC);
2707
flush_plug_callbacks(plug);
2708
if (list_empty(&plug->list))
2711
list_splice_init(&plug->list, &list);
2713
if (plug->should_sort) {
2714
list_sort(NULL, &list, plug_rq_cmp);
2715
plug->should_sort = 0;
2722
* Save and disable interrupts here, to avoid doing it for every
2723
* queue lock we have to take.
2725
local_irq_save(flags);
2726
while (!list_empty(&list)) {
2727
rq = list_entry_rq(list.next);
2728
list_del_init(&rq->queuelist);
2732
* This drops the queue lock
2735
queue_unplugged(q, depth, from_schedule);
2738
spin_lock(q->queue_lock);
2741
* rq is already accounted, so use raw insert
2743
if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2744
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2746
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2752
* This drops the queue lock
2755
queue_unplugged(q, depth, from_schedule);
2757
local_irq_restore(flags);
2760
void blk_finish_plug(struct blk_plug *plug)
2762
blk_flush_plug_list(plug, false);
2764
if (plug == current->plug)
2765
current->plug = NULL;
2767
EXPORT_SYMBOL(blk_finish_plug);
2614
2769
int __init blk_dev_init(void)
2616
2771
BUILD_BUG_ON(__REQ_NR_BITS > 8 *