Merge branch 'stable/for-linus-4.9' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / md / dm-rq.c
1 /*
2  * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-core.h"
8 #include "dm-rq.h"
9
10 #include <linux/elevator.h> /* for rq_end_sector() */
11 #include <linux/blk-mq.h>
12
13 #define DM_MSG_PREFIX "core-rq"
14
15 #define DM_MQ_NR_HW_QUEUES 1
16 #define DM_MQ_QUEUE_DEPTH 2048
17 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
18 static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
19
20 /*
21  * Request-based DM's mempools' reserved IOs set by the user.
22  */
23 #define RESERVED_REQUEST_BASED_IOS      256
24 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
25
26 #ifdef CONFIG_DM_MQ_DEFAULT
27 static bool use_blk_mq = true;
28 #else
29 static bool use_blk_mq = false;
30 #endif
31
32 bool dm_use_blk_mq_default(void)
33 {
34         return use_blk_mq;
35 }
36
37 bool dm_use_blk_mq(struct mapped_device *md)
38 {
39         return md->use_blk_mq;
40 }
41 EXPORT_SYMBOL_GPL(dm_use_blk_mq);
42
43 unsigned dm_get_reserved_rq_based_ios(void)
44 {
45         return __dm_get_module_param(&reserved_rq_based_ios,
46                                      RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
47 }
48 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
49
50 static unsigned dm_get_blk_mq_nr_hw_queues(void)
51 {
52         return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
53 }
54
55 static unsigned dm_get_blk_mq_queue_depth(void)
56 {
57         return __dm_get_module_param(&dm_mq_queue_depth,
58                                      DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
59 }
60
61 int dm_request_based(struct mapped_device *md)
62 {
63         return blk_queue_stackable(md->queue);
64 }
65
66 static void dm_old_start_queue(struct request_queue *q)
67 {
68         unsigned long flags;
69
70         spin_lock_irqsave(q->queue_lock, flags);
71         if (blk_queue_stopped(q))
72                 blk_start_queue(q);
73         spin_unlock_irqrestore(q->queue_lock, flags);
74 }
75
76 static void dm_mq_start_queue(struct request_queue *q)
77 {
78         blk_mq_start_stopped_hw_queues(q, true);
79         blk_mq_kick_requeue_list(q);
80 }
81
82 void dm_start_queue(struct request_queue *q)
83 {
84         if (!q->mq_ops)
85                 dm_old_start_queue(q);
86         else
87                 dm_mq_start_queue(q);
88 }
89
90 static void dm_old_stop_queue(struct request_queue *q)
91 {
92         unsigned long flags;
93
94         spin_lock_irqsave(q->queue_lock, flags);
95         if (!blk_queue_stopped(q))
96                 blk_stop_queue(q);
97         spin_unlock_irqrestore(q->queue_lock, flags);
98 }
99
100 static void dm_mq_stop_queue(struct request_queue *q)
101 {
102         if (blk_mq_queue_stopped(q))
103                 return;
104
105         blk_mq_quiesce_queue(q);
106 }
107
108 void dm_stop_queue(struct request_queue *q)
109 {
110         if (!q->mq_ops)
111                 dm_old_stop_queue(q);
112         else
113                 dm_mq_stop_queue(q);
114 }
115
116 static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
117                                                 gfp_t gfp_mask)
118 {
119         return mempool_alloc(md->io_pool, gfp_mask);
120 }
121
122 static void free_old_rq_tio(struct dm_rq_target_io *tio)
123 {
124         mempool_free(tio, tio->md->io_pool);
125 }
126
127 static struct request *alloc_old_clone_request(struct mapped_device *md,
128                                                gfp_t gfp_mask)
129 {
130         return mempool_alloc(md->rq_pool, gfp_mask);
131 }
132
133 static void free_old_clone_request(struct mapped_device *md, struct request *rq)
134 {
135         mempool_free(rq, md->rq_pool);
136 }
137
138 /*
139  * Partial completion handling for request-based dm
140  */
141 static void end_clone_bio(struct bio *clone)
142 {
143         struct dm_rq_clone_bio_info *info =
144                 container_of(clone, struct dm_rq_clone_bio_info, clone);
145         struct dm_rq_target_io *tio = info->tio;
146         struct bio *bio = info->orig;
147         unsigned int nr_bytes = info->orig->bi_iter.bi_size;
148         int error = clone->bi_error;
149
150         bio_put(clone);
151
152         if (tio->error)
153                 /*
154                  * An error has already been detected on the request.
155                  * Once error occurred, just let clone->end_io() handle
156                  * the remainder.
157                  */
158                 return;
159         else if (error) {
160                 /*
161                  * Don't notice the error to the upper layer yet.
162                  * The error handling decision is made by the target driver,
163                  * when the request is completed.
164                  */
165                 tio->error = error;
166                 return;
167         }
168
169         /*
170          * I/O for the bio successfully completed.
171          * Notice the data completion to the upper layer.
172          */
173
174         /*
175          * bios are processed from the head of the list.
176          * So the completing bio should always be rq->bio.
177          * If it's not, something wrong is happening.
178          */
179         if (tio->orig->bio != bio)
180                 DMERR("bio completion is going in the middle of the request");
181
182         /*
183          * Update the original request.
184          * Do not use blk_end_request() here, because it may complete
185          * the original request before the clone, and break the ordering.
186          */
187         blk_update_request(tio->orig, 0, nr_bytes);
188 }
189
190 static struct dm_rq_target_io *tio_from_request(struct request *rq)
191 {
192         return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
193 }
194
195 static void rq_end_stats(struct mapped_device *md, struct request *orig)
196 {
197         if (unlikely(dm_stats_used(&md->stats))) {
198                 struct dm_rq_target_io *tio = tio_from_request(orig);
199                 tio->duration_jiffies = jiffies - tio->duration_jiffies;
200                 dm_stats_account_io(&md->stats, rq_data_dir(orig),
201                                     blk_rq_pos(orig), tio->n_sectors, true,
202                                     tio->duration_jiffies, &tio->stats_aux);
203         }
204 }
205
206 /*
207  * Don't touch any member of the md after calling this function because
208  * the md may be freed in dm_put() at the end of this function.
209  * Or do dm_get() before calling this function and dm_put() later.
210  */
211 static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
212 {
213         atomic_dec(&md->pending[rw]);
214
215         /* nudge anyone waiting on suspend queue */
216         if (!md_in_flight(md))
217                 wake_up(&md->wait);
218
219         /*
220          * Run this off this callpath, as drivers could invoke end_io while
221          * inside their request_fn (and holding the queue lock). Calling
222          * back into ->request_fn() could deadlock attempting to grab the
223          * queue lock again.
224          */
225         if (!md->queue->mq_ops && run_queue)
226                 blk_run_queue_async(md->queue);
227
228         /*
229          * dm_put() must be at the end of this function. See the comment above
230          */
231         dm_put(md);
232 }
233
234 static void free_rq_clone(struct request *clone)
235 {
236         struct dm_rq_target_io *tio = clone->end_io_data;
237         struct mapped_device *md = tio->md;
238
239         blk_rq_unprep_clone(clone);
240
241         /*
242          * It is possible for a clone_old_rq() allocated clone to
243          * get passed in -- it may not yet have a request_queue.
244          * This is known to occur if the error target replaces
245          * a multipath target that has a request_fn queue stacked
246          * on blk-mq queue(s).
247          */
248         if (clone->q && clone->q->mq_ops)
249                 /* stacked on blk-mq queue(s) */
250                 tio->ti->type->release_clone_rq(clone);
251         else if (!md->queue->mq_ops)
252                 /* request_fn queue stacked on request_fn queue(s) */
253                 free_old_clone_request(md, clone);
254
255         if (!md->queue->mq_ops)
256                 free_old_rq_tio(tio);
257 }
258
259 /*
260  * Complete the clone and the original request.
261  * Must be called without clone's queue lock held,
262  * see end_clone_request() for more details.
263  */
264 static void dm_end_request(struct request *clone, int error)
265 {
266         int rw = rq_data_dir(clone);
267         struct dm_rq_target_io *tio = clone->end_io_data;
268         struct mapped_device *md = tio->md;
269         struct request *rq = tio->orig;
270
271         if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
272                 rq->errors = clone->errors;
273                 rq->resid_len = clone->resid_len;
274
275                 if (rq->sense)
276                         /*
277                          * We are using the sense buffer of the original
278                          * request.
279                          * So setting the length of the sense data is enough.
280                          */
281                         rq->sense_len = clone->sense_len;
282         }
283
284         free_rq_clone(clone);
285         rq_end_stats(md, rq);
286         if (!rq->q->mq_ops)
287                 blk_end_request_all(rq, error);
288         else
289                 blk_mq_end_request(rq, error);
290         rq_completed(md, rw, true);
291 }
292
293 static void dm_unprep_request(struct request *rq)
294 {
295         struct dm_rq_target_io *tio = tio_from_request(rq);
296         struct request *clone = tio->clone;
297
298         if (!rq->q->mq_ops) {
299                 rq->special = NULL;
300                 rq->rq_flags &= ~RQF_DONTPREP;
301         }
302
303         if (clone)
304                 free_rq_clone(clone);
305         else if (!tio->md->queue->mq_ops)
306                 free_old_rq_tio(tio);
307 }
308
309 /*
310  * Requeue the original request of a clone.
311  */
312 static void dm_old_requeue_request(struct request *rq)
313 {
314         struct request_queue *q = rq->q;
315         unsigned long flags;
316
317         spin_lock_irqsave(q->queue_lock, flags);
318         blk_requeue_request(q, rq);
319         blk_run_queue_async(q);
320         spin_unlock_irqrestore(q->queue_lock, flags);
321 }
322
323 static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
324 {
325         blk_mq_delay_kick_requeue_list(q, msecs);
326 }
327
328 void dm_mq_kick_requeue_list(struct mapped_device *md)
329 {
330         __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
331 }
332 EXPORT_SYMBOL(dm_mq_kick_requeue_list);
333
334 static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
335 {
336         blk_mq_requeue_request(rq, false);
337         __dm_mq_kick_requeue_list(rq->q, msecs);
338 }
339
340 static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
341 {
342         struct mapped_device *md = tio->md;
343         struct request *rq = tio->orig;
344         int rw = rq_data_dir(rq);
345
346         rq_end_stats(md, rq);
347         dm_unprep_request(rq);
348
349         if (!rq->q->mq_ops)
350                 dm_old_requeue_request(rq);
351         else
352                 dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
353
354         rq_completed(md, rw, false);
355 }
356
357 static void dm_done(struct request *clone, int error, bool mapped)
358 {
359         int r = error;
360         struct dm_rq_target_io *tio = clone->end_io_data;
361         dm_request_endio_fn rq_end_io = NULL;
362
363         if (tio->ti) {
364                 rq_end_io = tio->ti->type->rq_end_io;
365
366                 if (mapped && rq_end_io)
367                         r = rq_end_io(tio->ti, clone, error, &tio->info);
368         }
369
370         if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
371                      !clone->q->limits.max_write_same_sectors))
372                 disable_write_same(tio->md);
373
374         if (r <= 0)
375                 /* The target wants to complete the I/O */
376                 dm_end_request(clone, r);
377         else if (r == DM_ENDIO_INCOMPLETE)
378                 /* The target will handle the I/O */
379                 return;
380         else if (r == DM_ENDIO_REQUEUE)
381                 /* The target wants to requeue the I/O */
382                 dm_requeue_original_request(tio, false);
383         else {
384                 DMWARN("unimplemented target endio return value: %d", r);
385                 BUG();
386         }
387 }
388
389 /*
390  * Request completion handler for request-based dm
391  */
392 static void dm_softirq_done(struct request *rq)
393 {
394         bool mapped = true;
395         struct dm_rq_target_io *tio = tio_from_request(rq);
396         struct request *clone = tio->clone;
397         int rw;
398
399         if (!clone) {
400                 rq_end_stats(tio->md, rq);
401                 rw = rq_data_dir(rq);
402                 if (!rq->q->mq_ops) {
403                         blk_end_request_all(rq, tio->error);
404                         rq_completed(tio->md, rw, false);
405                         free_old_rq_tio(tio);
406                 } else {
407                         blk_mq_end_request(rq, tio->error);
408                         rq_completed(tio->md, rw, false);
409                 }
410                 return;
411         }
412
413         if (rq->rq_flags & RQF_FAILED)
414                 mapped = false;
415
416         dm_done(clone, tio->error, mapped);
417 }
418
419 /*
420  * Complete the clone and the original request with the error status
421  * through softirq context.
422  */
423 static void dm_complete_request(struct request *rq, int error)
424 {
425         struct dm_rq_target_io *tio = tio_from_request(rq);
426
427         tio->error = error;
428         if (!rq->q->mq_ops)
429                 blk_complete_request(rq);
430         else
431                 blk_mq_complete_request(rq, error);
432 }
433
434 /*
435  * Complete the not-mapped clone and the original request with the error status
436  * through softirq context.
437  * Target's rq_end_io() function isn't called.
438  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
439  */
440 static void dm_kill_unmapped_request(struct request *rq, int error)
441 {
442         rq->rq_flags |= RQF_FAILED;
443         dm_complete_request(rq, error);
444 }
445
446 /*
447  * Called with the clone's queue lock held (in the case of .request_fn)
448  */
449 static void end_clone_request(struct request *clone, int error)
450 {
451         struct dm_rq_target_io *tio = clone->end_io_data;
452
453         if (!clone->q->mq_ops) {
454                 /*
455                  * For just cleaning up the information of the queue in which
456                  * the clone was dispatched.
457                  * The clone is *NOT* freed actually here because it is alloced
458                  * from dm own mempool (RQF_ALLOCED isn't set).
459                  */
460                 __blk_put_request(clone->q, clone);
461         }
462
463         /*
464          * Actual request completion is done in a softirq context which doesn't
465          * hold the clone's queue lock.  Otherwise, deadlock could occur because:
466          *     - another request may be submitted by the upper level driver
467          *       of the stacking during the completion
468          *     - the submission which requires queue lock may be done
469          *       against this clone's queue
470          */
471         dm_complete_request(tio->orig, error);
472 }
473
474 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
475 {
476         int r;
477
478         if (blk_queue_io_stat(clone->q))
479                 clone->rq_flags |= RQF_IO_STAT;
480
481         clone->start_time = jiffies;
482         r = blk_insert_cloned_request(clone->q, clone);
483         if (r)
484                 /* must complete clone in terms of original request */
485                 dm_complete_request(rq, r);
486 }
487
488 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
489                                  void *data)
490 {
491         struct dm_rq_target_io *tio = data;
492         struct dm_rq_clone_bio_info *info =
493                 container_of(bio, struct dm_rq_clone_bio_info, clone);
494
495         info->orig = bio_orig;
496         info->tio = tio;
497         bio->bi_end_io = end_clone_bio;
498
499         return 0;
500 }
501
502 static int setup_clone(struct request *clone, struct request *rq,
503                        struct dm_rq_target_io *tio, gfp_t gfp_mask)
504 {
505         int r;
506
507         r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
508                               dm_rq_bio_constructor, tio);
509         if (r)
510                 return r;
511
512         clone->cmd = rq->cmd;
513         clone->cmd_len = rq->cmd_len;
514         clone->sense = rq->sense;
515         clone->end_io = end_clone_request;
516         clone->end_io_data = tio;
517
518         tio->clone = clone;
519
520         return 0;
521 }
522
523 static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
524                                     struct dm_rq_target_io *tio, gfp_t gfp_mask)
525 {
526         /*
527          * Create clone for use with .request_fn request_queue
528          */
529         struct request *clone;
530
531         clone = alloc_old_clone_request(md, gfp_mask);
532         if (!clone)
533                 return NULL;
534
535         blk_rq_init(NULL, clone);
536         if (setup_clone(clone, rq, tio, gfp_mask)) {
537                 /* -ENOMEM */
538                 free_old_clone_request(md, clone);
539                 return NULL;
540         }
541
542         return clone;
543 }
544
545 static void map_tio_request(struct kthread_work *work);
546
547 static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
548                      struct mapped_device *md)
549 {
550         tio->md = md;
551         tio->ti = NULL;
552         tio->clone = NULL;
553         tio->orig = rq;
554         tio->error = 0;
555         /*
556          * Avoid initializing info for blk-mq; it passes
557          * target-specific data through info.ptr
558          * (see: dm_mq_init_request)
559          */
560         if (!md->init_tio_pdu)
561                 memset(&tio->info, 0, sizeof(tio->info));
562         if (md->kworker_task)
563                 kthread_init_work(&tio->work, map_tio_request);
564 }
565
566 static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
567                                                struct mapped_device *md,
568                                                gfp_t gfp_mask)
569 {
570         struct dm_rq_target_io *tio;
571         int srcu_idx;
572         struct dm_table *table;
573
574         tio = alloc_old_rq_tio(md, gfp_mask);
575         if (!tio)
576                 return NULL;
577
578         init_tio(tio, rq, md);
579
580         table = dm_get_live_table(md, &srcu_idx);
581         /*
582          * Must clone a request if this .request_fn DM device
583          * is stacked on .request_fn device(s).
584          */
585         if (!dm_table_all_blk_mq_devices(table)) {
586                 if (!clone_old_rq(rq, md, tio, gfp_mask)) {
587                         dm_put_live_table(md, srcu_idx);
588                         free_old_rq_tio(tio);
589                         return NULL;
590                 }
591         }
592         dm_put_live_table(md, srcu_idx);
593
594         return tio;
595 }
596
597 /*
598  * Called with the queue lock held.
599  */
600 static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
601 {
602         struct mapped_device *md = q->queuedata;
603         struct dm_rq_target_io *tio;
604
605         if (unlikely(rq->special)) {
606                 DMWARN("Already has something in rq->special.");
607                 return BLKPREP_KILL;
608         }
609
610         tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
611         if (!tio)
612                 return BLKPREP_DEFER;
613
614         rq->special = tio;
615         rq->rq_flags |= RQF_DONTPREP;
616
617         return BLKPREP_OK;
618 }
619
620 /*
621  * Returns:
622  * DM_MAPIO_*       : the request has been processed as indicated
623  * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
624  * < 0              : the request was completed due to failure
625  */
626 static int map_request(struct dm_rq_target_io *tio)
627 {
628         int r;
629         struct dm_target *ti = tio->ti;
630         struct mapped_device *md = tio->md;
631         struct request *rq = tio->orig;
632         struct request *clone = NULL;
633
634         if (tio->clone) {
635                 clone = tio->clone;
636                 r = ti->type->map_rq(ti, clone, &tio->info);
637                 if (r == DM_MAPIO_DELAY_REQUEUE)
638                         return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
639         } else {
640                 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
641                 if (r < 0) {
642                         /* The target wants to complete the I/O */
643                         dm_kill_unmapped_request(rq, r);
644                         return r;
645                 }
646                 if (r == DM_MAPIO_REMAPPED &&
647                     setup_clone(clone, rq, tio, GFP_ATOMIC)) {
648                         /* -ENOMEM */
649                         ti->type->release_clone_rq(clone);
650                         return DM_MAPIO_REQUEUE;
651                 }
652         }
653
654         switch (r) {
655         case DM_MAPIO_SUBMITTED:
656                 /* The target has taken the I/O to submit by itself later */
657                 break;
658         case DM_MAPIO_REMAPPED:
659                 /* The target has remapped the I/O so dispatch it */
660                 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
661                                      blk_rq_pos(rq));
662                 dm_dispatch_clone_request(clone, rq);
663                 break;
664         case DM_MAPIO_REQUEUE:
665                 /* The target wants to requeue the I/O */
666                 break;
667         case DM_MAPIO_DELAY_REQUEUE:
668                 /* The target wants to requeue the I/O after a delay */
669                 dm_requeue_original_request(tio, true);
670                 break;
671         default:
672                 if (r > 0) {
673                         DMWARN("unimplemented target map return value: %d", r);
674                         BUG();
675                 }
676
677                 /* The target wants to complete the I/O */
678                 dm_kill_unmapped_request(rq, r);
679         }
680
681         return r;
682 }
683
684 static void dm_start_request(struct mapped_device *md, struct request *orig)
685 {
686         if (!orig->q->mq_ops)
687                 blk_start_request(orig);
688         else
689                 blk_mq_start_request(orig);
690         atomic_inc(&md->pending[rq_data_dir(orig)]);
691
692         if (md->seq_rq_merge_deadline_usecs) {
693                 md->last_rq_pos = rq_end_sector(orig);
694                 md->last_rq_rw = rq_data_dir(orig);
695                 md->last_rq_start_time = ktime_get();
696         }
697
698         if (unlikely(dm_stats_used(&md->stats))) {
699                 struct dm_rq_target_io *tio = tio_from_request(orig);
700                 tio->duration_jiffies = jiffies;
701                 tio->n_sectors = blk_rq_sectors(orig);
702                 dm_stats_account_io(&md->stats, rq_data_dir(orig),
703                                     blk_rq_pos(orig), tio->n_sectors, false, 0,
704                                     &tio->stats_aux);
705         }
706
707         /*
708          * Hold the md reference here for the in-flight I/O.
709          * We can't rely on the reference count by device opener,
710          * because the device may be closed during the request completion
711          * when all bios are completed.
712          * See the comment in rq_completed() too.
713          */
714         dm_get(md);
715 }
716
717 static void map_tio_request(struct kthread_work *work)
718 {
719         struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
720
721         if (map_request(tio) == DM_MAPIO_REQUEUE)
722                 dm_requeue_original_request(tio, false);
723 }
724
725 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
726 {
727         return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
728 }
729
730 #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
731
732 ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
733                                                      const char *buf, size_t count)
734 {
735         unsigned deadline;
736
737         if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
738                 return count;
739
740         if (kstrtouint(buf, 10, &deadline))
741                 return -EINVAL;
742
743         if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
744                 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
745
746         md->seq_rq_merge_deadline_usecs = deadline;
747
748         return count;
749 }
750
751 static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
752 {
753         ktime_t kt_deadline;
754
755         if (!md->seq_rq_merge_deadline_usecs)
756                 return false;
757
758         kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
759         kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
760
761         return !ktime_after(ktime_get(), kt_deadline);
762 }
763
764 /*
765  * q->request_fn for old request-based dm.
766  * Called with the queue lock held.
767  */
768 static void dm_old_request_fn(struct request_queue *q)
769 {
770         struct mapped_device *md = q->queuedata;
771         struct dm_target *ti = md->immutable_target;
772         struct request *rq;
773         struct dm_rq_target_io *tio;
774         sector_t pos = 0;
775
776         if (unlikely(!ti)) {
777                 int srcu_idx;
778                 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
779
780                 ti = dm_table_find_target(map, pos);
781                 dm_put_live_table(md, srcu_idx);
782         }
783
784         /*
785          * For suspend, check blk_queue_stopped() and increment
786          * ->pending within a single queue_lock not to increment the
787          * number of in-flight I/Os after the queue is stopped in
788          * dm_suspend().
789          */
790         while (!blk_queue_stopped(q)) {
791                 rq = blk_peek_request(q);
792                 if (!rq)
793                         return;
794
795                 /* always use block 0 to find the target for flushes for now */
796                 pos = 0;
797                 if (req_op(rq) != REQ_OP_FLUSH)
798                         pos = blk_rq_pos(rq);
799
800                 if ((dm_old_request_peeked_before_merge_deadline(md) &&
801                      md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
802                      md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
803                     (ti->type->busy && ti->type->busy(ti))) {
804                         blk_delay_queue(q, 10);
805                         return;
806                 }
807
808                 dm_start_request(md, rq);
809
810                 tio = tio_from_request(rq);
811                 /* Establish tio->ti before queuing work (map_tio_request) */
812                 tio->ti = ti;
813                 kthread_queue_work(&md->kworker, &tio->work);
814                 BUG_ON(!irqs_disabled());
815         }
816 }
817
818 /*
819  * Fully initialize a .request_fn request-based queue.
820  */
821 int dm_old_init_request_queue(struct mapped_device *md)
822 {
823         /* Fully initialize the queue */
824         if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
825                 return -EINVAL;
826
827         /* disable dm_old_request_fn's merge heuristic by default */
828         md->seq_rq_merge_deadline_usecs = 0;
829
830         dm_init_normal_md_queue(md);
831         blk_queue_softirq_done(md->queue, dm_softirq_done);
832         blk_queue_prep_rq(md->queue, dm_old_prep_fn);
833
834         /* Initialize the request-based DM worker thread */
835         kthread_init_worker(&md->kworker);
836         md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
837                                        "kdmwork-%s", dm_device_name(md));
838         if (IS_ERR(md->kworker_task)) {
839                 int error = PTR_ERR(md->kworker_task);
840                 md->kworker_task = NULL;
841                 return error;
842         }
843
844         elv_register_queue(md->queue);
845
846         return 0;
847 }
848
849 static int dm_mq_init_request(void *data, struct request *rq,
850                        unsigned int hctx_idx, unsigned int request_idx,
851                        unsigned int numa_node)
852 {
853         struct mapped_device *md = data;
854         struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
855
856         /*
857          * Must initialize md member of tio, otherwise it won't
858          * be available in dm_mq_queue_rq.
859          */
860         tio->md = md;
861
862         if (md->init_tio_pdu) {
863                 /* target-specific per-io data is immediately after the tio */
864                 tio->info.ptr = tio + 1;
865         }
866
867         return 0;
868 }
869
870 static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
871                           const struct blk_mq_queue_data *bd)
872 {
873         struct request *rq = bd->rq;
874         struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
875         struct mapped_device *md = tio->md;
876         struct dm_target *ti = md->immutable_target;
877
878         if (unlikely(!ti)) {
879                 int srcu_idx;
880                 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
881
882                 ti = dm_table_find_target(map, 0);
883                 dm_put_live_table(md, srcu_idx);
884         }
885
886         if (ti->type->busy && ti->type->busy(ti))
887                 return BLK_MQ_RQ_QUEUE_BUSY;
888
889         dm_start_request(md, rq);
890
891         /* Init tio using md established in .init_request */
892         init_tio(tio, rq, md);
893
894         /*
895          * Establish tio->ti before calling map_request().
896          */
897         tio->ti = ti;
898
899         /* Direct call is fine since .queue_rq allows allocations */
900         if (map_request(tio) == DM_MAPIO_REQUEUE) {
901                 /* Undo dm_start_request() before requeuing */
902                 rq_end_stats(md, rq);
903                 rq_completed(md, rq_data_dir(rq), false);
904                 return BLK_MQ_RQ_QUEUE_BUSY;
905         }
906
907         return BLK_MQ_RQ_QUEUE_OK;
908 }
909
910 static struct blk_mq_ops dm_mq_ops = {
911         .queue_rq = dm_mq_queue_rq,
912         .complete = dm_softirq_done,
913         .init_request = dm_mq_init_request,
914 };
915
916 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
917 {
918         struct request_queue *q;
919         struct dm_target *immutable_tgt;
920         int err;
921
922         if (!dm_table_all_blk_mq_devices(t)) {
923                 DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
924                 return -EINVAL;
925         }
926
927         md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
928         if (!md->tag_set)
929                 return -ENOMEM;
930
931         md->tag_set->ops = &dm_mq_ops;
932         md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
933         md->tag_set->numa_node = md->numa_node_id;
934         md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
935         md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
936         md->tag_set->driver_data = md;
937
938         md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
939         immutable_tgt = dm_table_get_immutable_target(t);
940         if (immutable_tgt && immutable_tgt->per_io_data_size) {
941                 /* any target-specific per-io data is immediately after the tio */
942                 md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
943                 md->init_tio_pdu = true;
944         }
945
946         err = blk_mq_alloc_tag_set(md->tag_set);
947         if (err)
948                 goto out_kfree_tag_set;
949
950         q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
951         if (IS_ERR(q)) {
952                 err = PTR_ERR(q);
953                 goto out_tag_set;
954         }
955         dm_init_md_queue(md);
956
957         /* backfill 'mq' sysfs registration normally done in blk_register_queue */
958         blk_mq_register_dev(disk_to_dev(md->disk), q);
959
960         return 0;
961
962 out_tag_set:
963         blk_mq_free_tag_set(md->tag_set);
964 out_kfree_tag_set:
965         kfree(md->tag_set);
966
967         return err;
968 }
969
970 void dm_mq_cleanup_mapped_device(struct mapped_device *md)
971 {
972         if (md->tag_set) {
973                 blk_mq_free_tag_set(md->tag_set);
974                 kfree(md->tag_set);
975         }
976 }
977
978 module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
979 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
980
981 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
982 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
983
984 module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
985 MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
986
987 module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
988 MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");