Merge tag 'pci-v5.11-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...
[linux-2.6-microblaze.git] / block / blk-core.c
index 10c08ac..96e5fcd 100644 (file)
@@ -116,8 +116,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        rq->__sector = (sector_t) -1;
        INIT_HLIST_NODE(&rq->hash);
        RB_CLEAR_NODE(&rq->rb_node);
-       rq->tag = -1;
-       rq->internal_tag = -1;
+       rq->tag = BLK_MQ_NO_TAG;
+       rq->internal_tag = BLK_MQ_NO_TAG;
        rq->start_time_ns = ktime_get_ns();
        rq->part = NULL;
        refcount_set(&rq->ref, 1);
@@ -186,6 +186,10 @@ static const struct {
        /* device mapper special case, should not leak out: */
        [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
 
+       /* zone device specific errors */
+       [BLK_STS_ZONE_OPEN_RESOURCE]    = { -ETOOMANYREFS, "open zones exceeded" },
+       [BLK_STS_ZONE_ACTIVE_RESOURCE]  = { -EOVERFLOW, "active zones exceeded" },
+
        /* everything else not covered above: */
        [BLK_STS_IOERR]         = { -EIO,       "I/O" },
 };
@@ -538,11 +542,10 @@ struct request_queue *blk_alloc_queue(int node_id)
        if (!q->stats)
                goto fail_stats;
 
-       q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
-       q->backing_dev_info->io_pages = VM_READAHEAD_PAGES;
-       q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
        q->node = node_id;
 
+       atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
+
        timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
                    laptop_mode_timer_fn, 0);
        timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
@@ -643,171 +646,14 @@ void blk_put_request(struct request *req)
 }
 EXPORT_SYMBOL(blk_put_request);
 
-static void blk_account_io_merge_bio(struct request *req)
-{
-       if (!blk_do_io_stat(req))
-               return;
-
-       part_stat_lock();
-       part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
-       part_stat_unlock();
-}
-
-bool bio_attempt_back_merge(struct request *req, struct bio *bio,
-               unsigned int nr_segs)
-{
-       const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
-
-       if (!ll_back_merge_fn(req, bio, nr_segs))
-               return false;
-
-       trace_block_bio_backmerge(req->q, req, bio);
-       rq_qos_merge(req->q, req, bio);
-
-       if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
-               blk_rq_set_mixed_merge(req);
-
-       req->biotail->bi_next = bio;
-       req->biotail = bio;
-       req->__data_len += bio->bi_iter.bi_size;
-
-       bio_crypt_free_ctx(bio);
-
-       blk_account_io_merge_bio(req);
-       return true;
-}
-
-bool bio_attempt_front_merge(struct request *req, struct bio *bio,
-               unsigned int nr_segs)
-{
-       const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
-
-       if (!ll_front_merge_fn(req, bio, nr_segs))
-               return false;
-
-       trace_block_bio_frontmerge(req->q, req, bio);
-       rq_qos_merge(req->q, req, bio);
-
-       if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
-               blk_rq_set_mixed_merge(req);
-
-       bio->bi_next = req->bio;
-       req->bio = bio;
-
-       req->__sector = bio->bi_iter.bi_sector;
-       req->__data_len += bio->bi_iter.bi_size;
-
-       bio_crypt_do_front_merge(req, bio);
-
-       blk_account_io_merge_bio(req);
-       return true;
-}
-
-bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
-               struct bio *bio)
-{
-       unsigned short segments = blk_rq_nr_discard_segments(req);
-
-       if (segments >= queue_max_discard_segments(q))
-               goto no_merge;
-       if (blk_rq_sectors(req) + bio_sectors(bio) >
-           blk_rq_get_max_sectors(req, blk_rq_pos(req)))
-               goto no_merge;
-
-       rq_qos_merge(q, req, bio);
-
-       req->biotail->bi_next = bio;
-       req->biotail = bio;
-       req->__data_len += bio->bi_iter.bi_size;
-       req->nr_phys_segments = segments + 1;
-
-       blk_account_io_merge_bio(req);
-       return true;
-no_merge:
-       req_set_nomerge(q, req);
-       return false;
-}
-
-/**
- * blk_attempt_plug_merge - try to merge with %current's plugged list
- * @q: request_queue new bio is being queued at
- * @bio: new bio being queued
- * @nr_segs: number of segments in @bio
- * @same_queue_rq: pointer to &struct request that gets filled in when
- * another request associated with @q is found on the plug list
- * (optional, may be %NULL)
- *
- * Determine whether @bio being queued on @q can be merged with a request
- * on %current's plugged list.  Returns %true if merge was successful,
- * otherwise %false.
- *
- * Plugging coalesces IOs from the same issuer for the same purpose without
- * going through @q->queue_lock.  As such it's more of an issuing mechanism
- * than scheduling, and the request, while may have elvpriv data, is not
- * added on the elevator at this point.  In addition, we don't have
- * reliable access to the elevator outside queue lock.  Only check basic
- * merging parameters without querying the elevator.
- *
- * Caller must ensure !blk_queue_nomerges(q) beforehand.
- */
-bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-               unsigned int nr_segs, struct request **same_queue_rq)
-{
-       struct blk_plug *plug;
-       struct request *rq;
-       struct list_head *plug_list;
-
-       plug = blk_mq_plug(q, bio);
-       if (!plug)
-               return false;
-
-       plug_list = &plug->mq_list;
-
-       list_for_each_entry_reverse(rq, plug_list, queuelist) {
-               bool merged = false;
-
-               if (rq->q == q && same_queue_rq) {
-                       /*
-                        * Only blk-mq multiple hardware queues case checks the
-                        * rq in the same queue, there should be only one such
-                        * rq in a queue
-                        **/
-                       *same_queue_rq = rq;
-               }
-
-               if (rq->q != q || !blk_rq_merge_ok(rq, bio))
-                       continue;
-
-               switch (blk_try_merge(rq, bio)) {
-               case ELEVATOR_BACK_MERGE:
-                       merged = bio_attempt_back_merge(rq, bio, nr_segs);
-                       break;
-               case ELEVATOR_FRONT_MERGE:
-                       merged = bio_attempt_front_merge(rq, bio, nr_segs);
-                       break;
-               case ELEVATOR_DISCARD_MERGE:
-                       merged = bio_attempt_discard_merge(q, rq, bio);
-                       break;
-               default:
-                       break;
-               }
-
-               if (merged)
-                       return true;
-       }
-
-       return false;
-}
-
 static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
        char b[BDEVNAME_SIZE];
 
-       printk(KERN_INFO "attempt to access beyond end of device\n");
-       printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
-                       bio_devname(bio, b), bio->bi_opf,
-                       (unsigned long long)bio_end_sector(bio),
-                       (long long)maxsector);
+       pr_info_ratelimited("attempt to access beyond end of device\n"
+                           "%s: rw=%d, want=%llu, limit=%llu\n",
+                           bio_devname(bio, b), bio->bi_opf,
+                           bio_end_sector(bio), maxsector);
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -820,9 +666,9 @@ static int __init setup_fail_make_request(char *str)
 }
 __setup("fail_make_request=", setup_fail_make_request);
 
-static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
+static bool should_fail_request(struct block_device *part, unsigned int bytes)
 {
-       return part->make_it_fail && should_fail(&fail_make_request, bytes);
+       return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
 }
 
 static int __init fail_make_request_debugfs(void)
@@ -837,7 +683,7 @@ late_initcall(fail_make_request_debugfs);
 
 #else /* CONFIG_FAIL_MAKE_REQUEST */
 
-static inline bool should_fail_request(struct hd_struct *part,
+static inline bool should_fail_request(struct block_device *part,
                                        unsigned int bytes)
 {
        return false;
@@ -845,11 +691,11 @@ static inline bool should_fail_request(struct hd_struct *part,
 
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
-static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
+static inline bool bio_check_ro(struct bio *bio, struct block_device *part)
 {
        const int op = bio_op(bio);
 
-       if (part->policy && op_is_write(op)) {
+       if (part->bd_read_only && op_is_write(op)) {
                char b[BDEVNAME_SIZE];
 
                if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
@@ -857,7 +703,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
 
                WARN_ONCE(1,
                       "Trying to write to read-only block-device %s (partno %d)\n",
-                       bio_devname(bio, b), part->partno);
+                       bio_devname(bio, b), part->bd_partno);
                /* Older lvm-tools actually trigger this */
                return false;
        }
@@ -867,7 +713,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
 
 static noinline int should_fail_bio(struct bio *bio)
 {
-       if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+       if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size))
                return -EIO;
        return 0;
 }
@@ -896,7 +742,7 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
  */
 static inline int blk_partition_remap(struct bio *bio)
 {
-       struct hd_struct *p;
+       struct block_device *p;
        int ret = -EIO;
 
        rcu_read_lock();
@@ -909,11 +755,12 @@ static inline int blk_partition_remap(struct bio *bio)
                goto out;
 
        if (bio_sectors(bio)) {
-               if (bio_check_eod(bio, part_nr_sects_read(p)))
+               if (bio_check_eod(bio, bdev_nr_sectors(p)))
                        goto out;
-               bio->bi_iter.bi_sector += p->start_sect;
-               trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
-                                     bio->bi_iter.bi_sector - p->start_sect);
+               bio->bi_iter.bi_sector += p->bd_start_sect;
+               trace_block_bio_remap(bio, p->bd_dev,
+                                     bio->bi_iter.bi_sector -
+                                     p->bd_start_sect);
        }
        bio->bi_partno = 0;
        ret = 0;
@@ -971,9 +818,9 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 
        /*
         * For a REQ_NOWAIT based request, return -EOPNOTSUPP
-        * if queue is not a request based queue.
+        * if queue does not support NOWAIT.
         */
-       if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
+       if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
                goto not_supported;
 
        if (should_fail_bio(bio))
@@ -983,7 +830,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
                if (unlikely(blk_partition_remap(bio)))
                        goto end_io;
        } else {
-               if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+               if (unlikely(bio_check_ro(bio, bio->bi_disk->part0)))
                        goto end_io;
                if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
                        goto end_io;
@@ -1060,7 +907,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
        blkcg_bio_issue_init(bio);
 
        if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-               trace_block_bio_queue(q, bio);
+               trace_block_bio_queue(bio);
                /* Now that enqueuing has been traced, we need to trace
                 * completion as well.
                 */
@@ -1301,14 +1148,28 @@ EXPORT_SYMBOL(submit_bio);
  *    limits when retrying requests on other queues. Those requests need
  *    to be checked against the new queue limits again during dispatch.
  */
-static int blk_cloned_rq_check_limits(struct request_queue *q,
+static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
                                      struct request *rq)
 {
-       if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
+       unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+
+       if (blk_rq_sectors(rq) > max_sectors) {
+               /*
+                * SCSI device does not have a good way to return if
+                * Write Same/Zero is actually supported. If a device rejects
+                * a non-read/write command (discard, write same,etc.) the
+                * low-level device driver will set the relevant queue limit to
+                * 0 to prevent blk-lib from issuing more of the offending
+                * operations. Commands queued prior to the queue limit being
+                * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
+                * errors being propagated to upper layers.
+                */
+               if (max_sectors == 0)
+                       return BLK_STS_NOTSUPP;
+
                printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
-                       __func__, blk_rq_sectors(rq),
-                       blk_queue_get_max_sectors(q, req_op(rq)));
-               return -EIO;
+                       __func__, blk_rq_sectors(rq), max_sectors);
+               return BLK_STS_IOERR;
        }
 
        /*
@@ -1321,10 +1182,10 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
        if (rq->nr_phys_segments > queue_max_segments(q)) {
                printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
                        __func__, rq->nr_phys_segments, queue_max_segments(q));
-               return -EIO;
+               return BLK_STS_IOERR;
        }
 
-       return 0;
+       return BLK_STS_OK;
 }
 
 /**
@@ -1334,11 +1195,14 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  */
 blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
-       if (blk_cloned_rq_check_limits(q, rq))
-               return BLK_STS_IOERR;
+       blk_status_t ret;
+
+       ret = blk_cloned_rq_check_limits(q, rq);
+       if (ret != BLK_STS_OK)
+               return ret;
 
        if (rq->rq_disk &&
-           should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
+           should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq)))
                return BLK_STS_IOERR;
 
        if (blk_crypto_insert_cloned_request(rq))
@@ -1397,17 +1261,18 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 
-static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+static void update_io_ticks(struct block_device *part, unsigned long now,
+               bool end)
 {
        unsigned long stamp;
 again:
-       stamp = READ_ONCE(part->stamp);
+       stamp = READ_ONCE(part->bd_stamp);
        if (unlikely(stamp != now)) {
-               if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+               if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp))
                        __part_stat_add(part, io_ticks, end ? now - stamp : 1);
        }
-       if (part->partno) {
-               part = &part_to_disk(part)->part0;
+       if (part->bd_partno) {
+               part = bdev_whole(part);
                goto again;
        }
 }
@@ -1416,11 +1281,9 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
        if (req->part && blk_do_io_stat(req)) {
                const int sgrp = op_stat_group(req_op(req));
-               struct hd_struct *part;
 
                part_stat_lock();
-               part = req->part;
-               part_stat_add(part, sectors[sgrp], bytes >> 9);
+               part_stat_add(req->part, sectors[sgrp], bytes >> 9);
                part_stat_unlock();
        }
 }
@@ -1435,17 +1298,12 @@ void blk_account_io_done(struct request *req, u64 now)
        if (req->part && blk_do_io_stat(req) &&
            !(req->rq_flags & RQF_FLUSH_SEQ)) {
                const int sgrp = op_stat_group(req_op(req));
-               struct hd_struct *part;
 
                part_stat_lock();
-               part = req->part;
-
-               update_io_ticks(part, jiffies, true);
-               part_stat_inc(part, ios[sgrp]);
-               part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
+               update_io_ticks(req->part, jiffies, true);
+               part_stat_inc(req->part, ios[sgrp]);
+               part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
                part_stat_unlock();
-
-               hd_struct_put(part);
        }
 }
 
@@ -1461,10 +1319,9 @@ void blk_account_io_start(struct request *rq)
        part_stat_unlock();
 }
 
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
-               unsigned int op)
+static unsigned long __part_start_io_acct(struct block_device *part,
+                                         unsigned int sectors, unsigned int op)
 {
-       struct hd_struct *part = &disk->part0;
        const int sgrp = op_stat_group(op);
        unsigned long now = READ_ONCE(jiffies);
 
@@ -1477,12 +1334,26 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
 
        return now;
 }
+
+unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part,
+                                struct bio *bio)
+{
+       *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector);
+
+       return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio));
+}
+EXPORT_SYMBOL_GPL(part_start_io_acct);
+
+unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+                                unsigned int op)
+{
+       return __part_start_io_acct(disk->part0, sectors, op);
+}
 EXPORT_SYMBOL(disk_start_io_acct);
 
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
-               unsigned long start_time)
+static void __part_end_io_acct(struct block_device *part, unsigned int op,
+                              unsigned long start_time)
 {
-       struct hd_struct *part = &disk->part0;
        const int sgrp = op_stat_group(op);
        unsigned long now = READ_ONCE(jiffies);
        unsigned long duration = now - start_time;
@@ -1493,6 +1364,19 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
        part_stat_local_dec(part, in_flight[op_is_write(op)]);
        part_stat_unlock();
 }
+
+void part_end_io_acct(struct block_device *part, struct bio *bio,
+                     unsigned long start_time)
+{
+       __part_end_io_acct(part, bio_op(bio), start_time);
+}
+EXPORT_SYMBOL_GPL(part_end_io_acct);
+
+void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+                     unsigned long start_time)
+{
+       __part_end_io_acct(disk->part0, op, start_time);
+}
 EXPORT_SYMBOL(disk_end_io_acct);
 
 /*
@@ -1730,8 +1614,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
                if (rq->bio) {
                        rq->biotail->bi_next = bio;
                        rq->biotail = bio;
-               } else
+               } else {
                        rq->bio = rq->biotail = bio;
+               }
+               bio = NULL;
        }
 
        /* Copy attributes of the original request to the clone request. */
@@ -1744,8 +1630,8 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
        rq->nr_phys_segments = rq_src->nr_phys_segments;
        rq->ioprio = rq_src->ioprio;
 
-       if (rq->bio)
-               blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
+       if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
+               goto free_and_out;
 
        return 0;