#include <linux/blk-mq.h>
#include <linux/highmem.h>
#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <linux/kernel_stat.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
#include <linux/psi.h>
+#include <linux/sched/sysctl.h>
+#include <linux/blk-crypto.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
#include "blk-pm.h"
#include "blk-rq-qos.h"
-#ifdef CONFIG_DEBUG_FS
struct dentry *blk_debugfs_root;
-#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
refcount_set(&rq->ref, 1);
+ blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);
REQ_OP_NAME(ZONE_OPEN),
REQ_OP_NAME(ZONE_CLOSE),
REQ_OP_NAME(ZONE_FINISH),
+ REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
bio_advance(bio, nbytes);
+ if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+ /*
+ * Partial zone append completions cannot be supported as the
+ * BIO fragments may end up not being written sequentially.
+ */
+ if (bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
+ else
+ bio->bi_iter.bi_sector = rq->__sector;
+ }
+
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
* A block device may call blk_sync_queue to ensure that any
* such activity is cancelled, thus allowing it to release resources
* that the callbacks might use. The caller must already have made sure
- * that its ->make_request_fn will not re-add plugging prior to calling
+ * that its ->submit_bio will not re-add plugging prior to calling
* this function.
*
* This function does not cancel any asynchronous activity arising
}
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
+/**
+ * blk_put_queue - decrement the request_queue refcount
+ * @q: the request_queue structure to decrement the refcount for
+ *
+ * Decrements the refcount of the request_queue kobject. When this reaches 0
+ * we'll have blk_release_queue() called.
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ * atomic context.
+ */
void blk_put_queue(struct request_queue *q)
{
kobject_put(&q->kobj);
*
* Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
* put it. All future requests will be failed immediately with -ENODEV.
+ *
+ * Context: can sleep
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ /* cannot be called from atomic context */
+ might_sleep();
+
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
}
}
+static inline int bio_queue_enter(struct bio *bio)
+{
+ struct request_queue *q = bio->bi_disk->queue;
+ bool nowait = bio->bi_opf & REQ_NOWAIT;
+ int ret;
+
+ ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
+ if (unlikely(ret)) {
+ if (nowait && !blk_queue_dying(q))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
+ }
+
+ return ret;
+}
+
void blk_queue_exit(struct request_queue *q)
{
percpu_ref_put(&q->q_usage_counter);
{
}
-struct request_queue *__blk_alloc_queue(int node_id)
+struct request_queue *blk_alloc_queue(int node_id)
{
struct request_queue *q;
int ret;
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
+ q->backing_dev_info = bdi_alloc(node_id);
if (!q->backing_dev_info)
goto fail_split;
q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
- q->backing_dev_info->name = "block";
q->node = node_id;
timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
kobject_init(&q->kobj, &blk_queue_ktype);
-#ifdef CONFIG_BLK_DEV_IO_TRACE
- mutex_init(&q->blk_trace_mutex);
-#endif
+ mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
spin_lock_init(&q->queue_lock);
blk_queue_dma_alignment(q, 511);
blk_set_default_limits(&q->limits);
+ q->nr_requests = BLKDEV_MAX_RQ;
return q;
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
-
-struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
-{
- struct request_queue *q;
-
- if (WARN_ON_ONCE(!make_request))
- return NULL;
-
- q = __blk_alloc_queue(node_id);
- if (!q)
- return NULL;
- q->make_request_fn = make_request;
- q->nr_requests = BLKDEV_MAX_RQ;
- return q;
-}
EXPORT_SYMBOL(blk_alloc_queue);
+/**
+ * blk_get_queue - increment the request_queue refcount
+ * @q: the request_queue structure to increment the refcount for
+ *
+ * Increment the refcount of the request_queue kobject.
+ *
+ * Context: Any context.
+ */
bool blk_get_queue(struct request_queue *q)
{
if (likely(!blk_queue_dying(q))) {
}
EXPORT_SYMBOL(blk_put_request);
+static void blk_account_io_merge_bio(struct request *req)
+{
+ if (!blk_do_io_stat(req))
+ return;
+
+ part_stat_lock();
+ part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+ part_stat_unlock();
+}
+
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
unsigned int nr_segs)
{
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
- blk_account_io_start(req, false);
+ bio_crypt_free_ctx(bio);
+
+ blk_account_io_merge_bio(req);
return true;
}
req->__sector = bio->bi_iter.bi_sector;
req->__data_len += bio->bi_iter.bi_size;
- blk_account_io_start(req, false);
+ bio_crypt_do_front_merge(req, bio);
+
+ blk_account_io_merge_bio(req);
return true;
}
req->__data_len += bio->bi_iter.bi_size;
req->nr_phys_segments = segments + 1;
- blk_account_io_start(req, false);
+ blk_account_io_merge_bio(req);
return true;
no_merge:
req_set_nomerge(q, req);
return false;
WARN_ONCE(1,
- "generic_make_request: Trying to write "
- "to read-only block-device %s (partno %d)\n",
+ "Trying to write to read-only block-device %s (partno %d)\n",
bio_devname(bio, b), part->partno);
/* Older lvm-tools actually trigger this */
return false;
return ret;
}
-static noinline_for_stack bool
-generic_make_request_checks(struct bio *bio)
+/*
+ * Check write append to a zoned block device.
+ */
+static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+ struct bio *bio)
{
- struct request_queue *q;
+ sector_t pos = bio->bi_iter.bi_sector;
int nr_sectors = bio_sectors(bio);
+
+ /* Only applicable to zoned block devices */
+ if (!blk_queue_is_zoned(q))
+ return BLK_STS_NOTSUPP;
+
+ /* The bio sector must point to the start of a sequential zone */
+ if (pos & (blk_queue_zone_sectors(q) - 1) ||
+ !blk_queue_zone_is_seq(q, pos))
+ return BLK_STS_IOERR;
+
+ /*
+ * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+ * split and could result in non-contiguous sectors being written in
+ * different zones.
+ */
+ if (nr_sectors > q->limits.chunk_sectors)
+ return BLK_STS_IOERR;
+
+ /* Make sure the BIO is small enough and will not get split */
+ if (nr_sectors > q->limits.max_zone_append_sectors)
+ return BLK_STS_IOERR;
+
+ bio->bi_opf |= REQ_NOMERGE;
+
+ return BLK_STS_OK;
+}
+
+static noinline_for_stack bool submit_bio_checks(struct bio *bio)
+{
+ struct request_queue *q = bio->bi_disk->queue;
blk_status_t status = BLK_STS_IOERR;
- char b[BDEVNAME_SIZE];
+ struct blk_plug *plug;
might_sleep();
- q = bio->bi_disk->queue;
- if (unlikely(!q)) {
- printk(KERN_ERR
- "generic_make_request: Trying to access "
- "nonexistent block-device %s (%Lu)\n",
- bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
- goto end_io;
- }
+ plug = blk_mq_plug(q, bio);
+ if (plug && plug->nowait)
+ bio->bi_opf |= REQ_NOWAIT;
/*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
}
/*
- * Filter flush bio's early so that make_request based
- * drivers without flush support don't have to worry
- * about them.
+ * Filter flush bio's early so that bio based drivers without flush
+ * support don't have to worry about them.
*/
if (op_is_flush(bio->bi_opf) &&
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
- if (!nr_sectors) {
+ if (!bio_sectors(bio)) {
status = BLK_STS_OK;
goto end_io;
}
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
+ case REQ_OP_ZONE_APPEND:
+ status = blk_check_zone_append(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ break;
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
}
/*
- * Various block parts want %current->io_context and lazy ioc
- * allocation ends up trading a lot of pain for a small amount of
- * memory. Just allocate it upfront. This may fail and block
- * layer knows how to live with it.
+ * Various block parts want %current->io_context, so allocate it up
+ * front rather than dealing with lots of pain to allocate it only
+ * where needed. This may fail and the block layer knows how to live
+ * with it.
*/
- create_io_context(GFP_ATOMIC, q->node);
+ if (unlikely(!current->io_context))
+ create_task_io_context(current, GFP_ATOMIC, q->node);
- if (!blkcg_bio_issue_check(q, bio))
+ if (blk_throtl_bio(bio)) {
+ blkcg_bio_issue_init(bio);
return false;
+ }
+
+ blk_cgroup_bio_start(bio);
+ blkcg_bio_issue_init(bio);
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_queue(q, bio);
return false;
}
-/**
- * generic_make_request - hand a buffer to its device driver for I/O
- * @bio: The bio describing the location in memory and on the device.
- *
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status. The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
+static blk_qc_t __submit_bio(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_disk;
+ blk_qc_t ret = BLK_QC_T_NONE;
+
+ if (blk_crypto_bio_prep(&bio)) {
+ if (!disk->fops->submit_bio)
+ return blk_mq_submit_bio(bio);
+ ret = disk->fops->submit_bio(bio);
+ }
+ blk_queue_exit(disk->queue);
+ return ret;
+}
+
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
*
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
+ * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ * that), so we have a list with a single bio.
+ * - We pretend that we have just taken it off a longer list, so we assign
+ * bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ * bio_list of new bios to be added. ->submit_bio() may indeed add some more
+ * bios through a recursive call to submit_bio_noacct. If it did, we find a
+ * non-NULL value in bio_list and re-enter the loop from the top.
+ * - In this case we really did just take the bio of the top of the list (no
+ * pretending) and so remove it from bio_list, and call into ->submit_bio()
+ * again.
*
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may resubmit the bio to
- * a lower device by calling into generic_make_request recursively, which
- * means the bio should NOT be touched after the call to ->make_request_fn.
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ * ->submit_bio_bio, but that haven't been processed yet.
*/
-blk_qc_t generic_make_request(struct bio *bio)
+static blk_qc_t __submit_bio_noacct(struct bio *bio)
{
- /*
- * bio_list_on_stack[0] contains bios submitted by the current
- * make_request_fn.
- * bio_list_on_stack[1] contains bios that were submitted before
- * the current make_request_fn, but that haven't been processed
- * yet.
- */
struct bio_list bio_list_on_stack[2];
blk_qc_t ret = BLK_QC_T_NONE;
- if (!generic_make_request_checks(bio))
- goto out;
-
- /*
- * We only want one ->make_request_fn to be active at a time, else
- * stack usage with stacked devices could be a problem. So use
- * current->bio_list to keep a list of requests submited by a
- * make_request_fn function. current->bio_list is also used as a
- * flag to say if generic_make_request is currently active in this
- * task or not. If it is NULL, then no make_request is active. If
- * it is non-NULL, then a make_request is active, and new requests
- * should be added at the tail
- */
- if (current->bio_list) {
- bio_list_add(¤t->bio_list[0], bio);
- goto out;
- }
-
- /* following loop may be a bit non-obvious, and so deserves some
- * explanation.
- * Before entering the loop, bio->bi_next is NULL (as all callers
- * ensure that) so we have a list with a single bio.
- * We pretend that we have just taken it off a longer list, so
- * we assign bio_list to a pointer to the bio_list_on_stack,
- * thus initialising the bio_list of new bios to be
- * added. ->make_request() may indeed add some more bios
- * through a recursive call to generic_make_request. If it
- * did, we find a non-NULL value in bio_list and re-enter the loop
- * from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so remove it from
- * bio_list, and call into ->make_request() again.
- */
BUG_ON(bio->bi_next);
+
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
+
do {
struct request_queue *q = bio->bi_disk->queue;
- blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
- BLK_MQ_REQ_NOWAIT : 0;
+ struct bio_list lower, same;
- if (likely(blk_queue_enter(q, flags) == 0)) {
- struct bio_list lower, same;
+ if (unlikely(bio_queue_enter(bio) != 0))
+ continue;
- /* Create a fresh bio_list for all subordinate requests */
- bio_list_on_stack[1] = bio_list_on_stack[0];
- bio_list_init(&bio_list_on_stack[0]);
- ret = q->make_request_fn(q, bio);
+ /*
+ * Create a fresh bio_list for all subordinate requests.
+ */
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
- blk_queue_exit(q);
+ ret = __submit_bio(bio);
- /* sort new bios into those for a lower level
- * and those for the same level
- */
- bio_list_init(&lower);
- bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
- if (q == bio->bi_disk->queue)
- bio_list_add(&same, bio);
- else
- bio_list_add(&lower, bio);
- /* now assemble so we handle the lowest level first */
- bio_list_merge(&bio_list_on_stack[0], &lower);
- bio_list_merge(&bio_list_on_stack[0], &same);
- bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
- } else {
- if (unlikely(!blk_queue_dying(q) &&
- (bio->bi_opf & REQ_NOWAIT)))
- bio_wouldblock_error(bio);
+ /*
+ * Sort new bios into those for a lower level and those for the
+ * same level.
+ */
+ bio_list_init(&lower);
+ bio_list_init(&same);
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+ if (q == bio->bi_disk->queue)
+ bio_list_add(&same, bio);
else
- bio_io_error(bio);
+ bio_list_add(&lower, bio);
+
+ /*
+ * Now assemble so we handle the lowest level first.
+ */
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+ current->bio_list = NULL;
+ return ret;
+}
+
+static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+{
+ struct bio_list bio_list[2] = { };
+ blk_qc_t ret = BLK_QC_T_NONE;
+
+ current->bio_list = bio_list;
+
+ do {
+ struct gendisk *disk = bio->bi_disk;
+
+ if (unlikely(bio_queue_enter(bio) != 0))
+ continue;
+
+ if (!blk_crypto_bio_prep(&bio)) {
+ blk_queue_exit(disk->queue);
+ ret = BLK_QC_T_NONE;
+ continue;
}
- bio = bio_list_pop(&bio_list_on_stack[0]);
- } while (bio);
- current->bio_list = NULL; /* deactivate */
-out:
+ ret = blk_mq_submit_bio(bio);
+ } while ((bio = bio_list_pop(&bio_list[0])));
+
+ current->bio_list = NULL;
return ret;
}
-EXPORT_SYMBOL(generic_make_request);
/**
- * direct_make_request - hand a buffer directly to its device driver for I/O
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
*
- * This function behaves like generic_make_request(), but does not protect
- * against recursion. Must only be used if the called driver is known
- * to not call generic_make_request (or direct_make_request) again from
- * its make_request function. (Calling direct_make_request again from
- * a workqueue is perfectly fine as that doesn't recurse).
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
*/
-blk_qc_t direct_make_request(struct bio *bio)
+blk_qc_t submit_bio_noacct(struct bio *bio)
{
- struct request_queue *q = bio->bi_disk->queue;
- bool nowait = bio->bi_opf & REQ_NOWAIT;
- blk_qc_t ret;
-
- if (!generic_make_request_checks(bio))
+ if (!submit_bio_checks(bio))
return BLK_QC_T_NONE;
- if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
- if (nowait && !blk_queue_dying(q))
- bio_wouldblock_error(bio);
- else
- bio_io_error(bio);
+ /*
+ * We only want one ->submit_bio to be active at a time, else stack
+ * usage with stacked devices could be a problem. Use current->bio_list
+ * to collect a list of requests submited by a ->submit_bio method while
+ * it is active, and then process them after it returned.
+ */
+ if (current->bio_list) {
+ bio_list_add(¤t->bio_list[0], bio);
return BLK_QC_T_NONE;
}
- ret = q->make_request_fn(q, bio);
- blk_queue_exit(q);
- return ret;
+ if (!bio->bi_disk->fops->submit_bio)
+ return __submit_bio_noacct_mq(bio);
+ return __submit_bio_noacct(bio);
}
-EXPORT_SYMBOL_GPL(direct_make_request);
+EXPORT_SYMBOL(submit_bio_noacct);
/**
* submit_bio - submit a bio to the block device layer for I/O
* @bio: The &struct bio which describes the I/O
*
- * submit_bio() is very similar in purpose to generic_make_request(), and
- * uses that function to do most of the work. Both are fairly rough
- * interfaces; @bio must be presetup and ready for I/O.
+ * submit_bio() is used to submit I/O requests to block devices. It is passed a
+ * fully set up &struct bio that describes the I/O that needs to be done. The
+ * bio will be send to the device described by the bi_disk and bi_partno fields.
*
+ * The success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the ->bi_end_io() callback
+ * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has
+ * been called.
*/
blk_qc_t submit_bio(struct bio *bio)
{
- bool workingset_read = false;
- unsigned long pflags;
- blk_qc_t ret;
-
if (blkcg_punt_bio_submit(bio))
return BLK_QC_T_NONE;
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
- if (bio_flagged(bio, BIO_WORKINGSET))
- workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
}
/*
- * If we're reading data that is part of the userspace
- * workingset, count submission time as memory stall. When the
- * device is congested, or the submitting cgroup IO-throttled,
- * submission can be a significant part of overall IO time.
+ * If we're reading data that is part of the userspace workingset, count
+ * submission time as memory stall. When the device is congested, or
+ * the submitting cgroup IO-throttled, submission can be a significant
+ * part of overall IO time.
*/
- if (workingset_read)
- psi_memstall_enter(&pflags);
-
- ret = generic_make_request(bio);
+ if (unlikely(bio_op(bio) == REQ_OP_READ &&
+ bio_flagged(bio, BIO_WORKINGSET))) {
+ unsigned long pflags;
+ blk_qc_t ret;
- if (workingset_read)
+ psi_memstall_enter(&pflags);
+ ret = submit_bio_noacct(bio);
psi_memstall_leave(&pflags);
- return ret;
+ return ret;
+ }
+
+ return submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio);
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
return BLK_STS_IOERR;
+ if (blk_crypto_insert_cloned_request(rq))
+ return BLK_STS_IOERR;
+
if (blk_queue_io_stat(q))
- blk_account_io_start(rq, true);
+ blk_account_io_start(rq);
/*
* Since we have a scheduler attached on the top device,
}
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
-void blk_account_io_completion(struct request *req, unsigned int bytes)
+static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+{
+ unsigned long stamp;
+again:
+ stamp = READ_ONCE(part->stamp);
+ if (unlikely(stamp != now)) {
+ if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
+ }
+ if (part->partno) {
+ part = &part_to_disk(part)->part0;
+ goto again;
+ }
+}
+
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (req->part && blk_do_io_stat(req)) {
const int sgrp = op_stat_group(req_op(req));
update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
- part_dec_in_flight(req->q, part, rq_data_dir(req));
+ part_stat_unlock();
hd_struct_put(part);
- part_stat_unlock();
}
}
-void blk_account_io_start(struct request *rq, bool new_io)
+void blk_account_io_start(struct request *rq)
{
- struct hd_struct *part;
- int rw = rq_data_dir(rq);
-
if (!blk_do_io_stat(rq))
return;
+ rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+
part_stat_lock();
+ update_io_ticks(rq->part, jiffies, false);
+ part_stat_unlock();
+}
- if (!new_io) {
- part = rq->part;
- part_stat_inc(part, merges[rw]);
- } else {
- part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
- if (!hd_struct_try_get(part)) {
- /*
- * The partition is already being removed,
- * the request will be accounted on the disk only
- *
- * We take a reference on disk->part0 although that
- * partition will never be deleted, so we can treat
- * it as any other partition.
- */
- part = &rq->rq_disk->part0;
- hd_struct_get(part);
- }
- part_inc_in_flight(rq->q, part, rw);
- rq->part = part;
- }
+unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+ unsigned int op)
+{
+ struct hd_struct *part = &disk->part0;
+ const int sgrp = op_stat_group(op);
+ unsigned long now = READ_ONCE(jiffies);
+
+ part_stat_lock();
+ update_io_ticks(part, now, false);
+ part_stat_inc(part, ios[sgrp]);
+ part_stat_add(part, sectors[sgrp], sectors);
+ part_stat_local_inc(part, in_flight[op_is_write(op)]);
+ part_stat_unlock();
+
+ return now;
+}
+EXPORT_SYMBOL(disk_start_io_acct);
- update_io_ticks(part, jiffies, false);
+void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+ unsigned long start_time)
+{
+ struct hd_struct *part = &disk->part0;
+ const int sgrp = op_stat_group(op);
+ unsigned long now = READ_ONCE(jiffies);
+ unsigned long duration = now - start_time;
+ part_stat_lock();
+ update_io_ticks(part, now, true);
+ part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_stat_local_dec(part, in_flight[op_is_write(op)]);
part_stat_unlock();
}
+EXPORT_SYMBOL(disk_end_io_acct);
/*
* Steal bios from a request and add them to a bio list.
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
rq->ioprio = rq_src->ioprio;
- rq->extra_len = rq_src->extra_len;
+
+ if (rq->bio)
+ blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
return 0;
INIT_LIST_HEAD(&plug->cb_list);
plug->rq_count = 0;
plug->multiple_queues = false;
+ plug->nowait = false;
/*
* Store ordering should not be needed here, since a potential
}
EXPORT_SYMBOL(blk_finish_plug);
+void blk_io_schedule(void)
+{
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
+
+ if (timeout)
+ io_schedule_timeout(timeout);
+ else
+ io_schedule();
+}
+EXPORT_SYMBOL_GPL(blk_io_schedule);
+
int __init blk_dev_init(void)
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
-#ifdef CONFIG_DEBUG_FS
blk_debugfs_root = debugfs_create_dir("block", NULL);
-#endif
return 0;
}