thp_fault_alloc
Number of transparent hugepages which were allocated to satisfy
- a page fault, including COW faults. This counter is not present
- when CONFIG_TRANSPARENT_HUGEPAGE is not set.
+ a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
+ is not set.
thp_collapse_alloc
Number of transparent hugepages which were allocated to allow
~~~~~~~~~~~~~~~~~~
io.stat
- A read-only nested-keyed file which exists on non-root
- cgroups.
+ A read-only nested-keyed file.
Lines are keyed by $MAJ:$MIN device numbers and not ordered.
The following nested keys are defined.
QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
+ QUEUE_FLAG_NAME(PCI_P2PDMA),
+ QUEUE_FLAG_NAME(ZONE_RESETALL),
+ QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
};
#undef QUEUE_FLAG_NAME
const struct show_busy_params *params = data;
if (rq->mq_hctx == params->hctx)
- __blk_mq_debugfs_rq_show(params->m,
- list_entry_rq(&rq->queuelist));
+ __blk_mq_debugfs_rq_show(params->m, rq);
return true;
}
struct blk_mq_hw_ctx *hctx;
int i;
- q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
- blk_debugfs_root);
-
debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
/*
void blk_mq_debugfs_unregister(struct request_queue *q)
{
- debugfs_remove_recursive(q->debugfs_dir);
q->sched_debugfs_dir = NULL;
- q->debugfs_dir = NULL;
}
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
+ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
{
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct request *rq = tags->static_rqs[tag];
- req_flags_t rq_flags = 0;
- if (data->flags & BLK_MQ_REQ_INTERNAL) {
+ if (data->q->elevator) {
rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = tag;
} else {
- if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
- rq_flags = RQF_MQ_INFLIGHT;
- atomic_inc(&data->hctx->nr_active);
- }
rq->tag = tag;
rq->internal_tag = BLK_MQ_NO_TAG;
- data->hctx->tags->rqs[rq->tag] = rq;
}
/* csd/requeue_work/fifo_time is initialized before use */
rq->q = data->q;
rq->mq_ctx = data->ctx;
rq->mq_hctx = data->hctx;
- rq->rq_flags = rq_flags;
+ rq->rq_flags = 0;
rq->cmd_flags = data->cmd_flags;
if (data->flags & BLK_MQ_REQ_PREEMPT)
rq->rq_flags |= RQF_PREEMPT;
data->flags |= BLK_MQ_REQ_NOWAIT;
if (e) {
- data->flags |= BLK_MQ_REQ_INTERNAL;
-
/*
* Flush requests are special and go directly to the
* dispatch list. Don't include reserved tags in the
retry:
data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!(data->flags & BLK_MQ_REQ_INTERNAL))
+ if (!e)
blk_mq_tag_busy(data->hctx);
/*
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
data.ctx = __blk_mq_get_ctx(q, cpu);
- if (q->elevator)
- data.flags |= BLK_MQ_REQ_INTERNAL;
- else
+ if (!q->elevator)
blk_mq_tag_busy(data.hctx);
ret = -EWOULDBLOCK;
blk_stat_add(rq, now);
}
- if (rq->internal_tag != BLK_MQ_NO_TAG)
- blk_mq_sched_completed_request(rq, now);
+ blk_mq_sched_completed_request(rq, now);
blk_account_io_done(rq, now);
}
EXPORT_SYMBOL(blk_mq_end_request);
- static void __blk_mq_complete_request_remote(void *data)
+ /*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
{
- struct request *rq = data;
- struct request_queue *q = rq->q;
+ struct list_head *cpu_list, local_list;
- q->mq_ops->complete(rq);
+ local_irq_disable();
+ cpu_list = this_cpu_ptr(&blk_cpu_done);
+ list_replace_init(cpu_list, &local_list);
+ local_irq_enable();
+
+ while (!list_empty(&local_list)) {
+ struct request *rq;
+
+ rq = list_entry(local_list.next, struct request, ipi_list);
+ list_del_init(&rq->ipi_list);
+ rq->q->mq_ops->complete(rq);
+ }
}
- /**
- * blk_mq_force_complete_rq() - Force complete the request, bypassing any error
- * injection that could drop the completion.
- * @rq: Request to be force completed
- *
- * Drivers should use blk_mq_complete_request() to complete requests in their
- * normal IO path. For timeout error recovery, drivers may call this forced
- * completion routine after they've reclaimed timed out requests to bypass
- * potentially subsequent fake timeouts.
- */
- void blk_mq_force_complete_rq(struct request *rq)
+ static void blk_mq_trigger_softirq(struct request *rq)
{
- struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct request_queue *q = rq->q;
- bool shared = false;
- int cpu;
+ struct list_head *list;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ list = this_cpu_ptr(&blk_cpu_done);
+ list_add_tail(&rq->ipi_list, list);
+
+ /*
+ * If the list only contains our just added request, signal a raise of
+ * the softirq. If there are already entries there, someone already
+ * raised the irq but it hasn't run yet.
+ */
+ if (list->next == &rq->ipi_list)
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+
+ static int blk_softirq_cpu_dead(unsigned int cpu)
+ {
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
+ this_cpu_ptr(&blk_cpu_done));
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ local_irq_enable();
+
+ return 0;
+ }
+
+
+ static void __blk_mq_complete_request_remote(void *data)
+ {
+ struct request *rq = data;
- WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
- * Most of single queue controllers, there is only one irq vector
- * for handling IO completion, and the only irq's affinity is set
- * as all possible CPUs. On most of ARCHs, this affinity means the
- * irq is handled on one specific CPU.
+ * For most of single queue controllers, there is only one irq vector
+ * for handling I/O completion, and the only irq's affinity is set
+ * to all possible CPUs. On most of ARCHs, this affinity means the irq
+ * is handled on one specific CPU.
*
- * So complete IO reqeust in softirq context in case of single queue
- * for not degrading IO performance by irqsoff latency.
+ * So complete I/O requests in softirq context in case of single queue
+ * devices to avoid degrading I/O performance due to irqsoff latency.
*/
- if (q->nr_hw_queues == 1) {
- __blk_complete_request(rq);
- return;
- }
+ if (rq->q->nr_hw_queues == 1)
+ blk_mq_trigger_softirq(rq);
+ else
+ rq->q->mq_ops->complete(rq);
+ }
+
+ static inline bool blk_mq_complete_need_ipi(struct request *rq)
+ {
+ int cpu = raw_smp_processor_id();
+
+ if (!IS_ENABLED(CONFIG_SMP) ||
+ !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
+ return false;
+
+ /* same CPU or cache domain? Complete locally */
+ if (cpu == rq->mq_ctx->cpu ||
+ (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
+ cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+ return false;
+
+ /* don't try to IPI to an offline CPU */
+ return cpu_online(rq->mq_ctx->cpu);
+ }
+
+ bool blk_mq_complete_request_remote(struct request *rq)
+ {
+ WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
* For a polled request, always complete locallly, it's pointless
* to redirect the completion.
*/
- if ((rq->cmd_flags & REQ_HIPRI) ||
- !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
- q->mq_ops->complete(rq);
- return;
- }
-
- cpu = get_cpu();
- if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
- shared = cpus_share_cache(cpu, ctx->cpu);
+ if (rq->cmd_flags & REQ_HIPRI)
+ return false;
- if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+ if (blk_mq_complete_need_ipi(rq)) {
rq->csd.func = __blk_mq_complete_request_remote;
rq->csd.info = rq;
rq->csd.flags = 0;
- smp_call_function_single_async(ctx->cpu, &rq->csd);
+ smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
} else {
- q->mq_ops->complete(rq);
+ if (rq->q->nr_hw_queues > 1)
+ return false;
+ blk_mq_trigger_softirq(rq);
}
- put_cpu();
+
+ return true;
+ }
+ EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
+
+ /**
+ * blk_mq_complete_request - end I/O on a request
+ * @rq: the request being processed
+ *
+ * Description:
+ * Complete a request by scheduling the ->complete_rq operation.
+ **/
+ void blk_mq_complete_request(struct request *rq)
+ {
+ if (!blk_mq_complete_request_remote(rq))
+ rq->q->mq_ops->complete(rq);
}
- EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
+ EXPORT_SYMBOL(blk_mq_complete_request);
static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
__releases(hctx->srcu)
*srcu_idx = srcu_read_lock(hctx->srcu);
}
- /**
- * blk_mq_complete_request - end I/O on a request
- * @rq: the request being processed
- *
- * Description:
- * Ends all I/O on a request. It does not handle partial completions.
- * The actual completion happens out-of-order, through a IPI handler.
- **/
- bool blk_mq_complete_request(struct request *rq)
- {
- if (unlikely(blk_should_fake_timeout(rq->q)))
- return false;
- blk_mq_force_complete_rq(rq);
- return true;
- }
- EXPORT_SYMBOL(blk_mq_complete_request);
-
/**
* blk_mq_start_request - Start processing a request
* @rq: Pointer to request to be started
void *priv, bool reserved)
{
/*
- * If we find a request that is inflight and the queue matches,
+ * If we find a request that isn't idle and the queue matches,
* we know the queue is busy. Return false to stop the iteration.
*/
- if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
+ if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
bool *busy = priv;
*busy = true;
return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
}
+ static bool __blk_mq_get_driver_tag(struct request *rq)
+ {
+ struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+ unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+ int tag;
+
+ blk_mq_tag_busy(rq->mq_hctx);
+
+ if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+ bt = &rq->mq_hctx->tags->breserved_tags;
+ tag_offset = 0;
+ }
+
+ if (!hctx_may_queue(rq->mq_hctx, bt))
+ return false;
+ tag = __sbitmap_queue_get(bt);
+ if (tag == BLK_MQ_NO_TAG)
+ return false;
+
+ rq->tag = tag + tag_offset;
+ return true;
+ }
+
+ static bool blk_mq_get_driver_tag(struct request *rq)
+ {
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
+ return false;
+
+ if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
+ !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
+ rq->rq_flags |= RQF_MQ_INFLIGHT;
+ atomic_inc(&hctx->nr_active);
+ }
+ hctx->tags->rqs[rq->tag] = rq;
+ return true;
+ }
+
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
int flags, void *key)
{
__blk_mq_requeue_request(rq);
}
+ enum prep_dispatch {
+ PREP_DISPATCH_OK,
+ PREP_DISPATCH_NO_TAG,
+ PREP_DISPATCH_NO_BUDGET,
+ };
+
+ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
+ bool need_budget)
+ {
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
+ blk_mq_put_driver_tag(rq);
+ return PREP_DISPATCH_NO_BUDGET;
+ }
+
+ if (!blk_mq_get_driver_tag(rq)) {
+ /*
+ * The initial allocation attempt failed, so we need to
+ * rerun the hardware queue when a tag is freed. The
+ * waitqueue takes care of that. If the queue is run
+ * before we add this entry back on the dispatch list,
+ * we'll re-run it below.
+ */
+ if (!blk_mq_mark_tag_wait(hctx, rq)) {
+ /*
+ * All budgets not got from this function will be put
+ * together during handling partial dispatch
+ */
+ if (need_budget)
+ blk_mq_put_dispatch_budget(rq->q);
+ return PREP_DISPATCH_NO_TAG;
+ }
+ }
+
+ return PREP_DISPATCH_OK;
+ }
+
+ /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
+ static void blk_mq_release_budgets(struct request_queue *q,
+ unsigned int nr_budgets)
+ {
+ int i;
+
+ for (i = 0; i < nr_budgets; i++)
+ blk_mq_put_dispatch_budget(q);
+ }
+
/*
* Returns true if we did some work AND can potentially do more.
*/
- bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
- bool got_budget)
+ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
+ unsigned int nr_budgets)
{
- struct blk_mq_hw_ctx *hctx;
+ enum prep_dispatch prep;
+ struct request_queue *q = hctx->queue;
struct request *rq, *nxt;
- bool no_tag = false;
int errors, queued;
blk_status_t ret = BLK_STS_OK;
- bool no_budget_avail = false;
LIST_HEAD(zone_list);
if (list_empty(list))
return false;
- WARN_ON(!list_is_singular(list) && got_budget);
-
/*
* Now process all the entries, sending them to the driver.
*/
rq = list_first_entry(list, struct request, queuelist);
- hctx = rq->mq_hctx;
- if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
- blk_mq_put_driver_tag(rq);
- no_budget_avail = true;
+ WARN_ON_ONCE(hctx != rq->mq_hctx);
+ prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
+ if (prep != PREP_DISPATCH_OK)
break;
- }
-
- if (!blk_mq_get_driver_tag(rq)) {
- /*
- * The initial allocation attempt failed, so we need to
- * rerun the hardware queue when a tag is freed. The
- * waitqueue takes care of that. If the queue is run
- * before we add this entry back on the dispatch list,
- * we'll re-run it below.
- */
- if (!blk_mq_mark_tag_wait(hctx, rq)) {
- blk_mq_put_dispatch_budget(hctx);
- /*
- * For non-shared tags, the RESTART check
- * will suffice.
- */
- if (hctx->flags & BLK_MQ_F_TAG_SHARED)
- no_tag = true;
- break;
- }
- }
list_del_init(&rq->queuelist);
bd.last = !blk_mq_get_driver_tag(nxt);
}
+ /*
+ * once the request is queued to lld, no need to cover the
+ * budget any more
+ */
+ if (nr_budgets)
+ nr_budgets--;
ret = q->mq_ops->queue_rq(hctx, &bd);
- if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
- blk_mq_handle_dev_resource(rq, list);
+ switch (ret) {
+ case BLK_STS_OK:
+ queued++;
break;
- } else if (ret == BLK_STS_ZONE_RESOURCE) {
+ case BLK_STS_RESOURCE:
+ case BLK_STS_DEV_RESOURCE:
+ blk_mq_handle_dev_resource(rq, list);
+ goto out;
+ case BLK_STS_ZONE_RESOURCE:
/*
* Move the request to zone_list and keep going through
* the dispatch list to find more requests the drive can
* accept.
*/
blk_mq_handle_zone_resource(rq, &zone_list);
- if (list_empty(list))
- break;
- continue;
- }
-
- if (unlikely(ret != BLK_STS_OK)) {
+ break;
+ default:
errors++;
blk_mq_end_request(rq, BLK_STS_IOERR);
- continue;
}
-
- queued++;
} while (!list_empty(list));
-
+ out:
if (!list_empty(&zone_list))
list_splice_tail_init(&zone_list, list);
*/
if (!list_empty(list)) {
bool needs_restart;
+ /* For non-shared tags, the RESTART check will suffice */
+ bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
+ (hctx->flags & BLK_MQ_F_TAG_SHARED);
+ bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
+
+ blk_mq_release_budgets(q, nr_budgets);
/*
* If we didn't flush the entire list, we could have told
} else
blk_mq_update_dispatch_busy(hctx, false);
- /*
- * If the host/device is unable to accept more work, inform the
- * caller of that.
- */
- if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
- return false;
-
return (queued + errors) != 0;
}
if (q->elevator && !bypass_insert)
goto insert;
- if (!blk_mq_get_dispatch_budget(hctx))
+ if (!blk_mq_get_dispatch_budget(q))
goto insert;
if (!blk_mq_get_driver_tag(rq)) {
- blk_mq_put_dispatch_budget(hctx);
+ blk_mq_put_dispatch_budget(q);
goto insert;
}
}
/**
- * blk_mq_make_request - Create and send a request to block device.
- * @q: Request queue pointer.
+ * blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
*
* Builds up a request structure from @q and @bio and send to the device. The
*
* Returns: Request queue cookie.
*/
- blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
+ blk_qc_t blk_mq_submit_bio(struct bio *bio)
{
+ struct request_queue *q = bio->bi_disk->queue;
const int is_sync = op_is_sync(bio->bi_opf);
const int is_flush_fua = op_is_flush(bio->bi_opf);
struct blk_mq_alloc_data data = {
blk_status_t ret;
blk_queue_bounce(q, &bio);
- __blk_queue_split(q, &bio, &nr_segs);
+ __blk_queue_split(&bio, &nr_segs);
if (!bio_integrity_prep(bio))
goto queue_exit;
blk_queue_exit(q);
return BLK_QC_T_NONE;
}
- EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
+ EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx)
{
struct request_queue *uninit_q, *q;
- uninit_q = __blk_alloc_queue(set->numa_node);
+ uninit_q = blk_alloc_queue(set->numa_node);
if (!uninit_q)
return ERR_PTR(-ENOMEM);
uninit_q->queuedata = queuedata;
static int __init blk_mq_init(void)
{
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+ open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+
+ cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+ "block/softirq:dead", NULL,
+ blk_softirq_cpu_dead);
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
blk_mq_hctx_notify_dead);
cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
struct nbd_device *nbd = args->nbd;
struct nbd_config *config = nbd->config;
struct nbd_cmd *cmd;
+ struct request *rq;
while (1) {
cmd = nbd_read_stat(nbd, args->index);
break;
}
- blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
+ rq = blk_mq_rq_from_pdu(cmd);
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
}
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
dev_err(disk_to_dev(nbd->disk),
"Device being setup by another task");
- sockfd_put(sock);
- return -EBUSY;
+ err = -EBUSY;
+ goto put_socket;
+ }
+
+ nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
+ if (!nsock) {
+ err = -ENOMEM;
+ goto put_socket;
}
socks = krealloc(config->socks, (config->num_connections + 1) *
sizeof(struct nbd_sock *), GFP_KERNEL);
if (!socks) {
- sockfd_put(sock);
- return -ENOMEM;
+ kfree(nsock);
+ err = -ENOMEM;
+ goto put_socket;
}
config->socks = socks;
- nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
- if (!nsock) {
- sockfd_put(sock);
- return -ENOMEM;
- }
-
nsock->fallback_index = -1;
nsock->dead = false;
mutex_init(&nsock->tx_lock);
atomic_inc(&config->live_connections);
return 0;
+
+put_socket:
+ sockfd_put(sock);
+ return err;
}
static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
struct request *req = blk_mq_rq_from_pdu(vbr);
- blk_mq_complete_request(req);
+ if (likely(!blk_should_fake_timeout(req->q)))
+ blk_mq_complete_request(req);
req_done = true;
}
if (unlikely(virtqueue_is_broken(vq)))
put_disk(vblk->disk);
out_free_vq:
vdev->config->del_vqs(vdev);
+ kfree(vblk->vqs);
out_free_vblk:
kfree(vblk);
out_free_index:
}
/*
- * Block layer want one ->make_request_fn to be active at a time
- * so if we use chained IO with parent IO in same context,
- * it's a deadlock. To avoid, it, it uses worker thread context.
+ * Block layer want one ->submit_bio to be active at a time, so if we use
+ * chained IO with parent IO in same context, it's a deadlock. To avoid that,
+ * use a worker thread context.
*/
static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *bio)
/*
* Handler function for all zram I/O requests.
*/
- static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
+ static blk_qc_t zram_submit_bio(struct bio *bio)
{
- struct zram *zram = queue->queuedata;
+ struct zram *zram = bio->bi_disk->private_data;
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
static const struct block_device_operations zram_devops = {
.open = zram_open,
+ .submit_bio = zram_submit_bio,
.swap_slot_free_notify = zram_slot_free_notify,
.rw_page = zram_rw_page,
.owner = THIS_MODULE
#ifdef CONFIG_ZRAM_WRITEBACK
spin_lock_init(&zram->wb_limit_lock);
#endif
- queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
+ queue = blk_alloc_queue(NUMA_NO_NODE);
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
zram->disk->first_minor = device_id;
zram->disk->fops = &zram_devops;
zram->disk->queue = queue;
- zram->disk->queue->queuedata = zram;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
return ret;
return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
}
-static CLASS_ATTR_RO(hot_add);
+static struct class_attribute class_attr_hot_add =
+ __ATTR(hot_add, 0400, hot_add_show, NULL);
static ssize_t hot_remove_store(struct class *class,
struct class_attribute *attr,
*/
static void rq_completed(struct mapped_device *md)
{
- /* nudge anyone waiting on suspend queue */
- if (unlikely(wq_has_sleeper(&md->wait)))
- wake_up(&md->wait);
-
/*
* dm_put() must be at the end of this function. See the comment above
*/
struct dm_rq_target_io *tio = tio_from_request(rq);
tio->error = error;
- blk_mq_complete_request(rq);
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
}
/*
while (daa-- && i < p) {
pages[i++] = pfn_t_to_page(pfn);
pfn.val++;
+ if (!(i & 15))
+ cond_resched();
}
} while (i < p);
wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
if (likely(!e->write_in_progress)) {
if (!discarded_something) {
- writecache_wait_for_ios(wc, READ);
- writecache_wait_for_ios(wc, WRITE);
+ if (!WC_MODE_PMEM(wc)) {
+ writecache_wait_for_ios(wc, READ);
+ writecache_wait_for_ios(wc, WRITE);
+ }
discarded_something = true;
}
+ if (!writecache_entry_is_committed(wc, e))
+ wc->uncommitted_blocks--;
writecache_free_entry(wc, e);
}
bio_end_sector(bio));
wc_unlock(wc);
bio_set_dev(bio, wc->dev->bdev);
- generic_make_request(bio);
+ submit_bio_noacct(bio);
} else {
writecache_flush(wc);
wc_unlock(wc);
}
if (WC_MODE_PMEM(wc)) {
+ if (!dax_synchronous(wc->ssd_dev->dax_dev)) {
+ r = -EOPNOTSUPP;
+ ti->error = "Asynchronous persistent memory not supported as pmem cache";
+ goto bad;
+ }
+
r = persistent_memory_claim(wc);
if (r) {
ti->error = "Unable to map persistent memory for cache";
bio_advance(bio, clone->bi_iter.bi_size);
refcount_inc(&bioctx->ref);
- generic_make_request(clone);
+ submit_bio_noacct(clone);
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
zone->wp_block += nr_blocks;
dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
struct dmz_metadata *zmd = dmz->metadata;
struct dm_zone *zone;
- int i, ret;
-
- /*
- * Write may trigger a zone allocation. So make sure the
- * allocation can succeed.
- */
- if (bio_op(bio) == REQ_OP_WRITE)
- for (i = 0; i < dmz->nr_ddevs; i++)
- dmz_schedule_reclaim(dmz->dev[i].reclaim);
+ int ret;
dmz_lock_metadata(zmd);
}
/* Set target (no write same support) */
- ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_zeroes_bios = 1;
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/blkpg.h>
#include <linux/bio.h>
bio_put(&tio->clone);
}
-static bool md_in_flight_bios(struct mapped_device *md)
-{
- int cpu;
- struct hd_struct *part = &dm_disk(md)->part0;
- long sum = 0;
-
- for_each_possible_cpu(cpu) {
- sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
- sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
- }
-
- return sum != 0;
-}
-
-static bool md_in_flight(struct mapped_device *md)
-{
- if (queue_is_mq(md->queue))
- return blk_mq_queue_inflight(md->queue);
- else
- return md_in_flight_bios(md);
-}
-
u64 dm_start_time_ns_from_clone(struct bio *bio)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
+ struct bio *orig_bio = io->orig_bio;
if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
if (bio_op(bio) == REQ_OP_DISCARD &&
disable_write_zeroes(md);
}
+ /*
+ * For zone-append bios get offset in zone of the written
+ * sector and add that to the original bio sector pos.
+ */
+ if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
+ sector_t written_sector = bio->bi_iter.bi_sector;
+ struct request_queue *q = orig_bio->bi_disk->queue;
+ u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
+
+ orig_bio->bi_iter.bi_sector += written_sector & mask;
+ }
+
if (endio) {
int r = endio(tio->ti, bio, &error);
switch (r) {
sector_t sector;
struct bio *clone = &tio->clone;
struct dm_io *io = tio->io;
- struct mapped_device *md = io->md;
struct dm_target *ti = tio->ti;
blk_qc_t ret = BLK_QC_T_NONE;
/* the bio has been remapped so dispatch it */
trace_block_bio_remap(clone->bi_disk->queue, clone,
bio_dev(io->orig_bio), sector);
- if (md->type == DM_TYPE_NVME_BIO_BASED)
- ret = direct_make_request(clone);
- else
- ret = generic_make_request(clone);
+ ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
free_tio(tio);
error = __split_and_process_non_flush(&ci);
if (current->bio_list && ci.sector_count && !error) {
/*
- * Remainder must be passed to generic_make_request()
+ * Remainder must be passed to submit_bio_noacct()
* so that it gets handled *after* bios already submitted
* have been completely processed.
* We take a clone of the original to store in
bio_chain(b, bio);
trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
- ret = generic_make_request(bio);
+ ret = submit_bio_noacct(bio);
break;
}
}
bio_chain(split, *bio);
trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
- generic_make_request(*bio);
+ submit_bio_noacct(*bio);
*bio = split;
}
}
}
/*
- * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+ * If in ->queue_bio we need to use blk_queue_split(), otherwise
* queue_limits for abnormal requests (e.g. discard, writesame, etc)
* won't be imposed.
*/
if (current->bio_list) {
if (is_abnormal_io(bio))
- blk_queue_split(md->queue, &bio);
+ blk_queue_split(&bio);
else
dm_queue_split(md, ti, &bio);
}
return __split_and_process_bio(md, map, bio);
}
- static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
+ static blk_qc_t dm_submit_bio(struct bio *bio)
{
- struct mapped_device *md = q->queuedata;
+ struct mapped_device *md = bio->bi_disk->private_data;
blk_qc_t ret = BLK_QC_T_NONE;
int srcu_idx;
struct dm_table *map;
/*
* We are called with a live reference on q_usage_counter, but
* that one will be released as soon as we return. Grab an
- * extra one as blk_mq_make_request expects to be able to
- * consume a reference (which lives until the request is freed
- * in case a request is allocated).
+ * extra one as blk_mq_submit_bio expects to be able to consume
+ * a reference (which lives until the request is freed in case a
+ * request is allocated).
*/
- percpu_ref_get(&q->q_usage_counter);
- return blk_mq_make_request(q, bio);
+ percpu_ref_get(&bio->bi_disk->queue->q_usage_counter);
+ return blk_mq_submit_bio(bio);
}
map = dm_get_live_table(md, &srcu_idx);
return ret;
}
- static int dm_any_congested(void *congested_data, int bdi_bits)
- {
- int r = bdi_bits;
- struct mapped_device *md = congested_data;
- struct dm_table *map;
-
- if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
- if (dm_request_based(md)) {
- /*
- * With request-based DM we only need to check the
- * top-level queue for congestion.
- */
- struct backing_dev_info *bdi = md->queue->backing_dev_info;
- r = bdi->wb.congested->state & bdi_bits;
- } else {
- map = dm_get_live_table_fast(md);
- if (map)
- r = dm_table_any_congested(map, bdi_bits);
- dm_put_live_table_fast(md);
- }
- }
-
- return r;
- }
-
/*-----------------------------------------------------------------
* An IDR is used to keep track of allocated minor numbers.
*---------------------------------------------------------------*/
spin_lock_init(&md->uevent_lock);
/*
- * default to bio-based required ->make_request_fn until DM
- * table is loaded and md->type established. If request-based
- * table is loaded: blk-mq will override accordingly.
+ * default to bio-based until DM table is loaded and md->type
+ * established. If request-based table is loaded: blk-mq will
+ * override accordingly.
*/
- md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+ md->queue = blk_alloc_queue(numa_node_id);
if (!md->queue)
goto bad;
- md->queue->queuedata = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
- static void dm_init_congested_fn(struct mapped_device *md)
- {
- md->queue->backing_dev_info->congested_data = md;
- md->queue->backing_dev_info->congested_fn = dm_any_congested;
- }
-
/*
* Setup the DM device's queue based on md's type
*/
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
- dm_init_congested_fn(md);
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
- dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
}
EXPORT_SYMBOL_GPL(dm_put);
-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static bool md_in_flight_bios(struct mapped_device *md)
+{
+ int cpu;
+ struct hd_struct *part = &dm_disk(md)->part0;
+ long sum = 0;
+
+ for_each_possible_cpu(cpu) {
+ sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
+ sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
+ }
+
+ return sum != 0;
+}
+
+static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
{
int r = 0;
DEFINE_WAIT(wait);
- while (1) {
+ while (true) {
prepare_to_wait(&md->wait, &wait, task_state);
- if (!md_in_flight(md))
+ if (!md_in_flight_bios(md))
break;
if (signal_pending_state(task_state, current)) {
return r;
}
+static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+{
+ int r = 0;
+
+ if (!queue_is_mq(md->queue))
+ return dm_wait_for_bios_completion(md, task_state);
+
+ while (true) {
+ if (!blk_mq_queue_inflight(md->queue))
+ break;
+
+ if (signal_pending_state(task_state, current)) {
+ r = -EINTR;
+ break;
+ }
+
+ msleep(5);
+ }
+
+ return r;
+}
+
/*
* Process the deferred bios
*/
break;
if (dm_request_based(md))
- (void) generic_make_request(c);
+ (void) submit_bio_noacct(c);
else
(void) dm_process_bio(md, map, c);
}
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
unsigned cookie)
{
+ int r;
+ unsigned noio_flag;
char udev_cookie[DM_COOKIE_LENGTH];
char *envp[] = { udev_cookie, NULL };
+ noio_flag = memalloc_noio_save();
+
if (!cookie)
- return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+ r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
else {
snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
DM_COOKIE_ENV_VAR_NAME, cookie);
- return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
- action, envp);
+ r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+ action, envp);
}
+
+ memalloc_noio_restore(noio_flag);
+
+ return r;
}
uint32_t dm_next_uevent_seq(struct mapped_device *md)
};
static const struct block_device_operations dm_blk_dops = {
+ .submit_bio = dm_submit_bio,
.open = dm_blk_open,
.release = dm_blk_close,
.ioctl = dm_blk_ioctl,
return true;
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
- blk_mq_force_complete_rq(req);
+ blk_mq_complete_request(req);
return true;
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
dev_warn(ctrl->device,
"Identify Descriptors failed (%d)\n", status);
/*
- * Don't treat an error as fatal, as we potentially already
- * have a NGUID or EUI-64.
+ * Don't treat non-retryable errors as fatal, as we potentially
+ * already have a NGUID or EUI-64. If we failed with DNR set,
+ * we want to silently ignore the error as we can still
+ * identify the device, but if the status has DNR set, we want
+ * to propagate the error back specifically for the disk
+ * revalidation flow to make sure we don't abandon the
+ * device just because of a temporal retry-able error (such
+ * as path of transport errors).
*/
- if (status > 0 && !(status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_SC_DNR))
status = 0;
goto free_data;
}
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
- revalidate_disk(ns->head->disk);
+ nvme_mpath_update_disk_size(ns->head->disk);
}
#endif
return 0;
const struct block_device_operations nvme_ns_head_ops = {
.owner = THIS_MODULE,
+ .submit_bio = nvme_ns_head_submit_bio,
.open = nvme_ns_head_open,
.release = nvme_ns_head_release,
.ioctl = nvme_ioctl,
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
+ ctrl->numa_node = NUMA_NO_NODE;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
return false;
}
- static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
- struct bio *bio)
+ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
{
struct nvme_ns_head *head = bio->bi_disk->private_data;
struct device *dev = disk_to_dev(head->disk);
int srcu_idx;
/*
- * The namespace might be going away and the bio might
- * be moved to a different queue via blk_steal_bios(),
- * so we need to use the bio_split pool from the original
- * queue to allocate the bvecs from.
+ * The namespace might be going away and the bio might be moved to a
+ * different queue via blk_steal_bios(), so we need to use the bio_split
+ * pool from the original queue to allocate the bvecs from.
*/
- blk_queue_split(q, &bio);
+ blk_queue_split(&bio);
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
trace_block_bio_remap(bio->bi_disk->queue, bio,
disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
- ret = direct_make_request(bio);
+ ret = submit_bio_noacct(bio);
} else if (nvme_available_path(head)) {
dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
* path.
*/
bio->bi_disk = head->disk;
- generic_make_request(bio);
+ submit_bio_noacct(bio);
}
}
if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
return 0;
- q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
+ q = blk_alloc_queue(ctrl->numa_node);
if (!q)
goto out;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
{
struct nvme_ns_head *head = ns->head;
- lockdep_assert_held(&ns->head->lock);
-
if (!head->disk)
return;
- if (!(head->disk->flags & GENHD_FL_UP))
+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups);
+ mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) {
int node, srcu_idx;
__nvme_find_path(head, node);
srcu_read_unlock(&head->srcu, srcu_idx);
}
+ mutex_unlock(&head->lock);
- synchronize_srcu(&ns->head->srcu);
- kblockd_schedule_work(&ns->head->requeue_work);
+ synchronize_srcu(&head->srcu);
+ kblockd_schedule_work(&head->requeue_work);
}
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
- mutex_lock(&ns->head->lock);
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
}
DEVICE_ATTR_RO(ana_state);
-static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
- struct nvme_ns *ns = data;
+ struct nvme_ana_group_desc *dst = data;
- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
- nvme_update_ns_ana_state(desc, ns);
- return -ENXIO; /* just break out of the loop */
- }
+ if (desc->grpid != dst->grpid)
+ return 0;
- return 0;
+ *dst = *desc;
+ return -ENXIO; /* just break out of the loop */
}
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
+ struct nvme_ana_group_desc desc = {
+ .grpid = id->anagrpid,
+ .state = 0,
+ };
+
mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid);
- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
mutex_unlock(&ns->ctrl->ana_lock);
+ if (desc.state) {
+ /* found the group desc: update */
+ nvme_update_ns_ana_state(&desc, ns);
+ }
} else {
- mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
}
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
- struct backing_dev_info *info =
- ns->head->disk->queue->backing_dev_info;
+ struct gendisk *disk = ns->head->disk;
- info->capabilities |= BDI_CAP_STABLE_WRITES;
+ if (disk)
+ disk->queue->backing_dev_info->capabilities |=
+ BDI_CAP_STABLE_WRITES;
}
}
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
blk_cleanup_queue(head->disk->queue);
+ if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+ /*
+ * if device_add_disk wasn't called, prevent
+ * disk release to put a bogus reference on the
+ * request queue
+ */
+ head->disk->queue = NULL;
+ }
put_disk(head->disk);
}
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
+ unsigned long flags;
+#define NVME_NSHEAD_DISK_LIVE 0
struct nvme_ns __rcu *current_path[];
#endif
};
return (len >> 2) - 1;
}
- static inline void nvme_end_request(struct request *req, __le16 status,
+ static inline bool nvme_end_request(struct request *req, __le16 status,
union nvme_result result)
{
struct nvme_request *rq = nvme_req(req);
rq->result = result;
/* inject error when permitted by fault injection framework */
nvme_should_fail(req);
- blk_mq_complete_request(req);
+ if (unlikely(blk_should_fake_timeout(req->q)))
+ return true;
+ return blk_mq_complete_request_remote(req);
}
static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio);
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{
trace_block_bio_complete(ns->head->disk->queue, req->bio);
}
+static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
+{
+ struct block_device *bdev = bdget_disk(disk, 0);
+
+ if (bdev) {
+ bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT);
+ bdput(bdev);
+ }
+}
+
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
}
+static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
+{
+}
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
- nvme_end_request(req, cqe->status, cqe->result);
+ if (!nvme_end_request(req, cqe->status, cqe->result))
+ nvme_pci_complete_rq(req);
}
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
- dev->admin_tagset.numa_node = dev_to_node(dev->dev);
+ dev->admin_tagset.numa_node = dev->ctrl.numa_node;
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev;
if (result)
return result;
+ dev->ctrl.numa_node = dev_to_node(dev->dev);
+
nvmeq = &dev->queues[0];
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(dev->dev);
+ dev->tagset.numa_node = dev->ctrl.numa_node;
dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = sizeof(struct nvme_iod);
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+ static void nvme_rdma_complete_rq(struct request *rq);
static const struct blk_mq_ops nvme_rdma_mq_ops;
static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
* Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0.
*/
- comp_vector = idx == 0 ? idx : idx - 1;
+ comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue))
queue_work(nvme_reset_wq, &ctrl->err_work);
}
+ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
+ {
+ struct request *rq = blk_mq_rq_from_pdu(req);
+
+ if (!refcount_dec_and_test(&req->ref))
+ return;
+ if (!nvme_end_request(rq, req->status, req->result))
+ nvme_rdma_complete_rq(rq);
+ }
+
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
const char *op)
{
{
struct nvme_rdma_request *req =
container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
- struct request *rq = blk_mq_rq_from_pdu(req);
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (unlikely(wc->status != IB_WC_SUCCESS))
nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
- return;
- }
-
- if (refcount_dec_and_test(&req->ref))
- nvme_end_request(rq, req->status, req->result);
-
+ else
+ nvme_rdma_end_request(req);
}
static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
struct nvme_rdma_request *req =
container_of(qe, struct nvme_rdma_request, sqe);
- struct request *rq = blk_mq_rq_from_pdu(req);
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (unlikely(wc->status != IB_WC_SUCCESS))
nvme_rdma_wr_error(cq, wc, "SEND");
- return;
- }
-
- if (refcount_dec_and_test(&req->ref))
- nvme_end_request(rq, req->status, req->result);
+ else
+ nvme_rdma_end_request(req);
}
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
return;
}
- if (refcount_dec_and_test(&req->ref))
- nvme_end_request(rq, req->status, req->result);
+ nvme_rdma_end_request(req);
}
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
return -EINVAL;
}
- nvme_end_request(rq, cqe->status, cqe->result);
+ if (!nvme_end_request(rq, cqe->status, cqe->result))
+ nvme_complete_rq(rq);
queue->nr_cqe++;
return 0;
{
union nvme_result res = {};
- nvme_end_request(rq, cpu_to_le16(status << 1), res);
+ if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
+ nvme_complete_rq(rq);
}
static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
set->ops = &nvme_tcp_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->ops = &nvme_tcp_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
return;
}
- nvme_end_request(rq, cqe->status, cqe->result);
+ if (!nvme_end_request(rq, cqe->status, cqe->result))
+ nvme_loop_complete_rq(rq);
}
}
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
return ERR_PTR(ret);
}
- static int btrfs_congested_fn(void *congested_data, int bdi_bits)
- {
- struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
- int ret = 0;
- struct btrfs_device *device;
- struct backing_dev_info *bdi;
-
- rcu_read_lock();
- list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
- if (!device->bdev)
- continue;
- bdi = device->bdev->bd_bdi;
- if (bdi_congested(bdi, bdi_bits)) {
- ret = 1;
- break;
- }
- }
- rcu_read_unlock();
- return ret;
- }
-
/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens
!extent_buffer_uptodate(tree_root->node)) {
handle_error = true;
- if (IS_ERR(tree_root->node))
+ if (IS_ERR(tree_root->node)) {
ret = PTR_ERR(tree_root->node);
- else if (!extent_buffer_uptodate(tree_root->node))
+ tree_root->node = NULL;
+ } else if (!extent_buffer_uptodate(tree_root->node)) {
ret = -EUCLEAN;
+ }
btrfs_warn(fs_info, "failed to read tree root");
continue;
goto fail_sb_buffer;
}
- sb->s_bdi->congested_fn = btrfs_congested_fn;
- sb->s_bdi->congested_data = fs_info;
sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
#include <linux/sched.h>
#include <linux/sched/clock.h>
-
- #ifdef CONFIG_BLOCK
-
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
struct blk_queue_ctx;
- typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-
struct bio_vec;
enum blk_eh_timer_return {
struct blk_queue_stats *stats;
struct rq_qos *rq_qos;
- make_request_fn *make_request_fn;
-
const struct blk_mq_ops *mq_ops;
/* sw queues */
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
+ struct mutex debugfs_mutex;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace __rcu *blk_trace;
- struct mutex blk_trace_mutex;
#endif
/*
* for flush operations
struct list_head tag_set_list;
struct bio_set bio_split;
- #ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
+
+ #ifdef CONFIG_BLK_DEBUG_FS
struct dentry *sched_debugfs_dir;
struct dentry *rqos_debugfs_dir;
#endif
size_t cmd_size;
- struct work_struct release_work;
-
#define BLK_MAX_WRITE_HINTS 5
u64 write_hints[BLK_MAX_WRITE_HINTS];
};
+/* Keep blk_queue_flag_name[] in sync with the definitions below */
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
#define QUEUE_FLAG_DYING 1 /* queue being torn down */
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
- extern blk_qc_t generic_make_request(struct bio *bio);
- extern blk_qc_t direct_make_request(struct bio *bio);
+ blk_qc_t submit_bio_noacct(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_put_request(struct request *);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
- extern void blk_queue_split(struct request_queue *, struct bio **);
+ extern void blk_queue_split(struct bio **);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
unsigned int, void __user *);
extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
- extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
/*
return __blk_rq_map_sg(q, rq, sglist, &last_sg);
}
extern void blk_dump_rq_flags(struct request *, char *);
- extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
- struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
+ struct request_queue *blk_alloc_queue(int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
+ #ifdef CONFIG_BLOCK
/*
* blk_plug permits building a queue of related requests by holding the I/O
* fragments for a short period. This allows merging of sequential requests
!list_empty(&plug->cb_list));
}
+ int blkdev_issue_flush(struct block_device *, gfp_t);
+ long nr_blockdev_pages(void);
+ #else /* CONFIG_BLOCK */
+ struct blk_plug {
+ };
+
+ static inline void blk_start_plug(struct blk_plug *plug)
+ {
+ }
+
+ static inline void blk_finish_plug(struct blk_plug *plug)
+ {
+ }
+
+ static inline void blk_flush_plug(struct task_struct *task)
+ {
+ }
+
+ static inline void blk_schedule_flush_plug(struct task_struct *task)
+ {
+ }
+
+
+ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+ {
+ return false;
+ }
+
+ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
+ {
+ return 0;
+ }
+
+ static inline long nr_blockdev_pages(void)
+ {
+ return 0;
+ }
+ #endif /* CONFIG_BLOCK */
+
extern void blk_io_schedule(void);
- int blkdev_issue_flush(struct block_device *, gfp_t);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
static inline unsigned int block_size(struct block_device *bdev)
{
- return bdev->bd_block_size;
+ return 1 << bdev->bd_inode->i_blkbits;
}
int kblockd_schedule_work(struct work_struct *work);
struct block_device_operations {
+ blk_qc_t (*submit_bio) (struct bio *bio);
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
- /* ->media_changed() is DEPRECATED, use ->check_events() instead */
- int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
}
#endif /* CONFIG_BLK_DEV_ZONED */
- #else /* CONFIG_BLOCK */
-
- struct block_device;
-
- /*
- * stubs for when the block layer is configured out
- */
- #define buffer_heads_over_limit 0
-
- static inline long nr_blockdev_pages(void)
- {
- return 0;
- }
-
- struct blk_plug {
- };
-
- static inline void blk_start_plug(struct blk_plug *plug)
- {
- }
-
- static inline void blk_finish_plug(struct blk_plug *plug)
- {
- }
-
- static inline void blk_flush_plug(struct task_struct *task)
- {
- }
-
- static inline void blk_schedule_flush_plug(struct task_struct *task)
- {
- }
-
-
- static inline bool blk_needs_flush_plug(struct task_struct *tsk)
- {
- return false;
- }
-
- static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
- {
- return 0;
- }
-
- #endif /* CONFIG_BLOCK */
-
static inline void blk_wake_io_task(struct task_struct *waiter)
{
/*
wake_up_process(waiter);
}
- #ifdef CONFIG_BLOCK
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
unsigned int op);
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
{
return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
}
- #endif /* CONFIG_BLOCK */
+ int bdev_read_only(struct block_device *bdev);
+ int set_blocksize(struct block_device *bdev, int size);
+
+ const char *bdevname(struct block_device *bdev, char *buffer);
+ struct block_device *lookup_bdev(const char *);
+
+ void blkdev_show(struct seq_file *seqf, off_t offset);
+
+ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
+ #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
+ #ifdef CONFIG_BLOCK
+ #define BLKDEV_MAJOR_MAX 512
+ #else
+ #define BLKDEV_MAJOR_MAX 0
+ #endif
+
+ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder);
+ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
+ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder);
+ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+ void *holder);
+ void blkdev_put(struct block_device *bdev, fmode_t mode);
+
+ struct block_device *I_BDEV(struct inode *inode);
+ struct block_device *bdget(dev_t);
+ struct block_device *bdgrab(struct block_device *bdev);
+ void bdput(struct block_device *);
+
+ #ifdef CONFIG_BLOCK
+ void invalidate_bdev(struct block_device *bdev);
+ int sync_blockdev(struct block_device *bdev);
+ #else
+ static inline void invalidate_bdev(struct block_device *bdev)
+ {
+ }
+ static inline int sync_blockdev(struct block_device *bdev)
+ {
+ return 0;
+ }
#endif
+ int fsync_bdev(struct block_device *bdev);
+
+ struct super_block *freeze_bdev(struct block_device *bdev);
+ int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+
+ #endif /* _LINUX_BLKDEV_H */
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+#define IOCB_NOIO (1 << 9)
struct kiocb {
struct file *ki_filp;
* must be enforced here for CRIS, to let the least significant bit
* of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
*/
- struct request_queue;
-
- struct block_device {
- dev_t bd_dev; /* not a kdev_t - it's a search key */
- int bd_openers;
- struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
- struct mutex bd_mutex; /* open/close mutex */
- void * bd_claiming;
- void * bd_holder;
- int bd_holders;
- bool bd_write_holder;
- #ifdef CONFIG_SYSFS
- struct list_head bd_holder_disks;
- #endif
- struct block_device * bd_contains;
- unsigned bd_block_size;
- u8 bd_partno;
- struct hd_struct * bd_part;
- /* number of times partitions within this device have been opened. */
- unsigned bd_part_count;
- int bd_invalidated;
- struct gendisk * bd_disk;
- struct request_queue * bd_queue;
- struct backing_dev_info *bd_bdi;
- struct list_head bd_list;
- /*
- * Private data. You must have bd_claim'ed the block_device
- * to use this. NOTE: bd_claim allows an owner to claim
- * the same device multiple times, the owner must take special
- * care to not mess up bd_private for that case.
- */
- unsigned long bd_private;
-
- /* The counter of freeze processes */
- int bd_fsfreeze_count;
- /* Mutex for freeze */
- struct mutex bd_fsfreeze_mutex;
- } __randomize_layout;
/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
#define PAGECACHE_TAG_DIRTY XA_MARK_0
return MAJOR(inode->i_rdev);
}
- extern struct block_device *I_BDEV(struct inode *inode);
-
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
loff_t pos;
};
- struct block_device_operations;
-
- /* These macros are for out of kernel modules to test that
- * the kernel supports the unlocked_ioctl and compat_ioctl
- * fields in struct file_operations. */
- #define HAVE_COMPAT_IOCTL 1
- #define HAVE_UNLOCKED_IOCTL 1
-
/*
* These flags let !MMU mmap() govern direct device mapping vs immediate
* copying more easily for MAP_PRIVATE, especially for ROM filesystems.
struct iovec *fast_pointer,
struct iovec **ret_pointer);
-extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
- #ifdef CONFIG_BLOCK
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
- #else
- static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int))
- {
- return ERR_PTR(-ENODEV);
- }
- #endif
extern struct dentry *mount_single(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void generic_shutdown_super(struct super_block *sb);
- #ifdef CONFIG_BLOCK
void kill_block_super(struct super_block *sb);
- #else
- static inline void kill_block_super(struct super_block *sb)
- {
- BUG();
- }
- #endif
void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
- #ifdef CONFIG_BLOCK
- extern int register_blkdev(unsigned int, const char *);
- extern void unregister_blkdev(unsigned int, const char *);
- extern struct block_device *bdget(dev_t);
- extern struct block_device *bdgrab(struct block_device *bdev);
- extern void bd_set_size(struct block_device *, loff_t size);
- extern void bd_forget(struct inode *inode);
- extern void bdput(struct block_device *);
- extern void invalidate_bdev(struct block_device *);
- extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
- extern int sync_blockdev(struct block_device *bdev);
- extern struct super_block *freeze_bdev(struct block_device *);
- extern void emergency_thaw_all(void);
- extern void emergency_thaw_bdev(struct super_block *sb);
- extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
- extern int fsync_bdev(struct block_device *);
-
extern struct super_block *blockdev_superblock;
-
static inline bool sb_is_blkdev_sb(struct super_block *sb)
{
- return sb == blockdev_superblock;
- }
- #else
- static inline void bd_forget(struct inode *inode) {}
- static inline int sync_blockdev(struct block_device *bdev) { return 0; }
- static inline void invalidate_bdev(struct block_device *bdev) {}
-
- static inline struct super_block *freeze_bdev(struct block_device *sb)
- {
- return NULL;
- }
-
- static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
- {
- return 0;
+ return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
}
- static inline int emergency_thaw_bdev(struct super_block *sb)
- {
- return 0;
- }
-
- static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
- {
- }
-
- static inline bool sb_is_blkdev_sb(struct super_block *sb)
- {
- return false;
- }
- #endif
+ void emergency_thaw_all(void);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
- #ifdef CONFIG_BLOCK
- extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
- extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
- extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder);
- extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- void *holder);
- extern struct block_device *bd_start_claiming(struct block_device *bdev,
- void *holder);
- extern void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
- extern void bd_abort_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
- extern void blkdev_put(struct block_device *bdev, fmode_t mode);
-
- #ifdef CONFIG_SYSFS
- extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
- extern void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk);
- #else
- static inline int bd_link_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
- {
- return 0;
- }
- static inline void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
- {
- }
- #endif
- #endif
/* fs/char_dev.c */
#define CHRDEV_MAJOR_MAX 512
__unregister_chrdev(major, 0, 256, name);
}
- /* fs/block_dev.c */
- #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
- #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
-
- #ifdef CONFIG_BLOCK
- #define BLKDEV_MAJOR_MAX 512
- extern const char *bdevname(struct block_device *bdev, char *buffer);
- extern struct block_device *lookup_bdev(const char *);
- extern void blkdev_show(struct seq_file *,off_t);
-
- #else
- #define BLKDEV_MAJOR_MAX 0
- #endif
-
extern void init_special_inode(struct inode *, umode_t, dev_t);
/* Invalid inode operations -- fs/bad_inode.c */
extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
- #ifdef CONFIG_BLOCK
- extern int revalidate_disk(struct gendisk *);
- extern int check_disk_change(struct block_device *);
- extern int __invalidate_device(struct block_device *, bool);
- #endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
enum kernel_read_file_id);
extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
+ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
extern struct file * open_exec(const char *);
extern void inode_sb_list_add(struct inode *inode);
- #ifdef CONFIG_BLOCK
- extern int bdev_read_only(struct block_device *);
- #endif
- extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);