For more details refer Documentation/admin-guide/iostats.rst
+What: /sys/block/<disk>/diskseq
+Date: February 2021
+Contact: Matteo Croce <mcroce@microsoft.com>
+Description:
+ The /sys/block/<disk>/diskseq files reports the disk
+ sequence number, which is a monotonically increasing
+ number assigned to every drive.
+ Some devices, like the loop device, refresh such number
+ every time the backing file is changed.
+ The value type is 64 bit unsigned.
+
+
What: /sys/block/<disk>/<part>/stat
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
CONFIG_VECTORBASE=0x40000000
CONFIG_KERNELBASE=0x40001000
# CONFIG_BLK_DEV_BSG is not set
-CONFIG_BLK_CMDLINE_PARSER=y
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
CONFIG_BINFMT_MISC=y
#ifndef __ASM_RC32434_RB_H
#define __ASM_RC32434_RB_H
-#include <linux/genhd.h>
-
#define REGBASE 0x18000000
#define IDT434_REG_BASE ((volatile void *) KSEG1ADDR(REGBASE))
#define UART0BASE 0x58000
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
rq_for_each_segment(bvec, req, iter) {
BUG_ON(i >= io_req->desc_cnt);
- io_req->io_desc[i].buffer =
- page_address(bvec.bv_page) + bvec.bv_offset;
+ io_req->io_desc[i].buffer = bvec_virt(&bvec);
io_req->io_desc[i].length = bvec.bv_len;
i++;
}
Note, this is an experimental interface and could be changed someday.
-config BLK_CMDLINE_PARSER
- bool "Block device command line partition parser"
- help
- Enabling this option allows you to specify the partition layout from
- the kernel boot args. This is typically of use for embedded devices
- which don't otherwise have any standardized method for listing the
- partitions on a block device.
-
- See Documentation/block/cmdline-partition.rst for more information.
-
config BLK_WBT
bool "Enable support for block device writeback throttling"
help
config BLK_PM
def_bool BLOCK && PM
+# do not use in new code
+config BLOCK_HOLDER_DEPRECATED
+ bool
+
source "block/Kconfig.iosched"
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
-obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o blk-crypto.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
+obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
__rq = bfq_find_rq_fmerge(bfqd, bio, q);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
int i, j;
for (i = 0; i < 2; i++)
- for (j = 0; j < IOPRIO_BE_NR; j++)
+ for (j = 0; j < IOPRIO_NR_LEVELS; j++)
if (bfqg->async_bfqq[i][j])
bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
if (bfqg->async_idle_bfqq)
switch (ioprio_class) {
default:
pr_err("bdi %s: bfq: bad prio class %d\n",
- bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
- ioprio_class);
+ bdi_dev_name(bfqq->bfqd->queue->disk->bdi),
+ ioprio_class);
fallthrough;
case IOPRIO_CLASS_NONE:
/*
break;
}
- if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+ if (bfqq->new_ioprio >= IOPRIO_NR_LEVELS) {
pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
bfqq->new_ioprio);
- bfqq->new_ioprio = IOPRIO_BE_NR;
+ bfqq->new_ioprio = IOPRIO_NR_LEVELS - 1;
}
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
case IOPRIO_CLASS_RT:
return &bfqg->async_bfqq[0][ioprio];
case IOPRIO_CLASS_NONE:
- ioprio = IOPRIO_NORM;
+ ioprio = IOPRIO_BE_NORM;
fallthrough;
case IOPRIO_CLASS_BE:
return &bfqg->async_bfqq[1][ioprio];
int i, j;
for (i = 0; i < 2; i++)
- for (j = 0; j < IOPRIO_BE_NR; j++)
+ for (j = 0; j < IOPRIO_NR_LEVELS; j++)
__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
void *bfqd;
- struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
struct bfq_queue *async_idle_bfqq;
struct bfq_entity *my_entity;
struct bfq_entity entity;
struct bfq_sched_data sched_data;
- struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
struct bfq_queue *async_idle_bfqq;
struct rb_root rq_pos_tree;
};
#endif
-struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
-
/* --------------- main algorithm interface ----------------- */
#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
*/
unsigned short bfq_ioprio_to_weight(int ioprio)
{
- return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
+ return (IOPRIO_NR_LEVELS - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
}
/**
*
* To preserve as much as possible the old only-ioprio user interface,
* 0 is used as an escape ioprio value for weights (numerically) equal or
- * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
+ * larger than IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF.
*/
static unsigned short bfq_weight_to_ioprio(int weight)
{
return max_t(int, 0,
- IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight);
+ IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
}
static void bfq_get_entity(struct bfq_entity *entity)
struct bio_set *bs = bio->bi_pool;
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
- kfree(page_address(bip->bip_vec->bv_page) +
- bip->bip_vec->bv_offset);
+ kfree(bvec_virt(bip->bip_vec));
__bio_integrity_free(bs, bip);
bio->bi_integrity = NULL;
struct bio_vec bv;
struct bio_integrity_payload *bip = bio_integrity(bio);
blk_status_t ret = BLK_STS_OK;
- void *prot_buf = page_address(bip->bip_vec->bv_page) +
- bip->bip_vec->bv_offset;
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
iter.seed = proc_iter->bi_sector;
- iter.prot_buf = prot_buf;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
- void *kaddr = kmap_atomic(bv.bv_page);
+ void *kaddr = bvec_kmap_local(&bv);
- iter.data_buf = kaddr + bv.bv_offset;
+ iter.data_buf = kaddr;
iter.data_size = bv.bv_len;
-
ret = proc_fn(&iter);
- if (ret) {
- kunmap_atomic(kaddr);
- return ret;
- }
+ kunmap_local(kaddr);
+
+ if (ret)
+ break;
- kunmap_atomic(kaddr);
}
return ret;
}
void zero_fill_bio(struct bio *bio)
{
- unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter) {
- char *data = bvec_kmap_irq(&bv, &flags);
- memset(data, 0, bv.bv_len);
- flush_dcache_page(bv.bv_page);
- bvec_kunmap_irq(data, &flags);
- }
+ bio_for_each_segment(bv, bio, iter)
+ memzero_bvec(&bv);
}
EXPORT_SYMBOL(zero_fill_bio);
return 0;
}
+static void bio_put_pages(struct page **pages, size_t size, size_t off)
+{
+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
+
+ for (i = 0; i < nr; i++)
+ put_page(pages[i]);
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
if (same_page)
put_page(page);
} else {
- if (WARN_ON_ONCE(bio_full(bio, len)))
- return -EINVAL;
+ if (WARN_ON_ONCE(bio_full(bio, len))) {
+ bio_put_pages(pages + i, left, offset);
+ return -EINVAL;
+ }
__bio_add_page(bio, page, len, offset);
}
offset = 0;
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len) {
+ bio_put_pages(pages + i, left, offset);
ret = -EINVAL;
break;
}
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter)
{
- struct bio_vec src_bv, dst_bv;
- void *src_p, *dst_p;
- unsigned bytes;
-
while (src_iter->bi_size && dst_iter->bi_size) {
- src_bv = bio_iter_iovec(src, *src_iter);
- dst_bv = bio_iter_iovec(dst, *dst_iter);
-
- bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- bytes);
-
- kunmap_atomic(dst_p);
- kunmap_atomic(src_p);
-
- flush_dcache_page(dst_bv.bv_page);
+ struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+ struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+ unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+ void *src_buf;
+
+ src_buf = bvec_kmap_local(&src_bv);
+ memcpy_to_bvec(&dst_bv, src_buf);
+ kunmap_local(src_buf);
bio_advance_iter_single(src, src_iter, bytes);
bio_advance_iter_single(dst, dst_iter, bytes);
const char *blkg_dev_name(struct blkcg_gq *blkg)
{
- /* some drivers (floppy) instantiate a queue w/o disk registered */
- if (blkg->q->backing_dev_info->dev)
- return bdi_dev_name(blkg->q->backing_dev_info);
- return NULL;
+ if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
+ return NULL;
+ return bdi_dev_name(blkg->q->disk->bdi);
}
/**
}
}
-static int blkcg_print_stat(struct seq_file *sf, void *v)
+static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
- struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
- struct blkcg_gq *blkg;
-
- if (!seq_css(sf)->parent)
- blkcg_fill_root_iostats();
- else
- cgroup_rstat_flush(blkcg->css.cgroup);
-
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- struct blkg_iostat_set *bis = &blkg->iostat;
- const char *dname;
- char *buf;
- u64 rbytes, wbytes, rios, wios, dbytes, dios;
- size_t size = seq_get_buf(sf, &buf), off = 0;
- int i;
- bool has_stats = false;
- unsigned seq;
+ struct blkg_iostat_set *bis = &blkg->iostat;
+ u64 rbytes, wbytes, rios, wios, dbytes, dios;
+ bool has_stats = false;
+ const char *dname;
+ unsigned seq;
+ int i;
- spin_lock_irq(&blkg->q->queue_lock);
+ if (!blkg->online)
+ return;
- if (!blkg->online)
- goto skip;
+ dname = blkg_dev_name(blkg);
+ if (!dname)
+ return;
- dname = blkg_dev_name(blkg);
- if (!dname)
- goto skip;
+ seq_printf(s, "%s ", dname);
- /*
- * Hooray string manipulation, count is the size written NOT
- * INCLUDING THE \0, so size is now count+1 less than what we
- * had before, but we want to start writing the next bit from
- * the \0 so we only add count to buf.
- */
- off += scnprintf(buf+off, size-off, "%s ", dname);
+ do {
+ seq = u64_stats_fetch_begin(&bis->sync);
+
+ rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+ wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+ dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+ rios = bis->cur.ios[BLKG_IOSTAT_READ];
+ wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+ dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+ } while (u64_stats_fetch_retry(&bis->sync, seq));
+
+ if (rbytes || wbytes || rios || wios) {
+ has_stats = true;
+ seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
+ rbytes, wbytes, rios, wios,
+ dbytes, dios);
+ }
- do {
- seq = u64_stats_fetch_begin(&bis->sync);
+ if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+ has_stats = true;
+ seq_printf(s, " use_delay=%d delay_nsec=%llu",
+ atomic_read(&blkg->use_delay),
+ atomic64_read(&blkg->delay_nsec));
+ }
- rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
- wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
- dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
- rios = bis->cur.ios[BLKG_IOSTAT_READ];
- wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
- dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
- } while (u64_stats_fetch_retry(&bis->sync, seq));
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
- if (rbytes || wbytes || rios || wios) {
- has_stats = true;
- off += scnprintf(buf+off, size-off,
- "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
- rbytes, wbytes, rios, wios,
- dbytes, dios);
- }
+ if (!blkg->pd[i] || !pol->pd_stat_fn)
+ continue;
- if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+ if (pol->pd_stat_fn(blkg->pd[i], s))
has_stats = true;
- off += scnprintf(buf+off, size-off,
- " use_delay=%d delay_nsec=%llu",
- atomic_read(&blkg->use_delay),
- (unsigned long long)atomic64_read(&blkg->delay_nsec));
- }
+ }
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
- size_t written;
+ if (has_stats)
+ seq_printf(s, "\n");
+}
- if (!blkg->pd[i] || !pol->pd_stat_fn)
- continue;
+static int blkcg_print_stat(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct blkcg_gq *blkg;
- written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
- if (written)
- has_stats = true;
- off += written;
- }
+ if (!seq_css(sf)->parent)
+ blkcg_fill_root_iostats();
+ else
+ cgroup_rstat_flush(blkcg->css.cgroup);
- if (has_stats) {
- if (off < size - 1) {
- off += scnprintf(buf+off, size-off, "\n");
- seq_commit(sf, off);
- } else {
- seq_commit(sf, -1);
- }
- }
- skip:
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ spin_lock_irq(&blkg->q->queue_lock);
+ blkcg_print_one_stat(blkg, sf);
spin_unlock_irq(&blkg->q->queue_lock);
}
-
rcu_read_unlock();
return 0;
}
*/
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
/* for synchronous bio-based driver finish in-flight integrity i/o */
blk_flush_integrity();
- /* @q won't process any more request, flush async actions */
- del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
blk_sync_queue(q);
-
if (queue_is_mq(q))
blk_mq_exit_queue(q);
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc(node_id);
- if (!q->backing_dev_info)
- goto fail_split;
-
q->stats = blk_alloc_queue_stats();
if (!q->stats)
- goto fail_stats;
+ goto fail_split;
q->node = node_id;
atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
- timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
- laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
if (percpu_ref_init(&q->q_usage_counter,
blk_queue_usage_counter_release,
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
- goto fail_bdi;
+ goto fail_stats;
if (blkcg_init_queue(q))
goto fail_ref;
fail_ref:
percpu_ref_exit(&q->q_usage_counter);
-fail_bdi:
- blk_free_queue_stats(q->stats);
fail_stats:
- bdi_put(q->backing_dev_info);
+ blk_free_queue_stats(q->stats);
fail_split:
bioset_exit(&q->bio_split);
fail_id:
if (mode->keysize == 0)
return -EINVAL;
- if (dun_bytes == 0 || dun_bytes > BLK_CRYPTO_MAX_IV_SIZE)
+ if (dun_bytes == 0 || dun_bytes > mode->ivsize)
return -EINVAL;
if (!is_power_of_2(data_unit_size))
}
EXPORT_SYMBOL(blk_integrity_unregister);
-void blk_integrity_add(struct gendisk *disk)
+int blk_integrity_add(struct gendisk *disk)
{
- if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
- &disk_to_dev(disk)->kobj, "%s", "integrity"))
- return;
+ int ret;
- kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
+ ret = kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
+ &disk_to_dev(disk)->kobj, "%s", "integrity");
+ if (!ret)
+ kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
+ return ret;
}
void blk_integrity_del(struct gendisk *disk)
kfree(iocg);
}
-static size_t ioc_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
+static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
- size_t pos = 0;
if (!ioc->enabled)
- return 0;
+ return false;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
ioc->vtime_base_rate * 10000,
VTIME_PER_USEC);
- pos += scnprintf(buf + pos, size - pos, " cost.vrate=%u.%02u",
- vp10k / 100, vp10k % 100);
+ seq_printf(s, " cost.vrate=%u.%02u", vp10k / 100, vp10k % 100);
}
- pos += scnprintf(buf + pos, size - pos, " cost.usage=%llu",
- iocg->last_stat.usage_us);
+ seq_printf(s, " cost.usage=%llu", iocg->last_stat.usage_us);
if (blkcg_debug_stats)
- pos += scnprintf(buf + pos, size - pos,
- " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
- iocg->last_stat.wait_us,
- iocg->last_stat.indebt_us,
- iocg->last_stat.indelay_us);
-
- return pos;
+ seq_printf(s, " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
+ iocg->last_stat.wait_us,
+ iocg->last_stat.indebt_us,
+ iocg->last_stat.indelay_us);
+ return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
return 0;
}
-static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
- size_t size)
+static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
preempt_enable();
if (iolat->rq_depth.max_depth == UINT_MAX)
- return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
- (unsigned long long)stat.ps.missed,
- (unsigned long long)stat.ps.total);
- return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
- (unsigned long long)stat.ps.missed,
- (unsigned long long)stat.ps.total,
- iolat->rq_depth.max_depth);
+ seq_printf(s, " missed=%llu total=%llu depth=max",
+ (unsigned long long)stat.ps.missed,
+ (unsigned long long)stat.ps.total);
+ else
+ seq_printf(s, " missed=%llu total=%llu depth=%u",
+ (unsigned long long)stat.ps.missed,
+ (unsigned long long)stat.ps.total,
+ iolat->rq_depth.max_depth);
+ return true;
}
-static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
- size_t size)
+static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return 0;
+ return false;
if (iolat->ssd)
- return iolatency_ssd_stat(iolat, buf, size);
+ return iolatency_ssd_stat(iolat, s);
avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
if (iolat->rq_depth.max_depth == UINT_MAX)
- return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
- avg_lat, cur_win);
-
- return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
- iolat->rq_depth.max_depth, avg_lat, cur_win);
+ seq_printf(s, " depth=max avg_lat=%llu win=%llu",
+ avg_lat, cur_win);
+ else
+ seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
+ iolat->rq_depth.max_depth, avg_lat, cur_win);
+ return true;
}
-
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
struct request_queue *q,
struct blkcg *blkcg)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ memcpy_from_bvec(p, bvec);
p += bvec->bv_len;
}
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
*bio = split;
+
+ blk_throtl_charge_bio_split(*bio);
}
}
}
}
-/*
- * Two cases of handling DISCARD merge:
- * If max_discard_segments > 1, the driver takes every bio
- * as a range and send them to controller together. The ranges
- * needn't to be contiguous.
- * Otherwise, the bios/requests will be handled as same as
- * others which should be contiguous.
- */
-static inline bool blk_discard_mergable(struct request *req)
-{
- if (req_op(req) == REQ_OP_DISCARD &&
- queue_max_discard_segments(req->q) > 1)
- return true;
- return false;
-}
-
static enum elv_merge blk_try_req_merge(struct request *req,
struct request *next)
{
kfree(hctx);
}
-struct blk_mq_ctx_sysfs_entry {
- struct attribute attr;
- ssize_t (*show)(struct blk_mq_ctx *, char *);
- ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t);
-};
-
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
};
-static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
- char *page)
-{
- struct blk_mq_ctx_sysfs_entry *entry;
- struct blk_mq_ctx *ctx;
- struct request_queue *q;
- ssize_t res;
-
- entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
- ctx = container_of(kobj, struct blk_mq_ctx, kobj);
- q = ctx->queue;
-
- if (!entry->show)
- return -EIO;
-
- mutex_lock(&q->sysfs_lock);
- res = entry->show(ctx, page);
- mutex_unlock(&q->sysfs_lock);
- return res;
-}
-
-static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
- const char *page, size_t length)
-{
- struct blk_mq_ctx_sysfs_entry *entry;
- struct blk_mq_ctx *ctx;
- struct request_queue *q;
- ssize_t res;
-
- entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
- ctx = container_of(kobj, struct blk_mq_ctx, kobj);
- q = ctx->queue;
-
- if (!entry->store)
- return -EIO;
-
- mutex_lock(&q->sysfs_lock);
- res = entry->store(ctx, page, length);
- mutex_unlock(&q->sysfs_lock);
- return res;
-}
-
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
};
ATTRIBUTE_GROUPS(default_hw_ctx);
-static const struct sysfs_ops blk_mq_sysfs_ops = {
- .show = blk_mq_sysfs_show,
- .store = blk_mq_sysfs_store,
-};
-
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
.show = blk_mq_hw_sysfs_show,
.store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
- .sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_ctx_ktype = {
- .sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_ctx_sysfs_release,
};
__blk_mq_dec_active_requests(hctx);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
- laptop_io_completion(q->backing_dev_info);
+ laptop_io_completion(q->disk->bdi);
rq_qos_done(q, rq);
}
EXPORT_SYMBOL(blk_mq_init_queue);
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ struct lock_class_key *lkclass)
{
struct request_queue *q;
struct gendisk *disk;
if (IS_ERR(q))
return ERR_CAST(q);
- disk = __alloc_disk_node(0, set->numa_node);
+ disk = __alloc_disk_node(q, set->numa_node, lkclass);
if (!disk) {
blk_cleanup_queue(q);
return ERR_PTR(-ENOMEM);
}
- disk->queue = q;
return disk;
}
EXPORT_SYMBOL(__blk_mq_alloc_disk);
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/pagemap.h>
+#include <linux/backing-dev-defs.h>
#include <linux/gcd.h>
#include <linux/lcm.h>
#include <linux/jiffies.h>
limits->logical_block_size >> SECTOR_SHIFT);
limits->max_sectors = max_sectors;
- q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
+ if (!q->disk)
+ return;
+ q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
}
EXPORT_SYMBOL(blk_queue_alignment_offset);
-void blk_queue_update_readahead(struct request_queue *q)
+void disk_update_readahead(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
+
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
*/
- q->backing_dev_info->ra_pages =
+ disk->bdi->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
- q->backing_dev_info->io_pages =
- queue_max_sectors(q) >> (PAGE_SHIFT - 9);
+ disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
}
-EXPORT_SYMBOL_GPL(blk_queue_update_readahead);
+EXPORT_SYMBOL_GPL(disk_update_readahead);
/**
* blk_limits_io_min - set minimum request size for a device
void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{
blk_limits_io_opt(&q->limits, opt);
- q->backing_dev_info->ra_pages =
+ if (!q->disk)
+ return;
+ q->disk->bdi->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
}
EXPORT_SYMBOL(blk_queue_io_opt);
struct request_queue *t = disk->queue;
if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
- get_start_sect(bdev) + (offset >> 9)) < 0) {
- char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
-
- disk_name(disk, 0, top);
- bdevname(bdev, bottom);
-
- printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
- top, bottom);
- }
+ get_start_sect(bdev) + (offset >> 9)) < 0)
+ pr_notice("%s: Warning: Device %pg is misaligned\n",
+ disk->disk_name, bdev);
- blk_queue_update_readahead(disk->queue);
+ disk_update_readahead(disk);
}
EXPORT_SYMBOL(disk_stack_limits);
static ssize_t queue_ra_show(struct request_queue *q, char *page)
{
- unsigned long ra_kb = q->backing_dev_info->ra_pages <<
- (PAGE_SHIFT - 10);
+ unsigned long ra_kb;
+ if (!q->disk)
+ return -EINVAL;
+ ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
return queue_var_show(ra_kb, page);
}
queue_ra_store(struct request_queue *q, const char *page, size_t count)
{
unsigned long ra_kb;
- ssize_t ret = queue_var_store(&ra_kb, page, count);
+ ssize_t ret;
+ if (!q->disk)
+ return -EINVAL;
+ ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
-
- q->backing_dev_info->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
-
+ q->disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
return ret;
}
spin_lock_irq(&q->queue_lock);
q->limits.max_sectors = max_sectors_kb << 1;
- q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
+ if (q->disk)
+ q->disk->bdi->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
spin_unlock_irq(&q->queue_lock);
return ret;
* e.g. blkcg_print_blkgs() to crash.
*/
blkcg_exit_queue(q);
-
- /*
- * Since the cgroup code may dereference the @q->backing_dev_info
- * pointer, only decrease its reference count after having removed the
- * association with the block cgroup controller.
- */
- bdi_put(q->backing_dev_info);
}
/**
struct device *dev = disk_to_dev(disk);
struct request_queue *q = disk->queue;
- if (WARN_ON(!q))
- return -ENXIO;
-
- WARN_ONCE(blk_queue_registered(q),
- "%s is registering an already registered queue\n",
- kobject_name(&dev->kobj));
-
- blk_queue_update_readahead(q);
-
ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;
return ret;
}
-EXPORT_SYMBOL_GPL(blk_register_queue);
/**
* blk_unregister_queue - counterpart of blk_register_queue()
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
+ atomic_t io_split_cnt[2];
+ atomic_t last_io_split_cnt[2];
+
struct blkg_rwstat stat_bytes;
struct blkg_rwstat stat_ios;
};
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
/*
* Previous slice has expired. We must have trimmed it after last
* bio dispatch. That means since start of last slice, we never used
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
jiffies + tg->td->throtl_slice);
}
+ if (iops_limit != UINT_MAX)
+ tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
+
if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
}
if (tg->iops[READ][LIMIT_LOW]) {
+ tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
iops = tg->last_io_disp[READ] * HZ / elapsed_time;
if (iops >= tg->iops[READ][LIMIT_LOW])
tg->last_low_overflow_time[READ] = now;
}
if (tg->iops[WRITE][LIMIT_LOW]) {
+ tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
if (iops >= tg->iops[WRITE][LIMIT_LOW])
tg->last_low_overflow_time[WRITE] = now;
}
#endif
+void blk_throtl_charge_bio_split(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct throtl_grp *parent = blkg_to_tg(blkg);
+ struct throtl_service_queue *parent_sq;
+ bool rw = bio_data_dir(bio);
+
+ do {
+ if (!parent->has_rules[rw])
+ break;
+
+ atomic_inc(&parent->io_split_cnt[rw]);
+ atomic_inc(&parent->last_io_split_cnt[rw]);
+
+ parent_sq = parent->service_queue.parent_sq;
+ parent = sq_to_tg(parent_sq);
+ } while (parent);
+}
+
bool blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
+ struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
return time_before(jiffies, wb->dirty_sleep + HZ);
}
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
u64 thislat;
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
status = latency_exceeded(rwb, cb->stat);
- trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
+ trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
inflight);
/*
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
return -EFAULT;
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (!(mode & FMODE_WRITE))
return -EBADF;
bip_next->bip_vec[0].bv_offset);
}
-void blk_integrity_add(struct gendisk *);
+int blk_integrity_add(struct gendisk *disk);
void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool blk_integrity_merge_rq(struct request_queue *rq,
static inline void bio_integrity_free(struct bio *bio)
{
}
-static inline void blk_integrity_add(struct gendisk *disk)
+static inline int blk_integrity_add(struct gendisk *disk)
{
+ return 0;
}
static inline void blk_integrity_del(struct gendisk *disk)
{
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
extern void blk_throtl_register_queue(struct request_queue *q);
+extern void blk_throtl_charge_bio_split(struct bio *bio);
bool blk_throtl_bio(struct bio *bio);
#else /* CONFIG_BLK_DEV_THROTTLING */
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
+static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
#endif /* CONFIG_BLK_DEV_THROTTLING */
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
int blk_alloc_ext_minor(void);
void blk_free_ext_minor(unsigned int minor);
-char *disk_name(struct gendisk *hd, int partno, char *buf);
#define ADDPART_FLAG_NONE 0
#define ADDPART_FLAG_RAID 1
#define ADDPART_FLAG_WHOLEDISK 2
-int bdev_add_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length);
-int bdev_del_partition(struct block_device *bdev, int partno);
-int bdev_resize_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length);
+int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length);
+int bdev_del_partition(struct gendisk *disk, int partno);
+int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length);
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
struct request_queue *blk_alloc_queue(int node_id);
-void disk_alloc_events(struct gendisk *disk);
+int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
void disk_del_events(struct gendisk *disk);
void disk_release_events(struct gendisk *disk);
__initcall(init_emergency_pool);
-/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
- unsigned char *vto;
-
- vto = kmap_atomic(to->bv_page);
- memcpy(vto + to->bv_offset, vfrom, to->bv_len);
- kunmap_atomic(vto);
-}
-
/*
* Simple bounce buffer support for highmem pages. Depending on the
* queue gfp mask set, *to may or may not be a highmem page. kmap it
*/
static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
{
- unsigned char *vfrom;
struct bio_vec tovec, fromvec;
struct bvec_iter iter;
/*
* been modified by the block layer, so use the original
* copy, bounce_copy_vec already uses tovec->bv_len
*/
- vfrom = page_address(fromvec.bv_page) +
- tovec.bv_offset;
-
- bounce_copy_vec(&tovec, vfrom);
- flush_dcache_page(tovec.bv_page);
+ memcpy_to_bvec(&tovec, page_address(fromvec.bv_page) +
+ tovec.bv_offset);
}
bio_advance_iter(from, &from_iter, tovec.bv_len);
}
* because the 'bio' is single-page bvec.
*/
for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) {
- struct page *page = to->bv_page;
+ struct page *bounce_page;
- if (!PageHighMem(page))
+ if (!PageHighMem(to->bv_page))
continue;
- to->bv_page = mempool_alloc(&page_pool, GFP_NOIO);
- inc_zone_page_state(to->bv_page, NR_BOUNCE);
+ bounce_page = mempool_alloc(&page_pool, GFP_NOIO);
+ inc_zone_page_state(bounce_page, NR_BOUNCE);
if (rw == WRITE) {
- char *vto, *vfrom;
-
- flush_dcache_page(page);
-
- vto = page_address(to->bv_page) + to->bv_offset;
- vfrom = kmap_atomic(page) + to->bv_offset;
- memcpy(vto, vfrom, to->bv_len);
- kunmap_atomic(vfrom);
+ flush_dcache_page(to->bv_page);
+ memcpy_from_bvec(page_address(bounce_page), to);
}
+ to->bv_page = bounce_page;
}
trace_block_bio_bounce(*bio_orig);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Parse command line, get partition information
- *
- * Written by Cai Zhiyong <caizhiyong@huawei.com>
- *
- */
-#include <linux/export.h>
-#include <linux/cmdline-parser.h>
-
-static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
-{
- int ret = 0;
- struct cmdline_subpart *new_subpart;
-
- *subpart = NULL;
-
- new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL);
- if (!new_subpart)
- return -ENOMEM;
-
- if (*partdef == '-') {
- new_subpart->size = (sector_t)(~0ULL);
- partdef++;
- } else {
- new_subpart->size = (sector_t)memparse(partdef, &partdef);
- if (new_subpart->size < (sector_t)PAGE_SIZE) {
- pr_warn("cmdline partition size is invalid.");
- ret = -EINVAL;
- goto fail;
- }
- }
-
- if (*partdef == '@') {
- partdef++;
- new_subpart->from = (sector_t)memparse(partdef, &partdef);
- } else {
- new_subpart->from = (sector_t)(~0ULL);
- }
-
- if (*partdef == '(') {
- int length;
- char *next = strchr(++partdef, ')');
-
- if (!next) {
- pr_warn("cmdline partition format is invalid.");
- ret = -EINVAL;
- goto fail;
- }
-
- length = min_t(int, next - partdef,
- sizeof(new_subpart->name) - 1);
- strncpy(new_subpart->name, partdef, length);
- new_subpart->name[length] = '\0';
-
- partdef = ++next;
- } else
- new_subpart->name[0] = '\0';
-
- new_subpart->flags = 0;
-
- if (!strncmp(partdef, "ro", 2)) {
- new_subpart->flags |= PF_RDONLY;
- partdef += 2;
- }
-
- if (!strncmp(partdef, "lk", 2)) {
- new_subpart->flags |= PF_POWERUP_LOCK;
- partdef += 2;
- }
-
- *subpart = new_subpart;
- return 0;
-fail:
- kfree(new_subpart);
- return ret;
-}
-
-static void free_subpart(struct cmdline_parts *parts)
-{
- struct cmdline_subpart *subpart;
-
- while (parts->subpart) {
- subpart = parts->subpart;
- parts->subpart = subpart->next_subpart;
- kfree(subpart);
- }
-}
-
-static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
-{
- int ret = -EINVAL;
- char *next;
- int length;
- struct cmdline_subpart **next_subpart;
- struct cmdline_parts *newparts;
- char buf[BDEVNAME_SIZE + 32 + 4];
-
- *parts = NULL;
-
- newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL);
- if (!newparts)
- return -ENOMEM;
-
- next = strchr(bdevdef, ':');
- if (!next) {
- pr_warn("cmdline partition has no block device.");
- goto fail;
- }
-
- length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
- strncpy(newparts->name, bdevdef, length);
- newparts->name[length] = '\0';
- newparts->nr_subparts = 0;
-
- next_subpart = &newparts->subpart;
-
- while (next && *(++next)) {
- bdevdef = next;
- next = strchr(bdevdef, ',');
-
- length = (!next) ? (sizeof(buf) - 1) :
- min_t(int, next - bdevdef, sizeof(buf) - 1);
-
- strncpy(buf, bdevdef, length);
- buf[length] = '\0';
-
- ret = parse_subpart(next_subpart, buf);
- if (ret)
- goto fail;
-
- newparts->nr_subparts++;
- next_subpart = &(*next_subpart)->next_subpart;
- }
-
- if (!newparts->subpart) {
- pr_warn("cmdline partition has no valid partition.");
- ret = -EINVAL;
- goto fail;
- }
-
- *parts = newparts;
-
- return 0;
-fail:
- free_subpart(newparts);
- kfree(newparts);
- return ret;
-}
-
-void cmdline_parts_free(struct cmdline_parts **parts)
-{
- struct cmdline_parts *next_parts;
-
- while (*parts) {
- next_parts = (*parts)->next_parts;
- free_subpart(*parts);
- kfree(*parts);
- *parts = next_parts;
- }
-}
-EXPORT_SYMBOL(cmdline_parts_free);
-
-int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
-{
- int ret;
- char *buf;
- char *pbuf;
- char *next;
- struct cmdline_parts **next_parts;
-
- *parts = NULL;
-
- next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- next_parts = parts;
-
- while (next && *pbuf) {
- next = strchr(pbuf, ';');
- if (next)
- *next = '\0';
-
- ret = parse_parts(next_parts, pbuf);
- if (ret)
- goto fail;
-
- if (next)
- pbuf = ++next;
-
- next_parts = &(*next_parts)->next_parts;
- }
-
- if (!*parts) {
- pr_warn("cmdline partition has no valid partition.");
- ret = -EINVAL;
- goto fail;
- }
-
- ret = 0;
-done:
- kfree(buf);
- return ret;
-
-fail:
- cmdline_parts_free(parts);
- goto done;
-}
-EXPORT_SYMBOL(cmdline_parts_parse);
-
-struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
- const char *bdev)
-{
- while (parts && strncmp(bdev, parts->name, sizeof(parts->name)))
- parts = parts->next_parts;
- return parts;
-}
-EXPORT_SYMBOL(cmdline_parts_find);
-
-/*
- * add_part()
- * 0 success.
- * 1 can not add so many partitions.
- */
-int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
- int slot,
- int (*add_part)(int, struct cmdline_subpart *, void *),
- void *param)
-{
- sector_t from = 0;
- struct cmdline_subpart *subpart;
-
- for (subpart = parts->subpart; subpart;
- subpart = subpart->next_subpart, slot++) {
- if (subpart->from == (sector_t)(~0ULL))
- subpart->from = from;
- else
- from = subpart->from;
-
- if (from >= disk_size)
- break;
-
- if (subpart->size > (disk_size - from))
- subpart->size = disk_size - from;
-
- from += subpart->size;
-
- if (add_part(slot, subpart, param))
- break;
- }
-
- return slot;
-}
-EXPORT_SYMBOL(cmdline_parts_set);
spin_unlock_irq(&ev->lock);
}
+/*
+ * Tell userland about new events. Only the events listed in @disk->events are
+ * reported, and only if DISK_EVENT_FLAG_UEVENT is set. Otherwise, events are
+ * processed internally but never get reported to userland.
+ */
+static void disk_event_uevent(struct gendisk *disk, unsigned int events)
+{
+ char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
+ int nr_events = 0, i;
+
+ for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
+ if (events & disk->events & (1 << i))
+ envp[nr_events++] = disk_uevents[i];
+
+ if (nr_events)
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+}
+
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr)
{
struct gendisk *disk = ev->disk;
- char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
unsigned int clearing = *clearing_ptr;
unsigned int events;
unsigned long intv;
- int nr_events = 0, i;
/* check events */
events = disk->fops->check_events(disk, clearing);
spin_unlock_irq(&ev->lock);
- /*
- * Tell userland about new events. Only the events listed in
- * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
- * is set. Otherwise, events are processed internally but never
- * get reported to userland.
- */
- for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if ((events & disk->events & (1 << i)) &&
- (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
- envp[nr_events++] = disk_uevents[i];
+ if (events & DISK_EVENT_MEDIA_CHANGE)
+ inc_diskseq(disk);
- if (nr_events)
- kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ if (disk->event_flags & DISK_EVENT_FLAG_UEVENT)
+ disk_event_uevent(disk, events);
}
/**
}
EXPORT_SYMBOL(bdev_check_media_change);
+/**
+ * disk_force_media_change - force a media change event
+ * @disk: the disk which will raise the event
+ * @events: the events to raise
+ *
+ * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
+ * attempt to free all dentries and inodes and invalidates all block
+ * device page cache entries in that case.
+ *
+ * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
+ */
+bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+{
+ disk_event_uevent(disk, events);
+
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
+ return false;
+
+ if (__invalidate_device(disk->part0, true))
+ pr_warn("VFS: busy inodes on changed media %s\n",
+ disk->disk_name);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+ return true;
+}
+EXPORT_SYMBOL_GPL(disk_force_media_change);
+
/*
* Separate this part out so that a different pointer for clearing_ptr can be
* passed in for disk_clear_events.
/*
* disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
*/
-void disk_alloc_events(struct gendisk *disk)
+int disk_alloc_events(struct gendisk *disk)
{
struct disk_events *ev;
if (!disk->fops->check_events || !disk->events)
- return;
+ return 0;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev) {
pr_warn("%s: failed to initialize events\n", disk->disk_name);
- return;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&ev->node);
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
disk->ev = ev;
+ return 0;
}
void disk_add_events(struct gendisk *disk)
__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_BACK_MERGE;
}
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
+ if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
+ return NULL;
+
if (q->nr_hw_queues != 1 &&
!blk_mq_is_sbitmap_shared(q->tag_set->flags))
return NULL;
elevator_put(e);
}
}
-EXPORT_SYMBOL_GPL(elevator_init_mq); /* only for dm-rq */
/*
* switch to new_e io scheduler. be careful not to introduce deadlocks -
static struct kobject *block_depr;
+/*
+ * Unique, monotonically increasing sequential number associated with block
+ * devices instances (i.e. incremented each time a device is attached).
+ * Associating uevents with block devices in userspace is difficult and racy:
+ * the uevent netlink socket is lossy, and on slow and overloaded systems has
+ * a very high latency.
+ * Block devices do not have exclusive owners in userspace, any process can set
+ * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
+ * can be reused again and again).
+ * A userspace process setting up a block device and watching for its events
+ * cannot thus reliably tell whether an event relates to the device it just set
+ * up or another earlier instance with the same name.
+ * This sequential number allows userspace processes to solve this problem, and
+ * uniquely associate an uevent to the lifetime to a device.
+ */
+static atomic64_t diskseq;
+
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
static DEFINE_IDA(ext_devt_ida);
* initial capacity during probing.
*/
if (size == capacity ||
- (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
+ !disk_live(disk) ||
+ (disk->flags & GENHD_FL_HIDDEN))
return false;
pr_info("%s: detected capacity change from %lld to %lld\n",
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
/*
- * Format the device name of the indicated disk into the supplied buffer and
- * return a pointer to that same buffer for convenience.
+ * Format the device name of the indicated block device into the supplied buffer
+ * and return a pointer to that same buffer for convenience.
+ *
+ * Note: do not use this in new code, use the %pg specifier to sprintf and
+ * printk insted.
*/
-char *disk_name(struct gendisk *hd, int partno, char *buf)
+const char *bdevname(struct block_device *bdev, char *buf)
{
+ struct gendisk *hd = bdev->bd_disk;
+ int partno = bdev->bd_partno;
+
if (!partno)
snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
return buf;
}
-
-const char *bdevname(struct block_device *bdev, char *buf)
-{
- return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
-}
EXPORT_SYMBOL(bdevname);
static void part_stat_read_all(struct block_device *part,
EXPORT_SYMBOL(unregister_blkdev);
-/**
- * blk_mangle_minor - scatter minor numbers apart
- * @minor: minor number to mangle
- *
- * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
- * is enabled. Mangling twice gives the original value.
- *
- * RETURNS:
- * Mangled value.
- *
- * CONTEXT:
- * Don't care.
- */
-static int blk_mangle_minor(int minor)
-{
-#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
- int i;
-
- for (i = 0; i < MINORBITS / 2; i++) {
- int low = minor & (1 << i);
- int high = minor & (1 << (MINORBITS - 1 - i));
- int distance = MINORBITS - 1 - 2 * i;
-
- minor ^= low | high; /* clear both bits */
- low <<= distance; /* swap the positions */
- high >>= distance;
- minor |= low | high; /* and set */
- }
-#endif
- return minor;
-}
-
int blk_alloc_ext_minor(void)
{
int idx;
idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
- if (idx < 0) {
- if (idx == -ENOSPC)
- return -EBUSY;
- return idx;
- }
- return blk_mangle_minor(idx);
+ if (idx == -ENOSPC)
+ return -EBUSY;
+ return idx;
}
void blk_free_ext_minor(unsigned int minor)
{
- ida_free(&ext_devt_ida, blk_mangle_minor(minor));
+ ida_free(&ext_devt_ida, minor);
}
static char *bdevt_str(dev_t devt, char *buf)
blkdev_put(bdev, FMODE_READ);
}
-static void register_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups)
-{
- struct device *ddev = disk_to_dev(disk);
- int err;
-
- ddev->parent = parent;
-
- dev_set_name(ddev, "%s", disk->disk_name);
-
- /* delay uevents, until we scanned partition table */
- dev_set_uevent_suppress(ddev, 1);
-
- if (groups) {
- WARN_ON(ddev->groups);
- ddev->groups = groups;
- }
- if (device_add(ddev))
- return;
- if (!sysfs_deprecated) {
- err = sysfs_create_link(block_depr, &ddev->kobj,
- kobject_name(&ddev->kobj));
- if (err) {
- device_del(ddev);
- return;
- }
- }
-
- /*
- * avoid probable deadlock caused by allocating memory with
- * GFP_KERNEL in runtime_resume callback of its all ancestor
- * devices
- */
- pm_runtime_set_memalloc_noio(ddev, true);
-
- disk->part0->bd_holder_dir =
- kobject_create_and_add("holders", &ddev->kobj);
- disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
-
- if (disk->flags & GENHD_FL_HIDDEN)
- return;
-
- disk_scan_partitions(disk);
-
- /* announce the disk and partitions after all partitions are created */
- dev_set_uevent_suppress(ddev, 0);
- disk_uevent(disk, KOBJ_ADD);
-
- if (disk->queue->backing_dev_info->dev) {
- err = sysfs_create_link(&ddev->kobj,
- &disk->queue->backing_dev_info->dev->kobj,
- "bdi");
- WARN_ON(err);
- }
-}
-
/**
- * __device_add_disk - add disk information to kernel list
+ * device_add_disk - add disk information to kernel list
* @parent: parent device for the disk
* @disk: per-device partitioning information
* @groups: Additional per-device sysfs groups
- * @register_queue: register the queue if set to true
*
* This function registers the partitioning information in @disk
* with the kernel.
- *
- * FIXME: error handling
*/
-static void __device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups,
- bool register_queue)
+int device_add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups)
+
{
+ struct device *ddev = disk_to_dev(disk);
int ret;
/*
* elevator if one is needed, that is, for devices requesting queue
* registration.
*/
- if (register_queue)
- elevator_init_mq(disk->queue);
+ elevator_init_mq(disk->queue);
/*
* If the driver provides an explicit major number it also must provide
* and all partitions from the extended dev_t space.
*/
if (disk->major) {
- WARN_ON(!disk->minors);
+ if (WARN_ON(!disk->minors))
+ return -EINVAL;
if (disk->minors > DISK_MAX_PARTS) {
pr_err("block: can't allocate more than %d partitions\n",
disk->minors = DISK_MAX_PARTS;
}
} else {
- WARN_ON(disk->minors);
+ if (WARN_ON(disk->minors))
+ return -EINVAL;
ret = blk_alloc_ext_minor();
- if (ret < 0) {
- WARN_ON(1);
- return;
- }
+ if (ret < 0)
+ return ret;
disk->major = BLOCK_EXT_MAJOR;
- disk->first_minor = MINOR(ret);
+ disk->first_minor = ret;
disk->flags |= GENHD_FL_EXT_DEVT;
}
- disk->flags |= GENHD_FL_UP;
+ ret = disk_alloc_events(disk);
+ if (ret)
+ goto out_free_ext_minor;
- disk_alloc_events(disk);
+ /* delay uevents, until we scanned partition table */
+ dev_set_uevent_suppress(ddev, 1);
+
+ ddev->parent = parent;
+ ddev->groups = groups;
+ dev_set_name(ddev, "%s", disk->disk_name);
+ if (!(disk->flags & GENHD_FL_HIDDEN))
+ ddev->devt = MKDEV(disk->major, disk->first_minor);
+ ret = device_add(ddev);
+ if (ret)
+ goto out_disk_release_events;
+ if (!sysfs_deprecated) {
+ ret = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+ if (ret)
+ goto out_device_del;
+ }
+
+ /*
+ * avoid probable deadlock caused by allocating memory with
+ * GFP_KERNEL in runtime_resume callback of its all ancestor
+ * devices
+ */
+ pm_runtime_set_memalloc_noio(ddev, true);
+
+ ret = blk_integrity_add(disk);
+ if (ret)
+ goto out_del_block_link;
+
+ disk->part0->bd_holder_dir =
+ kobject_create_and_add("holders", &ddev->kobj);
+ if (!disk->part0->bd_holder_dir)
+ goto out_del_integrity;
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
+ if (!disk->slave_dir)
+ goto out_put_holder_dir;
+
+ ret = bd_register_pending_holders(disk);
+ if (ret < 0)
+ goto out_put_slave_dir;
+
+ ret = blk_register_queue(disk);
+ if (ret)
+ goto out_put_slave_dir;
if (disk->flags & GENHD_FL_HIDDEN) {
/*
disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->flags |= GENHD_FL_NO_PART_SCAN;
} else {
- struct backing_dev_info *bdi = disk->queue->backing_dev_info;
- struct device *dev = disk_to_dev(disk);
-
- /* Register BDI before referencing it from bdev */
- dev->devt = MKDEV(disk->major, disk->first_minor);
- ret = bdi_register(bdi, "%u:%u",
+ ret = bdi_register(disk->bdi, "%u:%u",
disk->major, disk->first_minor);
- WARN_ON(ret);
- bdi_set_owner(bdi, dev);
- bdev_add(disk->part0, dev->devt);
- }
- register_disk(parent, disk, groups);
- if (register_queue)
- blk_register_queue(disk);
+ if (ret)
+ goto out_unregister_queue;
+ bdi_set_owner(disk->bdi, ddev);
+ ret = sysfs_create_link(&ddev->kobj,
+ &disk->bdi->dev->kobj, "bdi");
+ if (ret)
+ goto out_unregister_bdi;
- /*
- * Take an extra ref on queue which will be put on disk_release()
- * so that it sticks around as long as @disk is there.
- */
- if (blk_get_queue(disk->queue))
- set_bit(GD_QUEUE_REF, &disk->state);
- else
- WARN_ON_ONCE(1);
+ bdev_add(disk->part0, ddev->devt);
+ disk_scan_partitions(disk);
- disk_add_events(disk);
- blk_integrity_add(disk);
-}
+ /*
+ * Announce the disk and partitions after all partitions are
+ * created. (for hidden disks uevents remain suppressed forever)
+ */
+ dev_set_uevent_suppress(ddev, 0);
+ disk_uevent(disk, KOBJ_ADD);
+ }
-void device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups)
+ disk_update_readahead(disk);
+ disk_add_events(disk);
+ return 0;
-{
- __device_add_disk(parent, disk, groups, true);
+out_unregister_bdi:
+ if (!(disk->flags & GENHD_FL_HIDDEN))
+ bdi_unregister(disk->bdi);
+out_unregister_queue:
+ blk_unregister_queue(disk);
+out_put_slave_dir:
+ kobject_put(disk->slave_dir);
+out_put_holder_dir:
+ kobject_put(disk->part0->bd_holder_dir);
+out_del_integrity:
+ blk_integrity_del(disk);
+out_del_block_link:
+ if (!sysfs_deprecated)
+ sysfs_remove_link(block_depr, dev_name(ddev));
+out_device_del:
+ device_del(ddev);
+out_disk_release_events:
+ disk_release_events(disk);
+out_free_ext_minor:
+ if (disk->major == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(disk->first_minor);
+ return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
}
EXPORT_SYMBOL(device_add_disk);
-void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
-{
- __device_add_disk(parent, disk, NULL, false);
-}
-EXPORT_SYMBOL(device_add_disk_no_queue_reg);
-
/**
* del_gendisk - remove the gendisk
* @disk: the struct gendisk to remove
{
might_sleep();
- if (WARN_ON_ONCE(!disk->queue))
+ if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
return;
blk_integrity_del(disk);
disk_del_events(disk);
mutex_lock(&disk->open_mutex);
- disk->flags &= ~GENHD_FL_UP;
+ remove_inode_hash(disk->part0->bd_inode);
blk_drop_partitions(disk);
mutex_unlock(&disk->open_mutex);
fsync_bdev(disk->part0);
__invalidate_device(disk->part0, true);
- /*
- * Unhash the bdev inode for this device so that it can't be looked
- * up any more even if openers still hold references to it.
- */
- remove_inode_hash(disk->part0->bd_inode);
-
set_capacity(disk, 0);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
* Unregister bdi before releasing device numbers (as they can
* get reused and we'd get clashes in sysfs).
*/
- bdi_unregister(disk->queue->backing_dev_info);
+ bdi_unregister(disk->bdi);
}
blk_unregister_queue(disk);
while ((dev = class_dev_iter_next(&iter))) {
struct gendisk *disk = dev_to_disk(dev);
struct block_device *part;
- char name_buf[BDEVNAME_SIZE];
char devt_buf[BDEVT_SIZE];
unsigned long idx;
xa_for_each(&disk->part_tbl, idx, part) {
if (!bdev_nr_sectors(part))
continue;
- printk("%s%s %10llu %s %s",
+ printk("%s%s %10llu %pg %s",
bdev_is_partition(part) ? " " : "",
bdevt_str(part->bd_dev, devt_buf),
- bdev_nr_sectors(part) >> 1,
- disk_name(disk, part->bd_partno, name_buf),
+ bdev_nr_sectors(part) >> 1, part,
part->bd_meta_info ?
part->bd_meta_info->uuid : "");
if (bdev_is_partition(part))
struct gendisk *sgp = v;
struct block_device *part;
unsigned long idx;
- char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
xa_for_each(&sgp->part_tbl, idx, part) {
if (!bdev_nr_sectors(part))
continue;
- seq_printf(seqf, "%4d %7d %10llu %s\n",
+ seq_printf(seqf, "%4d %7d %10llu %pg\n",
MAJOR(part->bd_dev), MINOR(part->bd_dev),
- bdev_nr_sectors(part) >> 1,
- disk_name(sgp, part->bd_partno, buf));
+ bdev_nr_sectors(part) >> 1, part);
}
rcu_read_unlock();
return 0;
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
+static ssize_t diskseq_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%llu\n", disk->diskseq);
+}
+
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
&dev_attr_events.attr,
&dev_attr_events_async.attr,
&dev_attr_events_poll_msecs.attr,
+ &dev_attr_diskseq.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
might_sleep();
- if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
- blk_free_ext_minor(MINOR(dev->devt));
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
- if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
- blk_put_queue(disk->queue);
- bdput(disk->part0); /* frees the disk */
+ disk->queue->disk = NULL;
+ blk_put_queue(disk->queue);
+ iput(disk->part0->bd_inode); /* frees the disk */
+}
+
+static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
}
+
struct class block_class = {
.name = "block",
+ .dev_uevent = block_uevent,
};
static char *block_devnode(struct device *dev, umode_t *mode,
{
struct gendisk *gp = v;
struct block_device *hd;
- char buf[BDEVNAME_SIZE];
unsigned int inflight;
struct disk_stats stat;
unsigned long idx;
else
inflight = part_in_flight(hd);
- seq_printf(seqf, "%4d %7d %s "
+ seq_printf(seqf, "%4d %7d %pg "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
"%u %u %u "
"%lu %lu %lu %u "
"%lu %u"
"\n",
- MAJOR(hd->bd_dev), MINOR(hd->bd_dev),
- disk_name(gp, hd->bd_partno, buf),
+ MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
stat.ios[STAT_READ],
stat.merges[STAT_READ],
stat.sectors[STAT_READ],
return devt;
}
-struct gendisk *__alloc_disk_node(int minors, int node_id)
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ struct lock_class_key *lkclass)
{
struct gendisk *disk;
+ if (!blk_get_queue(q))
+ return NULL;
+
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (!disk)
- return NULL;
+ goto out_put_queue;
+
+ disk->bdi = bdi_alloc(node_id);
+ if (!disk->bdi)
+ goto out_free_disk;
disk->part0 = bdev_alloc(disk, 0);
if (!disk->part0)
- goto out_free_disk;
+ goto out_free_bdi;
disk->node_id = node_id;
mutex_init(&disk->open_mutex);
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
- disk->minors = minors;
rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk));
+ inc_diskseq(disk);
+ disk->queue = q;
+ q->disk = disk;
+ lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+ INIT_LIST_HEAD(&disk->slave_bdevs);
+#endif
return disk;
out_destroy_part_tbl:
xa_destroy(&disk->part_tbl);
- bdput(disk->part0);
+ iput(disk->part0->bd_inode);
+out_free_bdi:
+ bdi_put(disk->bdi);
out_free_disk:
kfree(disk);
+out_put_queue:
+ blk_put_queue(q);
return NULL;
}
EXPORT_SYMBOL(__alloc_disk_node);
-struct gendisk *__blk_alloc_disk(int node)
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
{
struct request_queue *q;
struct gendisk *disk;
if (!q)
return NULL;
- disk = __alloc_disk_node(0, node);
+ disk = __alloc_disk_node(q, node, lkclass);
if (!disk) {
blk_cleanup_queue(q);
return NULL;
}
- disk->queue = q;
return disk;
}
EXPORT_SYMBOL(__blk_alloc_disk);
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
}
EXPORT_SYMBOL(bdev_read_only);
+
+void inc_diskseq(struct gendisk *disk)
+{
+ disk->diskseq = atomic64_inc_return(&diskseq);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/genhd.h>
+
+struct bd_holder_disk {
+ struct list_head list;
+ struct block_device *bdev;
+ int refcnt;
+};
+
+static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+
+ list_for_each_entry(holder, &disk->slave_bdevs, list)
+ if (holder->bdev == bdev)
+ return holder;
+ return NULL;
+}
+
+static int add_symlink(struct kobject *from, struct kobject *to)
+{
+ return sysfs_create_link(from, to, kobject_name(to));
+}
+
+static void del_symlink(struct kobject *from, struct kobject *to)
+{
+ sysfs_remove_link(from, kobject_name(to));
+}
+
+static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ int ret;
+
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
+ if (ret)
+ return ret;
+ ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret)
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ return ret;
+}
+
+/**
+ * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+ * @bdev: the claimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * This functions creates the following sysfs symlinks.
+ *
+ * - from "slaves" directory of the holder @disk to the claimed @bdev
+ * - from "holders" directory of the @bdev to the holder @disk
+ *
+ * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+ * passed to bd_link_disk_holder(), then:
+ *
+ * /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
+ *
+ * The caller must have claimed @bdev before calling this function and
+ * ensure that both @bdev and @disk are valid during the creation and
+ * lifetime of these symlinks.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+ int ret = 0;
+
+ mutex_lock(&disk->open_mutex);
+
+ WARN_ON_ONCE(!bdev->bd_holder);
+
+ /* FIXME: remove the following once add_disk() handles errors */
+ if (WARN_ON(!bdev->bd_holder_dir))
+ goto out_unlock;
+
+ holder = bd_find_holder_disk(bdev, disk);
+ if (holder) {
+ holder->refcnt++;
+ goto out_unlock;
+ }
+
+ holder = kzalloc(sizeof(*holder), GFP_KERNEL);
+ if (!holder) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ INIT_LIST_HEAD(&holder->list);
+ holder->bdev = bdev;
+ holder->refcnt = 1;
+ if (disk->slave_dir) {
+ ret = __link_disk_holder(bdev, disk);
+ if (ret) {
+ kfree(holder);
+ goto out_unlock;
+ }
+ }
+
+ list_add(&holder->list, &disk->slave_bdevs);
+ /*
+ * del_gendisk drops the initial reference to bd_holder_dir, so we need
+ * to keep our own here to allow for cleanup past that point.
+ */
+ kobject_get(bdev->bd_holder_dir);
+
+out_unlock:
+ mutex_unlock(&disk->open_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bd_link_disk_holder);
+
+static void __unlink_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+}
+
+/**
+ * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
+ * @bdev: the calimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+
+ mutex_lock(&disk->open_mutex);
+ holder = bd_find_holder_disk(bdev, disk);
+ if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
+ if (disk->slave_dir)
+ __unlink_disk_holder(bdev, disk);
+ kobject_put(bdev->bd_holder_dir);
+ list_del_init(&holder->list);
+ kfree(holder);
+ }
+ mutex_unlock(&disk->open_mutex);
+}
+EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
+
+int bd_register_pending_holders(struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+ int ret;
+
+ mutex_lock(&disk->open_mutex);
+ list_for_each_entry(holder, &disk->slave_bdevs, list) {
+ ret = __link_disk_holder(holder->bdev, disk);
+ if (ret)
+ goto out_undo;
+ }
+ mutex_unlock(&disk->open_mutex);
+ return 0;
+
+out_undo:
+ list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
+ __unlink_disk_holder(holder->bdev, disk);
+ mutex_unlock(&disk->open_mutex);
+ return ret;
+}
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
{
+ struct gendisk *disk = bdev->bd_disk;
struct blkpg_partition p;
long long start, length;
return -EINVAL;
if (op == BLKPG_DEL_PARTITION)
- return bdev_del_partition(bdev, p.pno);
+ return bdev_del_partition(disk, p.pno);
start = p.start >> SECTOR_SHIFT;
length = p.length >> SECTOR_SHIFT;
/* check if partition is aligned to blocksize */
if (p.start & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- return bdev_add_partition(bdev, p.pno, start, length);
+ return bdev_add_partition(disk, p.pno, start, length);
case BLKPG_RESIZE_PARTITION:
- return bdev_resize_partition(bdev, p.pno, start, length);
+ return bdev_resize_partition(disk, p.pno, start, length);
default:
return -EINVAL;
}
BLKDEV_DISCARD_SECURE);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
+ case BLKGETDISKSEQ:
+ return put_u64(argp, bdev->bd_disk->diskseq);
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
return -EACCES;
- bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
+ bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKRRPART:
return blkdev_reread_part(bdev, mode);
case BLKFRAGET:
if (!argp)
return -EINVAL;
- return put_long(argp, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
+ return put_long(argp,
+ (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
if (!argp)
return -EINVAL;
return compat_put_long(argp,
- (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
+ (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
fallthrough;
/* rt has prio field too */
case IOPRIO_CLASS_BE:
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
-
break;
case IOPRIO_CLASS_IDLE:
break;
ret = security_task_getioprio(p);
if (ret)
goto out;
- ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
+ ret = IOPRIO_DEFAULT;
task_lock(p);
if (p->io_context)
ret = p->io_context->ioprio;
int ioprio_best(unsigned short aprio, unsigned short bprio)
{
if (!ioprio_valid(aprio))
- aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ aprio = IOPRIO_DEFAULT;
if (!ioprio_valid(bprio))
- bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ bprio = IOPRIO_DEFAULT;
return min(aprio, bprio);
}
if (elv_bio_merge_ok(__rq, bio)) {
*rq = __rq;
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
}
config CMDLINE_PARTITION
bool "Command line partition support" if PARTITION_ADVANCED
- select BLK_CMDLINE_PARSER
help
Say Y here if you want to read the partition table from bootargs.
The format for the command line is just like mtdparts.
/*
* Work out start of non-adfs partition.
*/
- nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
+ nr_sects = get_capacity(state->disk) - start_sect;
if (start_sect) {
switch (id) {
if (i != 0) {
sector_t size;
- size = get_capacity(state->bdev->bd_disk);
+ size = get_capacity(state->disk);
put_partition(state, slot++, start, size - start);
strlcat(state->pp_buf, "\n", PAGE_SIZE);
}
#define LVM_MAXLVS 256
-/**
- * last_lba(): return number of last logical block of device
- * @bdev: block device
- *
- * Description: Returns last LBA value on success, 0 on error.
- * This is stored (by sd and ide-geometry) in
- * the part[0] entry for this disk, and is the number of
- * physical sectors available on the disk.
- */
-static u64 last_lba(struct block_device *bdev)
-{
- if (!bdev || !bdev->bd_inode)
- return 0;
- return (bdev->bd_inode->i_size >> 9) - 1ULL;
-}
-
/**
* read_lba(): Read bytes from disk, starting at given LBA
* @state
* @buffer
* @count
*
- * Description: Reads @count bytes from @state->bdev into @buffer.
+ * Description: Reads @count bytes from @state->disk into @buffer.
* Returns number of bytes read on success, 0 on error.
*/
static size_t read_lba(struct parsed_partitions *state, u64 lba, u8 *buffer,
{
size_t totalreadcount = 0;
- if (!buffer || lba + count / 512 > last_lba(state->bdev))
+ if (!buffer || lba + count / 512 > get_capacity(state->disk) - 1ULL)
return 0;
while (count) {
int start_sect, nr_sects, blk, part, res = 0;
int blksize = 1; /* Multiplier for disk block size */
int slot = 1;
- char b[BDEVNAME_SIZE];
for (blk = 0; ; blk++, put_dev_sector(sect)) {
if (blk == RDB_ALLOCATION_LIMIT)
data = read_part_sector(state, blk, §);
if (!data) {
pr_err("Dev %s: unable to read RDB block %d\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
res = -1;
goto rdb_done;
}
}
pr_err("Dev %s: RDB in block %d has bad checksum\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
}
/* blksize is blocks per 512 byte standard block */
data = read_part_sector(state, blk, §);
if (!data) {
pr_err("Dev %s: unable to read partition block %d\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
res = -1;
goto rdb_done;
}
* ATARI partition scheme supports 512 lba only. If this is not
* the case, bail early to avoid miscalculating hd_size.
*/
- if (bdev_logical_block_size(state->bdev) != 512)
+ if (queue_logical_block_size(state->disk->queue) != 512)
return 0;
rs = read_part_sector(state, 0, §);
return -1;
/* Verify this is an Atari rootsector: */
- hd_size = state->bdev->bd_inode->i_size >> 9;
+ hd_size = get_capacity(state->disk);
if (!VALID_PARTITION(&rs->part[0], hd_size) &&
!VALID_PARTITION(&rs->part[1], hd_size) &&
!VALID_PARTITION(&rs->part[2], hd_size) &&
* description.
*/
struct parsed_partitions {
- struct block_device *bdev;
+ struct gendisk *disk;
char name[BDEVNAME_SIZE];
struct {
sector_t from;
* For further information, see "Documentation/block/cmdline-partition.rst"
*
*/
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "check.h"
-#include <linux/cmdline-parser.h>
-#include "check.h"
+/* partition flags */
+#define PF_RDONLY 0x01 /* Device is read only */
+#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */
+
+struct cmdline_subpart {
+ char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
+ sector_t from;
+ sector_t size;
+ int flags;
+ struct cmdline_subpart *next_subpart;
+};
+
+struct cmdline_parts {
+ char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
+ unsigned int nr_subparts;
+ struct cmdline_subpart *subpart;
+ struct cmdline_parts *next_parts;
+};
+
+static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
+{
+ int ret = 0;
+ struct cmdline_subpart *new_subpart;
+
+ *subpart = NULL;
+
+ new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL);
+ if (!new_subpart)
+ return -ENOMEM;
+
+ if (*partdef == '-') {
+ new_subpart->size = (sector_t)(~0ULL);
+ partdef++;
+ } else {
+ new_subpart->size = (sector_t)memparse(partdef, &partdef);
+ if (new_subpart->size < (sector_t)PAGE_SIZE) {
+ pr_warn("cmdline partition size is invalid.");
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
+ if (*partdef == '@') {
+ partdef++;
+ new_subpart->from = (sector_t)memparse(partdef, &partdef);
+ } else {
+ new_subpart->from = (sector_t)(~0ULL);
+ }
+
+ if (*partdef == '(') {
+ int length;
+ char *next = strchr(++partdef, ')');
+
+ if (!next) {
+ pr_warn("cmdline partition format is invalid.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ length = min_t(int, next - partdef,
+ sizeof(new_subpart->name) - 1);
+ strncpy(new_subpart->name, partdef, length);
+ new_subpart->name[length] = '\0';
+
+ partdef = ++next;
+ } else
+ new_subpart->name[0] = '\0';
+
+ new_subpart->flags = 0;
+
+ if (!strncmp(partdef, "ro", 2)) {
+ new_subpart->flags |= PF_RDONLY;
+ partdef += 2;
+ }
+
+ if (!strncmp(partdef, "lk", 2)) {
+ new_subpart->flags |= PF_POWERUP_LOCK;
+ partdef += 2;
+ }
+
+ *subpart = new_subpart;
+ return 0;
+fail:
+ kfree(new_subpart);
+ return ret;
+}
+
+static void free_subpart(struct cmdline_parts *parts)
+{
+ struct cmdline_subpart *subpart;
+
+ while (parts->subpart) {
+ subpart = parts->subpart;
+ parts->subpart = subpart->next_subpart;
+ kfree(subpart);
+ }
+}
+
+static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
+{
+ int ret = -EINVAL;
+ char *next;
+ int length;
+ struct cmdline_subpart **next_subpart;
+ struct cmdline_parts *newparts;
+ char buf[BDEVNAME_SIZE + 32 + 4];
+
+ *parts = NULL;
+
+ newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL);
+ if (!newparts)
+ return -ENOMEM;
+
+ next = strchr(bdevdef, ':');
+ if (!next) {
+ pr_warn("cmdline partition has no block device.");
+ goto fail;
+ }
+
+ length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
+ strncpy(newparts->name, bdevdef, length);
+ newparts->name[length] = '\0';
+ newparts->nr_subparts = 0;
+
+ next_subpart = &newparts->subpart;
+
+ while (next && *(++next)) {
+ bdevdef = next;
+ next = strchr(bdevdef, ',');
+
+ length = (!next) ? (sizeof(buf) - 1) :
+ min_t(int, next - bdevdef, sizeof(buf) - 1);
+
+ strncpy(buf, bdevdef, length);
+ buf[length] = '\0';
+
+ ret = parse_subpart(next_subpart, buf);
+ if (ret)
+ goto fail;
+
+ newparts->nr_subparts++;
+ next_subpart = &(*next_subpart)->next_subpart;
+ }
+
+ if (!newparts->subpart) {
+ pr_warn("cmdline partition has no valid partition.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ *parts = newparts;
+
+ return 0;
+fail:
+ free_subpart(newparts);
+ kfree(newparts);
+ return ret;
+}
+
+static void cmdline_parts_free(struct cmdline_parts **parts)
+{
+ struct cmdline_parts *next_parts;
+
+ while (*parts) {
+ next_parts = (*parts)->next_parts;
+ free_subpart(*parts);
+ kfree(*parts);
+ *parts = next_parts;
+ }
+}
+
+static int cmdline_parts_parse(struct cmdline_parts **parts,
+ const char *cmdline)
+{
+ int ret;
+ char *buf;
+ char *pbuf;
+ char *next;
+ struct cmdline_parts **next_parts;
+
+ *parts = NULL;
+
+ next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ next_parts = parts;
+
+ while (next && *pbuf) {
+ next = strchr(pbuf, ';');
+ if (next)
+ *next = '\0';
+
+ ret = parse_parts(next_parts, pbuf);
+ if (ret)
+ goto fail;
+
+ if (next)
+ pbuf = ++next;
+
+ next_parts = &(*next_parts)->next_parts;
+ }
+
+ if (!*parts) {
+ pr_warn("cmdline partition has no valid partition.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = 0;
+done:
+ kfree(buf);
+ return ret;
+
+fail:
+ cmdline_parts_free(parts);
+ goto done;
+}
+
+static struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
+ const char *bdev)
+{
+ while (parts && strncmp(bdev, parts->name, sizeof(parts->name)))
+ parts = parts->next_parts;
+ return parts;
+}
static char *cmdline;
static struct cmdline_parts *bdev_parts;
-static int add_part(int slot, struct cmdline_subpart *subpart, void *param)
+static int add_part(int slot, struct cmdline_subpart *subpart,
+ struct parsed_partitions *state)
{
int label_min;
struct partition_meta_info *info;
char tmp[sizeof(info->volname) + 4];
- struct parsed_partitions *state = (struct parsed_partitions *)param;
if (slot >= state->limit)
return 1;
return 0;
}
+static int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+ struct parsed_partitions *state)
+{
+ sector_t from = 0;
+ struct cmdline_subpart *subpart;
+ int slot = 1;
+
+ for (subpart = parts->subpart; subpart;
+ subpart = subpart->next_subpart, slot++) {
+ if (subpart->from == (sector_t)(~0ULL))
+ subpart->from = from;
+ else
+ from = subpart->from;
+
+ if (from >= disk_size)
+ break;
+
+ if (subpart->size > (disk_size - from))
+ subpart->size = disk_size - from;
+
+ from += subpart->size;
+
+ if (add_part(slot, subpart, state))
+ break;
+ }
+
+ return slot;
+}
+
static int __init cmdline_parts_setup(char *s)
{
cmdline = s;
int cmdline_partition(struct parsed_partitions *state)
{
sector_t disk_size;
- char bdev[BDEVNAME_SIZE];
struct cmdline_parts *parts;
if (cmdline) {
if (!bdev_parts)
return 0;
- bdevname(state->bdev, bdev);
- parts = cmdline_parts_find(bdev_parts, bdev);
+ parts = cmdline_parts_find(bdev_parts, state->disk->disk_name);
if (!parts)
return 0;
- disk_size = get_capacity(state->bdev->bd_disk) << 9;
+ disk_size = get_capacity(state->disk) << 9;
- cmdline_parts_set(parts, disk_size, 1, add_part, (void *)state);
+ cmdline_parts_set(parts, disk_size, state);
cmdline_parts_verifier(1, state);
strlcat(state->pp_buf, "\n", PAGE_SIZE);
}
state->pp_buf[0] = '\0';
- state->bdev = hd->part0;
- disk_name(hd, 0, state->name);
+ state->disk = hd;
+ snprintf(state->name, BDEVNAME_SIZE, "%s", hd->disk_name);
snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
if (isdigit(state->name[strlen(state->name)-1]))
sprintf(state->name, "p");
static void part_release(struct device *dev)
{
- if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
- blk_free_ext_minor(MINOR(dev->devt));
- bdput(dev_to_bdev(dev));
+ put_disk(dev_to_bdev(dev)->bd_disk);
+ iput(dev_to_bdev(dev)->bd_inode);
}
static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
.uevent = part_uevent,
};
-/*
- * Must be called either with open_mutex held, before a disk can be opened or
- * after all disk users are gone.
- */
static void delete_partition(struct block_device *part)
{
+ lockdep_assert_held(&part->bd_disk->open_mutex);
+
fsync_bdev(part);
__invalidate_device(part, true);
if (xa_load(&disk->part_tbl, partno))
return ERR_PTR(-EBUSY);
+ /* ensure we always have a reference to the whole disk */
+ get_device(disk_to_dev(disk));
+
+ err = -ENOMEM;
bdev = bdev_alloc(disk, partno);
if (!bdev)
- return ERR_PTR(-ENOMEM);
+ goto out_put_disk;
bdev->bd_start_sect = start;
bdev_set_nr_sectors(bdev, len);
- if (info) {
- err = -ENOMEM;
- bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL);
- if (!bdev->bd_meta_info)
- goto out_bdput;
- }
-
pdev = &bdev->bd_device;
dname = dev_name(ddev);
if (isdigit(dname[strlen(dname) - 1]))
}
pdev->devt = devt;
+ if (info) {
+ err = -ENOMEM;
+ bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL);
+ if (!bdev->bd_meta_info)
+ goto out_put;
+ }
+
/* delay uevent until 'holders' subdir is created */
dev_set_uevent_suppress(pdev, 1);
err = device_add(pdev);
kobject_uevent(&pdev->kobj, KOBJ_ADD);
return bdev;
-out_bdput:
- bdput(bdev);
- return ERR_PTR(err);
out_del:
kobject_put(bdev->bd_holder_dir);
device_del(pdev);
out_put:
put_device(pdev);
+out_put_disk:
+ put_disk(disk);
return ERR_PTR(err);
}
return overlap;
}
-int bdev_add_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length)
+int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length)
{
struct block_device *part;
- struct gendisk *disk = bdev->bd_disk;
int ret;
mutex_lock(&disk->open_mutex);
- if (!(disk->flags & GENHD_FL_UP)) {
+ if (!disk_live(disk)) {
ret = -ENXIO;
goto out;
}
return ret;
}
-int bdev_del_partition(struct block_device *bdev, int partno)
+int bdev_del_partition(struct gendisk *disk, int partno)
{
struct block_device *part = NULL;
int ret = -ENXIO;
- mutex_lock(&bdev->bd_disk->open_mutex);
- part = xa_load(&bdev->bd_disk->part_tbl, partno);
+ mutex_lock(&disk->open_mutex);
+ part = xa_load(&disk->part_tbl, partno);
if (!part)
goto out_unlock;
delete_partition(part);
ret = 0;
out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return ret;
}
-int bdev_resize_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length)
+int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length)
{
struct block_device *part = NULL;
int ret = -ENXIO;
- mutex_lock(&bdev->bd_disk->open_mutex);
- part = xa_load(&bdev->bd_disk->part_tbl, partno);
+ mutex_lock(&disk->open_mutex);
+ part = xa_load(&disk->part_tbl, partno);
if (!part)
goto out_unlock;
goto out_unlock;
ret = -EBUSY;
- if (partition_overlaps(bdev->bd_disk, start, length, partno))
+ if (partition_overlaps(disk, start, length, partno))
goto out_unlock;
bdev_set_nr_sectors(part, length);
ret = 0;
out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return ret;
}
lockdep_assert_held(&disk->open_mutex);
- if (!(disk->flags & GENHD_FL_UP))
+ if (!disk_live(disk))
return -ENXIO;
rescan:
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
- struct address_space *mapping = state->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = state->disk->part0->bd_inode->i_mapping;
struct page *page;
- if (n >= get_capacity(state->bdev->bd_disk)) {
+ if (n >= get_capacity(state->disk)) {
state->access_beyond_eod = true;
return NULL;
}
/**
* last_lba(): return number of last logical block of device
- * @bdev: block device
+ * @disk: block device
*
* Description: Returns last LBA value on success, 0 on error.
* This is stored (by sd and ide-geometry) in
* the part[0] entry for this disk, and is the number of
* physical sectors available on the disk.
*/
-static u64 last_lba(struct block_device *bdev)
+static u64 last_lba(struct gendisk *disk)
{
- if (!bdev || !bdev->bd_inode)
- return 0;
- return div_u64(bdev->bd_inode->i_size,
- bdev_logical_block_size(bdev)) - 1ULL;
+ return div_u64(disk->part0->bd_inode->i_size,
+ queue_logical_block_size(disk->queue)) - 1ULL;
}
static inline int pmbr_part_valid(gpt_mbr_record *part)
* @buffer: destination buffer
* @count: bytes to read
*
- * Description: Reads @count bytes from @state->bdev into @buffer.
+ * Description: Reads @count bytes from @state->disk into @buffer.
* Returns number of bytes read on success, 0 on error.
*/
static size_t read_lba(struct parsed_partitions *state,
u64 lba, u8 *buffer, size_t count)
{
size_t totalreadcount = 0;
- struct block_device *bdev = state->bdev;
- sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
+ sector_t n = lba *
+ (queue_logical_block_size(state->disk->queue) / 512);
- if (!buffer || lba > last_lba(bdev))
+ if (!buffer || lba > last_lba(state->disk))
return 0;
while (count) {
* @lba: the Logical Block Address of the partition table
*
* Description: returns GPT header on success, NULL on error. Allocates
- * and fills a GPT header starting at @ from @state->bdev.
+ * and fills a GPT header starting at @ from @state->disk.
* Note: remember to free gpt when finished with it.
*/
static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
u64 lba)
{
gpt_header *gpt;
- unsigned ssz = bdev_logical_block_size(state->bdev);
+ unsigned ssz = queue_logical_block_size(state->disk->queue);
gpt = kmalloc(ssz, GFP_KERNEL);
if (!gpt)
/* Check the GUID Partition Table header size is too big */
if (le32_to_cpu((*gpt)->header_size) >
- bdev_logical_block_size(state->bdev)) {
+ queue_logical_block_size(state->disk->queue)) {
pr_debug("GUID Partition Table Header size is too large: %u > %u\n",
le32_to_cpu((*gpt)->header_size),
- bdev_logical_block_size(state->bdev));
+ queue_logical_block_size(state->disk->queue));
goto fail;
}
/* Check the first_usable_lba and last_usable_lba are
* within the disk.
*/
- lastlba = last_lba(state->bdev);
+ lastlba = last_lba(state->disk);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
(unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
gpt_header *pgpt = NULL, *agpt = NULL;
gpt_entry *pptes = NULL, *aptes = NULL;
legacy_mbr *legacymbr;
- sector_t total_sectors = i_size_read(state->bdev->bd_inode) >> 9;
+ struct gendisk *disk = state->disk;
+ const struct block_device_operations *fops = disk->fops;
+ sector_t total_sectors = get_capacity(state->disk);
u64 lastlba;
if (!ptes)
return 0;
- lastlba = last_lba(state->bdev);
+ lastlba = last_lba(state->disk);
if (!force_gpt) {
/* This will be added to the EFI Spec. per Intel after v1.02. */
legacymbr = kzalloc(sizeof(*legacymbr), GFP_KERNEL);
if (!good_agpt && force_gpt)
good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
+ if (!good_agpt && force_gpt && fops->alternative_gpt_sector) {
+ sector_t agpt_sector;
+ int err;
+
+ err = fops->alternative_gpt_sector(disk, &agpt_sector);
+ if (!err)
+ good_agpt = is_gpt_valid(state, agpt_sector,
+ &agpt, &aptes);
+ }
+
/* The obviously unsuccessful case */
if (!good_pgpt && !good_agpt)
goto fail;
gpt_header *gpt = NULL;
gpt_entry *ptes = NULL;
u32 i;
- unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
+ unsigned ssz = queue_logical_block_size(state->disk->queue) / 512;
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
kfree(gpt);
u64 size = le64_to_cpu(ptes[i].ending_lba) -
le64_to_cpu(ptes[i].starting_lba) + 1ULL;
- if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
+ if (!is_pte_valid(&ptes[i], last_lba(state->disk)))
continue;
put_partition(state, i+1, start * ssz, size * ssz);
int ibm_partition(struct parsed_partitions *state)
{
int (*fn)(struct gendisk *disk, dasd_information2_t *info);
- struct block_device *bdev = state->bdev;
- struct gendisk *disk = bdev->bd_disk;
+ struct gendisk *disk = state->disk;
+ struct block_device *bdev = disk->part0;
int blocksize, res;
loff_t i_size, offset, size;
dasd_information2_t *info;
}
}
- num_sects = state->bdev->bd_inode->i_size >> 9;
+ num_sects = get_capacity(state->disk);
if ((ph[0]->config_start > num_sects) ||
((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
/**
* ldm_validate_tocblocks - Validate the table of contents and its backups
* @state: Partition check state including device holding the LDM Database
- * @base: Offset, into @state->bdev, of the database
+ * @base: Offset, into @state->disk, of the database
* @ldb: Cache of the database structures
*
* Find and compare the four tables of contents of the LDM Database stored on
- * @state->bdev and return the parsed information into @toc1.
+ * @state->disk and return the parsed information into @toc1.
*
* The offsets and sizes of the configs are range-checked against a privhead.
*
* only likely to happen if the underlying device is strange. If that IS
* the case we should return zero to let someone else try.
*
- * Return: 'true' @state->bdev is a dynamic disk
- * 'false' @state->bdev is not a dynamic disk, or an error occurred
+ * Return: 'true' @state->disk is a dynamic disk
+ * 'false' @state->disk is not a dynamic disk, or an error occurred
*/
static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
/**
* ldm_get_vblks - Read the on-disk database of VBLKs into memory
* @state: Partition check state including device holding the LDM Database
- * @base: Offset, into @state->bdev, of the database
+ * @base: Offset, into @state->disk, of the database
* @ldb: Cache of the database structures
*
* To use the information from the VBLKs, they need to be read from the disk,
* example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
* and so on: the actual data containing partitions.
*
- * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
- * 0 Success, @state->bdev is not a dynamic disk
+ * Return: 1 Success, @state->disk is a dynamic disk and we handled it
+ * 0 Success, @state->disk is not a dynamic disk
* -1 An error occurred before enough information had been read
- * Or @state->bdev is a dynamic disk, but it may be corrupted
+ * Or @state->disk is a dynamic disk, but it may be corrupted
*/
int ldm_partition(struct parsed_partitions *state)
{
}
#ifdef CONFIG_PPC_PMAC
if (found_root_goodness)
- note_bootable_part(state->bdev->bd_dev, found_root,
+ note_bootable_part(state->disk->part0->bd_dev, found_root,
found_root_goodness);
#endif
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
- sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
+ sector_t sector_size;
int loopct = 0; /* number of links followed
without finding a data partition */
int i;
+ sector_size = queue_logical_block_size(state->disk->queue) / 512;
this_sector = first_sector;
this_size = first_size;
int msdos_partition(struct parsed_partitions *state)
{
- sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
+ sector_t sector_size;
Sector sect;
unsigned char *data;
struct msdos_partition *p;
int slot;
u32 disksig;
+ sector_size = queue_logical_block_size(state->disk->queue) / 512;
data = read_part_sector(state, 0, §);
if (!data)
return -1;
Sector sect;
struct sgi_disklabel *label;
struct sgi_partition *p;
- char b[BDEVNAME_SIZE];
label = read_part_sector(state, 0, §);
if (!label)
magic = label->magic_mushroom;
if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
/*printk("Dev %s SGI disklabel: bad magic %08x\n",
- bdevname(bdev, b), be32_to_cpu(magic));*/
+ state->disk->disk_name, be32_to_cpu(magic));*/
put_dev_sector(sect);
return 0;
}
}
if(csum) {
printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
- bdevname(state->bdev, b));
+ state->disk->disk_name);
put_dev_sector(sect);
return 0;
}
} * label;
struct sun_partition *p;
unsigned long spc;
- char b[BDEVNAME_SIZE];
int use_vtoc;
int nparts;
p = label->partitions;
if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
- bdevname(bdev, b), be16_to_cpu(label->magic)); */
+ state->disk->disk_name, be16_to_cpu(label->magic)); */
put_dev_sector(sect);
return 0;
}
csum ^= *ush--;
if (csum) {
printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
- bdevname(state->bdev, b));
+ state->disk->disk_name);
put_dev_sector(sect);
return 0;
}
break;
bip_for_each_vec(iv, bip, iter) {
- void *p, *pmap;
unsigned int j;
+ void *p;
- pmap = kmap_atomic(iv.bv_page);
- p = pmap + iv.bv_offset;
+ p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
struct t10_pi_tuple *pi = p;
ref_tag++;
p += tuple_sz;
}
-
- kunmap_atomic(pmap);
+ kunmap_local(p);
}
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
struct bvec_iter iter;
bip_for_each_vec(iv, bip, iter) {
- void *p, *pmap;
unsigned int j;
+ void *p;
- pmap = kmap_atomic(iv.bv_page);
- p = pmap + iv.bv_offset;
+ p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
struct t10_pi_tuple *pi = p;
intervals--;
p += tuple_sz;
}
-
- kunmap_atomic(pmap);
+ kunmap_local(p);
}
}
}
#include <linux/uaccess.h>
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-
/*
* Each block ramdisk device has a radix_tree brd_pages of pages that stores
* the pages containing the block device's contents. A brd page's ->index is
if (b) {
blk_stack_limits(&q->limits, &b->limits, 0);
- blk_queue_update_readahead(q);
+ disk_update_readahead(device->vdisk);
}
fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q);
static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
enum drbd_read_balancing rbm)
{
- struct backing_dev_info *bdi;
int stripe_shift;
switch (rbm) {
case RB_CONGESTED_REMOTE:
- bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
- return bdi_read_congested(bdi);
+ return bdi_read_congested(
+ device->ldev->backing_bdev->bd_disk->bdi);
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
goto out_err;
/* and ... switch */
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
goto out_unlock;
}
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
if (partscan)
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- /* Grab the block_device to prevent its destruction after we
- * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
- */
- bdgrab(bdev);
loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
blk_queue_physical_block_size(lo->lo_queue, 512);
blk_queue_io_min(lo->lo_queue, 512);
if (bdev) {
- bdput(bdev);
invalidate_bdev(bdev);
bdev->bd_inode->i_mapping->wb_err = 0;
}
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
lo_number = lo->lo_number;
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
out_unlock:
mutex_unlock(&lo->lo_mutex);
if (partscan) {
lo->tag_set.queue_depth = 128;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
- lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
+ lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT;
lo->tag_set.driver_data = lo;
err = blk_mq_alloc_tag_set(&lo->tag_set);
disk->fops = &lo_fops;
disk->private_data = lo;
disk->queue = lo->lo_queue;
+ disk->events = DISK_EVENT_MEDIA_CHANGE;
+ disk->event_flags = DISK_EVENT_FLAG_UEVENT;
sprintf(disk->disk_name, "loop%d", i);
add_disk(disk);
mutex_unlock(&loop_ctl_mutex);
#include <linux/init.h>
#include "null_blk.h"
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_MASK (PAGE_SECTORS - 1)
-
#define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
return ret;
}
- add_disk(disk);
- return 0;
+ return add_disk(disk);
}
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
wakeup = (pd->write_congestion_on > 0
&& pd->bio_queue_size <= pd->write_congestion_off);
spin_unlock(&pd->lock);
- if (wakeup) {
- clear_bdi_congested(pd->disk->queue->backing_dev_info,
- BLK_RW_ASYNC);
- }
+ if (wakeup)
+ clear_bdi_congested(pd->disk->bdi, BLK_RW_ASYNC);
pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE);
spin_lock(&pd->lock);
if (pd->write_congestion_on > 0
&& pd->bio_queue_size >= pd->write_congestion_on) {
- set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC);
+ set_bdi_congested(bio->bi_bdev->bd_disk->bdi, BLK_RW_ASYNC);
do {
spin_unlock(&pd->lock);
congestion_wait(BLK_RW_ASYNC, HZ);
unsigned int offset = 0;
struct req_iterator iter;
struct bio_vec bvec;
- unsigned int i = 0;
- size_t size;
- void *buf;
rq_for_each_segment(bvec, req, iter) {
- unsigned long flags;
- dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %llu\n",
- __func__, __LINE__, i, bio_sectors(iter.bio),
- iter.bio->bi_iter.bi_sector);
-
- size = bvec.bv_len;
- buf = bvec_kmap_irq(&bvec, &flags);
if (gather)
- memcpy(dev->bounce_buf+offset, buf, size);
+ memcpy_from_bvec(dev->bounce_buf + offset, &bvec);
else
- memcpy(buf, dev->bounce_buf+offset, size);
- offset += size;
- flush_kernel_dcache_page(bvec.bv_page);
- bvec_kunmap_irq(buf, &flags);
- i++;
+ memcpy_to_bvec(&bvec, dev->bounce_buf + offset);
}
}
bio_for_each_segment(bvec, bio, iter) {
/* PS3 is ppc64, so we don't handle highmem */
- char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
+ char *ptr = bvec_virt(&bvec);
size_t len = bvec.bv_len, retlen;
dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
rbd_dev->mapping.size = 0;
}
-static void zero_bvec(struct bio_vec *bv)
-{
- void *buf;
- unsigned long flags;
-
- buf = bvec_kmap_irq(bv, &flags);
- memset(buf, 0, bv->bv_len);
- flush_dcache_page(bv->bv_page);
- bvec_kunmap_irq(buf, &flags);
-}
-
static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
{
struct ceph_bio_iter it = *bio_pos;
ceph_bio_iter_advance(&it, off);
ceph_bio_iter_advance_step(&it, bytes, ({
- zero_bvec(&bv);
+ memzero_bvec(&bv);
}));
}
ceph_bvec_iter_advance(&it, off);
ceph_bvec_iter_advance_step(&it, bytes, ({
- zero_bvec(&bv);
+ memzero_bvec(&bv);
}));
}
};
ceph_bvec_iter_advance_step(&it, bytes, ({
- if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
- bv.bv_len))
+ if (memchr_inv(bvec_virt(&bv), 0, bv.bv_len))
return false;
}));
return true;
if (!disk)
return;
- if (disk->flags & GENHD_FL_UP)
+ if (host->state > HST_DEV_ACTIVATE)
del_gendisk(disk);
blk_cleanup_disk(disk);
}
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- kfree(page_address(req->special_vec.bv_page) +
- req->special_vec.bv_offset);
- }
-
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
+ kfree(bvec_virt(&req->special_vec));
blk_mq_end_request(req, virtblk_result(vbr));
}
"block size is changed unexpectedly, now is %u\n",
blk_size);
err = -EINVAL;
- goto err_cleanup_disk;
+ goto out_cleanup_disk;
}
/* Use topology information if available */
virtblk_update_capacity(vblk, false);
virtio_device_ready(vdev);
- device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+ err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+ if (err)
+ goto out_cleanup_disk;
+
return 0;
-err_cleanup_disk:
+out_cleanup_disk:
blk_cleanup_disk(vblk->disk);
out_free_tags:
blk_mq_free_tag_set(&vblk->tag_set);
config BLK_DEV_MD
tristate "RAID support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
help
This driver lets you combine several hard disk partitions into one
logical block device. This can be used to simply append one
config BLK_DEV_DM
tristate "Device mapper support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select BLK_DEV_DM_BUILTIN
depends on DAX || DAX=n
help
config DM_EBS
tristate "Emulated block size target (EXPERIMENTAL)"
- depends on BLK_DEV_DM
+ depends on BLK_DEV_DM && !HIGHMEM
select DM_BUFIO
help
dm-ebs emulates smaller logical block size on backing devices
config BCACHE
tristate "Block device as cache"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select CRC64
help
Allows a block device to be used as cache for other devices; uses
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, b->bio, iter_all) {
- memcpy(page_address(bv->bv_page), addr, PAGE_SIZE);
+ memcpy(bvec_virt(bv), addr, PAGE_SIZE);
addr += PAGE_SIZE;
}
bcache_device_detach(d);
if (disk) {
- bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
-
- if (disk_added)
- del_gendisk(disk);
-
blk_cleanup_disk(disk);
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
if (!d->full_dirty_stripes)
- return -ENOMEM;
+ goto out_free_stripe_sectors_dirty;
idx = ida_simple_get(&bcache_device_idx, 0,
BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
if (idx < 0)
- return idx;
+ goto out_free_full_dirty_stripes;
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
- goto err;
+ goto out_ida_remove;
d->disk = blk_alloc_disk(NUMA_NO_NODE);
if (!d->disk)
- goto err;
+ goto out_bioset_exit;
set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
return 0;
-err:
+out_bioset_exit:
+ bioset_exit(&d->bio_split);
+out_ida_remove:
ida_simple_remove(&bcache_device_idx, idx);
+out_free_full_dirty_stripes:
+ kvfree(d->full_dirty_stripes);
+out_free_stripe_sectors_dirty:
+ kvfree(d->stripe_sectors_dirty);
return -ENOMEM;
}
mutex_lock(&bch_register_lock);
- if (atomic_read(&dc->running))
+ if (atomic_read(&dc->running)) {
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
+ del_gendisk(dc->disk.disk);
+ }
bcache_device_free(&dc->disk);
list_del(&dc->list);
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
&d->c->flash_dev_dirty_sectors);
+ del_gendisk(d->disk);
bcache_device_free(d);
mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
#include "closure.h"
-#define PAGE_SECTORS (PAGE_SIZE / 512)
-
struct closure;
#ifdef CONFIG_BCACHE_DEBUG
if (unlikely(!bv->bv_page || !bv_len))
return -EIO;
- pa = page_address(bv->bv_page) + bv->bv_offset;
+ pa = bvec_virt(bv);
/* Handle overlapping page <-> blocks */
while (bv_len) {
unsigned this_len;
BUG_ON(PageHighMem(biv.bv_page));
- tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag = bvec_virt(&biv);
this_len = min(biv.bv_len, data_to_process);
r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
unsigned tag_now = min(biv.bv_len, tag_todo);
char *tag_addr;
BUG_ON(PageHighMem(biv.bv_page));
- tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag_addr = bvec_virt(&biv);
if (likely(dio->op == REQ_OP_WRITE))
memcpy(tag_ptr, tag_addr, tag_now);
else
}
if (dm_get_md_type(md) == DM_TYPE_NONE) {
- /* Initial table load: acquire type of table. */
- dm_set_md_type(md, dm_table_get_type(t));
-
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
if (r)
goto err_destroy_table;
- md->type = dm_table_get_type(t);
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
if (err)
goto out_tag_set;
- elevator_init_mq(md->queue);
return 0;
out_tag_set:
}
dm_update_keyslot_manager(q, t);
- blk_queue_update_readahead(q);
+ disk_update_readahead(t->md->disk);
return 0;
}
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
- unsigned long flags;
unsigned size;
int rw = bio_data_dir(bio);
unsigned remaining_size = wc->block_size;
do {
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
- buf = bvec_kmap_irq(&bv, &flags);
+ buf = bvec_kmap_local(&bv);
size = bv.bv_len;
if (unlikely(size > remaining_size))
size = remaining_size;
memcpy_flushcache_optimized(data, buf, size);
}
- bvec_kunmap_irq(buf, &flags);
+ kunmap_local(buf);
data = (char *)data + size;
remaining_size -= size;
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
- del_gendisk(md->disk);
- }
-
- if (md->queue)
+ if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ dm_sysfs_exit(md);
+ del_gendisk(md->disk);
+ }
dm_queue_destroy_keyslot_manager(md->queue);
-
- if (md->disk)
blk_cleanup_disk(md->disk);
+ }
cleanup_srcu_struct(&md->io_barrier);
goto bad;
}
- add_disk_no_queue_reg(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
*/
int dm_create(int minor, struct mapped_device **result)
{
- int r;
struct mapped_device *md;
md = alloc_dev(minor);
if (!md)
return -ENXIO;
- r = dm_sysfs_init(md);
- if (r) {
- free_dev(md);
- return r;
- }
-
*result = md;
return 0;
}
*/
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
- int r;
+ enum dm_queue_mode type = dm_table_get_type(t);
struct queue_limits limits;
- enum dm_queue_mode type = dm_get_md_type(md);
+ int r;
switch (type) {
case DM_TYPE_REQUEST_BASED:
if (r)
return r;
- blk_register_queue(md->disk);
+ add_disk(md->disk);
+ r = dm_sysfs_init(md);
+ if (r) {
+ del_gendisk(md->disk);
+ return r;
+ }
+ md->type = type;
return 0;
}
DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
dm_device_name(md), atomic_read(&md->holders));
- dm_sysfs_exit(md);
dm_table_destroy(__unbind(md));
free_dev(md);
}
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
{
- int flags = rdev->bdev->bd_disk->flags;
-
- if (!(flags & GENHD_FL_UP)) {
+ if (!disk_live(rdev->bdev->bd_disk)) {
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
pr_warn("md: %s: %s array has a missing/failed member\n",
mdname(rdev->mddev), md_type);
* track of the current selected device partition.
*/
unsigned int part_curr;
- struct device_attribute force_ro;
- struct device_attribute power_ro_lock;
int area_type;
/* debugfs files (only in main mmc_blk_data) */
return count;
}
+static DEVICE_ATTR(ro_lock_until_next_power_on, 0,
+ power_ro_lock_show, power_ro_lock_store);
+
static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return ret;
}
+static DEVICE_ATTR(force_ro, 0644, force_ro_show, force_ro_store);
+
+static struct attribute *mmc_disk_attrs[] = {
+ &dev_attr_force_ro.attr,
+ &dev_attr_ro_lock_until_next_power_on.attr,
+ NULL,
+};
+
+static umode_t mmc_disk_attrs_is_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+ umode_t mode = a->mode;
+
+ if (a == &dev_attr_ro_lock_until_next_power_on.attr &&
+ (md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+ md->queue.card->ext_csd.boot_ro_lockable) {
+ mode = S_IRUGO;
+ if (!(md->queue.card->ext_csd.boot_ro_lock &
+ EXT_CSD_BOOT_WP_B_PWR_WP_DIS))
+ mode |= S_IWUSR;
+ }
+
+ mmc_blk_put(md);
+ return mode;
+}
+
+static const struct attribute_group mmc_disk_attr_group = {
+ .is_visible = mmc_disk_attrs_is_visible,
+ .attrs = mmc_disk_attrs,
+};
+
+static const struct attribute_group *mmc_disk_attr_groups[] = {
+ &mmc_disk_attr_group,
+ NULL,
+};
+
static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
}
#endif
+static int mmc_blk_alternative_gpt_sector(struct gendisk *disk,
+ sector_t *sector)
+{
+ struct mmc_blk_data *md;
+ int ret;
+
+ md = mmc_blk_get(disk);
+ if (!md)
+ return -EINVAL;
+
+ if (md->queue.card)
+ ret = mmc_card_alternative_gpt_sector(md->queue.card, sector);
+ else
+ ret = -ENODEV;
+
+ mmc_blk_put(md);
+
+ return ret;
+}
+
static const struct block_device_operations mmc_bdops = {
.open = mmc_blk_open,
.release = mmc_blk_release,
#ifdef CONFIG_COMPAT
.compat_ioctl = mmc_blk_compat_ioctl,
#endif
+ .alternative_gpt_sector = mmc_blk_alternative_gpt_sector,
};
static int mmc_blk_part_switch_pre(struct mmc_card *card,
sector_t size,
bool default_ro,
const char *subname,
- int area_type)
+ int area_type,
+ unsigned int part_type)
{
struct mmc_blk_data *md;
int devidx, ret;
kref_init(&md->kref);
md->queue.blkdata = md;
+ md->part_type = part_type;
md->disk->major = MMC_BLOCK_MAJOR;
md->disk->minors = perdev_minors;
md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
cap_str, md->read_only ? "(ro)" : "");
+ /* used in ->open, must be set before add_disk: */
+ if (area_type == MMC_BLK_DATA_AREA_MAIN)
+ dev_set_drvdata(&card->dev, md);
+ device_add_disk(md->parent, md->disk, mmc_disk_attr_groups);
return md;
err_kfree:
}
return mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
- MMC_BLK_DATA_AREA_MAIN);
+ MMC_BLK_DATA_AREA_MAIN, 0);
}
static int mmc_blk_alloc_part(struct mmc_card *card,
struct mmc_blk_data *part_md;
part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
- subname, area_type);
+ subname, area_type, part_type);
if (IS_ERR(part_md))
return PTR_ERR(part_md);
- part_md->part_type = part_type;
list_add(&part_md->part, &md->part);
return 0;
static void mmc_blk_remove_req(struct mmc_blk_data *md)
{
- struct mmc_card *card;
-
- if (md) {
- /*
- * Flush remaining requests and free queues. It
- * is freeing the queue that stops new requests
- * from being accepted.
- */
- card = md->queue.card;
- if (md->disk->flags & GENHD_FL_UP) {
- device_remove_file(disk_to_dev(md->disk), &md->force_ro);
- if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
- card->ext_csd.boot_ro_lockable)
- device_remove_file(disk_to_dev(md->disk),
- &md->power_ro_lock);
-
- del_gendisk(md->disk);
- }
- mmc_cleanup_queue(&md->queue);
- mmc_blk_put(md);
- }
+ /*
+ * Flush remaining requests and free queues. It is freeing the queue
+ * that stops new requests from being accepted.
+ */
+ del_gendisk(md->disk);
+ mmc_cleanup_queue(&md->queue);
+ mmc_blk_put(md);
}
static void mmc_blk_remove_parts(struct mmc_card *card,
}
}
-static int mmc_add_disk(struct mmc_blk_data *md)
-{
- int ret;
- struct mmc_card *card = md->queue.card;
-
- device_add_disk(md->parent, md->disk, NULL);
- md->force_ro.show = force_ro_show;
- md->force_ro.store = force_ro_store;
- sysfs_attr_init(&md->force_ro.attr);
- md->force_ro.attr.name = "force_ro";
- md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
- ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
- if (ret)
- goto force_ro_fail;
-
- if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
- card->ext_csd.boot_ro_lockable) {
- umode_t mode;
-
- if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
- mode = S_IRUGO;
- else
- mode = S_IRUGO | S_IWUSR;
-
- md->power_ro_lock.show = power_ro_lock_show;
- md->power_ro_lock.store = power_ro_lock_store;
- sysfs_attr_init(&md->power_ro_lock.attr);
- md->power_ro_lock.attr.mode = mode;
- md->power_ro_lock.attr.name =
- "ro_lock_until_next_power_on";
- ret = device_create_file(disk_to_dev(md->disk),
- &md->power_ro_lock);
- if (ret)
- goto power_ro_lock_fail;
- }
- return ret;
-
-power_ro_lock_fail:
- device_remove_file(disk_to_dev(md->disk), &md->force_ro);
-force_ro_fail:
- del_gendisk(md->disk);
-
- return ret;
-}
-
#ifdef CONFIG_DEBUG_FS
static int mmc_dbg_card_status_get(void *data, u64 *val)
static int mmc_blk_probe(struct mmc_card *card)
{
- struct mmc_blk_data *md, *part_md;
+ struct mmc_blk_data *md;
int ret = 0;
/*
if (ret)
goto out;
- dev_set_drvdata(&card->dev, md);
-
- ret = mmc_add_disk(md);
- if (ret)
- goto out;
-
- list_for_each_entry(part_md, &md->part, part) {
- ret = mmc_add_disk(part_md);
- if (ret)
- goto out;
- }
-
/* Add two debugfs entries */
mmc_blk_add_debugfs(card, md);
}
EXPORT_SYMBOL(mmc_detect_card_removed);
+int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *gpt_sector)
+{
+ unsigned int boot_sectors_num;
+
+ if ((!(card->host->caps2 & MMC_CAP2_ALT_GPT_TEGRA)))
+ return -EOPNOTSUPP;
+
+ /* filter out unrelated cards */
+ if (card->ext_csd.rev < 3 ||
+ !mmc_card_mmc(card) ||
+ !mmc_card_is_blockaddr(card) ||
+ mmc_card_is_removable(card->host))
+ return -ENOENT;
+
+ /*
+ * eMMC storage has two special boot partitions in addition to the
+ * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main
+ * accesses, this means that the partition table addresses are shifted
+ * by the size of boot partitions. In accordance with the eMMC
+ * specification, the boot partition size is calculated as follows:
+ *
+ * boot partition size = 128K byte x BOOT_SIZE_MULT
+ *
+ * Calculate number of sectors occupied by the both boot partitions.
+ */
+ boot_sectors_num = card->ext_csd.raw_boot_mult * SZ_128K /
+ SZ_512 * MMC_NUM_BOOT_PARTITION;
+
+ /* Defined by NVIDIA and used by Android devices. */
+ *gpt_sector = card->ext_csd.sectors - boot_sectors_num - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(mmc_card_alternative_gpt_sector);
+
void mmc_rescan(struct work_struct *work)
{
struct mmc_host *host =
void mmc_get_card(struct mmc_card *card, struct mmc_ctx *ctx);
void mmc_put_card(struct mmc_card *card, struct mmc_ctx *ctx);
+int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *sector);
+
/**
* mmc_claim_host - exclusively claim a host
* @host: mmc host to claim
ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
card->ext_csd.raw_hc_erase_grp_size =
ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
+ card->ext_csd.raw_boot_mult =
+ ext_csd[EXT_CSD_BOOT_MULT];
if (card->ext_csd.rev >= 3) {
u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
*/
#define NVQUIRK_HAS_TMCLK BIT(10)
+#define NVQUIRK_HAS_ANDROID_GPT_SECTOR BIT(11)
+
/* SDMMC CQE Base Address for Tegra Host Ver 4.1 and Higher */
#define SDHCI_TEGRA_CQE_BASE_ADDR 0xF000
.pdata = &sdhci_tegra20_pdata,
.dma_mask = DMA_BIT_MASK(32),
.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
+ NVQUIRK_HAS_ANDROID_GPT_SECTOR |
NVQUIRK_ENABLE_BLOCK_GAP_DET,
};
.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104 |
+ NVQUIRK_HAS_ANDROID_GPT_SECTOR |
NVQUIRK_HAS_PADCALIB,
};
static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
.pdata = &sdhci_tegra114_pdata,
.dma_mask = DMA_BIT_MASK(32),
+ .nvquirks = NVQUIRK_HAS_ANDROID_GPT_SECTOR,
};
static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
static const struct sdhci_tegra_soc_data soc_data_tegra124 = {
.pdata = &sdhci_tegra124_pdata,
.dma_mask = DMA_BIT_MASK(34),
+ .nvquirks = NVQUIRK_HAS_ANDROID_GPT_SECTOR,
};
static const struct sdhci_ops tegra210_sdhci_ops = {
tegra_host->pad_control_available = false;
tegra_host->soc_data = soc_data;
+ if (soc_data->nvquirks & NVQUIRK_HAS_ANDROID_GPT_SECTOR)
+ host->mmc->caps2 |= MMC_CAP2_ALT_GPT_TEGRA;
+
if (soc_data->nvquirks & NVQUIRK_NEEDS_PAD_CONTROL) {
rc = tegra_sdhci_init_pinctrl_info(&pdev->dev, tegra_host);
if (rc == 0)
{
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
- struct page *page = req->special_vec.bv_page;
- if (page == ctrl->discard_page)
+ if (req->special_vec.bv_page == ctrl->discard_page)
clear_bit_unlock(0, &ctrl->discard_page_busy);
else
- kfree(page_address(page) + req->special_vec.bv_offset);
+ kfree(bvec_virt(&req->special_vec));
}
}
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
static inline bool nvme_first_scan(struct gendisk *disk)
{
/* nvme_alloc_ns() scans the disk prior to adding it */
- return !(disk->flags & GENHD_FL_UP);
+ return !disk_live(disk);
}
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
nvme_update_disk_info(ns->head->disk, ns, id);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
- blk_queue_update_readahead(ns->head->disk->queue);
+ disk_update_readahead(ns->head->disk);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
return 0;
if (!ns)
goto out_free_id;
- ns->queue = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ns->queue))
+ disk = blk_mq_alloc_disk(ctrl->tagset, ns);
+ if (IS_ERR(disk))
goto out_free_ns;
+ disk->fops = &nvme_bdev_ops;
+ disk->private_data = ns;
+
+ ns->disk = disk;
+ ns->queue = disk->queue;
if (ctrl->opts && ctrl->opts->data_digest)
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
- ns->queue->queuedata = ns;
ns->ctrl = ctrl;
kref_init(&ns->kref);
if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
- goto out_free_queue;
+ goto out_cleanup_disk;
- disk = alloc_disk_node(0, node);
- if (!disk)
- goto out_unlink_ns;
-
- disk->fops = &nvme_bdev_ops;
- disk->private_data = ns;
- disk->queue = ns->queue;
/*
* Without the multipath code enabled, multiple controller per
* subsystems are visible as devices and thus we cannot use the
if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags))
sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
ns->head->instance);
- ns->disk = disk;
if (nvme_update_ns_info(ns, id))
- goto out_put_disk;
+ goto out_unlink_ns;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk->disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
- goto out_put_disk;
+ goto out_unlink_ns;
}
}
kfree(id);
return;
- out_put_disk:
- /* prevent double queue cleanup */
- ns->disk->queue = NULL;
- put_disk(ns->disk);
+
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
list_del_init(&ns->head->entry);
mutex_unlock(&ctrl->subsys->lock);
nvme_put_ns_head(ns->head);
- out_free_queue:
- blk_cleanup_queue(ns->queue);
+ out_cleanup_disk:
+ blk_cleanup_disk(disk);
out_free_ns:
kfree(ns);
out_free_id:
nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
- if (ns->disk->flags & GENHD_FL_UP) {
- if (!nvme_ns_head_multipath(ns->head))
- nvme_cdev_del(&ns->cdev, &ns->cdev_device);
- del_gendisk(ns->disk);
- blk_cleanup_queue(ns->queue);
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
- }
+ if (!nvme_ns_head_multipath(ns->head))
+ nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ del_gendisk(ns->disk);
+ blk_cleanup_queue(ns->queue);
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
down_write(&ns->ctrl->namespaces_rwsem);
list_del_init(&ns->list);
if (!head->disk)
return;
kblockd_schedule_work(&head->requeue_work);
- if (head->disk->flags & GENHD_FL_UP) {
+ if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
}
dbio = dreq->bio;
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
memset(dbio, 0, sizeof (struct dasd_diag_bio));
dbio->type = rw_cmd;
end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
if (first_blk + blk_count >= end_blk) {
cqr->proc_bytes = blk_count * blksize;
last_rec - recid + 1, cmd, basedev, blksize);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
idaw_dst = NULL;
idaw_len = 0;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
new_track = 1;
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
}
} else {
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
last_tidaw = itcw_add_tidaw(itcw, 0x00,
dst, bv.bv_len);
if (IS_ERR(last_tidaw)) {
idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
if (cmd == DASD_ECKD_CCW_READ_TRACK)
memset(dst, 0, seg_len);
if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
ccw++;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->uses_cdl && recid <= 2*blk_per_trk)
}
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
if (private->rdc_data.mode.bits.data_chain != 0)
ccw++;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->rdc_data.mode.bits.data_chain == 0)
#include "dasd_int.h"
+static struct lock_class_key dasd_bio_compl_lkclass;
+
/*
* Allocate and register gendisk structure for device.
*/
if (base->devindex >= DASD_PER_MAJOR)
return -EBUSY;
- gdp = alloc_disk(1 << DASD_PARTN_BITS);
+ gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE,
+ &dasd_bio_compl_lkclass);
if (!gdp)
return -ENOMEM;
/* Initialize gendisk structure. */
gdp->major = DASD_MAJOR;
gdp->first_minor = base->devindex << DASD_PARTN_BITS;
+ gdp->minors = 1 << DASD_PARTN_BITS;
gdp->fops = &dasd_device_operations;
/*
test_bit(DASD_FLAG_DEVICE_RO, &base->flags))
set_disk_ro(gdp, 1);
dasd_add_link_to_gendisk(gdp, base);
- gdp->queue = block->request_queue;
block->gdp = gdp;
set_capacity(block->gdp, 0);
device_add_disk(&base->cdev->dev, block->gdp, NULL);
index = (bio->bi_iter.bi_sector >> 3);
bio_for_each_segment(bvec, bio, iter) {
- page_addr = (unsigned long)
- page_address(bvec.bv_page) + bvec.bv_offset;
+ page_addr = (unsigned long)bvec_virt(&bvec);
source_addr = dev_info->start + (index<<12) + bytes_done;
if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
// More paranoia.
static struct kmem_cache *sd_cdb_cache;
static mempool_t *sd_cdb_pool;
static mempool_t *sd_page_pool;
+static struct lock_class_key sd_bio_compl_lkclass;
static const char *sd_cache_types[] = {
"write through", "none", "write back",
cmd->cmnd[0] = UNMAP;
cmd->cmnd[8] = 24;
- buf = page_address(rq->special_vec.bv_page);
+ buf = bvec_virt(&rq->special_vec);
put_unaligned_be16(6 + 16, &buf[0]);
put_unaligned_be16(16, &buf[2]);
put_unaligned_be64(lba, &buf[8]);
if (!sdkp)
goto out;
- gd = alloc_disk(SD_MINORS);
+ gd = __alloc_disk_node(sdp->request_queue, NUMA_NO_NODE,
+ &sd_bio_compl_lkclass);
if (!gd)
goto out_free;
gd->major = sd_major((index & 0xf0) >> 4);
gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+ gd->minors = SD_MINORS;
gd->fops = &sd_fops;
gd->private_data = &sdkp->driver;
- gd->queue = sdkp->device->request_queue;
/* defaults, until the device tells us otherwise */
sdp->sector_size = 512;
bool exclude; /* 1->open(O_EXCL) succeeded and is active */
int open_cnt; /* count of opens (perhaps < num(sfds) ) */
char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */
- struct gendisk *disk;
+ char name[DISK_NAME_LEN];
struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg<n>] */
struct kref d_ref;
} Sg_device;
#define SZ_SG_REQ_INFO sizeof(sg_req_info_t)
#define sg_printk(prefix, sdp, fmt, a...) \
- sdev_prefix_printk(prefix, (sdp)->device, \
- (sdp)->disk->disk_name, fmt, ##a)
+ sdev_prefix_printk(prefix, (sdp)->device, (sdp)->name, fmt, ##a)
/*
* The SCSI interfaces that use read() and write() as an asynchronous variant of
srp->rq->timeout = timeout;
kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
- blk_execute_rq_nowait(sdp->disk, srp->rq, at_head, sg_rq_end_io);
+ blk_execute_rq_nowait(NULL, srp->rq, at_head, sg_rq_end_io);
return 0;
}
return put_user(max_sectors_bytes(sdp->device->request_queue),
ip);
case BLKTRACESETUP:
- return blk_trace_setup(sdp->device->request_queue,
- sdp->disk->disk_name,
+ return blk_trace_setup(sdp->device->request_queue, sdp->name,
MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
NULL, p);
case BLKTRACESTART:
static int sg_sysfs_valid = 0;
static Sg_device *
-sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
+sg_alloc(struct scsi_device *scsidp)
{
struct request_queue *q = scsidp->request_queue;
Sg_device *sdp;
SCSI_LOG_TIMEOUT(3, sdev_printk(KERN_INFO, scsidp,
"sg_alloc: dev=%d \n", k));
- sprintf(disk->disk_name, "sg%d", k);
- disk->first_minor = k;
- sdp->disk = disk;
+ sprintf(sdp->name, "sg%d", k);
sdp->device = scsidp;
mutex_init(&sdp->open_rel_lock);
INIT_LIST_HEAD(&sdp->sfds);
sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
{
struct scsi_device *scsidp = to_scsi_device(cl_dev->parent);
- struct gendisk *disk;
Sg_device *sdp = NULL;
struct cdev * cdev = NULL;
int error;
unsigned long iflags;
- disk = alloc_disk(1);
- if (!disk) {
- pr_warn("%s: alloc_disk failed\n", __func__);
- return -ENOMEM;
- }
- disk->major = SCSI_GENERIC_MAJOR;
-
error = -ENOMEM;
cdev = cdev_alloc();
if (!cdev) {
cdev->owner = THIS_MODULE;
cdev->ops = &sg_fops;
- sdp = sg_alloc(disk, scsidp);
+ sdp = sg_alloc(scsidp);
if (IS_ERR(sdp)) {
pr_warn("%s: sg_alloc failed\n", __func__);
error = PTR_ERR(sdp);
sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
MKDEV(SCSI_GENERIC_MAJOR,
sdp->index),
- sdp, "%s", disk->disk_name);
+ sdp, "%s", sdp->name);
if (IS_ERR(sg_class_member)) {
pr_err("%s: device_create failed\n", __func__);
error = PTR_ERR(sg_class_member);
kfree(sdp);
out:
- put_disk(disk);
if (cdev)
cdev_del(cdev);
return error;
SCSI_LOG_TIMEOUT(3,
sg_printk(KERN_INFO, sdp, "sg_device_destroy\n"));
- put_disk(sdp->disk);
kfree(sdp);
}
goto skip;
read_lock(&sdp->sfd_lock);
if (!list_empty(&sdp->sfds)) {
- seq_printf(s, " >>> device=%s ", sdp->disk->disk_name);
+ seq_printf(s, " >>> device=%s ", sdp->name);
if (atomic_read(&sdp->detaching))
seq_puts(s, "detaching pending close ");
else if (sdp->device) {
static unsigned long sr_index_bits[SR_DISKS / BITS_PER_LONG];
static DEFINE_SPINLOCK(sr_index_lock);
+static struct lock_class_key sr_bio_compl_lkclass;
+
/* This semaphore is used to mediate the 0->1 reference get in the
* face of object destruction (i.e. we can't allow a get on an
* object after last put) */
kref_init(&cd->kref);
- disk = alloc_disk(1);
+ disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
+ &sr_bio_compl_lkclass);
if (!disk)
goto fail_free;
mutex_init(&cd->lock);
disk->major = SCSI_CDROM_MAJOR;
disk->first_minor = minor;
+ disk->minors = 1;
sprintf(disk->disk_name, "sr%d", minor);
disk->fops = &sr_bdops;
disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
set_capacity(disk, cd->capacity);
disk->private_data = &cd->driver;
- disk->queue = sdev->request_queue;
if (register_cdrom(disk, &cd->cdi))
goto fail_minor;
}
\f
-static inline char *tape_name(struct scsi_tape *tape)
-{
- return tape->disk->disk_name;
-}
-
#define st_printk(prefix, t, fmt, a...) \
- sdev_prefix_printk(prefix, (t)->device, tape_name(t), fmt, ##a)
+ sdev_prefix_printk(prefix, (t)->device, (t)->name, fmt, ##a)
#ifdef DEBUG
#define DEBC_printk(t, fmt, a...) \
if (debugging) { st_printk(ST_DEB_MSG, t, fmt, ##a ); }
int result = SRpnt->result;
u8 scode;
DEB(const char *stp;)
- char *name = tape_name(STp);
+ char *name = STp->name;
struct st_cmdstatus *cmdstatp;
if (!result)
!capable(CAP_SYS_RAWIO))
i = -EPERM;
else
- i = scsi_cmd_ioctl(STp->disk->queue, STp->disk,
- file->f_mode, cmd_in, p);
+ i = scsi_cmd_ioctl(STp->device->request_queue,
+ NULL, file->f_mode, cmd_in,
+ p);
if (i != -ENOTTY)
return i;
break;
i = mode << (4 - ST_NBR_MODE_BITS);
snprintf(name, 10, "%s%s%s", rew ? "n" : "",
- tape->disk->disk_name, st_formats[i]);
+ tape->name, st_formats[i]);
dev = device_create(&st_sysfs_class, &tape->device->sdev_gendev,
cdev_devno, &tape->modes[mode], "%s", name);
static int st_probe(struct device *dev)
{
struct scsi_device *SDp = to_scsi_device(dev);
- struct gendisk *disk = NULL;
struct scsi_tape *tpnt = NULL;
struct st_modedef *STm;
struct st_partstat *STps;
goto out;
}
- disk = alloc_disk(1);
- if (!disk) {
- sdev_printk(KERN_ERR, SDp,
- "st: out of memory. Device not attached.\n");
- goto out_buffer_free;
- }
-
tpnt = kzalloc(sizeof(struct scsi_tape), GFP_KERNEL);
if (tpnt == NULL) {
sdev_printk(KERN_ERR, SDp,
"st: Can't allocate device descriptor.\n");
- goto out_put_disk;
+ goto out_buffer_free;
}
kref_init(&tpnt->kref);
- tpnt->disk = disk;
- disk->private_data = &tpnt->driver;
- /* SCSI tape doesn't register this gendisk via add_disk(). Manually
- * take queue reference that release_disk() expects. */
- if (!blk_get_queue(SDp->request_queue))
- goto out_put_disk;
- disk->queue = SDp->request_queue;
tpnt->driver = &st_template;
tpnt->device = SDp;
idr_preload_end();
if (error < 0) {
pr_warn("st: idr allocation failed: %d\n", error);
- goto out_put_queue;
+ goto out_free_tape;
}
tpnt->index = error;
- sprintf(disk->disk_name, "st%d", tpnt->index);
+ sprintf(tpnt->name, "st%d", tpnt->index);
tpnt->stats = kzalloc(sizeof(struct scsi_tape_stats), GFP_KERNEL);
if (tpnt->stats == NULL) {
sdev_printk(KERN_ERR, SDp,
scsi_autopm_put_device(SDp);
sdev_printk(KERN_NOTICE, SDp,
- "Attached scsi tape %s\n", tape_name(tpnt));
+ "Attached scsi tape %s\n", tpnt->name);
sdev_printk(KERN_INFO, SDp, "%s: try direct i/o: %s (alignment %d B)\n",
- tape_name(tpnt), tpnt->try_dio ? "yes" : "no",
+ tpnt->name, tpnt->try_dio ? "yes" : "no",
queue_dma_alignment(SDp->request_queue) + 1);
return 0;
spin_lock(&st_index_lock);
idr_remove(&st_index_idr, tpnt->index);
spin_unlock(&st_index_lock);
-out_put_queue:
- blk_put_queue(disk->queue);
-out_put_disk:
- put_disk(disk);
+out_free_tape:
kfree(tpnt);
out_buffer_free:
kfree(buffer);
static void scsi_tape_release(struct kref *kref)
{
struct scsi_tape *tpnt = to_scsi_tape(kref);
- struct gendisk *disk = tpnt->disk;
tpnt->device = NULL;
kfree(tpnt->buffer);
}
- disk->private_data = NULL;
- put_disk(disk);
kfree(tpnt->stats);
kfree(tpnt);
return;
unsigned char last_cmnd[6];
unsigned char last_sense[16];
#endif
- struct gendisk *disk;
+ char name[DISK_NAME_LEN];
struct kref kref;
struct scsi_tape_stats *stats;
};
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include "internal.h"
+#include "../block/blk.h"
struct bdev_inode {
struct block_device bdev;
return retval;
}
-int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *bd_inode = bdev_file_inode(filp);
struct block_device *bdev = I_BDEV(bd_inode);
return error;
}
-EXPORT_SYMBOL(blkdev_fsync);
/**
* bdev_read_page() - Start reading a page from a block device
if (!ei)
return NULL;
memset(&ei->bdev, 0, sizeof(ei->bdev));
- ei->bdev.bd_bdi = &noop_backing_dev_info;
return &ei->vfs_inode;
}
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
- if (!bdev_is_partition(bdev))
+ if (!bdev_is_partition(bdev)) {
+ if (bdev->bd_disk && bdev->bd_disk->bdi)
+ bdi_put(bdev->bd_disk->bdi);
kfree(bdev->bd_disk);
+ }
+
+ if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(MINOR(bdev->bd_dev));
+
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
static void bdev_evict_inode(struct inode *inode)
{
- struct block_device *bdev = &BDEV_I(inode)->bdev;
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode); /* is it needed here? */
clear_inode(inode);
- /* Detach inode from wb early as bdi_put() may free bdi->wb */
- inode_detach_wb(inode);
- if (bdev->bd_bdi != &noop_backing_dev_info) {
- bdi_put(bdev->bd_bdi);
- bdev->bd_bdi = &noop_backing_dev_info;
- }
}
static const struct super_operations bdev_sops = {
bdev->bd_disk = disk;
bdev->bd_partno = partno;
bdev->bd_inode = inode;
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) {
iput(inode);
insert_inode_hash(bdev->bd_inode);
}
-static struct block_device *bdget(dev_t dev)
-{
- struct inode *inode;
-
- inode = ilookup(blockdev_superblock, dev);
- if (!inode)
- return NULL;
- return &BDEV_I(inode)->bdev;
-}
-
-/**
- * bdgrab -- Grab a reference to an already referenced block device
- * @bdev: Block device to grab a reference to.
- *
- * Returns the block_device with an additional reference when successful,
- * or NULL if the inode is already beeing freed.
- */
-struct block_device *bdgrab(struct block_device *bdev)
-{
- if (!igrab(bdev->bd_inode))
- return NULL;
- return bdev;
-}
-EXPORT_SYMBOL(bdgrab);
-
long nr_blockdev_pages(void)
{
struct inode *inode;
return ret;
}
-void bdput(struct block_device *bdev)
-{
- iput(bdev->bd_inode);
-}
-EXPORT_SYMBOL(bdput);
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
}
EXPORT_SYMBOL(bd_abort_claiming);
-#ifdef CONFIG_SYSFS
-struct bd_holder_disk {
- struct list_head list;
- struct gendisk *disk;
- int refcnt;
-};
-
-static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
- struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- list_for_each_entry(holder, &bdev->bd_holder_disks, list)
- if (holder->disk == disk)
- return holder;
- return NULL;
-}
-
-static int add_symlink(struct kobject *from, struct kobject *to)
-{
- return sysfs_create_link(from, to, kobject_name(to));
-}
-
-static void del_symlink(struct kobject *from, struct kobject *to)
-{
- sysfs_remove_link(from, kobject_name(to));
-}
-
-/**
- * bd_link_disk_holder - create symlinks between holding disk and slave bdev
- * @bdev: the claimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * This functions creates the following sysfs symlinks.
- *
- * - from "slaves" directory of the holder @disk to the claimed @bdev
- * - from "holders" directory of the @bdev to the holder @disk
- *
- * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
- * passed to bd_link_disk_holder(), then:
- *
- * /sys/block/dm-0/slaves/sda --> /sys/block/sda
- * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
- *
- * The caller must have claimed @bdev before calling this function and
- * ensure that both @bdev and @disk are valid during the creation and
- * lifetime of these symlinks.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
- int ret = 0;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- WARN_ON_ONCE(!bdev->bd_holder);
-
- /* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
- goto out_unlock;
-
- holder = bd_find_holder_disk(bdev, disk);
- if (holder) {
- holder->refcnt++;
- goto out_unlock;
- }
-
- holder = kzalloc(sizeof(*holder), GFP_KERNEL);
- if (!holder) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- INIT_LIST_HEAD(&holder->list);
- holder->disk = disk;
- holder->refcnt = 1;
-
- ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
- if (ret)
- goto out_free;
-
- ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- if (ret)
- goto out_del;
- /*
- * bdev could be deleted beneath us which would implicitly destroy
- * the holder directory. Hold on to it.
- */
- kobject_get(bdev->bd_holder_dir);
-
- list_add(&holder->list, &bdev->bd_holder_disks);
- goto out_unlock;
-
-out_del:
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
-out_free:
- kfree(holder);
-out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-
-/**
- * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
- * @bdev: the calimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * CONTEXT:
- * Might sleep.
- */
-void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- holder = bd_find_holder_disk(bdev, disk);
-
- if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
- del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- kobject_put(bdev->bd_holder_dir);
- list_del_init(&holder->list);
- kfree(holder);
- }
-
- mutex_unlock(&bdev->bd_disk->open_mutex);
-}
-EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
-#endif
-
static void blkdev_flush_mapping(struct block_device *bdev)
{
WARN_ON_ONCE(bdev->bd_holders);
}
}
- if (!bdev->bd_openers) {
+ if (!bdev->bd_openers)
set_init_blocksize(bdev);
- if (bdev->bd_bdi == &noop_backing_dev_info)
- bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
- }
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
static int blkdev_get_part(struct block_device *part, fmode_t mode)
{
struct gendisk *disk = part->bd_disk;
- struct block_device *whole;
int ret;
if (part->bd_openers)
goto done;
- whole = bdgrab(disk->part0);
- ret = blkdev_get_whole(whole, mode);
+ ret = blkdev_get_whole(bdev_whole(part), mode);
if (ret)
- goto out_put_whole;
+ return ret;
ret = -ENXIO;
if (!bdev_nr_sectors(part))
disk->open_partitions++;
set_init_blocksize(part);
- if (part->bd_bdi == &noop_backing_dev_info)
- part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
done:
part->bd_openers++;
return 0;
out_blkdev_put:
- blkdev_put_whole(whole, mode);
-out_put_whole:
- bdput(whole);
+ blkdev_put_whole(bdev_whole(part), mode);
return ret;
}
blkdev_flush_mapping(part);
whole->bd_disk->open_partitions--;
blkdev_put_whole(whole, mode);
- bdput(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
{
struct block_device *bdev;
- struct gendisk *disk;
+ struct inode *inode;
- bdev = bdget(dev);
- if (!bdev) {
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode) {
blk_request_module(dev);
- bdev = bdget(dev);
- if (!bdev)
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode)
return NULL;
}
- disk = bdev->bd_disk;
- if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
- goto bdput;
- if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
- goto put_disk;
- if (!try_module_get(bdev->bd_disk->fops->owner))
- goto put_disk;
+ /* switch from the inode reference to a device mode one: */
+ bdev = &BDEV_I(inode)->bdev;
+ if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+ bdev = NULL;
+ iput(inode);
+
+ if (!bdev)
+ return NULL;
+ if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+ !try_module_get(bdev->bd_disk->fops->owner)) {
+ put_device(&bdev->bd_device);
+ return NULL;
+ }
+
return bdev;
-put_disk:
- put_disk(disk);
-bdput:
- bdput(bdev);
- return NULL;
}
void blkdev_put_no_open(struct block_device *bdev)
{
module_put(bdev->bd_disk->fops->owner);
- put_disk(bdev->bd_disk);
- bdput(bdev);
+ put_device(&bdev->bd_device);
}
/**
mutex_lock(&disk->open_mutex);
ret = -ENXIO;
- if (!(disk->flags & GENHD_FL_UP))
+ if (!disk_live(disk))
goto abort_claiming;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
ret = kstrtol(name, 10, &data);
if (ret)
return ret;
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
#include <linux/blkdev.h>
#include <linux/sched/signal.h>
+#include <linux/backing-dev-defs.h>
#include "fat.h"
struct fatent_operations {
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi);
+ sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi);
err = load_nilfs(nilfs, sb);
if (err)
bytes_to_copy = min_t(int, bytes_to_copy,
req_length - copied_bytes);
- memcpy(actor_addr + actor_offset,
- page_address(bvec->bv_page) + bvec->bv_offset + offset,
+ memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset,
bytes_to_copy);
actor_offset += bytes_to_copy;
goto out_free_bio;
}
/* Extract the length of the metadata block */
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length = data[offset];
if (offset < bvec->bv_len - 1) {
length |= data[offset + 1] << 8;
res = -EIO;
goto out_free_bio;
}
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length |= data[0] << 8;
}
bio_free_pages(bio);
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->buf.in = data + offset;
stream->buf.in_size = avail;
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->next_in = data + offset;
stream->avail_in = avail;
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
in_buf.src = data + offset;
in_buf.size = avail;
{
s->s_bdev = data;
s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
s->s_iflags |= SB_I_STABLE_WRITES;
{
struct xfs_buf *bp;
- if (bdi_read_congested(target->bt_bdev->bd_bdi))
+ if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
return;
xfs_buf_read_map(target, map, nmaps,
sb = inode->i_sb;
#ifdef CONFIG_BLOCK
if (sb_is_blkdev_sb(sb))
- return I_BDEV(inode)->bd_bdi;
+ return I_BDEV(inode)->bd_disk->bdi;
#endif
return sb->s_bdi;
}
#ifndef __LINUX_BIO_H
#define __LINUX_BIO_H
-#include <linux/highmem.h>
#include <linux/mempool.h>
#include <linux/ioprio.h>
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
-#ifdef CONFIG_HIGHMEM
-/*
- * remember never ever reenable interrupts between a bvec_kmap_irq and
- * bvec_kunmap_irq!
- */
-static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
-{
- unsigned long addr;
-
- /*
- * might not be a highmem page, but the preempt/irq count
- * balancing is a lot nicer this way
- */
- local_irq_save(*flags);
- addr = (unsigned long) kmap_atomic(bvec->bv_page);
-
- BUG_ON(addr & ~PAGE_MASK);
-
- return (char *) addr + bvec->bv_offset;
-}
-
-static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
-{
- unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
-
- kunmap_atomic((void *) ptr);
- local_irq_restore(*flags);
-}
-
-#else
-static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
-{
- return page_address(bvec->bv_page) + bvec->bv_offset;
-}
-
-static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
-{
- *flags = 0;
-}
-#endif
-
/*
* BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
*
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
- size_t size);
+typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+ struct seq_file *s);
struct blkcg_policy {
int plid;
BLK_MQ_F_STACKING = 1 << 2,
BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
BLK_MQ_F_BLOCKING = 1 << 5,
+ /* Do not allow an I/O scheduler to be configured. */
BLK_MQ_F_NO_SCHED = 1 << 6,
+ /*
+ * Select 'none' during queue registration in case of a single hwq
+ * or shared hwqs instead of 'mq-deadline'.
+ */
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ struct lock_class_key *lkclass);
#define blk_mq_alloc_disk(set, queuedata) \
({ \
static struct lock_class_key __key; \
- struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata); \
\
- if (!IS_ERR(__disk)) \
- lockdep_init_map(&__disk->lockdep_map, \
- "(bio completion)", &__key, 0); \
- __disk; \
+ __blk_mq_alloc_disk(set, queuedata, &__key); \
})
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
- void *queuedata);
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q);
void * bd_holder;
int bd_holders;
bool bd_write_holder;
-#ifdef CONFIG_SYSFS
- struct list_head bd_holder_disks;
-#endif
struct kobject *bd_holder_dir;
u8 bd_partno;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct gendisk * bd_disk;
- struct backing_dev_info *bd_bdi;
/* The counter of freeze processes */
int bd_fsfreeze_count;
#include <linux/minmax.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#include <linux/backing-dev-defs.h>
#include <linux/wait.h>
#include <linux/mempool.h>
#include <linux/pfn.h>
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
- struct backing_dev_info *backing_dev_info;
-
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
spinlock_t queue_lock;
+ struct gendisk *disk;
+
/*
* queue kobject
*/
dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
(dir), (attrs))
-#define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent)))
-
static inline bool queue_is_mq(struct request_queue *q)
{
return q->mq_ops;
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#endif
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
/*
* blk_rq_pos() : the current sector
* blk_rq_bytes() : bytes left in the entire request
unsigned int size);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
-void blk_queue_update_readahead(struct request_queue *q);
+void disk_update_readahead(struct gendisk *disk);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
return offset << SECTOR_SHIFT;
}
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+ if (req_op(req) == REQ_OP_DISCARD &&
+ queue_max_discard_segments(req->q) > 1)
+ return true;
+ return false;
+}
+
static inline int bdev_discard_alignment(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
struct module *owner;
const struct pr_ops *pr_ops;
+
+ /*
+ * Special callback for probing GPT entry at a given sector.
+ * Needed by Android devices, used by GPT scanner and MMC blk
+ * driver.
+ */
+ int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector);
};
#ifdef CONFIG_COMPAT
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
void bdev_add(struct block_device *bdev, dev_t dev);
struct block_device *I_BDEV(struct inode *inode);
-struct block_device *bdgrab(struct block_device *bdev);
-void bdput(struct block_device *);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
loff_t lend);
*
* Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
*/
-#ifndef __LINUX_BVEC_ITER_H
-#define __LINUX_BVEC_ITER_H
+#ifndef __LINUX_BVEC_H
+#define __LINUX_BVEC_H
+#include <linux/highmem.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/limits.h>
}
}
-#endif /* __LINUX_BVEC_ITER_H */
+/**
+ * bvec_kmap_local - map a bvec into the kernel virtual address space
+ * @bvec: bvec to map
+ *
+ * Must be called on single-page bvecs only. Call kunmap_local on the returned
+ * address to unmap.
+ */
+static inline void *bvec_kmap_local(struct bio_vec *bvec)
+{
+ return kmap_local_page(bvec->bv_page) + bvec->bv_offset;
+}
+
+/**
+ * memcpy_from_bvec - copy data from a bvec
+ * @bvec: bvec to copy from
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memcpy_from_bvec(char *to, struct bio_vec *bvec)
+{
+ memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, bvec->bv_len);
+}
+
+/**
+ * memcpy_to_bvec - copy data to a bvec
+ * @bvec: bvec to copy to
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memcpy_to_bvec(struct bio_vec *bvec, const char *from)
+{
+ memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, bvec->bv_len);
+}
+
+/**
+ * memzero_bvec - zero all data in a bvec
+ * @bvec: bvec to zero
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memzero_bvec(struct bio_vec *bvec)
+{
+ memzero_page(bvec->bv_page, bvec->bv_offset, bvec->bv_len);
+}
+
+/**
+ * bvec_virt - return the virtual address for a bvec
+ * @bvec: bvec to return the virtual address for
+ *
+ * Note: the caller must ensure that @bvec->bv_page is not a highmem page.
+ */
+static inline void *bvec_virt(struct bio_vec *bvec)
+{
+ WARN_ON_ONCE(PageHighMem(bvec->bv_page));
+ return page_address(bvec->bv_page) + bvec->bv_offset;
+}
+
+#endif /* __LINUX_BVEC_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Parsing command line, get the partitions information.
- *
- * Written by Cai Zhiyong <caizhiyong@huawei.com>
- *
- */
-#ifndef CMDLINEPARSEH
-#define CMDLINEPARSEH
-
-#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-
-/* partition flags */
-#define PF_RDONLY 0x01 /* Device is read only */
-#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */
-
-struct cmdline_subpart {
- char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
- sector_t from;
- sector_t size;
- int flags;
- struct cmdline_subpart *next_subpart;
-};
-
-struct cmdline_parts {
- char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
- unsigned int nr_subparts;
- struct cmdline_subpart *subpart;
- struct cmdline_parts *next_parts;
-};
-
-void cmdline_parts_free(struct cmdline_parts **parts);
-
-int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
-
-struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
- const char *bdev);
-
-int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
- int slot,
- int (*add_part)(int, struct cmdline_subpart *, void *),
- void *param);
-
-#endif /* CMDLINEPARSEH */
void *addr, size_t bytes, struct iov_iter *i);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages);
-#define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message);
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter);
-/* fs/block_dev.c */
-extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync);
-
/* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
* device.
* Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
*
- * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
- * with a similar meaning to network interfaces.
- *
* ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
* partition information in ``/proc/partitions`` or in the output of
* printk_all_partitions().
/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
#define GENHD_FL_CD 0x0008
-#define GENHD_FL_UP 0x0010
#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
#define GENHD_FL_EXT_DEVT 0x0040
#define GENHD_FL_NATIVE_CAPACITY 0x0080
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
-#define GD_QUEUE_REF 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
+ struct backing_dev_info *bdi;
struct kobject *slave_dir;
-
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+ struct list_head slave_bdevs;
+#endif
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct disk_events *ev;
int node_id;
struct badblocks *bb;
struct lockdep_map lockdep_map;
+ u64 diskseq;
};
+static inline bool disk_live(struct gendisk *disk)
+{
+ return !inode_unhashed(disk->part0->bd_inode);
+}
+
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.
void disk_uevent(struct gendisk *disk, enum kobject_action action);
/* block/genhd.c */
-extern void device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups);
-static inline void add_disk(struct gendisk *disk)
+int device_add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups);
+static inline int add_disk(struct gendisk *disk)
{
- device_add_disk(NULL, disk, NULL);
+ return device_add_disk(NULL, disk, NULL);
}
-extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
-static inline void add_disk_no_queue_reg(struct gendisk *disk)
-{
- device_add_disk_no_queue_reg(NULL, disk);
-}
-
extern void del_gendisk(struct gendisk *gp);
void set_disk_ro(struct gendisk *disk, bool read_only);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
+bool disk_force_media_change(struct gendisk *disk, unsigned int events);
/* drivers/char/random.c */
extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void blk_drop_partitions(struct gendisk *disk);
-extern struct gendisk *__alloc_disk_node(int minors, int node_id);
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ struct lock_class_key *lkclass);
extern void put_disk(struct gendisk *disk);
-
-#define alloc_disk_node(minors, node_id) \
-({ \
- static struct lock_class_key __key; \
- const char *__name; \
- struct gendisk *__disk; \
- \
- __name = "(gendisk_completion)"#minors"("#node_id")"; \
- \
- __disk = __alloc_disk_node(minors, node_id); \
- \
- if (__disk) \
- lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \
- \
- __disk; \
-})
-
-#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
/**
* blk_alloc_disk - allocate a gendisk structure
*/
#define blk_alloc_disk(node_id) \
({ \
- struct gendisk *__disk = __blk_alloc_disk(node_id); \
static struct lock_class_key __key; \
\
- if (__disk) \
- lockdep_init_map(&__disk->lockdep_map, \
- "(bio completion)", &__key, 0); \
- __disk; \
+ __blk_alloc_disk(node_id, &__key); \
})
-struct gendisk *__blk_alloc_disk(int node);
void blk_cleanup_disk(struct gendisk *disk);
int __register_blkdev(unsigned int major, const char *name,
int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+int bd_register_pending_holders(struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
struct gendisk *disk)
{
}
-#endif /* CONFIG_SYSFS */
+static inline int bd_register_pending_holders(struct gendisk *disk)
+{
+ return 0;
+}
+#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
dev_t part_devt(struct gendisk *disk, u8 partno);
+void inc_diskseq(struct gendisk *disk);
dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
#ifdef CONFIG_BLOCK
#include <linux/sched/rt.h>
#include <linux/iocontext.h>
-/*
- * Gives us 8 prio classes with 13-bits of data for each class
- */
-#define IOPRIO_CLASS_SHIFT (13)
-#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
-
-#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
-#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
-#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-
-#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+#include <uapi/linux/ioprio.h>
/*
- * These are the io priority groups as implemented by CFQ. RT is the realtime
- * class, it always gets premium service. BE is the best-effort scheduling
- * class, the default for any process. IDLE is the idle scheduling class, it
- * is only served when no one else is using the disk.
+ * Default IO priority.
*/
-enum {
- IOPRIO_CLASS_NONE,
- IOPRIO_CLASS_RT,
- IOPRIO_CLASS_BE,
- IOPRIO_CLASS_IDLE,
-};
+#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
/*
- * 8 best effort priority levels are supported
+ * Check that a priority value has a valid class.
*/
-#define IOPRIO_BE_NR (8)
-
-enum {
- IOPRIO_WHO_PROCESS = 1,
- IOPRIO_WHO_PGRP,
- IOPRIO_WHO_USER,
-};
+static inline bool ioprio_valid(unsigned short ioprio)
+{
+ unsigned short class = IOPRIO_PRIO_CLASS(ioprio);
-/*
- * Fallback BE priority
- */
-#define IOPRIO_NORM (4)
+ return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_IDLE;
+}
/*
* if process has set io priority explicitly, use that. if not, convert
if (ioc)
return ioc->ioprio;
- return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ return IOPRIO_DEFAULT;
}
/*
u8 raw_hc_erase_gap_size; /* 221 */
u8 raw_erase_timeout_mult; /* 223 */
u8 raw_hc_erase_grp_size; /* 224 */
+ u8 raw_boot_mult; /* 226 */
u8 raw_sec_trim_mult; /* 229 */
u8 raw_sec_erase_mult; /* 230 */
u8 raw_sec_feature_support;/* 231 */
#else
#define MMC_CAP2_CRYPTO 0
#endif
+#define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */
int fixed_drv_type; /* fixed driver type for non-removable media */
/*
* mm/page-writeback.c
*/
-#ifdef CONFIG_BLOCK
void laptop_io_completion(struct backing_dev_info *info);
void laptop_sync_completion(void);
-void laptop_mode_sync(struct work_struct *work);
void laptop_mode_timer_fn(struct timer_list *t);
-#else
-static inline void laptop_sync_completion(void) { }
-#endif
bool node_dirty_ok(struct pglist_data *pgdat);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
#ifdef CONFIG_CGROUP_WRITEBACK
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
strlcpy(__entry->type, type, sizeof(__entry->type));
__entry->percentile = percentile;
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
__entry->depth = depth;
),
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
),
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
/*
* A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IOPRIO_H
+#define _UAPI_LINUX_IOPRIO_H
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_CLASS_MASK 0x07
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(ioprio) \
+ (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK)
+#define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) \
+ ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \
+ ((data) & IOPRIO_PRIO_MASK))
+
+/*
+ * These are the io priority groups as implemented by the BFQ and mq-deadline
+ * schedulers. RT is the realtime class, it always gets premium service. For
+ * ATA disks supporting NCQ IO priority, RT class IOs will be processed using
+ * high priority NCQ commands. BE is the best-effort scheduling class, the
+ * default for any process. IDLE is the idle scheduling class, it is only
+ * served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * The RT and BE priority classes both support up to 8 priority levels.
+ */
+#define IOPRIO_NR_LEVELS 8
+#define IOPRIO_BE_NR IOPRIO_NR_LEVELS
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+/*
+ * Fallback BE priority level.
+ */
+#define IOPRIO_NORM 4
+#define IOPRIO_BE_NORM IOPRIO_NORM
+
+#endif /* _UAPI_LINUX_IOPRIO_H */
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
printk_all_partitions();
-#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
- printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
- "explicit textual name for \"root=\" boot option.\n");
-#endif
panic("VFS: Unable to mount root fs on %s", b);
}
if (!(flags & SB_RDONLY)) {
feature by default. When enabled, memory and cache locality will
be impacted.
-config DEBUG_BLOCK_EXT_DEVT
- bool "Force extended block device numbers and spread them"
- depends on DEBUG_KERNEL
- depends on BLOCK
- default n
- help
- BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
- SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
- YOU ARE DOING. Distros, please enable this and fix whatever
- is broken.
-
- Conventionally, block device numbers are allocated from
- predetermined contiguous area. However, extended block area
- may introduce non-contiguous block device numbers. This
- option forces most block device numbers to be allocated from
- the extended space and spreads them to discover kernel or
- userland code paths which assume predetermined contiguous
- device number allocation.
-
- Note that turning on this debug option shuffles all the
- device numbers for all IDE and SCSI devices including libata
- ones, so root partition specified using device number
- directly (via rdev or root=MAJ:MIN) won't work anymore.
- Textual device names (root=/dev/sdXn) will continue to work.
-
- Say N if you are unsure.
-
config CPU_HOTPLUG_STATE_CONTROL
bool "Enable CPU hotplug state control"
depends on DEBUG_KERNEL
bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
bdi->ra_pages = VM_READAHEAD_PAGES;
bdi->io_pages = VM_READAHEAD_PAGES;
+ timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
return bdi;
}
EXPORT_SYMBOL(bdi_alloc);
void bdi_unregister(struct backing_dev_info *bdi)
{
+ del_timer_sync(&bdi->laptop_mode_wb_timer);
+
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
return ret;
}
-#ifdef CONFIG_BLOCK
void laptop_mode_timer_fn(struct timer_list *t)
{
struct backing_dev_info *backing_dev_info =
rcu_read_unlock();
}
-#endif
/*
* If ratelimit_pages is too high then we can get into dirty-data overload