* count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
* actually.
*/
-struct super_block *freeze_bdev(struct block_device *bdev)
+int freeze_bdev(struct block_device *bdev)
{
struct super_block *sb;
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1) {
- /*
- * We don't even need to grab a reference - the first call
- * to freeze_bdev grab an active reference and only the last
- * thaw_bdev drops it.
- */
- sb = get_super(bdev);
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb;
- }
+ if (++bdev->bd_fsfreeze_count > 1)
+ goto done;
sb = get_active_super(bdev);
if (!sb)
- goto out;
+ goto sync;
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb);
else
error = freeze_super(sb);
+ deactivate_super(sb);
+
if (error) {
- deactivate_super(sb);
bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
+ goto done;
}
- deactivate_super(sb);
- out:
+ bdev->bd_fsfreeze_sb = sb;
+
+sync:
sync_blockdev(bdev);
+done:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb; /* thaw_bdev releases s->s_umount */
+ return error;
}
EXPORT_SYMBOL(freeze_bdev);
/**
* thaw_bdev -- unlock filesystem
* @bdev: blockdevice to unlock
- * @sb: associated superblock
*
* Unlocks the filesystem and marks it writeable again after freeze_bdev().
*/
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+int thaw_bdev(struct block_device *bdev)
{
+ struct super_block *sb;
int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (--bdev->bd_fsfreeze_count > 0)
goto out;
+ sb = bdev->bd_fsfreeze_sb;
if (!sb)
goto out;
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
-/*
- * Most likely _very_ bad one - but then it's hardly critical for small
- * /dev and can be fixed when somebody will need really large one.
- * Keep in mind that it will be fed through icache hash function too.
- */
-static inline unsigned long hash(dev_t dev)
-{
- return MAJOR(dev)+MINOR(dev);
-}
-
-static int bdev_test(struct inode *inode, void *data)
-{
- return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
-}
-
-static int bdev_set(struct inode *inode, void *data)
-{
- BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
- return 0;
-}
-
static struct block_device *bdget(dev_t dev)
{
struct block_device *bdev;
struct inode *inode;
- inode = iget5_locked(blockdev_superblock, hash(dev),
- bdev_test, bdev_set, &dev);
-
+ inode = iget_locked(blockdev_superblock, dev);
if (!inode)
return NULL;
bdev->bd_super = NULL;
bdev->bd_inode = inode;
bdev->bd_part_count = 0;
+ bdev->bd_dev = dev;
inode->i_mode = S_IFBLK;
inode->i_rdev = dev;
- inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
unlock_new_inode(inode);
/**
* bdgrab -- Grab a reference to an already referenced block device
* @bdev: Block device to grab a reference to.
+ *
+ * Returns the block_device with an additional reference when successful,
+ * or NULL if the inode is already beeing freed.
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- ihold(bdev->bd_inode);
+ if (!igrab(bdev->bd_inode))
+ return NULL;
return bdev;
}
EXPORT_SYMBOL(bdgrab);
{
iput(bdev->bd_inode);
}
-
EXPORT_SYMBOL(bdput);
-static struct block_device *bd_acquire(struct inode *inode)
-{
- struct block_device *bdev;
-
- spin_lock(&bdev_lock);
- bdev = inode->i_bdev;
- if (bdev && !inode_unhashed(bdev->bd_inode)) {
- bdgrab(bdev);
- spin_unlock(&bdev_lock);
- return bdev;
- }
- spin_unlock(&bdev_lock);
-
- /*
- * i_bdev references block device inode that was already shut down
- * (corresponding device got removed). Remove the reference and look
- * up block device inode again just in case new device got
- * reestablished under the same device number.
- */
- if (bdev)
- bd_forget(inode);
-
- bdev = bdget(inode->i_rdev);
- if (bdev) {
- spin_lock(&bdev_lock);
- if (!inode->i_bdev) {
- /*
- * We take an additional reference to bd_inode,
- * and it's released in clear_inode() of inode.
- * So, we can access it via ->i_mapping always
- * without igrab().
- */
- bdgrab(bdev);
- inode->i_bdev = bdev;
- inode->i_mapping = bdev->bd_inode->i_mapping;
- }
- spin_unlock(&bdev_lock);
- }
- return bdev;
-}
-
-/* Call when you free inode */
-
-void bd_forget(struct inode *inode)
-{
- struct block_device *bdev = NULL;
-
- spin_lock(&bdev_lock);
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- inode->i_bdev = NULL;
- inode->i_mapping = &inode->i_data;
- spin_unlock(&bdev_lock);
-
- if (bdev)
- bdput(bdev);
-}
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
holder->disk = disk;
holder->refcnt = 1;
- ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
if (ret)
goto out_free;
goto out_unlock;
out_del:
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
out_free:
kfree(holder);
out_unlock:
holder = bd_find_holder_disk(bdev, disk);
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
del_symlink(bdev->bd_part->holder_dir,
&disk_to_dev(disk)->kobj);
kobject_put(bdev->bd_part->holder_dir);
i_size_write(bdev->bd_inode, disk_size);
}
spin_unlock(&bdev->bd_size_lock);
-
- if (bdev_size > disk_size) {
- if (__invalidate_device(bdev, false))
- pr_warn("VFS: busy inodes on resized disk %s\n",
- disk->disk_name);
- }
}
/**
bdput(bdev);
}
}
-EXPORT_SYMBOL(revalidate_disk_size);
void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
* mutex_lock(part->bd_mutex)
* mutex_lock_nested(whole->bd_mutex, 1)
*/
-
-static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
- int for_part)
+static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
+ int partno, fmode_t mode)
{
- struct block_device *whole = NULL, *claiming = NULL;
- struct gendisk *disk;
int ret;
- int partno;
- bool first_open = false, unblock_events = true, need_restart;
-
- restart:
- need_restart = false;
- ret = -ENXIO;
- disk = bdev_get_gendisk(bdev, &partno);
- if (!disk)
- goto out;
-
- if (partno) {
- whole = bdget_disk(disk, 0);
- if (!whole) {
- ret = -ENOMEM;
- goto out_put_disk;
- }
- }
- if (!for_part && (mode & FMODE_EXCL)) {
- WARN_ON_ONCE(!holder);
- if (whole)
- claiming = whole;
- else
- claiming = bdev;
- ret = bd_prepare_to_claim(bdev, claiming, holder);
- if (ret)
- goto out_put_whole;
- }
-
- disk_block_events(disk);
- mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
- first_open = true;
bdev->bd_disk = disk;
bdev->bd_contains = bdev;
bdev->bd_partno = partno;
goto out_clear;
ret = 0;
- if (disk->fops->open) {
+ if (disk->fops->open)
ret = disk->fops->open(bdev, mode);
- /*
- * If we lost a race with 'disk' being deleted,
- * try again. See md.c
- */
- if (ret == -ERESTARTSYS)
- need_restart = true;
- }
if (!ret) {
bd_set_nr_sectors(bdev, get_capacity(disk));
if (ret)
goto out_clear;
} else {
- BUG_ON(for_part);
- ret = __blkdev_get(whole, mode, NULL, 1);
- if (ret)
+ struct block_device *whole = bdget_disk(disk, 0);
+
+ mutex_lock_nested(&whole->bd_mutex, 1);
+ ret = __blkdev_get(whole, disk, 0, mode);
+ if (ret) {
+ mutex_unlock(&whole->bd_mutex);
+ bdput(whole);
goto out_clear;
- bdev->bd_contains = bdgrab(whole);
+ }
+ whole->bd_part_count++;
+ mutex_unlock(&whole->bd_mutex);
+
+ bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
!bdev->bd_part || !bdev->bd_part->nr_sects) {
+ __blkdev_put(whole, mode, 1);
+ bdput(whole);
ret = -ENXIO;
goto out_clear;
}
(!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
- goto out_unlock_bdev;
+ return ret;
}
}
bdev->bd_openers++;
- if (for_part)
- bdev->bd_part_count++;
- if (claiming)
- bd_finish_claiming(bdev, claiming, holder);
-
- /*
- * Block event polling for write claims if requested. Any write holder
- * makes the write_holder state stick until all are released. This is
- * good enough and tracking individual writeable reference is too
- * fragile given the way @mode is used in blkdev_get/put().
- */
- if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
- (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
- bdev->bd_write_holder = true;
- unblock_events = false;
- }
- mutex_unlock(&bdev->bd_mutex);
-
- if (unblock_events)
- disk_unblock_events(disk);
-
- /* only one opener holds refs to the module and disk */
- if (!first_open)
- put_disk_and_module(disk);
- if (whole)
- bdput(whole);
return 0;
out_clear:
disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL;
bdev->bd_part = NULL;
- if (bdev != bdev->bd_contains)
- __blkdev_put(bdev->bd_contains, mode, 1);
bdev->bd_contains = NULL;
- out_unlock_bdev:
- if (claiming)
- bd_abort_claiming(bdev, claiming, holder);
- mutex_unlock(&bdev->bd_mutex);
- disk_unblock_events(disk);
- out_put_whole:
- if (whole)
- bdput(whole);
- out_put_disk:
- put_disk_and_module(disk);
- if (need_restart)
- goto restart;
- out:
return ret;
}
/**
- * blkdev_get - open a block device
- * @bdev: block_device to open
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
- * open with exclusive access. Specifying %FMODE_EXCL with %NULL
- * @holder is invalid. Exclusive opens may nest for the same @holder.
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
+ * the same @holder.
*
- * On success, the reference count of @bdev is unchanged. On failure,
- * @bdev is put.
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number. Everything else should use blkdev_get_by_path().
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * 0 on success, -errno on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
{
- int ret, perm = 0;
+ struct block_device *claiming;
+ bool unblock_events = true;
+ struct block_device *bdev;
+ struct gendisk *disk;
+ int partno;
+ int ret;
- if (mode & FMODE_READ)
- perm |= MAY_READ;
- if (mode & FMODE_WRITE)
- perm |= MAY_WRITE;
- ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+ ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+ MAJOR(dev), MINOR(dev),
+ ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
- goto bdput;
+ return ERR_PTR(ret);
- ret =__blkdev_get(bdev, mode, holder, 0);
- if (ret)
+ bdev = bdget(dev);
+ if (!bdev)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * If we lost a race with 'disk' being deleted, try again. See md.c.
+ */
+retry:
+ ret = -ENXIO;
+ disk = bdev_get_gendisk(bdev, &partno);
+ if (!disk)
goto bdput;
- return 0;
+ if (mode & FMODE_EXCL) {
+ WARN_ON_ONCE(!holder);
+
+ ret = -ENOMEM;
+ claiming = bdget_disk(disk, 0);
+ if (!claiming)
+ goto put_disk;
+ ret = bd_prepare_to_claim(bdev, claiming, holder);
+ if (ret)
+ goto put_claiming;
+ }
+
+ disk_block_events(disk);
+
+ mutex_lock(&bdev->bd_mutex);
+ ret =__blkdev_get(bdev, disk, partno, mode);
+ if (!(mode & FMODE_EXCL)) {
+ ; /* nothing to do here */
+ } else if (ret) {
+ bd_abort_claiming(bdev, claiming, holder);
+ } else {
+ bd_finish_claiming(bdev, claiming, holder);
+
+ /*
+ * Block event polling for write claims if requested. Any write
+ * holder makes the write_holder state stick until all are
+ * released. This is good enough and tracking individual
+ * writeable reference is too fragile given the way @mode is
+ * used in blkdev_get/put().
+ */
+ if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+ bdev->bd_write_holder = true;
+ unblock_events = false;
+ }
+ }
+ mutex_unlock(&bdev->bd_mutex);
+
+ if (unblock_events)
+ disk_unblock_events(disk);
+
+put_claiming:
+ if (mode & FMODE_EXCL)
+ bdput(claiming);
+put_disk:
+ if (ret)
+ put_disk_and_module(disk);
+ if (ret == -ERESTARTSYS)
+ goto retry;
bdput:
- bdput(bdev);
- return ret;
+ if (ret) {
+ bdput(bdev);
+ return ERR_PTR(ret);
+ }
+ return bdev;
}
+EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open the blockdevice described by the device file at @path. @mode
- * and @holder are identical to blkdev_get().
- *
- * On success, the returned block_device has reference count of one.
+ * Open the block device described by the device file at @path. If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
+ * nest for the same @holder.
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
void *holder)
{
struct block_device *bdev;
- int err;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
+ dev_t dev;
+ int error;
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
+ error = lookup_bdev(path, &dev);
+ if (error)
+ return ERR_PTR(error);
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ bdev = blkdev_get_by_dev(dev, mode, holder);
+ if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
blkdev_put(bdev, mode);
return ERR_PTR(-EACCES);
}
}
EXPORT_SYMBOL(blkdev_get_by_path);
-/**
- * blkdev_get_by_dev - open a block device by device number
- * @dev: device number of block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the blockdevice described by device number @dev. @mode and
- * @holder are identical to blkdev_get().
- *
- * Use it ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a
- * device number. _Never_ to be used for internal purposes. If you
- * ever need it - reconsider your API.
- *
- * On success, the returned block_device has reference count of one.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-{
- struct block_device *bdev;
- int err;
-
- bdev = bdget(dev);
- if (!bdev)
- return ERR_PTR(-ENOMEM);
-
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
- bdev = bd_acquire(inode);
- if (bdev == NULL)
- return -ENOMEM;
-
+ bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-
- return blkdev_get(bdev, filp->f_mode, filp);
+ return 0;
}
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
-
bdev_write_inode(bdev);
- }
- if (bdev->bd_contains == bdev) {
- if (disk->fops->release)
+
+ if (!bdev_is_partition(bdev) && disk->fops->release)
disk->fops->release(disk, mode);
- }
- if (!bdev->bd_openers) {
+
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
- if (bdev != bdev->bd_contains)
+ if (bdev_is_partition(bdev))
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
-
- put_disk_and_module(disk);
+ } else {
+ if (!bdev_is_partition(bdev) && disk->fops->release)
+ disk->fops->release(disk, mode);
}
mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
- if (victim)
+ if (victim) {
__blkdev_put(victim, mode, 1);
+ bdput(victim);
+ }
}
void blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ struct gendisk *disk = bdev->bd_disk;
+
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
* unblock evpoll if it was a write holder.
*/
if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(bdev->bd_disk);
+ disk_unblock_events(disk);
bdev->bd_write_holder = false;
}
}
* event. This is to ensure detection of media removal commanded
* from userland - e.g. eject(1).
*/
- disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
+ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0);
+ bdput(bdev);
+ put_disk_and_module(disk);
}
EXPORT_SYMBOL(blkdev_put);
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
-struct block_device *lookup_bdev(const char *pathname)
+int lookup_bdev(const char *pathname, dev_t *dev)
{
- struct block_device *bdev;
struct inode *inode;
struct path path;
int error;
if (!pathname || !*pathname)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
- return ERR_PTR(error);
+ return error;
inode = d_backing_inode(path.dentry);
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
- goto fail;
+ goto out_path_put;
error = -EACCES;
if (!may_open_dev(&path))
- goto fail;
- error = -ENOMEM;
- bdev = bd_acquire(inode);
- if (!bdev)
- goto fail;
-out:
+ goto out_path_put;
+
+ *dev = inode->i_rdev;
+ error = 0;
+out_path_put:
path_put(&path);
- return bdev;
-fail:
- bdev = ERR_PTR(error);
- goto out;
+ return error;
}
EXPORT_SYMBOL(lookup_bdev);