X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=blobdiff_plain;f=fs%2Fblock_dev.c;h=6d6e4d50834cd00b9d3da1db583ffae10079c15c;hp=9e84b1928b9401d0eee5057e5c09e72473e649d0;hb=4e7b5671c6a8;hpb=699116c45e155925638677a74625ac9e9e046dc1 diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e84b1928b94..6d6e4d50834c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -548,55 +548,47 @@ EXPORT_SYMBOL(fsync_bdev); * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ -struct super_block *freeze_bdev(struct block_device *bdev) +int freeze_bdev(struct block_device *bdev) { struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) { - /* - * We don't even need to grab a reference - the first call - * to freeze_bdev grab an active reference and only the last - * thaw_bdev drops it. - */ - sb = get_super(bdev); - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; - } + if (++bdev->bd_fsfreeze_count > 1) + goto done; sb = get_active_super(bdev); if (!sb) - goto out; + goto sync; if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb); else error = freeze_super(sb); + deactivate_super(sb); + if (error) { - deactivate_super(sb); bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); + goto done; } - deactivate_super(sb); - out: + bdev->bd_fsfreeze_sb = sb; + +sync: sync_blockdev(bdev); +done: mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; /* thaw_bdev releases s->s_umount */ + return error; } EXPORT_SYMBOL(freeze_bdev); /** * thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock - * @sb: associated superblock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +int thaw_bdev(struct block_device *bdev) { + struct super_block *sb; int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); @@ -607,6 +599,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (--bdev->bd_fsfreeze_count > 0) goto out; + sb = bdev->bd_fsfreeze_sb; if (!sb) goto out; @@ -870,35 +863,12 @@ void __init bdev_cache_init(void) blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } -/* - * Most likely _very_ bad one - but then it's hardly critical for small - * /dev and can be fixed when somebody will need really large one. - * Keep in mind that it will be fed through icache hash function too. - */ -static inline unsigned long hash(dev_t dev) -{ - return MAJOR(dev)+MINOR(dev); -} - -static int bdev_test(struct inode *inode, void *data) -{ - return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; -} - -static int bdev_set(struct inode *inode, void *data) -{ - BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; - return 0; -} - static struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; - inode = iget5_locked(blockdev_superblock, hash(dev), - bdev_test, bdev_set, &dev); - + inode = iget_locked(blockdev_superblock, dev); if (!inode) return NULL; @@ -910,9 +880,9 @@ static struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; + bdev->bd_dev = dev; inode->i_mode = S_IFBLK; inode->i_rdev = dev; - inode->i_bdev = bdev; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); unlock_new_inode(inode); @@ -923,10 +893,14 @@ static struct block_device *bdget(dev_t dev) /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. + * + * Returns the block_device with an additional reference when successful, + * or NULL if the inode is already beeing freed. */ struct block_device *bdgrab(struct block_device *bdev) { - ihold(bdev->bd_inode); + if (!igrab(bdev->bd_inode)) + return NULL; return bdev; } EXPORT_SYMBOL(bdgrab); @@ -953,67 +927,8 @@ void bdput(struct block_device *bdev) { iput(bdev->bd_inode); } - EXPORT_SYMBOL(bdput); -static struct block_device *bd_acquire(struct inode *inode) -{ - struct block_device *bdev; - - spin_lock(&bdev_lock); - bdev = inode->i_bdev; - if (bdev && !inode_unhashed(bdev->bd_inode)) { - bdgrab(bdev); - spin_unlock(&bdev_lock); - return bdev; - } - spin_unlock(&bdev_lock); - - /* - * i_bdev references block device inode that was already shut down - * (corresponding device got removed). Remove the reference and look - * up block device inode again just in case new device got - * reestablished under the same device number. - */ - if (bdev) - bd_forget(inode); - - bdev = bdget(inode->i_rdev); - if (bdev) { - spin_lock(&bdev_lock); - if (!inode->i_bdev) { - /* - * We take an additional reference to bd_inode, - * and it's released in clear_inode() of inode. - * So, we can access it via ->i_mapping always - * without igrab(). - */ - bdgrab(bdev); - inode->i_bdev = bdev; - inode->i_mapping = bdev->bd_inode->i_mapping; - } - spin_unlock(&bdev_lock); - } - return bdev; -} - -/* Call when you free inode */ - -void bd_forget(struct inode *inode) -{ - struct block_device *bdev = NULL; - - spin_lock(&bdev_lock); - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - inode->i_bdev = NULL; - inode->i_mapping = &inode->i_data; - spin_unlock(&bdev_lock); - - if (bdev) - bdput(bdev); -} - /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest @@ -1249,7 +1164,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) holder->disk = disk; holder->refcnt = 1; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); if (ret) goto out_free; @@ -1266,7 +1181,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) goto out_unlock; out_del: - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); out_free: kfree(holder); out_unlock: @@ -1294,7 +1209,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); kobject_put(bdev->bd_part->holder_dir); @@ -1334,12 +1249,6 @@ static void check_disk_size_change(struct gendisk *disk, i_size_write(bdev->bd_inode, disk_size); } spin_unlock(&bdev->bd_size_lock); - - if (bdev_size > disk_size) { - if (__invalidate_device(bdev, false)) - pr_warn("VFS: busy inodes on resized disk %s\n", - disk->disk_name); - } } /** @@ -1368,7 +1277,6 @@ void revalidate_disk_size(struct gendisk *disk, bool verbose) bdput(bdev); } } -EXPORT_SYMBOL(revalidate_disk_size); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { @@ -1439,46 +1347,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ - -static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, - int for_part) +static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, + int partno, fmode_t mode) { - struct block_device *whole = NULL, *claiming = NULL; - struct gendisk *disk; int ret; - int partno; - bool first_open = false, unblock_events = true, need_restart; - - restart: - need_restart = false; - ret = -ENXIO; - disk = bdev_get_gendisk(bdev, &partno); - if (!disk) - goto out; - - if (partno) { - whole = bdget_disk(disk, 0); - if (!whole) { - ret = -ENOMEM; - goto out_put_disk; - } - } - if (!for_part && (mode & FMODE_EXCL)) { - WARN_ON_ONCE(!holder); - if (whole) - claiming = whole; - else - claiming = bdev; - ret = bd_prepare_to_claim(bdev, claiming, holder); - if (ret) - goto out_put_whole; - } - - disk_block_events(disk); - mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { - first_open = true; bdev->bd_disk = disk; bdev->bd_contains = bdev; bdev->bd_partno = partno; @@ -1490,15 +1364,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, goto out_clear; ret = 0; - if (disk->fops->open) { + if (disk->fops->open) ret = disk->fops->open(bdev, mode); - /* - * If we lost a race with 'disk' being deleted, - * try again. See md.c - */ - if (ret == -ERESTARTSYS) - need_restart = true; - } if (!ret) { bd_set_nr_sectors(bdev, get_capacity(disk)); @@ -1518,14 +1385,24 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (ret) goto out_clear; } else { - BUG_ON(for_part); - ret = __blkdev_get(whole, mode, NULL, 1); - if (ret) + struct block_device *whole = bdget_disk(disk, 0); + + mutex_lock_nested(&whole->bd_mutex, 1); + ret = __blkdev_get(whole, disk, 0, mode); + if (ret) { + mutex_unlock(&whole->bd_mutex); + bdput(whole); goto out_clear; - bdev->bd_contains = bdgrab(whole); + } + whole->bd_part_count++; + mutex_unlock(&whole->bd_mutex); + + bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { + __blkdev_put(whole, mode, 1); + bdput(whole); ret = -ENXIO; goto out_clear; } @@ -1545,101 +1422,127 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_unlock_bdev; + return ret; } } bdev->bd_openers++; - if (for_part) - bdev->bd_part_count++; - if (claiming) - bd_finish_claiming(bdev, claiming, holder); - - /* - * Block event polling for write claims if requested. Any write holder - * makes the write_holder state stick until all are released. This is - * good enough and tracking individual writeable reference is too - * fragile given the way @mode is used in blkdev_get/put(). - */ - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { - bdev->bd_write_holder = true; - unblock_events = false; - } - mutex_unlock(&bdev->bd_mutex); - - if (unblock_events) - disk_unblock_events(disk); - - /* only one opener holds refs to the module and disk */ - if (!first_open) - put_disk_and_module(disk); - if (whole) - bdput(whole); return 0; out_clear: disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; - out_unlock_bdev: - if (claiming) - bd_abort_claiming(bdev, claiming, holder); - mutex_unlock(&bdev->bd_mutex); - disk_unblock_events(disk); - out_put_whole: - if (whole) - bdput(whole); - out_put_disk: - put_disk_and_module(disk); - if (need_restart) - goto restart; - out: return ret; } /** - * blkdev_get - open a block device - * @bdev: block_device to open + * blkdev_get_by_dev - open a block device by device number + * @dev: device number of block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is - * open with exclusive access. Specifying %FMODE_EXCL with %NULL - * @holder is invalid. Exclusive opens may nest for the same @holder. + * Open the block device described by device number @dev. If @mode includes + * %FMODE_EXCL, the block device is opened with exclusive access. Specifying + * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for + * the same @holder. * - * On success, the reference count of @bdev is unchanged. On failure, - * @bdev is put. + * Use this interface ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a device + * number. Everything else should use blkdev_get_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * 0 on success, -errno on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ -static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { - int ret, perm = 0; + struct block_device *claiming; + bool unblock_events = true; + struct block_device *bdev; + struct gendisk *disk; + int partno; + int ret; - if (mode & FMODE_READ) - perm |= MAY_READ; - if (mode & FMODE_WRITE) - perm |= MAY_WRITE; - ret = devcgroup_inode_permission(bdev->bd_inode, perm); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, + MAJOR(dev), MINOR(dev), + ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | + ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) - goto bdput; + return ERR_PTR(ret); - ret =__blkdev_get(bdev, mode, holder, 0); - if (ret) + bdev = bdget(dev); + if (!bdev) + return ERR_PTR(-ENOMEM); + + /* + * If we lost a race with 'disk' being deleted, try again. See md.c. + */ +retry: + ret = -ENXIO; + disk = bdev_get_gendisk(bdev, &partno); + if (!disk) goto bdput; - return 0; + if (mode & FMODE_EXCL) { + WARN_ON_ONCE(!holder); + + ret = -ENOMEM; + claiming = bdget_disk(disk, 0); + if (!claiming) + goto put_disk; + ret = bd_prepare_to_claim(bdev, claiming, holder); + if (ret) + goto put_claiming; + } + + disk_block_events(disk); + + mutex_lock(&bdev->bd_mutex); + ret =__blkdev_get(bdev, disk, partno, mode); + if (!(mode & FMODE_EXCL)) { + ; /* nothing to do here */ + } else if (ret) { + bd_abort_claiming(bdev, claiming, holder); + } else { + bd_finish_claiming(bdev, claiming, holder); + + /* + * Block event polling for write claims if requested. Any write + * holder makes the write_holder state stick until all are + * released. This is good enough and tracking individual + * writeable reference is too fragile given the way @mode is + * used in blkdev_get/put(). + */ + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { + bdev->bd_write_holder = true; + unblock_events = false; + } + } + mutex_unlock(&bdev->bd_mutex); + + if (unblock_events) + disk_unblock_events(disk); + +put_claiming: + if (mode & FMODE_EXCL) + bdput(claiming); +put_disk: + if (ret) + put_disk_and_module(disk); + if (ret == -ERESTARTSYS) + goto retry; bdput: - bdput(bdev); - return ret; + if (ret) { + bdput(bdev); + return ERR_PTR(ret); + } + return bdev; } +EXPORT_SYMBOL(blkdev_get_by_dev); /** * blkdev_get_by_path - open a block device by name @@ -1647,32 +1550,30 @@ bdput: * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the blockdevice described by the device file at @path. @mode - * and @holder are identical to blkdev_get(). - * - * On success, the returned block_device has reference count of one. + * Open the block device described by the device file at @path. If @mode + * includes %FMODE_EXCL, the block device is opened with exclusive access. + * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may + * nest for the same @holder. * * CONTEXT: * Might sleep. * * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - int err; - - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; + dev_t dev; + int error; - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + bdev = blkdev_get_by_dev(dev, mode, holder); + if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { blkdev_put(bdev, mode); return ERR_PTR(-EACCES); } @@ -1681,45 +1582,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -/** - * blkdev_get_by_dev - open a block device by device number - * @dev: device number of block device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open the blockdevice described by device number @dev. @mode and - * @holder are identical to blkdev_get(). - * - * Use it ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a - * device number. _Never_ to be used for internal purposes. If you - * ever need it - reconsider your API. - * - * On success, the returned block_device has reference count of one. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. - */ -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) -{ - struct block_device *bdev; - int err; - - bdev = bdget(dev); - if (!bdev) - return ERR_PTR(-ENOMEM); - - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); - - return bdev; -} -EXPORT_SYMBOL(blkdev_get_by_dev); - static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1741,14 +1603,12 @@ static int blkdev_open(struct inode * inode, struct file * filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = bd_acquire(inode); - if (bdev == NULL) - return -ENOMEM; - + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); filp->f_mapping = bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - - return blkdev_get(bdev, filp->f_mode, filp); + return 0; } static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -1774,31 +1634,32 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); - bdev_write_inode(bdev); - } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) + + if (!bdev_is_partition(bdev) && disk->fops->release) disk->fops->release(disk, mode); - } - if (!bdev->bd_openers) { + disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) victim = bdev->bd_contains; bdev->bd_contains = NULL; - - put_disk_and_module(disk); + } else { + if (!bdev_is_partition(bdev) && disk->fops->release) + disk->fops->release(disk, mode); } mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - if (victim) + if (victim) { __blkdev_put(victim, mode, 1); + bdput(victim); + } } void blkdev_put(struct block_device *bdev, fmode_t mode) { + struct gendisk *disk = bdev->bd_disk; + mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { @@ -1827,7 +1688,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { - disk_unblock_events(bdev->bd_disk); + disk_unblock_events(disk); bdev->bd_write_holder = false; } } @@ -1837,11 +1698,13 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); + bdput(bdev); + put_disk_and_module(disk); } EXPORT_SYMBOL(blkdev_put); @@ -2054,37 +1917,32 @@ const struct file_operations def_blk_fops = { * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *pathname) +int lookup_bdev(const char *pathname, dev_t *dev) { - struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) - return ERR_PTR(-EINVAL); + return -EINVAL; error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) - return ERR_PTR(error); + return error; inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) - goto fail; + goto out_path_put; error = -EACCES; if (!may_open_dev(&path)) - goto fail; - error = -ENOMEM; - bdev = bd_acquire(inode); - if (!bdev) - goto fail; -out: + goto out_path_put; + + *dev = inode->i_rdev; + error = 0; +out_path_put: path_put(&path); - return bdev; -fail: - bdev = ERR_PTR(error); - goto out; + return error; } EXPORT_SYMBOL(lookup_bdev);