block: opencode devcgroup_inode_permission
[linux-2.6-microblaze.git] / fs / block_dev.c
index 9e84b19..2b8c058 100644 (file)
@@ -548,55 +548,47 @@ EXPORT_SYMBOL(fsync_bdev);
  * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
  * actually.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+int freeze_bdev(struct block_device *bdev)
 {
        struct super_block *sb;
        int error = 0;
 
        mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (++bdev->bd_fsfreeze_count > 1) {
-               /*
-                * We don't even need to grab a reference - the first call
-                * to freeze_bdev grab an active reference and only the last
-                * thaw_bdev drops it.
-                */
-               sb = get_super(bdev);
-               if (sb)
-                       drop_super(sb);
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return sb;
-       }
+       if (++bdev->bd_fsfreeze_count > 1)
+               goto done;
 
        sb = get_active_super(bdev);
        if (!sb)
-               goto out;
+               goto sync;
        if (sb->s_op->freeze_super)
                error = sb->s_op->freeze_super(sb);
        else
                error = freeze_super(sb);
+       deactivate_super(sb);
+
        if (error) {
-               deactivate_super(sb);
                bdev->bd_fsfreeze_count--;
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return ERR_PTR(error);
+               goto done;
        }
-       deactivate_super(sb);
- out:
+       bdev->bd_fsfreeze_sb = sb;
+
+sync:
        sync_blockdev(bdev);
+done:
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       return sb;      /* thaw_bdev releases s->s_umount */
+       return error;
 }
 EXPORT_SYMBOL(freeze_bdev);
 
 /**
  * thaw_bdev  -- unlock filesystem
  * @bdev:      blockdevice to unlock
- * @sb:                associated superblock
  *
  * Unlocks the filesystem and marks it writeable again after freeze_bdev().
  */
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+int thaw_bdev(struct block_device *bdev)
 {
+       struct super_block *sb;
        int error = -EINVAL;
 
        mutex_lock(&bdev->bd_fsfreeze_mutex);
@@ -607,6 +599,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
        if (--bdev->bd_fsfreeze_count > 0)
                goto out;
 
+       sb = bdev->bd_fsfreeze_sb;
        if (!sb)
                goto out;
 
@@ -870,35 +863,12 @@ void __init bdev_cache_init(void)
        blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
 }
 
-/*
- * Most likely _very_ bad one - but then it's hardly critical for small
- * /dev and can be fixed when somebody will need really large one.
- * Keep in mind that it will be fed through icache hash function too.
- */
-static inline unsigned long hash(dev_t dev)
-{
-       return MAJOR(dev)+MINOR(dev);
-}
-
-static int bdev_test(struct inode *inode, void *data)
-{
-       return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
-}
-
-static int bdev_set(struct inode *inode, void *data)
-{
-       BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
-       return 0;
-}
-
 static struct block_device *bdget(dev_t dev)
 {
        struct block_device *bdev;
        struct inode *inode;
 
-       inode = iget5_locked(blockdev_superblock, hash(dev),
-                       bdev_test, bdev_set, &dev);
-
+       inode = iget_locked(blockdev_superblock, dev);
        if (!inode)
                return NULL;
 
@@ -910,6 +880,7 @@ static struct block_device *bdget(dev_t dev)
                bdev->bd_super = NULL;
                bdev->bd_inode = inode;
                bdev->bd_part_count = 0;
+               bdev->bd_dev = dev;
                inode->i_mode = S_IFBLK;
                inode->i_rdev = dev;
                inode->i_bdev = bdev;
@@ -923,10 +894,14 @@ static struct block_device *bdget(dev_t dev)
 /**
  * bdgrab -- Grab a reference to an already referenced block device
  * @bdev:      Block device to grab a reference to.
+ *
+ * Returns the block_device with an additional reference when successful,
+ * or NULL if the inode is already beeing freed.
  */
 struct block_device *bdgrab(struct block_device *bdev)
 {
-       ihold(bdev->bd_inode);
+       if (!igrab(bdev->bd_inode))
+               return NULL;
        return bdev;
 }
 EXPORT_SYMBOL(bdgrab);
@@ -1249,7 +1224,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
        holder->disk = disk;
        holder->refcnt = 1;
 
-       ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
        if (ret)
                goto out_free;
 
@@ -1266,7 +1241,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
        goto out_unlock;
 
 out_del:
-       del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       del_symlink(disk->slave_dir, bdev_kobj(bdev));
 out_free:
        kfree(holder);
 out_unlock:
@@ -1294,7 +1269,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
        holder = bd_find_holder_disk(bdev, disk);
 
        if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
-               del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+               del_symlink(disk->slave_dir, bdev_kobj(bdev));
                del_symlink(bdev->bd_part->holder_dir,
                            &disk_to_dev(disk)->kobj);
                kobject_put(bdev->bd_part->holder_dir);
@@ -1334,12 +1309,6 @@ static void check_disk_size_change(struct gendisk *disk,
                i_size_write(bdev->bd_inode, disk_size);
        }
        spin_unlock(&bdev->bd_size_lock);
-
-       if (bdev_size > disk_size) {
-               if (__invalidate_device(bdev, false))
-                       pr_warn("VFS: busy inodes on resized disk %s\n",
-                               disk->disk_name);
-       }
 }
 
 /**
@@ -1368,7 +1337,6 @@ void revalidate_disk_size(struct gendisk *disk, bool verbose)
                bdput(bdev);
        }
 }
-EXPORT_SYMBOL(revalidate_disk_size);
 
 void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
@@ -1439,46 +1407,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
  *  mutex_lock(part->bd_mutex)
  *    mutex_lock_nested(whole->bd_mutex, 1)
  */
-
-static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
-               int for_part)
+static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
+               int partno, fmode_t mode)
 {
-       struct block_device *whole = NULL, *claiming = NULL;
-       struct gendisk *disk;
        int ret;
-       int partno;
-       bool first_open = false, unblock_events = true, need_restart;
 
- restart:
-       need_restart = false;
-       ret = -ENXIO;
-       disk = bdev_get_gendisk(bdev, &partno);
-       if (!disk)
-               goto out;
-
-       if (partno) {
-               whole = bdget_disk(disk, 0);
-               if (!whole) {
-                       ret = -ENOMEM;
-                       goto out_put_disk;
-               }
-       }
-
-       if (!for_part && (mode & FMODE_EXCL)) {
-               WARN_ON_ONCE(!holder);
-               if (whole)
-                       claiming = whole;
-               else
-                       claiming = bdev;
-               ret = bd_prepare_to_claim(bdev, claiming, holder);
-               if (ret)
-                       goto out_put_whole;
-       }
-
-       disk_block_events(disk);
-       mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
-               first_open = true;
                bdev->bd_disk = disk;
                bdev->bd_contains = bdev;
                bdev->bd_partno = partno;
@@ -1490,15 +1424,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
                                goto out_clear;
 
                        ret = 0;
-                       if (disk->fops->open) {
+                       if (disk->fops->open)
                                ret = disk->fops->open(bdev, mode);
-                               /*
-                                * If we lost a race with 'disk' being deleted,
-                                * try again.  See md.c
-                                */
-                               if (ret == -ERESTARTSYS)
-                                       need_restart = true;
-                       }
 
                        if (!ret) {
                                bd_set_nr_sectors(bdev, get_capacity(disk));
@@ -1518,14 +1445,24 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
                        if (ret)
                                goto out_clear;
                } else {
-                       BUG_ON(for_part);
-                       ret = __blkdev_get(whole, mode, NULL, 1);
-                       if (ret)
+                       struct block_device *whole = bdget_disk(disk, 0);
+
+                       mutex_lock_nested(&whole->bd_mutex, 1);
+                       ret = __blkdev_get(whole, disk, 0, mode);
+                       if (ret) {
+                               mutex_unlock(&whole->bd_mutex);
+                               bdput(whole);
                                goto out_clear;
-                       bdev->bd_contains = bdgrab(whole);
+                       }
+                       whole->bd_part_count++;
+                       mutex_unlock(&whole->bd_mutex);
+
+                       bdev->bd_contains = whole;
                        bdev->bd_part = disk_get_part(disk, partno);
                        if (!(disk->flags & GENHD_FL_UP) ||
                            !bdev->bd_part || !bdev->bd_part->nr_sects) {
+                               __blkdev_put(whole, mode, 1);
+                               bdput(whole);
                                ret = -ENXIO;
                                goto out_clear;
                        }
@@ -1545,58 +1482,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
                            (!ret || ret == -ENOMEDIUM))
                                bdev_disk_changed(bdev, ret == -ENOMEDIUM);
                        if (ret)
-                               goto out_unlock_bdev;
+                               return ret;
                }
        }
        bdev->bd_openers++;
-       if (for_part)
-               bdev->bd_part_count++;
-       if (claiming)
-               bd_finish_claiming(bdev, claiming, holder);
-
-       /*
-        * Block event polling for write claims if requested.  Any write holder
-        * makes the write_holder state stick until all are released.  This is
-        * good enough and tracking individual writeable reference is too
-        * fragile given the way @mode is used in blkdev_get/put().
-        */
-       if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
-           (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
-               bdev->bd_write_holder = true;
-               unblock_events = false;
-       }
-       mutex_unlock(&bdev->bd_mutex);
-
-       if (unblock_events)
-               disk_unblock_events(disk);
-
-       /* only one opener holds refs to the module and disk */
-       if (!first_open)
-               put_disk_and_module(disk);
-       if (whole)
-               bdput(whole);
        return 0;
 
  out_clear:
        disk_put_part(bdev->bd_part);
        bdev->bd_disk = NULL;
        bdev->bd_part = NULL;
-       if (bdev != bdev->bd_contains)
-               __blkdev_put(bdev->bd_contains, mode, 1);
        bdev->bd_contains = NULL;
- out_unlock_bdev:
-       if (claiming)
-               bd_abort_claiming(bdev, claiming, holder);
-       mutex_unlock(&bdev->bd_mutex);
-       disk_unblock_events(disk);
- out_put_whole:
-       if (whole)
-               bdput(whole);
- out_put_disk:
-       put_disk_and_module(disk);
-       if (need_restart)
-               goto restart;
- out:
        return ret;
 }
 
@@ -1621,23 +1517,80 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
  */
 static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 {
-       int ret, perm = 0;
+       struct block_device *claiming;
+       bool unblock_events = true;
+       struct gendisk *disk;
+       int partno;
+       int ret;
 
-       if (mode & FMODE_READ)
-               perm |= MAY_READ;
-       if (mode & FMODE_WRITE)
-               perm |= MAY_WRITE;
-       ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+       ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+                       imajor(bdev->bd_inode), iminor(bdev->bd_inode),
+                       ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+                       ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
        if (ret)
                goto bdput;
 
-       ret =__blkdev_get(bdev, mode, holder, 0);
-       if (ret)
+       /*
+        * If we lost a race with 'disk' being deleted, try again.  See md.c.
+        */
+retry:
+       ret = -ENXIO;
+       disk = bdev_get_gendisk(bdev, &partno);
+       if (!disk)
                goto bdput;
-       return 0;
 
+       if (mode & FMODE_EXCL) {
+               WARN_ON_ONCE(!holder);
+       
+               ret = -ENOMEM;
+               claiming = bdget_disk(disk, 0);
+               if (!claiming)
+                       goto put_disk;
+               ret = bd_prepare_to_claim(bdev, claiming, holder);
+               if (ret)
+                       goto put_claiming;
+       }
+
+       disk_block_events(disk);
+
+       mutex_lock(&bdev->bd_mutex);
+       ret =__blkdev_get(bdev, disk, partno, mode);
+       if (!(mode & FMODE_EXCL)) {
+               ; /* nothing to do here */
+       } else if (ret) {
+               bd_abort_claiming(bdev, claiming, holder);
+       } else {
+               bd_finish_claiming(bdev, claiming, holder);
+
+               /*
+                * Block event polling for write claims if requested.  Any write
+                * holder makes the write_holder state stick until all are
+                * released.  This is good enough and tracking individual
+                * writeable reference is too fragile given the way @mode is
+                * used in blkdev_get/put().
+                */
+               if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+                   (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+                       bdev->bd_write_holder = true;
+                       unblock_events = false;
+               }
+       }
+       mutex_unlock(&bdev->bd_mutex);
+
+       if (unblock_events)
+               disk_unblock_events(disk);
+
+put_claiming:
+       if (mode & FMODE_EXCL)
+               bdput(claiming);
+put_disk:
+       if (ret)
+               put_disk_and_module(disk);
+       if (ret == -ERESTARTSYS)
+               goto retry;
 bdput:
-       bdput(bdev);
+       if (ret)
+               bdput(bdev);
        return ret;
 }
 
@@ -1774,31 +1727,32 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                WARN_ON_ONCE(bdev->bd_holders);
                sync_blockdev(bdev);
                kill_bdev(bdev);
-
                bdev_write_inode(bdev);
-       }
-       if (bdev->bd_contains == bdev) {
-               if (disk->fops->release)
+
+               if (!bdev_is_partition(bdev) && disk->fops->release)
                        disk->fops->release(disk, mode);
-       }
-       if (!bdev->bd_openers) {
+
                disk_put_part(bdev->bd_part);
                bdev->bd_part = NULL;
                bdev->bd_disk = NULL;
-               if (bdev != bdev->bd_contains)
+               if (bdev_is_partition(bdev))
                        victim = bdev->bd_contains;
                bdev->bd_contains = NULL;
-
-               put_disk_and_module(disk);
+       } else {
+               if (!bdev_is_partition(bdev) && disk->fops->release)
+                       disk->fops->release(disk, mode);
        }
        mutex_unlock(&bdev->bd_mutex);
-       bdput(bdev);
-       if (victim)
+       if (victim) {
                __blkdev_put(victim, mode, 1);
+               bdput(victim);
+       }
 }
 
 void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
+       struct gendisk *disk = bdev->bd_disk;
+
        mutex_lock(&bdev->bd_mutex);
 
        if (mode & FMODE_EXCL) {
@@ -1827,7 +1781,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
                 * unblock evpoll if it was a write holder.
                 */
                if (bdev_free && bdev->bd_write_holder) {
-                       disk_unblock_events(bdev->bd_disk);
+                       disk_unblock_events(disk);
                        bdev->bd_write_holder = false;
                }
        }
@@ -1837,11 +1791,13 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
         * event.  This is to ensure detection of media removal commanded
         * from userland - e.g. eject(1).
         */
-       disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
+       disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
        mutex_unlock(&bdev->bd_mutex);
 
        __blkdev_put(bdev, mode, 0);
+       bdput(bdev);
+       put_disk_and_module(disk);
 }
 EXPORT_SYMBOL(blkdev_put);