Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / fs / btrfs / volumes.c
index 70f94b7..464485a 100644 (file)
@@ -38,7 +38,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .sub_stripes    = 2,
                .dev_stripes    = 1,
                .devs_max       = 0,    /* 0 == as many as possible */
-               .devs_min       = 4,
+               .devs_min       = 2,
                .tolerated_failures = 1,
                .devs_increment = 2,
                .ncopies        = 2,
@@ -103,7 +103,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .sub_stripes    = 1,
                .dev_stripes    = 1,
                .devs_max       = 0,
-               .devs_min       = 2,
+               .devs_min       = 1,
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 1,
@@ -153,6 +153,32 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        },
 };
 
+/*
+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
+ * can be used as index to access btrfs_raid_array[].
+ */
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
+{
+       if (flags & BTRFS_BLOCK_GROUP_RAID10)
+               return BTRFS_RAID_RAID10;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+               return BTRFS_RAID_RAID1;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+               return BTRFS_RAID_RAID1C3;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+               return BTRFS_RAID_RAID1C4;
+       else if (flags & BTRFS_BLOCK_GROUP_DUP)
+               return BTRFS_RAID_DUP;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+               return BTRFS_RAID_RAID0;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+               return BTRFS_RAID_RAID5;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+               return BTRFS_RAID_RAID6;
+
+       return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
 const char *btrfs_bg_type_to_raid_name(u64 flags)
 {
        const int index = btrfs_bg_flags_to_raid_index(flags);
@@ -404,44 +430,6 @@ void __exit btrfs_cleanup_fs_uuids(void)
        }
 }
 
-/*
- * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
- * Returned struct is not linked onto any lists and must be destroyed using
- * btrfs_free_device.
- */
-static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
-{
-       struct btrfs_device *dev;
-
-       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-       if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       /*
-        * Preallocate a bio that's always going to be used for flushing device
-        * barriers and matches the device lifespan
-        */
-       dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
-       if (!dev->flush_bio) {
-               kfree(dev);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       INIT_LIST_HEAD(&dev->dev_list);
-       INIT_LIST_HEAD(&dev->dev_alloc_list);
-       INIT_LIST_HEAD(&dev->post_commit_list);
-
-       atomic_set(&dev->reada_in_flight, 0);
-       atomic_set(&dev->dev_stats_ccnt, 0);
-       btrfs_device_data_ordered_init(dev);
-       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-       extent_io_tree_init(fs_info, &dev->alloc_state,
-                           IO_TREE_DEVICE_ALLOC_STATE, NULL);
-
-       return dev;
-}
-
 static noinline struct btrfs_fs_devices *find_fsid(
                const u8 *fsid, const u8 *metadata_fsid)
 {
@@ -570,6 +558,8 @@ static int btrfs_free_stale_devices(const char *path,
        struct btrfs_device *device, *tmp_device;
        int ret = 0;
 
+       lockdep_assert_held(&uuid_mutex);
+
        if (path)
                ret = -ENOENT;
 
@@ -1000,11 +990,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        struct btrfs_device *orig_dev;
        int ret = 0;
 
+       lockdep_assert_held(&uuid_mutex);
+
        fs_devices = alloc_fs_devices(orig->fsid, NULL);
        if (IS_ERR(fs_devices))
                return fs_devices;
 
-       mutex_lock(&orig->device_list_mutex);
        fs_devices->total_devices = orig->total_devices;
 
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
@@ -1036,10 +1027,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
        }
-       mutex_unlock(&orig->device_list_mutex);
        return fs_devices;
 error:
-       mutex_unlock(&orig->device_list_mutex);
        free_fs_devices(fs_devices);
        return ERR_PTR(ret);
 }
@@ -1130,6 +1119,9 @@ static void btrfs_close_one_device(struct btrfs_device *device)
                fs_devices->rw_devices--;
        }
 
+       if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+               clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
        if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
                fs_devices->missing_devices--;
 
@@ -1228,7 +1220,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 static int devid_cmp(void *priv, const struct list_head *a,
                     const struct list_head *b)
 {
-       struct btrfs_device *dev1, *dev2;
+       const struct btrfs_device *dev1, *dev2;
 
        dev1 = list_entry(a, struct btrfs_device, dev_list);
        dev2 = list_entry(b, struct btrfs_device, dev_list);
@@ -1598,14 +1590,9 @@ again:
        key.offset = search_start;
        key.type = BTRFS_DEV_EXTENT_KEY;
 
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_backwards(root, &key, path);
        if (ret < 0)
                goto out;
-       if (ret > 0) {
-               ret = btrfs_previous_item(root, path, key.objectid, key.type);
-               if (ret < 0)
-                       goto out;
-       }
 
        while (1) {
                l = path->nodes[0];
@@ -1759,48 +1746,6 @@ out:
        return ret;
 }
 
-static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_device *device,
-                                 u64 chunk_offset, u64 start, u64 num_bytes)
-{
-       int ret;
-       struct btrfs_path *path;
-       struct btrfs_fs_info *fs_info = device->fs_info;
-       struct btrfs_root *root = fs_info->dev_root;
-       struct btrfs_dev_extent *extent;
-       struct extent_buffer *leaf;
-       struct btrfs_key key;
-
-       WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
-       WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       key.objectid = device->devid;
-       key.offset = start;
-       key.type = BTRFS_DEV_EXTENT_KEY;
-       ret = btrfs_insert_empty_item(trans, root, path, &key,
-                                     sizeof(*extent));
-       if (ret)
-               goto out;
-
-       leaf = path->nodes[0];
-       extent = btrfs_item_ptr(leaf, path->slots[0],
-                               struct btrfs_dev_extent);
-       btrfs_set_dev_extent_chunk_tree(leaf, extent,
-                                       BTRFS_CHUNK_TREE_OBJECTID);
-       btrfs_set_dev_extent_chunk_objectid(leaf, extent,
-                                           BTRFS_FIRST_CHUNK_TREE_OBJECTID);
-       btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
-
-       btrfs_set_dev_extent_length(leaf, extent, num_bytes);
-       btrfs_mark_buffer_dirty(leaf);
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
 static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
 {
        struct extent_map_tree *em_tree;
@@ -1925,15 +1870,17 @@ out:
  * Function to update ctime/mtime for a given device path.
  * Mainly used for ctime/mtime based probe like libblkid.
  */
-static void update_dev_time(const char *path_name)
+static void update_dev_time(struct block_device *bdev)
 {
-       struct file *filp;
+       struct inode *inode = bdev->bd_inode;
+       struct timespec64 now;
 
-       filp = filp_open(path_name, O_RDWR, 0);
-       if (IS_ERR(filp))
+       /* Shouldn't happen but just in case. */
+       if (!inode)
                return;
-       file_update_time(filp);
-       filp_close(filp, NULL);
+
+       now = current_time(inode);
+       generic_update_time(inode, &now, S_MTIME | S_CTIME);
 }
 
 static int btrfs_rm_dev_item(struct btrfs_device *device)
@@ -2003,12 +1950,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
                if (!(all_avail & btrfs_raid_array[i].bg_flag))
                        continue;
 
-               if (num_devices < btrfs_raid_array[i].devs_min) {
-                       int ret = btrfs_raid_array[i].mindev_error;
-
-                       if (ret)
-                               return ret;
-               }
+               if (num_devices < btrfs_raid_array[i].devs_min)
+                       return btrfs_raid_array[i].mindev_error;
        }
 
        return 0;
@@ -2113,11 +2056,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
        btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
        /* Update ctime/mtime for device path for libblkid */
-       update_dev_time(device_path);
+       update_dev_time(bdev);
 }
 
 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
-                   u64 devid)
+                   u64 devid, struct block_device **bdev, fmode_t *mode)
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *cur_devices;
@@ -2137,7 +2080,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
        if (IS_ERR(device)) {
                if (PTR_ERR(device) == -ENOENT &&
-                   strcmp(device_path, "missing") == 0)
+                   device_path && strcmp(device_path, "missing") == 0)
                        ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
                else
                        ret = PTR_ERR(device);
@@ -2231,15 +2174,26 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        mutex_unlock(&fs_devices->device_list_mutex);
 
        /*
-        * at this point, the device is zero sized and detached from
-        * the devices list.  All that's left is to zero out the old
-        * supers and free the device.
+        * At this point, the device is zero sized and detached from the
+        * devices list.  All that's left is to zero out the old supers and
+        * free the device.
+        *
+        * We cannot call btrfs_close_bdev() here because we're holding the sb
+        * write lock, and blkdev_put() will pull in the ->open_mutex on the
+        * block device and it's dependencies.  Instead just flush the device
+        * and let the caller do the final blkdev_put.
         */
-       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                btrfs_scratch_superblocks(fs_info, device->bdev,
                                          device->name->str);
+               if (device->bdev) {
+                       sync_blockdev(device->bdev);
+                       invalidate_bdev(device->bdev);
+               }
+       }
 
-       btrfs_close_bdev(device);
+       *bdev = device->bdev;
+       *mode = device->mode;
        synchronize_rcu();
        btrfs_free_device(device);
 
@@ -2766,7 +2720,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        btrfs_forget_devices(device_path);
 
        /* Update ctime/mtime for blkid or udev */
-       update_dev_time(device_path);
+       update_dev_time(bdev);
 
        return ret;
 
@@ -3622,10 +3576,7 @@ static u64 calc_data_stripes(u64 type, int num_stripes)
        const int ncopies = btrfs_raid_array[index].ncopies;
        const int nparity = btrfs_raid_array[index].nparity;
 
-       if (nparity)
-               return num_stripes - nparity;
-       else
-               return num_stripes / ncopies;
+       return (num_stripes - nparity) / ncopies;
 }
 
 /* [pstart, pend) */
@@ -4025,6 +3976,13 @@ static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
        if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
                return true;
 
+       if (fs_info->sectorsize < PAGE_SIZE &&
+               bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               btrfs_err(fs_info,
+               "RAID56 is not yet supported for sectorsize %u with page size %lu",
+                         fs_info->sectorsize, PAGE_SIZE);
+               return false;
+       }
        /* Profile is valid and does not have bits outside of the allowed set */
        if (alloc_profile_is_valid(bargs->target, 1) &&
            (bargs->target & ~allowed) == 0)
@@ -5463,56 +5421,6 @@ out:
        return block_group;
 }
 
-/*
- * This function, btrfs_finish_chunk_alloc(), belongs to phase 2.
- *
- * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
- * phases.
- */
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
-                            u64 chunk_offset, u64 chunk_size)
-{
-       struct btrfs_fs_info *fs_info = trans->fs_info;
-       struct btrfs_device *device;
-       struct extent_map *em;
-       struct map_lookup *map;
-       u64 dev_offset;
-       u64 stripe_size;
-       int i;
-       int ret = 0;
-
-       em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
-       if (IS_ERR(em))
-               return PTR_ERR(em);
-
-       map = em->map_lookup;
-       stripe_size = em->orig_block_len;
-
-       /*
-        * Take the device list mutex to prevent races with the final phase of
-        * a device replace operation that replaces the device object associated
-        * with the map's stripes, because the device object's id can change
-        * at any time during that final phase of the device replace operation
-        * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
-        * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
-        * resulting in persisting a device extent item with such ID.
-        */
-       mutex_lock(&fs_info->fs_devices->device_list_mutex);
-       for (i = 0; i < map->num_stripes; i++) {
-               device = map->stripes[i].dev;
-               dev_offset = map->stripes[i].physical;
-
-               ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
-                                            dev_offset, stripe_size);
-               if (ret)
-                       break;
-       }
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
-       free_extent_map(em);
-       return ret;
-}
-
 /*
  * This function, btrfs_chunk_alloc_add_chunk_item(), typically belongs to the
  * phase 1 of chunk allocation. It belongs to phase 2 only when allocating system
@@ -6923,9 +6831,31 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
        if (WARN_ON(!devid && !fs_info))
                return ERR_PTR(-EINVAL);
 
-       dev = __alloc_device(fs_info);
-       if (IS_ERR(dev))
-               return dev;
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * Preallocate a bio that's always going to be used for flushing device
+        * barriers and matches the device lifespan
+        */
+       dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
+       if (!dev->flush_bio) {
+               kfree(dev);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       INIT_LIST_HEAD(&dev->dev_list);
+       INIT_LIST_HEAD(&dev->dev_alloc_list);
+       INIT_LIST_HEAD(&dev->post_commit_list);
+
+       atomic_set(&dev->reada_in_flight, 0);
+       atomic_set(&dev->dev_stats_ccnt, 0);
+       btrfs_device_data_ordered_init(dev);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+       extent_io_tree_init(fs_info, &dev->alloc_state,
+                           IO_TREE_DEVICE_ALLOC_STATE, NULL);
 
        if (devid)
                tmp = *devid;
@@ -6961,15 +6891,7 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
 
 static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
 {
-       int index = btrfs_bg_flags_to_raid_index(type);
-       int ncopies = btrfs_raid_array[index].ncopies;
-       const int nparity = btrfs_raid_array[index].nparity;
-       int data_stripes;
-
-       if (nparity)
-               data_stripes = num_stripes - nparity;
-       else
-               data_stripes = num_stripes / ncopies;
+       const int data_stripes = calc_data_stripes(type, num_stripes);
 
        return div_u64(chunk_len, data_stripes);
 }
@@ -8144,7 +8066,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
                goto out;
 
        if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
-               ret = btrfs_next_item(root, path);
+               ret = btrfs_next_leaf(root, path);
                if (ret < 0)
                        goto out;
                /* No dev extents at all? Not good */