.sub_stripes = 2,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
- .devs_min = 4,
+ .devs_min = 2,
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
- .devs_min = 2,
+ .devs_min = 1,
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
},
};
+/*
+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
+ * can be used as index to access btrfs_raid_array[].
+ */
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID10)
+ return BTRFS_RAID_RAID10;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+ return BTRFS_RAID_RAID1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+ return BTRFS_RAID_RAID1C3;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+ return BTRFS_RAID_RAID1C4;
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
+ return BTRFS_RAID_DUP;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ return BTRFS_RAID_RAID0;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+ return BTRFS_RAID_RAID5;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ return BTRFS_RAID_RAID6;
+
+ return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
const char *btrfs_bg_type_to_raid_name(u64 flags)
{
const int index = btrfs_bg_flags_to_raid_index(flags);
}
}
-/*
- * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
- * Returned struct is not linked onto any lists and must be destroyed using
- * btrfs_free_device.
- */
-static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return ERR_PTR(-ENOMEM);
-
- /*
- * Preallocate a bio that's always going to be used for flushing device
- * barriers and matches the device lifespan
- */
- dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
- if (!dev->flush_bio) {
- kfree(dev);
- return ERR_PTR(-ENOMEM);
- }
-
- INIT_LIST_HEAD(&dev->dev_list);
- INIT_LIST_HEAD(&dev->dev_alloc_list);
- INIT_LIST_HEAD(&dev->post_commit_list);
-
- atomic_set(&dev->reada_in_flight, 0);
- atomic_set(&dev->dev_stats_ccnt, 0);
- btrfs_device_data_ordered_init(dev);
- INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- extent_io_tree_init(fs_info, &dev->alloc_state,
- IO_TREE_DEVICE_ALLOC_STATE, NULL);
-
- return dev;
-}
-
static noinline struct btrfs_fs_devices *find_fsid(
const u8 *fsid, const u8 *metadata_fsid)
{
struct btrfs_device *device, *tmp_device;
int ret = 0;
+ lockdep_assert_held(&uuid_mutex);
+
if (path)
ret = -ENOENT;
struct btrfs_device *orig_dev;
int ret = 0;
+ lockdep_assert_held(&uuid_mutex);
+
fs_devices = alloc_fs_devices(orig->fsid, NULL);
if (IS_ERR(fs_devices))
return fs_devices;
- mutex_lock(&orig->device_list_mutex);
fs_devices->total_devices = orig->total_devices;
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
device->fs_devices = fs_devices;
fs_devices->num_devices++;
}
- mutex_unlock(&orig->device_list_mutex);
return fs_devices;
error:
- mutex_unlock(&orig->device_list_mutex);
free_fs_devices(fs_devices);
return ERR_PTR(ret);
}
fs_devices->rw_devices--;
}
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
static int devid_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct btrfs_device *dev1, *dev2;
+ const struct btrfs_device *dev1, *dev2;
dev1 = list_entry(a, struct btrfs_device, dev_list);
dev2 = list_entry(b, struct btrfs_device, dev_list);
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
while (1) {
l = path->nodes[0];
return ret;
}
-static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_offset, u64 start, u64 num_bytes)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct btrfs_root *root = fs_info->dev_root;
- struct btrfs_dev_extent *extent;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
- WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*extent));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- btrfs_set_dev_extent_chunk_tree(leaf, extent,
- BTRFS_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_objectid(leaf, extent,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
-
- btrfs_set_dev_extent_length(leaf, extent, num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree;
* Function to update ctime/mtime for a given device path.
* Mainly used for ctime/mtime based probe like libblkid.
*/
-static void update_dev_time(const char *path_name)
+static void update_dev_time(struct block_device *bdev)
{
- struct file *filp;
+ struct inode *inode = bdev->bd_inode;
+ struct timespec64 now;
- filp = filp_open(path_name, O_RDWR, 0);
- if (IS_ERR(filp))
+ /* Shouldn't happen but just in case. */
+ if (!inode)
return;
- file_update_time(filp);
- filp_close(filp, NULL);
+
+ now = current_time(inode);
+ generic_update_time(inode, &now, S_MTIME | S_CTIME);
}
static int btrfs_rm_dev_item(struct btrfs_device *device)
if (!(all_avail & btrfs_raid_array[i].bg_flag))
continue;
- if (num_devices < btrfs_raid_array[i].devs_min) {
- int ret = btrfs_raid_array[i].mindev_error;
-
- if (ret)
- return ret;
- }
+ if (num_devices < btrfs_raid_array[i].devs_min)
+ return btrfs_raid_array[i].mindev_error;
}
return 0;
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
- update_dev_time(device_path);
+ update_dev_time(bdev);
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
- u64 devid)
+ u64 devid, struct block_device **bdev, fmode_t *mode)
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
if (IS_ERR(device)) {
if (PTR_ERR(device) == -ENOENT &&
- strcmp(device_path, "missing") == 0)
+ device_path && strcmp(device_path, "missing") == 0)
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = PTR_ERR(device);
mutex_unlock(&fs_devices->device_list_mutex);
/*
- * at this point, the device is zero sized and detached from
- * the devices list. All that's left is to zero out the old
- * supers and free the device.
+ * At this point, the device is zero sized and detached from the
+ * devices list. All that's left is to zero out the old supers and
+ * free the device.
+ *
+ * We cannot call btrfs_close_bdev() here because we're holding the sb
+ * write lock, and blkdev_put() will pull in the ->open_mutex on the
+ * block device and it's dependencies. Instead just flush the device
+ * and let the caller do the final blkdev_put.
*/
- if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
btrfs_scratch_superblocks(fs_info, device->bdev,
device->name->str);
+ if (device->bdev) {
+ sync_blockdev(device->bdev);
+ invalidate_bdev(device->bdev);
+ }
+ }
- btrfs_close_bdev(device);
+ *bdev = device->bdev;
+ *mode = device->mode;
synchronize_rcu();
btrfs_free_device(device);
btrfs_forget_devices(device_path);
/* Update ctime/mtime for blkid or udev */
- update_dev_time(device_path);
+ update_dev_time(bdev);
return ret;
const int ncopies = btrfs_raid_array[index].ncopies;
const int nparity = btrfs_raid_array[index].nparity;
- if (nparity)
- return num_stripes - nparity;
- else
- return num_stripes / ncopies;
+ return (num_stripes - nparity) / ncopies;
}
/* [pstart, pend) */
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
return true;
+ if (fs_info->sectorsize < PAGE_SIZE &&
+ bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ btrfs_err(fs_info,
+ "RAID56 is not yet supported for sectorsize %u with page size %lu",
+ fs_info->sectorsize, PAGE_SIZE);
+ return false;
+ }
/* Profile is valid and does not have bits outside of the allowed set */
if (alloc_profile_is_valid(bargs->target, 1) &&
(bargs->target & ~allowed) == 0)
return block_group;
}
-/*
- * This function, btrfs_finish_chunk_alloc(), belongs to phase 2.
- *
- * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
- * phases.
- */
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- u64 chunk_offset, u64 chunk_size)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_device *device;
- struct extent_map *em;
- struct map_lookup *map;
- u64 dev_offset;
- u64 stripe_size;
- int i;
- int ret = 0;
-
- em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- map = em->map_lookup;
- stripe_size = em->orig_block_len;
-
- /*
- * Take the device list mutex to prevent races with the final phase of
- * a device replace operation that replaces the device object associated
- * with the map's stripes, because the device object's id can change
- * at any time during that final phase of the device replace operation
- * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
- * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
- * resulting in persisting a device extent item with such ID.
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- for (i = 0; i < map->num_stripes; i++) {
- device = map->stripes[i].dev;
- dev_offset = map->stripes[i].physical;
-
- ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
- dev_offset, stripe_size);
- if (ret)
- break;
- }
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- free_extent_map(em);
- return ret;
-}
-
/*
* This function, btrfs_chunk_alloc_add_chunk_item(), typically belongs to the
* phase 1 of chunk allocation. It belongs to phase 2 only when allocating system
if (WARN_ON(!devid && !fs_info))
return ERR_PTR(-EINVAL);
- dev = __alloc_device(fs_info);
- if (IS_ERR(dev))
- return dev;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Preallocate a bio that's always going to be used for flushing device
+ * barriers and matches the device lifespan
+ */
+ dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
+ if (!dev->flush_bio) {
+ kfree(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&dev->dev_list);
+ INIT_LIST_HEAD(&dev->dev_alloc_list);
+ INIT_LIST_HEAD(&dev->post_commit_list);
+
+ atomic_set(&dev->reada_in_flight, 0);
+ atomic_set(&dev->dev_stats_ccnt, 0);
+ btrfs_device_data_ordered_init(dev);
+ INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ extent_io_tree_init(fs_info, &dev->alloc_state,
+ IO_TREE_DEVICE_ALLOC_STATE, NULL);
if (devid)
tmp = *devid;
static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
{
- int index = btrfs_bg_flags_to_raid_index(type);
- int ncopies = btrfs_raid_array[index].ncopies;
- const int nparity = btrfs_raid_array[index].nparity;
- int data_stripes;
-
- if (nparity)
- data_stripes = num_stripes - nparity;
- else
- data_stripes = num_stripes / ncopies;
+ const int data_stripes = calc_data_stripes(type, num_stripes);
return div_u64(chunk_len, data_stripes);
}
goto out;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_item(root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
/* No dev extents at all? Not good */