Merge tag '5.11-rc-smb3-part2' of git://git.samba.org/sfrench/cifs-2.6
[linux-2.6-microblaze.git] / fs / btrfs / volumes.c
index 7863766..ee086fc 100644 (file)
@@ -31,6 +31,7 @@
 #include "space-info.h"
 #include "block-group.h"
 #include "discard.h"
+#include "zoned.h"
 
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
@@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
        rcu_string_free(device->name);
        extent_io_tree_release(&device->alloc_state);
        bio_put(device->flush_bio);
+       btrfs_destroy_dev_zone_info(device);
        kfree(device);
 }
 
@@ -667,6 +669,10 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
        clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
        device->mode = flags;
 
+       ret = btrfs_get_dev_zone_info(device);
+       if (ret != 0)
+               goto error_free_page;
+
        fs_devices->open_devices++;
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -822,7 +828,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
        } else {
                mutex_lock(&fs_devices->device_list_mutex);
                device = btrfs_find_device(fs_devices, devid,
-                               disk_super->dev_item.uuid, NULL, false);
+                               disk_super->dev_item.uuid, NULL);
 
                /*
                 * If this disk has been pulled into an fs devices created by
@@ -929,16 +935,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                 * make sure it's the same device if the device is mounted
                 */
                if (device->bdev) {
-                       struct block_device *path_bdev;
+                       int error;
+                       dev_t path_dev;
 
-                       path_bdev = lookup_bdev(path);
-                       if (IS_ERR(path_bdev)) {
+                       error = lookup_bdev(path, &path_dev);
+                       if (error) {
                                mutex_unlock(&fs_devices->device_list_mutex);
-                               return ERR_CAST(path_bdev);
+                               return ERR_PTR(error);
                        }
 
-                       if (device->bdev != path_bdev) {
-                               bdput(path_bdev);
+                       if (device->bdev->bd_dev != path_dev) {
                                mutex_unlock(&fs_devices->device_list_mutex);
                                /*
                                 * device->fs_info may not be reliable here, so
@@ -953,7 +959,6 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                                                  task_pid_nr(current));
                                return ERR_PTR(-EEXIST);
                        }
-                       bdput(path_bdev);
                        btrfs_info_in_rcu(device->fs_info,
        "devid %llu device path %s changed to %s scanned by %s (%d)",
                                          devid, rcu_str_deref(device->name),
@@ -1044,7 +1049,7 @@ error:
 }
 
 static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
-                                     int step, struct btrfs_device **latest_dev)
+                                     struct btrfs_device **latest_dev)
 {
        struct btrfs_device *device, *next;
 
@@ -1089,16 +1094,16 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
  * After we have read the system tree and know devids belonging to this
  * filesystem, remove the device which does not belong there.
  */
-void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
+void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *latest_dev = NULL;
        struct btrfs_fs_devices *seed_dev;
 
        mutex_lock(&uuid_mutex);
-       __btrfs_free_extra_devids(fs_devices, step, &latest_dev);
+       __btrfs_free_extra_devids(fs_devices, &latest_dev);
 
        list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
-               __btrfs_free_extra_devids(seed_dev, step, &latest_dev);
+               __btrfs_free_extra_devids(seed_dev, &latest_dev);
 
        fs_devices->latest_bdev = latest_dev->bdev;
 
@@ -1137,6 +1142,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
                device->bdev = NULL;
        }
        clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+       btrfs_destroy_dev_zone_info(device);
 
        device->fs_info = NULL;
        atomic_set(&device->dev_stats_ccnt, 0);
@@ -1217,6 +1223,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
        fs_devices->latest_bdev = latest_dev->bdev;
        fs_devices->total_rw_bytes = 0;
        fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
+       fs_devices->read_policy = BTRFS_READ_POLICY_PID;
 
        return 0;
 }
@@ -1268,7 +1275,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
 }
 
 static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
-                                                      u64 bytenr)
+                                                      u64 bytenr, u64 bytenr_orig)
 {
        struct btrfs_super_block *disk_super;
        struct page *page;
@@ -1299,7 +1306,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
        /* align our pointer to the offset of the super block */
        disk_super = p + offset_in_page(bytenr);
 
-       if (btrfs_super_bytenr(disk_super) != bytenr ||
+       if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
            btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
                btrfs_release_disk_super(p);
                return ERR_PTR(-EINVAL);
@@ -1334,7 +1341,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
        bool new_device_added = false;
        struct btrfs_device *device = NULL;
        struct block_device *bdev;
-       u64 bytenr;
+       u64 bytenr, bytenr_orig;
+       int ret;
 
        lockdep_assert_held(&uuid_mutex);
 
@@ -1344,14 +1352,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
         * So, we need to add a special mount option to scan for
         * later supers, using BTRFS_SUPER_MIRROR_MAX instead
         */
-       bytenr = btrfs_sb_offset(0);
        flags |= FMODE_EXCL;
 
        bdev = blkdev_get_by_path(path, flags, holder);
        if (IS_ERR(bdev))
                return ERR_CAST(bdev);
 
-       disk_super = btrfs_read_disk_super(bdev, bytenr);
+       bytenr_orig = btrfs_sb_offset(0);
+       ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
+       if (ret)
+               return ERR_PTR(ret);
+
+       disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
        if (IS_ERR(disk_super)) {
                device = ERR_CAST(disk_super);
                goto error_bdev_put;
@@ -2015,6 +2027,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
                if (IS_ERR(disk_super))
                        continue;
 
+               if (bdev_is_zoned(bdev)) {
+                       btrfs_reset_sb_log_zones(bdev, copy_num);
+                       continue;
+               }
+
                memset(&disk_super->magic, 0, sizeof(disk_super->magic));
 
                page = virt_to_page(disk_super);
@@ -2293,10 +2310,10 @@ static struct btrfs_device *btrfs_find_device_by_path(
        dev_uuid = disk_super->dev_item.uuid;
        if (btrfs_fs_incompat(fs_info, METADATA_UUID))
                device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
-                                          disk_super->metadata_uuid, true);
+                                          disk_super->metadata_uuid);
        else
                device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
-                                          disk_super->fsid, true);
+                                          disk_super->fsid);
 
        btrfs_release_disk_super(disk_super);
        if (!device)
@@ -2316,7 +2333,7 @@ struct btrfs_device *btrfs_find_device_by_devspec(
 
        if (devid) {
                device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
-                                          NULL, true);
+                                          NULL);
                if (!device)
                        return ERR_PTR(-ENOENT);
                return device;
@@ -2465,7 +2482,7 @@ next_slot:
                read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                                   BTRFS_FSID_SIZE);
                device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
-                                          fs_uuid, true);
+                                          fs_uuid);
                BUG_ON(!device); /* Logic error */
 
                if (device->fs_devices->seeding) {
@@ -2507,6 +2524,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        if (IS_ERR(bdev))
                return PTR_ERR(bdev);
 
+       if (!btrfs_check_device_zone_type(fs_info, bdev)) {
+               ret = -EINVAL;
+               goto error;
+       }
+
        if (fs_devices->seeding) {
                seeding_dev = 1;
                down_write(&sb->s_umount);
@@ -2540,10 +2562,17 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        }
        rcu_assign_pointer(device->name, name);
 
+       device->fs_info = fs_info;
+       device->bdev = bdev;
+
+       ret = btrfs_get_dev_zone_info(device);
+       if (ret)
+               goto error_free_device;
+
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               goto error_free_device;
+               goto error_free_zone;
        }
 
        q = bdev_get_queue(bdev);
@@ -2556,8 +2585,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
                                         fs_info->sectorsize);
        device->disk_total_bytes = device->total_bytes;
        device->commit_total_bytes = device->total_bytes;
-       device->fs_info = fs_info;
-       device->bdev = bdev;
        set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
        clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
        device->mode = FMODE_EXCL;
@@ -2704,6 +2731,8 @@ error_trans:
                sb->s_flags |= SB_RDONLY;
        if (trans)
                btrfs_end_transaction(trans);
+error_free_zone:
+       btrfs_destroy_dev_zone_info(device);
 error_free_device:
        btrfs_free_device(device);
 error:
@@ -5479,7 +5508,18 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
        else
                num_stripes = map->num_stripes;
 
-       preferred_mirror = first + current->pid % num_stripes;
+       switch (fs_info->fs_devices->read_policy) {
+       default:
+               /* Shouldn't happen, just warn and use pid instead of failing */
+               btrfs_warn_rl(fs_info,
+                             "unknown read_policy type %u, reset to pid",
+                             fs_info->fs_devices->read_policy);
+               fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
+               fallthrough;
+       case BTRFS_READ_POLICY_PID:
+               preferred_mirror = first + (current->pid % num_stripes);
+               break;
+       }
 
        if (dev_replace_is_ongoing &&
            fs_info->dev_replace.cont_reading_from_srcdev_mode ==
@@ -6335,7 +6375,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
        bio->bi_iter.bi_sector = physical >> 9;
        btrfs_debug_in_rcu(fs_info,
        "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
-               bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
+               bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
                (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
                dev->devid, bio->bi_iter.bi_size);
        bio_set_dev(bio, dev->bdev);
@@ -6367,7 +6407,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 {
        struct btrfs_device *dev;
        struct bio *first_bio = bio;
-       u64 logical = (u64)bio->bi_iter.bi_sector << 9;
+       u64 logical = bio->bi_iter.bi_sector << 9;
        u64 length = 0;
        u64 map_length;
        int ret;
@@ -6447,8 +6487,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
  * If @seed is true, traverse through the seed devices.
  */
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
-                                      u64 devid, u8 *uuid, u8 *fsid,
-                                      bool seed)
+                                      u64 devid, u8 *uuid, u8 *fsid)
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *seed_devs;
@@ -6655,7 +6694,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
                                   btrfs_stripe_dev_uuid_nr(chunk, i),
                                   BTRFS_UUID_SIZE);
                map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
-                                                       devid, uuid, NULL, true);
+                                                       devid, uuid, NULL);
                if (!map->stripes[i].dev &&
                    !btrfs_test_opt(fs_info, DEGRADED)) {
                        free_extent_map(em);
@@ -6794,7 +6833,7 @@ static int read_one_dev(struct extent_buffer *leaf,
        }
 
        device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
-                                  fs_uuid, true);
+                                  fs_uuid);
        if (!device) {
                if (!btrfs_test_opt(fs_info, DEGRADED)) {
                        btrfs_report_missing_device(fs_info, devid,
@@ -6857,6 +6896,16 @@ static int read_one_dev(struct extent_buffer *leaf,
        }
 
        fill_device_from_item(leaf, dev_item, device);
+       if (device->bdev) {
+               u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
+
+               if (device->total_bytes > max_total_bytes) {
+                       btrfs_err(fs_info,
+                       "device total_bytes should be at most %llu but found %llu",
+                                 max_total_bytes, device->total_bytes);
+                       return -EINVAL;
+               }
+       }
        set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
           !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
@@ -6891,11 +6940,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
         * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
         * overallocate but we can keep it as-is, only the first page is used.
         */
-       sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
+       sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
+                                         root->root_key.objectid, 0);
        if (IS_ERR(sb))
                return PTR_ERR(sb);
        set_extent_buffer_uptodate(sb);
-       btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
        /*
         * The sb extent buffer is artificial and just used to read the system array.
         * set_extent_buffer_uptodate() call does not properly mark all it's
@@ -7059,12 +7108,8 @@ static void readahead_tree_node_children(struct extent_buffer *node)
        int i;
        const int nr_items = btrfs_header_nritems(node);
 
-       for (i = 0; i < nr_items; i++) {
-               u64 start;
-
-               start = btrfs_node_blockptr(node, i);
-               readahead_tree_block(node->fs_info, start);
-       }
+       for (i = 0; i < nr_items; i++)
+               btrfs_readahead_node_child(node, i);
 }
 
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
@@ -7451,8 +7496,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
        int i;
 
        mutex_lock(&fs_devices->device_list_mutex);
-       dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
-                               true);
+       dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
        mutex_unlock(&fs_devices->device_list_mutex);
 
        if (!dev) {
@@ -7583,28 +7627,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
        }
 
        /* Make sure no dev extent is beyond device bondary */
-       dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
+       dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
        if (!dev) {
                btrfs_err(fs_info, "failed to find devid %llu", devid);
                ret = -EUCLEAN;
                goto out;
        }
 
-       /* It's possible this device is a dummy for seed device */
-       if (dev->disk_total_bytes == 0) {
-               struct btrfs_fs_devices *devs;
-
-               devs = list_first_entry(&fs_info->fs_devices->seed_list,
-                                       struct btrfs_fs_devices, seed_list);
-               dev = btrfs_find_device(devs, devid, NULL, NULL, false);
-               if (!dev) {
-                       btrfs_err(fs_info, "failed to find seed devid %llu",
-                                 devid);
-                       ret = -EUCLEAN;
-                       goto out;
-               }
-       }
-
        if (physical_offset + physical_len > dev->disk_total_bytes) {
                btrfs_err(fs_info,
 "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
@@ -7659,6 +7688,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
        u64 prev_dev_ext_end = 0;
        int ret = 0;
 
+       /*
+        * We don't have a dev_root because we mounted with ignorebadroots and
+        * failed to load the root, so we want to skip the verification in this
+        * case for sure.
+        *
+        * However if the dev root is fine, but the tree itself is corrupted
+        * we'd still fail to mount.  This verification is only to make sure
+        * writes can happen safely, so instead just bypass this check
+        * completely in the case of IGNOREBADROOTS.
+        */
+       if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
+               return 0;
+
        key.objectid = 1;
        key.type = BTRFS_DEV_EXTENT_KEY;
        key.offset = 0;