Merge tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
[linux-2.6-microblaze.git] / fs / f2fs / super.c
index ce2ab1b..78ebc30 100644 (file)
@@ -33,6 +33,7 @@
 #include "segment.h"
 #include "xattr.h"
 #include "gc.h"
+#include "iostat.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/f2fs.h>
@@ -56,6 +57,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
        [FAULT_CHECKPOINT]      = "checkpoint error",
        [FAULT_DISCARD]         = "discard error",
        [FAULT_WRITE_IO]        = "write IO error",
+       [FAULT_SLAB_ALLOC]      = "slab alloc",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -155,6 +157,7 @@ enum {
        Opt_atgc,
        Opt_gc_merge,
        Opt_nogc_merge,
+       Opt_discard_unit,
        Opt_err,
 };
 
@@ -231,6 +234,7 @@ static match_table_t f2fs_tokens = {
        {Opt_atgc, "atgc"},
        {Opt_gc_merge, "gc_merge"},
        {Opt_nogc_merge, "nogc_merge"},
+       {Opt_discard_unit, "discard_unit=%s"},
        {Opt_err, NULL},
 };
 
@@ -657,10 +661,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                                return -EINVAL;
                        break;
                case Opt_discard:
+                       if (!f2fs_hw_support_discard(sbi)) {
+                               f2fs_warn(sbi, "device does not support discard");
+                               break;
+                       }
                        set_opt(sbi, DISCARD);
                        break;
                case Opt_nodiscard:
-                       if (f2fs_sb_has_blkzoned(sbi)) {
+                       if (f2fs_hw_should_discard(sbi)) {
                                f2fs_warn(sbi, "discard is required for zoned block devices");
                                return -EINVAL;
                        }
@@ -1173,6 +1181,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                case Opt_nogc_merge:
                        clear_opt(sbi, GC_MERGE);
                        break;
+               case Opt_discard_unit:
+                       name = match_strdup(&args[0]);
+                       if (!name)
+                               return -ENOMEM;
+                       if (!strcmp(name, "block")) {
+                               F2FS_OPTION(sbi).discard_unit =
+                                               DISCARD_UNIT_BLOCK;
+                       } else if (!strcmp(name, "segment")) {
+                               F2FS_OPTION(sbi).discard_unit =
+                                               DISCARD_UNIT_SEGMENT;
+                       } else if (!strcmp(name, "section")) {
+                               F2FS_OPTION(sbi).discard_unit =
+                                               DISCARD_UNIT_SECTION;
+                       } else {
+                               kfree(name);
+                               return -EINVAL;
+                       }
+                       kfree(name);
+                       break;
                default:
                        f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
                                 p);
@@ -1211,6 +1238,14 @@ default_check:
                return -EINVAL;
        }
 #endif
+       if (f2fs_sb_has_blkzoned(sbi)) {
+               if (F2FS_OPTION(sbi).discard_unit !=
+                                               DISCARD_UNIT_SECTION) {
+                       f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
+                       F2FS_OPTION(sbi).discard_unit =
+                                       DISCARD_UNIT_SECTION;
+               }
+       }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        if (f2fs_test_compress_extension(sbi)) {
@@ -1271,7 +1306,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 {
        struct f2fs_inode_info *fi;
 
-       fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
+       fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
+                               GFP_F2FS_ZERO, false, F2FS_SB(sb));
        if (!fi)
                return NULL;
 
@@ -1541,6 +1577,7 @@ static void f2fs_put_super(struct super_block *sb)
 #endif
        fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
        destroy_percpu_info(sbi);
+       f2fs_destroy_iostat(sbi);
        for (i = 0; i < NR_PAGE_TYPE; i++)
                kvfree(sbi->write_io[i]);
 #ifdef CONFIG_UNICODE
@@ -1924,6 +1961,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 
        if (test_opt(sbi, ATGC))
                seq_puts(seq, ",atgc");
+
+       if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+               seq_printf(seq, ",discard_unit=%s", "block");
+       else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+               seq_printf(seq, ",discard_unit=%s", "segment");
+       else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+               seq_printf(seq, ",discard_unit=%s", "section");
+
        return 0;
 }
 
@@ -1959,11 +2004,15 @@ static void default_options(struct f2fs_sb_info *sbi)
        F2FS_OPTION(sbi).unusable_cap = 0;
        sbi->sb->s_flags |= SB_LAZYTIME;
        set_opt(sbi, FLUSH_MERGE);
-       set_opt(sbi, DISCARD);
-       if (f2fs_sb_has_blkzoned(sbi))
+       if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+               set_opt(sbi, DISCARD);
+       if (f2fs_sb_has_blkzoned(sbi)) {
                F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
-       else
+               F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+       } else {
                F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+               F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+       }
 
 #ifdef CONFIG_F2FS_FS_XATTR
        set_opt(sbi, XATTR_USER);
@@ -2038,8 +2087,17 @@ restore_flag:
 
 static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
 {
+       int retry = DEFAULT_RETRY_IO_COUNT;
+
        /* we should flush all the data to keep data consistency */
-       sync_inodes_sb(sbi->sb);
+       do {
+               sync_inodes_sb(sbi->sb);
+               cond_resched();
+               congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+       } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+       if (unlikely(retry < 0))
+               f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
 
        down_write(&sbi->gc_lock);
        f2fs_dirty_to_prefree(sbi);
@@ -2060,12 +2118,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool need_restart_gc = false, need_stop_gc = false;
        bool need_restart_ckpt = false, need_stop_ckpt = false;
        bool need_restart_flush = false, need_stop_flush = false;
+       bool need_restart_discard = false, need_stop_discard = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
-       bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+       bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
        bool no_io_align = !F2FS_IO_ALIGNED(sbi);
        bool no_atgc = !test_opt(sbi, ATGC);
+       bool no_discard = !test_opt(sbi, DISCARD);
        bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
-       bool checkpoint_changed;
+       bool block_unit_discard = f2fs_block_unit_discard(sbi);
+       struct discard_cmd_control *dcc;
 #ifdef CONFIG_QUOTA
        int i, j;
 #endif
@@ -2110,8 +2171,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        err = parse_options(sb, data, true);
        if (err)
                goto restore_opts;
-       checkpoint_changed =
-                       disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
        /*
         * Previous and new state of filesystem is RO,
@@ -2168,6 +2227,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
+       if (block_unit_discard != f2fs_block_unit_discard(sbi)) {
+               err = -EINVAL;
+               f2fs_warn(sbi, "switch discard_unit option is not allowed");
+               goto restore_opts;
+       }
+
        if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
                err = -EINVAL;
                f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -2233,11 +2298,26 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                need_stop_flush = true;
        }
 
-       if (checkpoint_changed) {
+       if (no_discard == !!test_opt(sbi, DISCARD)) {
+               if (test_opt(sbi, DISCARD)) {
+                       err = f2fs_start_discard_thread(sbi);
+                       if (err)
+                               goto restore_flush;
+                       need_stop_discard = true;
+               } else {
+                       dcc = SM_I(sbi)->dcc_info;
+                       f2fs_stop_discard_thread(sbi);
+                       if (atomic_read(&dcc->discard_cmd_cnt))
+                               f2fs_issue_discard_timeout(sbi);
+                       need_restart_discard = true;
+               }
+       }
+
+       if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) {
                if (test_opt(sbi, DISABLE_CHECKPOINT)) {
                        err = f2fs_disable_checkpoint(sbi);
                        if (err)
-                               goto restore_flush;
+                               goto restore_discard;
                } else {
                        f2fs_enable_checkpoint(sbi);
                }
@@ -2257,6 +2337,13 @@ skip:
        adjust_unusable_cap_perc(sbi);
        *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
        return 0;
+restore_discard:
+       if (need_restart_discard) {
+               if (f2fs_start_discard_thread(sbi))
+                       f2fs_warn(sbi, "discard has been stopped");
+       } else if (need_stop_discard) {
+               f2fs_stop_discard_thread(sbi);
+       }
 restore_flush:
        if (need_restart_flush) {
                if (f2fs_create_flush_cmd_control(sbi))
@@ -2517,6 +2604,33 @@ static int f2fs_enable_quotas(struct super_block *sb)
        return 0;
 }
 
+static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type)
+{
+       struct quota_info *dqopt = sb_dqopt(sbi->sb);
+       struct address_space *mapping = dqopt->files[type]->i_mapping;
+       int ret = 0;
+
+       ret = dquot_writeback_dquots(sbi->sb, type);
+       if (ret)
+               goto out;
+
+       ret = filemap_fdatawrite(mapping);
+       if (ret)
+               goto out;
+
+       /* if we are using journalled quota */
+       if (is_journalled_quota(sbi))
+               goto out;
+
+       ret = filemap_fdatawait(mapping);
+
+       truncate_inode_pages(&dqopt->files[type]->i_data, 0);
+out:
+       if (ret)
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+       return ret;
+}
+
 int f2fs_quota_sync(struct super_block *sb, int type)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -2524,57 +2638,42 @@ int f2fs_quota_sync(struct super_block *sb, int type)
        int cnt;
        int ret;
 
-       /*
-        * do_quotactl
-        *  f2fs_quota_sync
-        *  down_read(quota_sem)
-        *  dquot_writeback_dquots()
-        *  f2fs_dquot_commit
-        *                            block_operation
-        *                            down_read(quota_sem)
-        */
-       f2fs_lock_op(sbi);
-
-       down_read(&sbi->quota_sem);
-       ret = dquot_writeback_dquots(sb, type);
-       if (ret)
-               goto out;
-
        /*
         * Now when everything is written we can discard the pagecache so
         * that userspace sees the changes.
         */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               struct address_space *mapping;
 
                if (type != -1 && cnt != type)
                        continue;
-               if (!sb_has_quota_active(sb, cnt))
-                       continue;
 
-               mapping = dqopt->files[cnt]->i_mapping;
+               if (!sb_has_quota_active(sb, type))
+                       return 0;
 
-               ret = filemap_fdatawrite(mapping);
-               if (ret)
-                       goto out;
+               inode_lock(dqopt->files[cnt]);
 
-               /* if we are using journalled quota */
-               if (is_journalled_quota(sbi))
-                       continue;
+               /*
+                * do_quotactl
+                *  f2fs_quota_sync
+                *  down_read(quota_sem)
+                *  dquot_writeback_dquots()
+                *  f2fs_dquot_commit
+                *                            block_operation
+                *                            down_read(quota_sem)
+                */
+               f2fs_lock_op(sbi);
+               down_read(&sbi->quota_sem);
 
-               ret = filemap_fdatawait(mapping);
-               if (ret)
-                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+               ret = f2fs_quota_sync_file(sbi, cnt);
+
+               up_read(&sbi->quota_sem);
+               f2fs_unlock_op(sbi);
 
-               inode_lock(dqopt->files[cnt]);
-               truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
                inode_unlock(dqopt->files[cnt]);
+
+               if (ret)
+                       break;
        }
-out:
-       if (ret)
-               set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
-       up_read(&sbi->quota_sem);
-       f2fs_unlock_op(sbi);
        return ret;
 }
 
@@ -3207,11 +3306,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
                return -EFSCORRUPTED;
        }
 
-       if (le32_to_cpu(raw_super->cp_payload) >
-                               (blocks_per_seg - F2FS_CP_PACKS)) {
-               f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+       if (le32_to_cpu(raw_super->cp_payload) >=
+                               (blocks_per_seg - F2FS_CP_PACKS -
+                               NR_CURSEG_PERSIST_TYPE)) {
+               f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
                          le32_to_cpu(raw_super->cp_payload),
-                         blocks_per_seg - F2FS_CP_PACKS);
+                         blocks_per_seg - F2FS_CP_PACKS -
+                         NR_CURSEG_PERSIST_TYPE);
                return -EFSCORRUPTED;
        }
 
@@ -3247,6 +3348,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
        unsigned int cp_pack_start_sum, cp_payload;
        block_t user_block_count, valid_user_blocks;
        block_t avail_node_count, valid_node_count;
+       unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
        int i, j;
 
        total = le32_to_cpu(raw_super->segment_count);
@@ -3377,6 +3479,17 @@ skip_cross:
                return 1;
        }
 
+       nat_blocks = nat_segs << log_blocks_per_seg;
+       nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+       nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+       if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+               (cp_payload + F2FS_CP_PACKS +
+               NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+               f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+                         cp_payload, nat_bits_blocks);
+               return -EFSCORRUPTED;
+       }
+
        if (unlikely(f2fs_cp_error(sbi))) {
                f2fs_err(sbi, "A bug case: need to run fsck");
                return 1;
@@ -3409,6 +3522,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
        sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
        sbi->migration_granularity = sbi->segs_per_sec;
+       sbi->seq_file_ra_mul = MIN_RA_MUL;
 
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3768,7 +3882,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
        /* adjust parameters according to the volume size */
        if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
                F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
-               sm_i->dcc_info->discard_granularity = 1;
+               if (f2fs_block_unit_discard(sbi))
+                       sm_i->dcc_info->discard_granularity = 1;
                sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
        }
 
@@ -3889,11 +4004,6 @@ try_onemore:
        set_sbi_flag(sbi, SBI_POR_DOING);
        spin_lock_init(&sbi->stat_lock);
 
-       /* init iostat info */
-       spin_lock_init(&sbi->iostat_lock);
-       sbi->iostat_enable = false;
-       sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
-
        for (i = 0; i < NR_PAGE_TYPE; i++) {
                int n = (i == META) ? 1 : NR_TEMP_TYPE;
                int j;
@@ -3924,10 +4034,14 @@ try_onemore:
        init_waitqueue_head(&sbi->cp_wait);
        init_sb_info(sbi);
 
-       err = init_percpu_info(sbi);
+       err = f2fs_init_iostat(sbi);
        if (err)
                goto free_bio_info;
 
+       err = init_percpu_info(sbi);
+       if (err)
+               goto free_iostat;
+
        if (F2FS_IO_ALIGNED(sbi)) {
                sbi->write_io_dummy =
                        mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
@@ -4259,6 +4373,8 @@ free_io_dummy:
        mempool_destroy(sbi->write_io_dummy);
 free_percpu:
        destroy_percpu_info(sbi);
+free_iostat:
+       f2fs_destroy_iostat(sbi);
 free_bio_info:
        for (i = 0; i < NR_PAGE_TYPE; i++)
                kvfree(sbi->write_io[i]);
@@ -4401,9 +4517,12 @@ static int __init init_f2fs_fs(void)
        err = f2fs_init_post_read_processing();
        if (err)
                goto free_root_stats;
-       err = f2fs_init_bio_entry_cache();
+       err = f2fs_init_iostat_processing();
        if (err)
                goto free_post_read;
+       err = f2fs_init_bio_entry_cache();
+       if (err)
+               goto free_iostat;
        err = f2fs_init_bioset();
        if (err)
                goto free_bio_enrty_cache;
@@ -4425,6 +4544,8 @@ free_bioset:
        f2fs_destroy_bioset();
 free_bio_enrty_cache:
        f2fs_destroy_bio_entry_cache();
+free_iostat:
+       f2fs_destroy_iostat_processing();
 free_post_read:
        f2fs_destroy_post_read_processing();
 free_root_stats:
@@ -4459,6 +4580,7 @@ static void __exit exit_f2fs_fs(void)
        f2fs_destroy_compress_mempool();
        f2fs_destroy_bioset();
        f2fs_destroy_bio_entry_cache();
+       f2fs_destroy_iostat_processing();
        f2fs_destroy_post_read_processing();
        f2fs_destroy_root_stats();
        unregister_filesystem(&f2fs_fs_type);