Merge tag 'f2fs-for-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 19 Jan 2022 09:50:20 +0000 (11:50 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 19 Jan 2022 09:50:20 +0000 (11:50 +0200)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've tried to address some performance issues in
  f2fs_checkpoint and direct IO flows. Also, there was a work to enhance
  the page cache management used for compression. Other than them, we've
  done typical work including sysfs, code clean-ups, tracepoint, sanity
  check, in addition to bug fixes on corner cases.

  Enhancements:
   - use iomap for direct IO
   - try to avoid lock contention to improve f2fs_ckpt speed
   - avoid unnecessary memory allocation in compression flow
   - POSIX_FADV_DONTNEED drops the page cache containing compression
     pages
   - add some sysfs entries (gc_urgent_high_remaining, pending_discard)

  Bug fixes:
   - try not to expose unwritten blocks to user by DIO (this was added
     to avoid merge conflict; another patch is coming to address other
     missing case)
   - relax minor error condition for file pinning feature used in
     Android OTA
   - fix potential deadlock case in compression flow
   - should not truncate any block on pinned file

  In addition, we've done some code clean-ups and tracepoint/sanity
  check improvement"

* tag 'f2fs-for-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (29 commits)
  f2fs: do not allow partial truncation on pinned file
  f2fs: remove redunant invalidate compress pages
  f2fs: Simplify bool conversion
  f2fs: don't drop compressed page cache in .{invalidate,release}page
  f2fs: fix to reserve space for IO align feature
  f2fs: fix to check available space of CP area correctly in update_ckpt_flags()
  f2fs: support fault injection to f2fs_trylock_op()
  f2fs: clean up __find_inline_xattr() with __find_xattr()
  f2fs: fix to do sanity check on last xattr entry in __f2fs_setxattr()
  f2fs: do not bother checkpoint by f2fs_get_node_info
  f2fs: avoid down_write on nat_tree_lock during checkpoint
  f2fs: compress: fix potential deadlock of compress file
  f2fs: avoid EINVAL by SBI_NEED_FSCK when pinning a file
  f2fs: add gc_urgent_high_remaining sysfs node
  f2fs: fix to do sanity check in is_alive()
  f2fs: fix to avoid panic in is_alive() if metadata is inconsistent
  f2fs: fix to do sanity check on inode type during garbage collection
  f2fs: avoid duplicate call of mark_inode_dirty
  f2fs: show number of pending discard commands
  f2fs: support POSIX_FADV_DONTNEED drop compressed page cache
  ...

1  2 
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/gc.c
fs/f2fs/inode.c
fs/f2fs/node.c
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/super.c
fs/f2fs/sysfs.c

diff --combined fs/f2fs/data.c
@@@ -8,9 -8,9 +8,9 @@@
  #include <linux/fs.h>
  #include <linux/f2fs_fs.h>
  #include <linux/buffer_head.h>
 +#include <linux/sched/mm.h>
  #include <linux/mpage.h>
  #include <linux/writeback.h>
 -#include <linux/backing-dev.h>
  #include <linux/pagevec.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
@@@ -21,6 -21,7 +21,7 @@@
  #include <linux/cleancache.h>
  #include <linux/sched/signal.h>
  #include <linux/fiemap.h>
+ #include <linux/iomap.h>
  
  #include "f2fs.h"
  #include "node.h"
@@@ -1354,7 -1355,7 +1355,7 @@@ static int __allocate_data_block(struc
        if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
                return -EPERM;
  
-       err = f2fs_get_node_info(sbi, dn->nid, &ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
        if (err)
                return err;
  
@@@ -1376,61 -1377,9 +1377,9 @@@ alloc
                f2fs_invalidate_compress_page(sbi, old_blkaddr);
        }
        f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-       /*
-        * i_size will be updated by direct_IO. Otherwise, we'll get stale
-        * data from unwritten block via dio_read.
-        */
        return 0;
  }
  
- int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
- {
-       struct inode *inode = file_inode(iocb->ki_filp);
-       struct f2fs_map_blocks map;
-       int flag;
-       int err = 0;
-       bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-       map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
-       map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
-       if (map.m_len > map.m_lblk)
-               map.m_len -= map.m_lblk;
-       else
-               map.m_len = 0;
-       map.m_next_pgofs = NULL;
-       map.m_next_extent = NULL;
-       map.m_seg_type = NO_CHECK_TYPE;
-       map.m_may_create = true;
-       if (direct_io) {
-               map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
-               flag = f2fs_force_buffered_io(inode, iocb, from) ?
-                                       F2FS_GET_BLOCK_PRE_AIO :
-                                       F2FS_GET_BLOCK_PRE_DIO;
-               goto map_blocks;
-       }
-       if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
-               err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
-       if (f2fs_has_inline_data(inode))
-               return err;
-       flag = F2FS_GET_BLOCK_PRE_AIO;
- map_blocks:
-       err = f2fs_map_blocks(inode, &map, 1, flag);
-       if (map.m_len > 0 && err == -ENOSPC) {
-               if (!direct_io)
-                       set_inode_flag(inode, FI_NO_PREALLOC);
-               err = 0;
-       }
-       return err;
- }
  void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
  {
        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@@ -1590,8 -1539,11 +1539,11 @@@ next_block
                                        flag != F2FS_GET_BLOCK_DIO);
                                err = __allocate_data_block(&dn,
                                                        map->m_seg_type);
-                               if (!err)
+                               if (!err) {
+                                       if (flag == F2FS_GET_BLOCK_PRE_DIO)
+                                               file_need_truncate(inode);
                                        set_inode_flag(inode, FI_APPEND_WRITE);
+                               }
                        }
                        if (err)
                                goto sync_out;
@@@ -1786,50 -1738,6 +1738,6 @@@ static inline u64 blks_to_bytes(struct 
        return (blks << inode->i_blkbits);
  }
  
- static int __get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh, int create, int flag,
-                       pgoff_t *next_pgofs, int seg_type, bool may_write)
- {
-       struct f2fs_map_blocks map;
-       int err;
-       map.m_lblk = iblock;
-       map.m_len = bytes_to_blks(inode, bh->b_size);
-       map.m_next_pgofs = next_pgofs;
-       map.m_next_extent = NULL;
-       map.m_seg_type = seg_type;
-       map.m_may_create = may_write;
-       err = f2fs_map_blocks(inode, &map, create, flag);
-       if (!err) {
-               map_bh(bh, inode->i_sb, map.m_pblk);
-               bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
-               bh->b_size = blks_to_bytes(inode, map.m_len);
-               if (map.m_multidev_dio)
-                       bh->b_bdev = map.m_bdev;
-       }
-       return err;
- }
- static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
- {
-       return __get_data_block(inode, iblock, bh_result, create,
-                               F2FS_GET_BLOCK_DIO, NULL,
-                               f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               true);
- }
- static int get_data_block_dio(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
- {
-       return __get_data_block(inode, iblock, bh_result, create,
-                               F2FS_GET_BLOCK_DIO, NULL,
-                               f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               false);
- }
  static int f2fs_xattr_fiemap(struct inode *inode,
                                struct fiemap_extent_info *fieinfo)
  {
                if (!page)
                        return -ENOMEM;
  
-               err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+               err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
                if (err) {
                        f2fs_put_page(page, 1);
                        return err;
                if (!page)
                        return -ENOMEM;
  
-               err = f2fs_get_node_info(sbi, xnid, &ni);
+               err = f2fs_get_node_info(sbi, xnid, &ni, false);
                if (err) {
                        f2fs_put_page(page, 1);
                        return err;
@@@ -2542,7 -2450,7 +2450,7 @@@ retry_encrypt
                /* flush pending IOs and wait for a while in the ENOMEM case */
                if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
                        f2fs_flush_merged_writes(fio->sbi);
 -                      congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
 +                      memalloc_retry_wait(GFP_NOFS);
                        gfp_flags |= __GFP_NOFAIL;
                        goto retry_encrypt;
                }
@@@ -2617,6 -2525,11 +2525,11 @@@ bool f2fs_should_update_outplace(struc
  {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  
+       /* The below cases were checked when setting it. */
+       if (f2fs_is_pinned_file(inode))
+               return false;
+       if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+               return true;
        if (f2fs_lfs_mode(sbi))
                return true;
        if (S_ISDIR(inode->i_mode))
                return true;
        if (f2fs_is_atomic_file(inode))
                return true;
-       if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
-               return true;
  
        /* swap file is migrating in aligned write mode */
        if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@@ -2738,7 -2649,7 +2649,7 @@@ got_it
                fio->need_lock = LOCK_REQ;
        }
  
-       err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
        if (err)
                goto out_writepage;
  
@@@ -2987,6 -2898,7 +2898,7 @@@ static int f2fs_write_cache_pages(struc
                .rpages = NULL,
                .nr_rpages = 0,
                .cpages = NULL,
+               .valid_nr_cpages = 0,
                .rbuf = NULL,
                .cbuf = NULL,
                .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
@@@ -3305,7 -3217,7 +3217,7 @@@ static int f2fs_write_data_pages(struc
                        FS_CP_DATA_IO : FS_DATA_IO);
  }
  
static void f2fs_write_failed(struct inode *inode, loff_t to)
+ void f2fs_write_failed(struct inode *inode, loff_t to)
  {
        loff_t i_size = i_size_read(inode);
  
@@@ -3339,12 -3251,10 +3251,10 @@@ static int prepare_write_begin(struct f
        int flag;
  
        /*
-        * we already allocated all the blocks, so we don't need to get
-        * the block addresses when there is no need to fill the page.
+        * If a whole page is being written and we already preallocated all the
+        * blocks, then there is no need to get a block address now.
         */
-       if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
-           !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
-           !f2fs_verity_in_progress(inode))
+       if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
                return 0;
  
        /* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@@ -3595,158 -3505,6 +3505,6 @@@ unlock_out
        return copied;
  }
  
- static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
-                          loff_t offset)
- {
-       unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
-       unsigned blkbits = i_blkbits;
-       unsigned blocksize_mask = (1 << blkbits) - 1;
-       unsigned long align = offset | iov_iter_alignment(iter);
-       struct block_device *bdev = inode->i_sb->s_bdev;
-       if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
-               return 1;
-       if (align & blocksize_mask) {
-               if (bdev)
-                       blkbits = blksize_bits(bdev_logical_block_size(bdev));
-               blocksize_mask = (1 << blkbits) - 1;
-               if (align & blocksize_mask)
-                       return -EINVAL;
-               return 1;
-       }
-       return 0;
- }
- static void f2fs_dio_end_io(struct bio *bio)
- {
-       struct f2fs_private_dio *dio = bio->bi_private;
-       dec_page_count(F2FS_I_SB(dio->inode),
-                       dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-       bio->bi_private = dio->orig_private;
-       bio->bi_end_io = dio->orig_end_io;
-       kfree(dio);
-       bio_endio(bio);
- }
- static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
-                                                       loff_t file_offset)
- {
-       struct f2fs_private_dio *dio;
-       bool write = (bio_op(bio) == REQ_OP_WRITE);
-       dio = f2fs_kzalloc(F2FS_I_SB(inode),
-                       sizeof(struct f2fs_private_dio), GFP_NOFS);
-       if (!dio)
-               goto out;
-       dio->inode = inode;
-       dio->orig_end_io = bio->bi_end_io;
-       dio->orig_private = bio->bi_private;
-       dio->write = write;
-       bio->bi_end_io = f2fs_dio_end_io;
-       bio->bi_private = dio;
-       inc_page_count(F2FS_I_SB(inode),
-                       write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-       submit_bio(bio);
-       return;
- out:
-       bio->bi_status = BLK_STS_IOERR;
-       bio_endio(bio);
- }
- static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
- {
-       struct address_space *mapping = iocb->ki_filp->f_mapping;
-       struct inode *inode = mapping->host;
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct f2fs_inode_info *fi = F2FS_I(inode);
-       size_t count = iov_iter_count(iter);
-       loff_t offset = iocb->ki_pos;
-       int rw = iov_iter_rw(iter);
-       int err;
-       enum rw_hint hint = iocb->ki_hint;
-       int whint_mode = F2FS_OPTION(sbi).whint_mode;
-       bool do_opu;
-       err = check_direct_IO(inode, iter, offset);
-       if (err)
-               return err < 0 ? err : 0;
-       if (f2fs_force_buffered_io(inode, iocb, iter))
-               return 0;
-       do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-       trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-       if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
-               iocb->ki_hint = WRITE_LIFE_NOT_SET;
-       if (iocb->ki_flags & IOCB_NOWAIT) {
-               if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
-                       iocb->ki_hint = hint;
-                       err = -EAGAIN;
-                       goto out;
-               }
-               if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
-                       up_read(&fi->i_gc_rwsem[rw]);
-                       iocb->ki_hint = hint;
-                       err = -EAGAIN;
-                       goto out;
-               }
-       } else {
-               down_read(&fi->i_gc_rwsem[rw]);
-               if (do_opu)
-                       down_read(&fi->i_gc_rwsem[READ]);
-       }
-       err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-                       iter, rw == WRITE ? get_data_block_dio_write :
-                       get_data_block_dio, NULL, f2fs_dio_submit_bio,
-                       rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
-                       DIO_SKIP_HOLES);
-       if (do_opu)
-               up_read(&fi->i_gc_rwsem[READ]);
-       up_read(&fi->i_gc_rwsem[rw]);
-       if (rw == WRITE) {
-               if (whint_mode == WHINT_MODE_OFF)
-                       iocb->ki_hint = hint;
-               if (err > 0) {
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
-                                                                       err);
-                       if (!do_opu)
-                               set_inode_flag(inode, FI_UPDATE_WRITE);
-               } else if (err == -EIOCBQUEUED) {
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
-                                               count - iov_iter_count(iter));
-               } else if (err < 0) {
-                       f2fs_write_failed(inode, offset + count);
-               }
-       } else {
-               if (err > 0)
-                       f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
-               else if (err == -EIOCBQUEUED)
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
-                                               count - iov_iter_count(iter));
-       }
- out:
-       trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-       return err;
- }
  void f2fs_invalidate_page(struct page *page, unsigned int offset,
                                                        unsigned int length)
  {
  
        clear_page_private_gcing(page);
  
-       if (test_opt(sbi, COMPRESS_CACHE)) {
-               if (f2fs_compressed_file(inode))
-                       f2fs_invalidate_compress_pages(sbi, inode->i_ino);
-               if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
-                       clear_page_private_data(page);
-       }
+       if (test_opt(sbi, COMPRESS_CACHE) &&
+                       inode->i_ino == F2FS_COMPRESS_INO(sbi))
+               clear_page_private_data(page);
  
        if (page_private_atomic(page))
                return f2fs_drop_inmem_page(inode, page);
@@@ -3795,12 -3550,9 +3550,9 @@@ int f2fs_release_page(struct page *page
                return 0;
  
        if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
-               struct f2fs_sb_info *sbi = F2FS_P_SB(page);
                struct inode *inode = page->mapping->host;
  
-               if (f2fs_compressed_file(inode))
-                       f2fs_invalidate_compress_pages(sbi, inode->i_ino);
-               if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
+               if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode)))
                        clear_page_private_data(page);
        }
  
@@@ -4202,7 -3954,7 +3954,7 @@@ const struct address_space_operations f
        .set_page_dirty = f2fs_set_data_page_dirty,
        .invalidatepage = f2fs_invalidate_page,
        .releasepage    = f2fs_release_page,
-       .direct_IO      = f2fs_direct_IO,
+       .direct_IO      = noop_direct_IO,
        .bmap           = f2fs_bmap,
        .swap_activate  = f2fs_swap_activate,
        .swap_deactivate = f2fs_swap_deactivate,
@@@ -4282,3 -4034,58 +4034,58 @@@ void f2fs_destroy_bio_entry_cache(void
  {
        kmem_cache_destroy(bio_entry_slab);
  }
+ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+                           unsigned int flags, struct iomap *iomap,
+                           struct iomap *srcmap)
+ {
+       struct f2fs_map_blocks map = {};
+       pgoff_t next_pgofs = 0;
+       int err;
+       map.m_lblk = bytes_to_blks(inode, offset);
+       map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+       map.m_next_pgofs = &next_pgofs;
+       map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+       if (flags & IOMAP_WRITE)
+               map.m_may_create = true;
+       err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+                             F2FS_GET_BLOCK_DIO);
+       if (err)
+               return err;
+       iomap->offset = blks_to_bytes(inode, map.m_lblk);
+       if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+               iomap->length = blks_to_bytes(inode, map.m_len);
+               if (map.m_flags & F2FS_MAP_MAPPED) {
+                       iomap->type = IOMAP_MAPPED;
+                       iomap->flags |= IOMAP_F_MERGED;
+               } else {
+                       iomap->type = IOMAP_UNWRITTEN;
+               }
+               if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+                       return -EINVAL;
+               iomap->bdev = map.m_bdev;
+               iomap->addr = blks_to_bytes(inode, map.m_pblk);
+       } else {
+               iomap->length = blks_to_bytes(inode, next_pgofs) -
+                               iomap->offset;
+               iomap->type = IOMAP_HOLE;
+               iomap->addr = IOMAP_NULL_ADDR;
+       }
+       if (map.m_flags & F2FS_MAP_NEW)
+               iomap->flags |= IOMAP_F_NEW;
+       if ((inode->i_state & I_DIRTY_DATASYNC) ||
+           offset + length > i_size_read(inode))
+               iomap->flags |= IOMAP_F_DIRTY;
+       return 0;
+ }
+ const struct iomap_ops f2fs_iomap_ops = {
+       .iomap_begin    = f2fs_iomap_begin,
+ };
diff --combined fs/f2fs/f2fs.h
@@@ -28,8 -28,6 +28,8 @@@
  #include <linux/fscrypt.h>
  #include <linux/fsverity.h>
  
 +struct pagevec;
 +
  #ifdef CONFIG_F2FS_CHECK_FS
  #define f2fs_bug_on(sbi, condition)   BUG_ON(condition)
  #else
@@@ -58,6 -56,7 +58,7 @@@ enum 
        FAULT_WRITE_IO,
        FAULT_SLAB_ALLOC,
        FAULT_DQUOT_INIT,
+       FAULT_LOCK_OP,
        FAULT_MAX,
  };
  
@@@ -656,6 -655,7 +657,7 @@@ enum 
  #define FADVISE_KEEP_SIZE_BIT 0x10
  #define FADVISE_HOT_BIT               0x20
  #define FADVISE_VERITY_BIT    0x40
+ #define FADVISE_TRUNC_BIT     0x80
  
  #define FADVISE_MODIFIABLE_BITS       (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
  
  #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
  #define file_set_verity(inode)        set_file(inode, FADVISE_VERITY_BIT)
  
+ #define file_should_truncate(inode)   is_file(inode, FADVISE_TRUNC_BIT)
+ #define file_need_truncate(inode)     set_file(inode, FADVISE_TRUNC_BIT)
+ #define file_dont_truncate(inode)     clear_file(inode, FADVISE_TRUNC_BIT)
  #define DEF_DIR_LEVEL         0
  
  enum {
@@@ -717,7 -721,7 +723,7 @@@ enum 
        FI_INLINE_DOTS,         /* indicate inline dot dentries */
        FI_DO_DEFRAG,           /* indicate defragment is running */
        FI_DIRTY_FILE,          /* indicate regular/symlink has dirty pages */
-       FI_NO_PREALLOC,         /* indicate skipped preallocated blocks */
+       FI_PREALLOCATED_ALL,    /* all blocks for write were preallocated */
        FI_HOT_DATA,            /* indicate file is hot */
        FI_EXTRA_ATTR,          /* indicate file has extra attribute */
        FI_PROJ_INHERIT,        /* indicate file inherits projectid */
@@@ -1020,6 -1024,7 +1026,7 @@@ struct f2fs_sm_info 
        unsigned int segment_count;     /* total # of segments */
        unsigned int main_segments;     /* # of segments in main area */
        unsigned int reserved_segments; /* # of reserved segments */
+       unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
        unsigned int ovp_segments;      /* # of overprovision segments */
  
        /* a threshold to reclaim prefree segments */
@@@ -1488,6 -1493,7 +1495,7 @@@ struct compress_ctx 
        unsigned int nr_rpages;         /* total page number in rpages */
        struct page **cpages;           /* pages store compressed data in cluster */
        unsigned int nr_cpages;         /* total page number in cpages */
+       unsigned int valid_nr_cpages;   /* valid page number in cpages */
        void *rbuf;                     /* virtual mapped address on rpages */
        struct compress_data *cbuf;     /* virtual mapped address on cpages */
        size_t rlen;                    /* valid data length in rbuf */
@@@ -1679,6 -1685,9 +1687,9 @@@ struct f2fs_sb_info 
        unsigned int cur_victim_sec;            /* current victim section num */
        unsigned int gc_mode;                   /* current GC state */
        unsigned int next_victim_seg[2];        /* next segment in victim section */
+       spinlock_t gc_urgent_high_lock;
+       bool gc_urgent_high_limited;            /* indicates having limited trial count */
+       unsigned int gc_urgent_high_remaining;  /* remaining trial count for GC_URGENT_HIGH */
  
        /* for skip statistic */
        unsigned int atomic_files;              /* # of opened atomic file */
  #endif
  };
  
- struct f2fs_private_dio {
-       struct inode *inode;
-       void *orig_private;
-       bio_end_io_t *orig_end_io;
-       bool write;
- };
  #ifdef CONFIG_F2FS_FAULT_INJECTION
  #define f2fs_show_injection_info(sbi, type)                                   \
        printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
@@@ -2095,6 -2097,10 +2099,10 @@@ static inline void f2fs_lock_op(struct 
  
  static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
  {
+       if (time_to_inject(sbi, FAULT_LOCK_OP)) {
+               f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+               return 0;
+       }
        return down_read_trylock(&sbi->cp_rwsem);
  }
  
@@@ -2200,6 -2206,11 +2208,11 @@@ static inline int inc_valid_block_count
  
        if (!__allow_reserved_blocks(sbi, inode, true))
                avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+       if (F2FS_IO_ALIGNED(sbi))
+               avail_user_block_count -= sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments;
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
                if (avail_user_block_count > sbi->unusable_block_count)
                        avail_user_block_count -= sbi->unusable_block_count;
@@@ -2446,6 -2457,11 +2459,11 @@@ static inline int inc_valid_node_count(
  
        if (!__allow_reserved_blocks(sbi, inode, false))
                valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+       if (F2FS_IO_ALIGNED(sbi))
+               valid_block_count += sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments;
        user_block_count = sbi->user_block_count;
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                user_block_count -= sbi->unusable_block_count;
@@@ -3118,12 -3134,16 +3136,16 @@@ static inline int is_file(struct inode 
  
  static inline void set_file(struct inode *inode, int type)
  {
+       if (is_file(inode, type))
+               return;
        F2FS_I(inode)->i_advise |= type;
        f2fs_mark_inode_dirty_sync(inode, true);
  }
  
  static inline void clear_file(struct inode *inode, int type)
  {
+       if (!is_file(inode, type))
+               return;
        F2FS_I(inode)->i_advise &= ~type;
        f2fs_mark_inode_dirty_sync(inode, true);
  }
@@@ -3408,7 -3428,7 +3430,7 @@@ int f2fs_need_dentry_mark(struct f2fs_s
  bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
  bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
  int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
-                                               struct node_info *ni);
+                               struct node_info *ni, bool checkpoint_context);
  pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
  int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
  int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
@@@ -3616,7 -3636,6 +3638,6 @@@ void f2fs_update_data_blkaddr(struct dn
  int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
  int f2fs_reserve_new_block(struct dnode_of_data *dn);
  int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
- int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
  int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
  struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                        int op_flags, bool for_write);
@@@ -3639,6 -3658,7 +3660,7 @@@ int f2fs_write_single_data_page(struct 
                                struct writeback_control *wbc,
                                enum iostat_type io_type,
                                int compr_blocks, bool allow_balance);
+ void f2fs_write_failed(struct inode *inode, loff_t to);
  void f2fs_invalidate_page(struct page *page, unsigned int offset,
                        unsigned int length);
  int f2fs_release_page(struct page *page, gfp_t wait);
@@@ -3652,6 -3672,7 +3674,7 @@@ int f2fs_init_post_read_processing(void
  void f2fs_destroy_post_read_processing(void);
  int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
  void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+ extern const struct iomap_ops f2fs_iomap_ops;
  
  /*
   * gc.c
diff --combined fs/f2fs/gc.c
@@@ -7,6 -7,7 +7,6 @@@
   */
  #include <linux/fs.h>
  #include <linux/module.h>
 -#include <linux/backing-dev.h>
  #include <linux/init.h>
  #include <linux/f2fs_fs.h>
  #include <linux/kthread.h>
@@@ -14,7 -15,6 +14,7 @@@
  #include <linux/freezer.h>
  #include <linux/sched/signal.h>
  #include <linux/random.h>
 +#include <linux/sched/mm.h>
  
  #include "f2fs.h"
  #include "node.h"
@@@ -92,6 -92,18 +92,18 @@@ static int gc_thread_func(void *data
                 * So, I'd like to wait some time to collect dirty segments.
                 */
                if (sbi->gc_mode == GC_URGENT_HIGH) {
+                       spin_lock(&sbi->gc_urgent_high_lock);
+                       if (sbi->gc_urgent_high_limited) {
+                               if (!sbi->gc_urgent_high_remaining) {
+                                       sbi->gc_urgent_high_limited = false;
+                                       spin_unlock(&sbi->gc_urgent_high_lock);
+                                       sbi->gc_mode = GC_NORMAL;
+                                       continue;
+                               }
+                               sbi->gc_urgent_high_remaining--;
+                       }
+                       spin_unlock(&sbi->gc_urgent_high_lock);
                        wait_ms = gc_th->urgent_sleep_time;
                        down_write(&sbi->gc_lock);
                        goto do_gc;
@@@ -947,7 -959,7 +959,7 @@@ next_step
                        continue;
                }
  
-               if (f2fs_get_node_info(sbi, nid, &ni)) {
+               if (f2fs_get_node_info(sbi, nid, &ni, false)) {
                        f2fs_put_page(node_page, 1);
                        continue;
                }
@@@ -1015,7 -1027,7 +1027,7 @@@ static bool is_alive(struct f2fs_sb_inf
        if (IS_ERR(node_page))
                return false;
  
-       if (f2fs_get_node_info(sbi, nid, dni)) {
+       if (f2fs_get_node_info(sbi, nid, dni, false)) {
                f2fs_put_page(node_page, 1);
                return false;
        }
                set_sbi_flag(sbi, SBI_NEED_FSCK);
        }
  
+       if (f2fs_check_nid_range(sbi, dni->ino))
+               return false;
        *nofs = ofs_of_node(node_page);
        source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
        f2fs_put_page(node_page, 1);
                        if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
                                f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
                                         blkaddr, source_blkaddr, segno);
-                               f2fs_bug_on(sbi, 1);
+                               set_sbi_flag(sbi, SBI_NEED_FSCK);
                        }
                }
  #endif
@@@ -1206,7 -1221,7 +1221,7 @@@ static int move_data_block(struct inod
  
        f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
  
-       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
        if (err)
                goto put_out;
  
@@@ -1375,7 -1390,8 +1390,7 @@@ retry
                if (err) {
                        clear_page_private_gcing(page);
                        if (err == -ENOMEM) {
 -                              congestion_wait(BLK_RW_ASYNC,
 -                                              DEFAULT_IO_TIMEOUT);
 +                              memalloc_retry_wait(GFP_NOFS);
                                goto retry;
                        }
                        if (is_dirty)
@@@ -1456,7 -1472,8 +1471,8 @@@ next_step
  
                if (phase == 3) {
                        inode = f2fs_iget(sb, dni.ino);
-                       if (IS_ERR(inode) || is_bad_inode(inode))
+                       if (IS_ERR(inode) || is_bad_inode(inode) ||
+                                       special_file(inode->i_mode))
                                continue;
  
                        if (!down_write_trylock(
diff --combined fs/f2fs/inode.c
@@@ -8,8 -8,8 +8,8 @@@
  #include <linux/fs.h>
  #include <linux/f2fs_fs.h>
  #include <linux/buffer_head.h>
 -#include <linux/backing-dev.h>
  #include <linux/writeback.h>
 +#include <linux/sched/mm.h>
  
  #include "f2fs.h"
  #include "node.h"
@@@ -516,6 -516,11 +516,11 @@@ make_now
        } else if (ino == F2FS_COMPRESS_INO(sbi)) {
  #ifdef CONFIG_F2FS_FS_COMPRESSION
                inode->i_mapping->a_ops = &f2fs_compress_aops;
+               /*
+                * generic_error_remove_page only truncates pages of regular
+                * inode
+                */
+               inode->i_mode |= S_IFREG;
  #endif
                mapping_set_gfp_mask(inode->i_mapping,
                        GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
                goto bad_inode;
        }
        f2fs_set_inode_flags(inode);
+       if (file_should_truncate(inode)) {
+               ret = f2fs_truncate(inode);
+               if (ret)
+                       goto bad_inode;
+               file_dont_truncate(inode);
+       }
        unlock_new_inode(inode);
        trace_f2fs_iget(inode);
        return inode;
@@@ -562,7 -575,7 +575,7 @@@ retry
        inode = f2fs_iget(sb, ino);
        if (IS_ERR(inode)) {
                if (PTR_ERR(inode) == -ENOMEM) {
 -                      congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
 +                      memalloc_retry_wait(GFP_NOFS);
                        goto retry;
                }
        }
@@@ -738,7 -751,8 +751,8 @@@ void f2fs_evict_inode(struct inode *ino
        trace_f2fs_evict_inode(inode);
        truncate_inode_pages_final(&inode->i_data);
  
-       if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
+       if ((inode->i_nlink || is_bad_inode(inode)) &&
+               test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
                f2fs_invalidate_compress_pages(sbi, inode->i_ino);
  
        if (inode->i_ino == F2FS_NODE_INO(sbi) ||
@@@ -868,7 -882,7 +882,7 @@@ void f2fs_handle_failed_inode(struct in
         * so we can prevent losing this orphan when encoutering checkpoint
         * and following suddenly power-off.
         */
-       err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+       err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
        if (err) {
                set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
diff --combined fs/f2fs/node.c
@@@ -8,7 -8,7 +8,7 @@@
  #include <linux/fs.h>
  #include <linux/f2fs_fs.h>
  #include <linux/mpage.h>
 -#include <linux/backing-dev.h>
 +#include <linux/sched/mm.h>
  #include <linux/blkdev.h>
  #include <linux/pagevec.h>
  #include <linux/swap.h>
@@@ -430,6 -430,10 +430,10 @@@ static void cache_nat_entry(struct f2fs
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *new, *e;
  
+       /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
+       if (rwsem_is_locked(&sbi->cp_global_sem))
+               return;
        new = __alloc_nat_entry(sbi, nid, false);
        if (!new)
                return;
@@@ -539,7 -543,7 +543,7 @@@ int f2fs_try_to_free_nats(struct f2fs_s
  }
  
  int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
-                                               struct node_info *ni)
+                               struct node_info *ni, bool checkpoint_context)
  {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@@ -572,9 -576,10 +576,10 @@@ retry
         * nat_tree_lock. Therefore, we should retry, if we failed to grab here
         * while not bothering checkpoint.
         */
-       if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+       if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
                down_read(&curseg->journal_rwsem);
-       } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+       } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+                               !down_read_trylock(&curseg->journal_rwsem)) {
                up_read(&nm_i->nat_tree_lock);
                goto retry;
        }
@@@ -887,7 -892,7 +892,7 @@@ static int truncate_node(struct dnode_o
        int err;
        pgoff_t index;
  
-       err = f2fs_get_node_info(sbi, dn->nid, &ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
        if (err)
                return err;
  
@@@ -1286,7 -1291,7 +1291,7 @@@ struct page *f2fs_new_node_page(struct 
                goto fail;
  
  #ifdef CONFIG_F2FS_CHECK_FS
-       err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
        if (err) {
                dec_valid_node_count(sbi, dn->inode, !ofs);
                goto fail;
@@@ -1348,7 -1353,7 +1353,7 @@@ static int read_node_page(struct page *
                return LOCKED_PAGE;
        }
  
-       err = f2fs_get_node_info(sbi, page->index, &ni);
+       err = f2fs_get_node_info(sbi, page->index, &ni, false);
        if (err)
                return err;
  
@@@ -1600,7 -1605,7 +1605,7 @@@ static int __write_node_page(struct pag
        nid = nid_of_node(page);
        f2fs_bug_on(sbi, page->index != nid);
  
-       if (f2fs_get_node_info(sbi, nid, &ni))
+       if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
                goto redirty_out;
  
        if (wbc->for_reclaim) {
@@@ -2701,7 -2706,7 +2706,7 @@@ int f2fs_recover_xattr_data(struct inod
                goto recover_xnid;
  
        /* 1: invalidate the previous xattr nid */
-       err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+       err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
        if (err)
                return err;
  
@@@ -2741,7 -2746,7 +2746,7 @@@ int f2fs_recover_inode_page(struct f2fs
        struct page *ipage;
        int err;
  
-       err = f2fs_get_node_info(sbi, ino, &old_ni);
+       err = f2fs_get_node_info(sbi, ino, &old_ni, false);
        if (err)
                return err;
  
  retry:
        ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
        if (!ipage) {
 -              congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
 +              memalloc_retry_wait(GFP_NOFS);
                goto retry;
        }
  
diff --combined fs/f2fs/recovery.c
@@@ -8,7 -8,6 +8,7 @@@
  #include <asm/unaligned.h>
  #include <linux/fs.h>
  #include <linux/f2fs_fs.h>
 +#include <linux/sched/mm.h>
  #include "f2fs.h"
  #include "node.h"
  #include "segment.h"
@@@ -588,7 -587,7 +588,7 @@@ retry_dn
        err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
        if (err) {
                if (err == -ENOMEM) {
 -                      congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
 +                      memalloc_retry_wait(GFP_NOFS);
                        goto retry_dn;
                }
                goto out;
  
        f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
  
-       err = f2fs_get_node_info(sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
        if (err)
                goto err;
  
@@@ -671,7 -670,8 +671,7 @@@ retry_prev
                        err = check_index_in_prev_nodes(sbi, dest, &dn);
                        if (err) {
                                if (err == -ENOMEM) {
 -                                      congestion_wait(BLK_RW_ASYNC,
 -                                                      DEFAULT_IO_TIMEOUT);
 +                                      memalloc_retry_wait(GFP_NOFS);
                                        goto retry_prev;
                                }
                                goto err;
diff --combined fs/f2fs/segment.c
@@@ -9,7 -9,6 +9,7 @@@
  #include <linux/f2fs_fs.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
 +#include <linux/sched/mm.h>
  #include <linux/prefetch.h>
  #include <linux/kthread.h>
  #include <linux/swap.h>
@@@ -246,14 -245,16 +246,14 @@@ retry
                                                                LOOKUP_NODE);
                        if (err) {
                                if (err == -ENOMEM) {
 -                                      congestion_wait(BLK_RW_ASYNC,
 -                                                      DEFAULT_IO_TIMEOUT);
 -                                      cond_resched();
 +                                      memalloc_retry_wait(GFP_NOFS);
                                        goto retry;
                                }
                                err = -EAGAIN;
                                goto next;
                        }
  
-                       err = f2fs_get_node_info(sbi, dn.nid, &ni);
+                       err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
                        if (err) {
                                f2fs_put_dnode(&dn);
                                return err;
@@@ -423,7 -424,9 +423,7 @@@ retry
                        err = f2fs_do_write_data_page(&fio);
                        if (err) {
                                if (err == -ENOMEM) {
 -                                      congestion_wait(BLK_RW_ASYNC,
 -                                                      DEFAULT_IO_TIMEOUT);
 -                                      cond_resched();
 +                                      memalloc_retry_wait(GFP_NOFS);
                                        goto retry;
                                }
                                unlock_page(page);
diff --combined fs/f2fs/super.c
@@@ -8,9 -8,9 +8,9 @@@
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/fs.h>
 +#include <linux/sched/mm.h>
  #include <linux/statfs.h>
  #include <linux/buffer_head.h>
 -#include <linux/backing-dev.h>
  #include <linux/kthread.h>
  #include <linux/parser.h>
  #include <linux/mount.h>
@@@ -59,6 -59,7 +59,7 @@@ const char *f2fs_fault_name[FAULT_MAX] 
        [FAULT_WRITE_IO]        = "write IO error",
        [FAULT_SLAB_ALLOC]      = "slab alloc",
        [FAULT_DQUOT_INIT]      = "dquot initialize",
+       [FAULT_LOCK_OP]         = "lock_op",
  };
  
  void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@@ -260,22 -261,29 +261,22 @@@ void f2fs_printk(struct f2fs_sb_info *s
  static const struct f2fs_sb_encodings {
        __u16 magic;
        char *name;
 -      char *version;
 +      unsigned int version;
  } f2fs_sb_encoding_map[] = {
 -      {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
 +      {F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
  };
  
 -static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
 -                               const struct f2fs_sb_encodings **encoding,
 -                               __u16 *flags)
 +static const struct f2fs_sb_encodings *
 +f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
  {
        __u16 magic = le16_to_cpu(sb->s_encoding);
        int i;
  
        for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
                if (magic == f2fs_sb_encoding_map[i].magic)
 -                      break;
 -
 -      if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
 -              return -EINVAL;
 -
 -      *encoding = &f2fs_sb_encoding_map[i];
 -      *flags = le16_to_cpu(sb->s_encoding_flags);
 +                      return &f2fs_sb_encoding_map[i];
  
 -      return 0;
 +      return NULL;
  }
  
  struct kmem_cache *f2fs_cf_name_slab;
@@@ -321,6 -329,46 +322,46 @@@ static inline void limit_reserve_root(s
                                           F2FS_OPTION(sbi).s_resgid));
  }
  
+ static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+ {
+       unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+       unsigned int avg_vblocks;
+       unsigned int wanted_reserved_segments;
+       block_t avail_user_block_count;
+       if (!F2FS_IO_ALIGNED(sbi))
+               return 0;
+       /* average valid block count in section in worst case */
+       avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+       /*
+        * we need enough free space when migrating one section in worst case
+        */
+       wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+                                               reserved_segments(sbi);
+       wanted_reserved_segments -= reserved_segments(sbi);
+       avail_user_block_count = sbi->user_block_count -
+                               sbi->current_reserved_blocks -
+                               F2FS_OPTION(sbi).root_reserved_blocks;
+       if (wanted_reserved_segments * sbi->blocks_per_seg >
+                                       avail_user_block_count) {
+               f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+                       wanted_reserved_segments,
+                       avail_user_block_count >> sbi->log_blocks_per_seg);
+               return -ENOSPC;
+       }
+       SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+       f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+                        wanted_reserved_segments);
+       return 0;
+ }
  static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
  {
        if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@@ -2408,7 -2456,8 +2449,7 @@@ repeat
                page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
                if (IS_ERR(page)) {
                        if (PTR_ERR(page) == -ENOMEM) {
 -                              congestion_wait(BLK_RW_ASYNC,
 -                                              DEFAULT_IO_TIMEOUT);
 +                              memalloc_retry_wait(GFP_NOFS);
                                goto repeat;
                        }
                        set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@@ -3540,6 -3589,7 +3581,7 @@@ static void init_sb_info(struct f2fs_sb
        sbi->seq_file_ra_mul = MIN_RA_MUL;
        sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
        sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
+       spin_lock_init(&sbi->gc_urgent_high_lock);
  
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@@ -3867,32 -3917,25 +3909,32 @@@ static int f2fs_setup_casefold(struct f
                struct unicode_map *encoding;
                __u16 encoding_flags;
  
 -              if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
 -                                        &encoding_flags)) {
 +              encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
 +              if (!encoding_info) {
                        f2fs_err(sbi,
                                 "Encoding requested by superblock is unknown");
                        return -EINVAL;
                }
  
 +              encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
                encoding = utf8_load(encoding_info->version);
                if (IS_ERR(encoding)) {
                        f2fs_err(sbi,
 -                               "can't mount with superblock charset: %s-%s "
 +                               "can't mount with superblock charset: %s-%u.%u.%u "
                                 "not supported by the kernel. flags: 0x%x.",
 -                               encoding_info->name, encoding_info->version,
 +                               encoding_info->name,
 +                               unicode_major(encoding_info->version),
 +                               unicode_minor(encoding_info->version),
 +                               unicode_rev(encoding_info->version),
                                 encoding_flags);
                        return PTR_ERR(encoding);
                }
                f2fs_info(sbi, "Using encoding defined by superblock: "
 -                       "%s-%s with flags 0x%hx", encoding_info->name,
 -                       encoding_info->version?:"\b", encoding_flags);
 +                       "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
 +                       unicode_major(encoding_info->version),
 +                       unicode_minor(encoding_info->version),
 +                       unicode_rev(encoding_info->version),
 +                       encoding_flags);
  
                sbi->sb->s_encoding = encoding;
                sbi->sb->s_encoding_flags = encoding_flags;
@@@ -4179,6 -4222,10 +4221,10 @@@ try_onemore
                goto free_nm;
        }
  
+       err = adjust_reserved_segment(sbi);
+       if (err)
+               goto free_nm;
        /* For write statistics */
        sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
  
diff --combined fs/f2fs/sysfs.c
@@@ -118,6 -118,15 +118,15 @@@ static ssize_t sb_status_show(struct f2
        return sprintf(buf, "%lx\n", sbi->s_flag);
  }
  
+ static ssize_t pending_discard_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+ {
+       if (!SM_I(sbi)->dcc_info)
+               return -EINVAL;
+       return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+                               &SM_I(sbi)->dcc_info->discard_cmd_cnt));
+ }
  static ssize_t features_show(struct f2fs_attr *a,
                struct f2fs_sb_info *sbi, char *buf)
  {
@@@ -196,7 -205,8 +205,7 @@@ static ssize_t encoding_show(struct f2f
        struct super_block *sb = sbi->sb;
  
        if (f2fs_sb_has_casefold(sbi))
 -              return sysfs_emit(buf, "%s (%d.%d.%d)\n",
 -                      sb->s_encoding->charset,
 +              return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
                        (sb->s_encoding->version >> 16) & 0xff,
                        (sb->s_encoding->version >> 8) & 0xff,
                        sb->s_encoding->version & 0xff);
@@@ -414,7 -424,9 +423,9 @@@ out
        if (a->struct_type == RESERVED_BLOCKS) {
                spin_lock(&sbi->stat_lock);
                if (t > (unsigned long)(sbi->user_block_count -
-                               F2FS_OPTION(sbi).root_reserved_blocks)) {
+                               F2FS_OPTION(sbi).root_reserved_blocks -
+                               sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments)) {
                        spin_unlock(&sbi->stat_lock);
                        return -EINVAL;
                }
                return count;
        }
  
+       if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
+               spin_lock(&sbi->gc_urgent_high_lock);
+               sbi->gc_urgent_high_limited = t != 0;
+               sbi->gc_urgent_high_remaining = t;
+               spin_unlock(&sbi->gc_urgent_high_lock);
+               return count;
+       }
  #ifdef CONFIG_F2FS_IOSTAT
        if (!strcmp(a->attr.name, "iostat_enable")) {
                sbi->iostat_enable = !!t;
@@@ -732,6 -753,7 +752,7 @@@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_faul
  #endif
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
  F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
  F2FS_GENERAL_RO_ATTR(dirty_segments);
  F2FS_GENERAL_RO_ATTR(free_segments);
@@@ -743,6 -765,7 +764,7 @@@ F2FS_GENERAL_RO_ATTR(unusable)
  F2FS_GENERAL_RO_ATTR(encoding);
  F2FS_GENERAL_RO_ATTR(mounted_time_sec);
  F2FS_GENERAL_RO_ATTR(main_blkaddr);
+ F2FS_GENERAL_RO_ATTR(pending_discard);
  #ifdef CONFIG_F2FS_STAT_FS
  F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
  F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@@ -811,6 -834,7 +833,7 @@@ static struct attribute *f2fs_attrs[] 
        ATTR_LIST(main_blkaddr),
        ATTR_LIST(max_small_discards),
        ATTR_LIST(discard_granularity),
+       ATTR_LIST(pending_discard),
        ATTR_LIST(batched_trim_sections),
        ATTR_LIST(ipu_policy),
        ATTR_LIST(min_ipu_util),
  #endif
        ATTR_LIST(data_io_flag),
        ATTR_LIST(node_io_flag),
+       ATTR_LIST(gc_urgent_high_remaining),
        ATTR_LIST(ckpt_thread_ioprio),
        ATTR_LIST(dirty_segments),
        ATTR_LIST(free_segments),