Merge tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs

index 0345f2d..e5200f3 100644 (file)
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -87,6 +87,12 @@ Contact:     "Jaegeuk Kim" <jaegeuk@kernel.org>
  Description:
                  Controls the checkpoint timing.
  
+What:          /sys/fs/f2fs/<disk>/idle_interval
+Date:          January 2016
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+                Controls the idle timing.
+
  What:          /sys/fs/f2fs/<disk>/ra_nid_pages
  Date:          October 2015
  Contact:       "Chao Yu" <chao2.yu@samsung.com>
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt

index b102b43..e1c9f08 100644 (file)
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -102,7 +102,7 @@ background_gc=%s       Turn on/off cleaning operations, namely garbage
                         collection, triggered in background when I/O subsystem is
                         idle. If background_gc=on, it will turn on the garbage
                         collection and if background_gc=off, garbage collection
-                       will be truned off. If background_gc=sync, it will turn
+                       will be turned off. If background_gc=sync, it will turn
                         on synchronous garbage collection running in background.
                         Default value for this option is on. So garbage
                         collection is on by default.
@@ -145,10 +145,12 @@ extent_cache           Enable an extent cache based on rb-tree, it can cache
                         as many as extent which map between contiguous logical
                         address and physical address per inode, resulting in
                         increasing the cache hit ratio. Set by default.
-noextent_cache         Diable an extent cache based on rb-tree explicitly, see
+noextent_cache         Disable an extent cache based on rb-tree explicitly, see
                         the above extent_cache mount option.
  noinline_data          Disable the inline data feature, inline data feature is
                         enabled by default.
+data_flush             Enable data flushing before checkpoint in order to
+                       persist data of regular and symlink.
  
  ================================================================================
  DEBUGFS ENTRIES
@@ -192,7 +194,7 @@ Files in /sys/fs/f2fs/<devname>
                                policy for garbage collection. Setting gc_idle = 0
                                (default) will disable this option. Setting
                                gc_idle = 1 will select the Cost Benefit approach
-                              & setting gc_idle = 2 will select the greedy aproach.
+                              & setting gc_idle = 2 will select the greedy approach.
  
   reclaim_segments             This parameter controls the number of prefree
                                segments to be reclaimed. If the number of prefree
@@ -298,7 +300,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
  file. Each file is dump_ssa and dump_sit.
  
  The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
-It shows on-disk inode information reconized by a given inode number, and is
+It shows on-disk inode information recognized by a given inode number, and is
  able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
  ./dump_sit respectively.
  
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c

index f661d80..3842af9 100644 (file)
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
         dec_page_count(sbi, F2FS_DIRTY_META);
         unlock_page(page);
  
-       if (wbc->for_reclaim)
+       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
                 f2fs_submit_merged_bio(sbi, META, WRITE);
         return 0;
  
@@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
         spin_unlock(&im->ino_lock);
  }
  
-void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
  {
         /* add new dirty ino entry into list */
         __add_ino_entry(sbi, ino, type);
  }
  
-void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
  {
         /* remove dirty ino entry from list */
         __remove_ino_entry(sbi, ino, type);
@@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
         return e ? true : false;
  }
  
-void release_dirty_inode(struct f2fs_sb_info *sbi)
+void release_ino_entry(struct f2fs_sb_info *sbi)
  {
         struct ino_entry *e, *tmp;
         int i;
@@ -722,47 +722,48 @@ fail_no_cp:
         return -EINVAL;
  }
  
-static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
+static void __add_dirty_inode(struct inode *inode, enum inode_type type)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
  
-       if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
-               return -EEXIST;
+       if (is_inode_flag_set(fi, flag))
+               return;
  
-       set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
-       F2FS_I(inode)->dirty_dir = new;
-       list_add_tail(&new->list, &sbi->dir_inode_list);
-       stat_inc_dirty_dir(sbi);
-       return 0;
+       set_inode_flag(fi, flag);
+       list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
+       stat_inc_dirty_inode(sbi, type);
+}
+
+static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
+{
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
+
+       if (get_dirty_pages(inode) ||
+                       !is_inode_flag_set(F2FS_I(inode), flag))
+               return;
+
+       list_del_init(&fi->dirty_list);
+       clear_inode_flag(fi, flag);
+       stat_dec_dirty_inode(F2FS_I_SB(inode), type);
  }
  
  void update_dirty_page(struct inode *inode, struct page *page)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct inode_entry *new;
-       int ret = 0;
+       enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
  
         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
                         !S_ISLNK(inode->i_mode))
                 return;
  
-       if (!S_ISDIR(inode->i_mode)) {
-               inode_inc_dirty_pages(inode);
-               goto out;
-       }
-
-       new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
-       new->inode = inode;
-       INIT_LIST_HEAD(&new->list);
-
-       spin_lock(&sbi->dir_inode_lock);
-       ret = __add_dirty_inode(inode, new);
+       spin_lock(&sbi->inode_lock[type]);
+       __add_dirty_inode(inode, type);
         inode_inc_dirty_pages(inode);
-       spin_unlock(&sbi->dir_inode_lock);
+       spin_unlock(&sbi->inode_lock[type]);
  
-       if (ret)
-               kmem_cache_free(inode_entry_slab, new);
-out:
         SetPagePrivate(page);
         f2fs_trace_pid(page);
  }
@@ -770,70 +771,60 @@ out:
  void add_dirty_dir_inode(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct inode_entry *new =
-                       f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
-       int ret = 0;
-
-       new->inode = inode;
-       INIT_LIST_HEAD(&new->list);
  
-       spin_lock(&sbi->dir_inode_lock);
-       ret = __add_dirty_inode(inode, new);
-       spin_unlock(&sbi->dir_inode_lock);
-
-       if (ret)
-               kmem_cache_free(inode_entry_slab, new);
+       spin_lock(&sbi->inode_lock[DIR_INODE]);
+       __add_dirty_inode(inode, DIR_INODE);
+       spin_unlock(&sbi->inode_lock[DIR_INODE]);
  }
  
-void remove_dirty_dir_inode(struct inode *inode)
+void remove_dirty_inode(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct inode_entry *entry;
-
-       if (!S_ISDIR(inode->i_mode))
-               return;
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
  
-       spin_lock(&sbi->dir_inode_lock);
-       if (get_dirty_pages(inode) ||
-                       !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
-               spin_unlock(&sbi->dir_inode_lock);
+       if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
+                       !S_ISLNK(inode->i_mode))
                 return;
-       }
  
-       entry = F2FS_I(inode)->dirty_dir;
-       list_del(&entry->list);
-       F2FS_I(inode)->dirty_dir = NULL;
-       clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
-       stat_dec_dirty_dir(sbi);
-       spin_unlock(&sbi->dir_inode_lock);
-       kmem_cache_free(inode_entry_slab, entry);
+       spin_lock(&sbi->inode_lock[type]);
+       __remove_dirty_inode(inode, type);
+       spin_unlock(&sbi->inode_lock[type]);
  
         /* Only from the recovery routine */
-       if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
-               clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
+       if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
+               clear_inode_flag(fi, FI_DELAY_IPUT);
                 iput(inode);
         }
  }
  
-void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
  {
         struct list_head *head;
-       struct inode_entry *entry;
         struct inode *inode;
+       struct f2fs_inode_info *fi;
+       bool is_dir = (type == DIR_INODE);
+
+       trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
+                               get_pages(sbi, is_dir ?
+                               F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
  retry:
         if (unlikely(f2fs_cp_error(sbi)))
-               return;
+               return -EIO;
  
-       spin_lock(&sbi->dir_inode_lock);
+       spin_lock(&sbi->inode_lock[type]);
  
-       head = &sbi->dir_inode_list;
+       head = &sbi->inode_list[type];
         if (list_empty(head)) {
-               spin_unlock(&sbi->dir_inode_lock);
-               return;
+               spin_unlock(&sbi->inode_lock[type]);
+               trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+                               get_pages(sbi, is_dir ?
+                               F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
+               return 0;
         }
-       entry = list_entry(head->next, struct inode_entry, list);
-       inode = igrab(entry->inode);
-       spin_unlock(&sbi->dir_inode_lock);
+       fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+       inode = igrab(&fi->vfs_inode);
+       spin_unlock(&sbi->inode_lock[type]);
         if (inode) {
                 filemap_fdatawrite(inode->i_mapping);
                 iput(inode);
@@ -868,11 +859,9 @@ retry_flush_dents:
         /* write all the dirty dentry pages */
         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
                 f2fs_unlock_all(sbi);
-               sync_dirty_dir_inodes(sbi);
-               if (unlikely(f2fs_cp_error(sbi))) {
-                       err = -EIO;
+               err = sync_dirty_inodes(sbi, DIR_INODE);
+               if (err)
                         goto out;
-               }
                 goto retry_flush_dents;
         }
  
@@ -885,10 +874,9 @@ retry_flush_nodes:
  
         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
                 up_write(&sbi->node_write);
-               sync_node_pages(sbi, 0, &wbc);
-               if (unlikely(f2fs_cp_error(sbi))) {
+               err = sync_node_pages(sbi, 0, &wbc);
+               if (err) {
                         f2fs_unlock_all(sbi);
-                       err = -EIO;
                         goto out;
                 }
                 goto retry_flush_nodes;
@@ -919,7 +907,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
         finish_wait(&sbi->cp_wait, &wait);
  }
  
-static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  {
         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -945,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         while (get_pages(sbi, F2FS_DIRTY_META)) {
                 sync_meta_pages(sbi, META, LONG_MAX);
                 if (unlikely(f2fs_cp_error(sbi)))
-                       return;
+                       return -EIO;
         }
  
         next_free_nid(sbi, &last_nid);
@@ -1030,7 +1018,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         /* need to wait for end_io results */
         wait_on_all_pages_writeback(sbi);
         if (unlikely(f2fs_cp_error(sbi)))
-               return;
+               return -EIO;
  
         /* write out checkpoint buffer at block 0 */
         update_meta_page(sbi, ckpt, start_blk++);
@@ -1058,7 +1046,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         wait_on_all_pages_writeback(sbi);
  
         if (unlikely(f2fs_cp_error(sbi)))
-               return;
+               return -EIO;
  
         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
         filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
@@ -1081,22 +1069,25 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
                                                                 discard_blk);
  
-       release_dirty_inode(sbi);
+       release_ino_entry(sbi);
  
         if (unlikely(f2fs_cp_error(sbi)))
-               return;
+               return -EIO;
  
         clear_prefree_segments(sbi, cpc);
         clear_sbi_flag(sbi, SBI_IS_DIRTY);
+
+       return 0;
  }
  
  /*
   * We guarantee that this checkpoint procedure will not fail.
   */
-void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  {
         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
         unsigned long long ckpt_ver;
+       int err = 0;
  
         mutex_lock(&sbi->cp_mutex);
  
@@ -1104,14 +1095,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
                 (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
                 goto out;
-       if (unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi))) {
+               err = -EIO;
                 goto out;
-       if (f2fs_readonly(sbi->sb))
+       }
+       if (f2fs_readonly(sbi->sb)) {
+               err = -EROFS;
                 goto out;
+       }
  
         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
  
-       if (block_operations(sbi))
+       err = block_operations(sbi);
+       if (err)
                 goto out;
  
         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
@@ -1133,7 +1129,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         flush_sit_entries(sbi, cpc);
  
         /* unlock all the fs_lock[] in do_checkpoint() */
-       do_checkpoint(sbi, cpc);
+       err = do_checkpoint(sbi, cpc);
  
         unblock_operations(sbi);
         stat_inc_cp_count(sbi->stat_info);
@@ -1143,10 +1139,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                         "checkpoint: version = %llx", ckpt_ver);
  
         /* do checkpoint periodically */
-       sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
+       f2fs_update_time(sbi, CP_TIME);
+       trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
  out:
         mutex_unlock(&sbi->cp_mutex);
-       trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
+       return err;
  }
  
  void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index 972eab7..ac9e7c6 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
         /* Get physical address of data block */
         addr_array = blkaddr_in_node(rn);
         addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
-       set_page_dirty(node_page);
+       if (set_page_dirty(node_page))
+               dn->node_changed = true;
  }
  
  int reserve_new_block(struct dnode_of_data *dn)
@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
         struct page *page;
         struct dnode_of_data dn;
         int err;
-repeat:
+
         page = f2fs_grab_cache_page(mapping, index, true);
         if (!page) {
                 /*
@@ -441,12 +442,11 @@ repeat:
         } else {
                 f2fs_put_page(page, 1);
  
-               page = get_read_data_page(inode, index, READ_SYNC, true);
+               /* if ipage exists, blkaddr should be NEW_ADDR */
+               f2fs_bug_on(F2FS_I_SB(inode), ipage);
+               page = get_lock_data_page(inode, index, true);
                 if (IS_ERR(page))
-                       goto repeat;
-
-               /* wait for read completion */
-               lock_page(page);
+                       return page;
         }
  got_it:
         if (new_i_size && i_size_read(inode) <
@@ -494,14 +494,10 @@ alloc:
         if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
                 i_size_write(dn->inode,
                                 ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
-
-       /* direct IO doesn't use extent cache to maximize the performance */
-       f2fs_drop_largest_extent(dn->inode, fofs);
-
         return 0;
  }
  
-static void __allocate_data_blocks(struct inode *inode, loff_t offset,
+static int __allocate_data_blocks(struct inode *inode, loff_t offset,
                                                         size_t count)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
         u64 len = F2FS_BYTES_TO_BLK(count);
         bool allocated;
         u64 end_offset;
+       int err = 0;
  
         while (len) {
-               f2fs_balance_fs(sbi);
                 f2fs_lock_op(sbi);
  
                 /* When reading holes, we need its node page */
                 set_new_dnode(&dn, inode, NULL, NULL, 0);
-               if (get_dnode_of_data(&dn, start, ALLOC_NODE))
+               err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+               if (err)
                         goto out;
  
                 allocated = false;
@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
                 while (dn.ofs_in_node < end_offset && len) {
                         block_t blkaddr;
  
-                       if (unlikely(f2fs_cp_error(sbi)))
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
                                 goto sync_out;
+                       }
  
                         blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
                         if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
-                               if (__allocate_data_block(&dn))
+                               err = __allocate_data_block(&dn);
+                               if (err)
                                         goto sync_out;
                                 allocated = true;
                         }
@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
  
                 f2fs_put_dnode(&dn);
                 f2fs_unlock_op(sbi);
+
+               f2fs_balance_fs(sbi, dn.node_changed);
         }
-       return;
+       return err;
  
  sync_out:
         if (allocated)
@@ -554,7 +556,8 @@ sync_out:
         f2fs_put_dnode(&dn);
  out:
         f2fs_unlock_op(sbi);
-       return;
+       f2fs_balance_fs(sbi, dn.node_changed);
+       return err;
  }
  
  /*
@@ -566,7 +569,7 @@ out:
   *     b. do not use extent cache for better performance
   *     c. give the block addresses to blockdev
   */
-static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                                                 int create, int flag)
  {
         unsigned int maxblocks = map->m_len;
@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         int err = 0, ofs = 1;
         struct extent_info ei;
         bool allocated = false;
+       block_t blkaddr;
  
         map->m_len = 0;
         map->m_flags = 0;
@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         }
  
         if (create)
-               f2fs_lock_op(F2FS_I_SB(inode));
+               f2fs_lock_op(sbi);
  
         /* When reading holes, we need its node page */
         set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         pgofs++;
  
  get_next:
+       if (map->m_len >= maxblocks)
+               goto sync_out;
+
         if (dn.ofs_in_node >= end_offset) {
                 if (allocated)
                         sync_inode_page(&dn);
                 allocated = false;
                 f2fs_put_dnode(&dn);
  
+               if (create) {
+                       f2fs_unlock_op(sbi);
+                       f2fs_balance_fs(sbi, dn.node_changed);
+                       f2fs_lock_op(sbi);
+               }
+
                 set_new_dnode(&dn, inode, NULL, NULL, 0);
                 err = get_dnode_of_data(&dn, pgofs, mode);
                 if (err) {
@@ -657,52 +670,53 @@ get_next:
                 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
         }
  
-       if (maxblocks > map->m_len) {
-               block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  
-               if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
-                       if (create) {
-                               if (unlikely(f2fs_cp_error(sbi))) {
-                                       err = -EIO;
-                                       goto sync_out;
-                               }
-                               err = __allocate_data_block(&dn);
-                               if (err)
-                                       goto sync_out;
-                               allocated = true;
-                               map->m_flags |= F2FS_MAP_NEW;
-                               blkaddr = dn.data_blkaddr;
-                       } else {
-                               /*
-                                * we only merge preallocated unwritten blocks
-                                * for fiemap.
-                                */
-                               if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               blkaddr != NEW_ADDR)
-                                       goto sync_out;
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
+                               goto sync_out;
                         }
+                       err = __allocate_data_block(&dn);
+                       if (err)
+                               goto sync_out;
+                       allocated = true;
+                       map->m_flags |= F2FS_MAP_NEW;
+                       blkaddr = dn.data_blkaddr;
+               } else {
+                       /*
+                        * we only merge preallocated unwritten blocks
+                        * for fiemap.
+                        */
+                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
+                                       blkaddr != NEW_ADDR)
+                               goto sync_out;
                 }
+       }
  
-               /* Give more consecutive addresses for the readahead */
-               if ((map->m_pblk != NEW_ADDR &&
-                               blkaddr == (map->m_pblk + ofs)) ||
-                               (map->m_pblk == NEW_ADDR &&
-                               blkaddr == NEW_ADDR)) {
-                       ofs++;
-                       dn.ofs_in_node++;
-                       pgofs++;
-                       map->m_len++;
-                       goto get_next;
-               }
+       /* Give more consecutive addresses for the readahead */
+       if ((map->m_pblk != NEW_ADDR &&
+                       blkaddr == (map->m_pblk + ofs)) ||
+                       (map->m_pblk == NEW_ADDR &&
+                       blkaddr == NEW_ADDR)) {
+               ofs++;
+               dn.ofs_in_node++;
+               pgofs++;
+               map->m_len++;
+               goto get_next;
         }
+
  sync_out:
         if (allocated)
                 sync_inode_page(&dn);
  put_out:
         f2fs_put_dnode(&dn);
  unlock_out:
-       if (create)
-               f2fs_unlock_op(F2FS_I_SB(inode));
+       if (create) {
+               f2fs_unlock_op(sbi);
+               f2fs_balance_fs(sbi, dn.node_changed);
+       }
  out:
         trace_f2fs_map_blocks(inode, map, err);
         return err;
@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
  static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create)
  {
+       /* Block number less than F2FS MAX BLOCKS */
+       if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
+               return -EFBIG;
+
         return __get_data_block(inode, iblock, bh_result, create,
                                                 F2FS_GET_BLOCK_BMAP);
  }
@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  {
         struct buffer_head map_bh;
         sector_t start_blk, last_blk;
-       loff_t isize = i_size_read(inode);
+       loff_t isize;
         u64 logical = 0, phys = 0, size = 0;
         u32 flags = 0;
-       bool past_eof = false, whole_file = false;
         int ret = 0;
  
         ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
@@ -779,16 +796,19 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  
         mutex_lock(&inode->i_mutex);
  
-       if (len >= isize) {
-               whole_file = true;
-               len = isize;
-       }
+       isize = i_size_read(inode);
+       if (start >= isize)
+               goto out;
+
+       if (start + len > isize)
+               len = isize - start;
  
         if (logical_to_blk(inode, len) == 0)
                 len = blk_to_logical(inode, 1);
  
         start_blk = logical_to_blk(inode, start);
         last_blk = logical_to_blk(inode, start + len - 1);
+
  next:
         memset(&map_bh, 0, sizeof(struct buffer_head));
         map_bh.b_size = len;
@@ -800,59 +820,37 @@ next:
  
         /* HOLE */
         if (!buffer_mapped(&map_bh)) {
-               start_blk++;
-
-               if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
-                       past_eof = 1;
-
-               if (past_eof && size) {
-                       flags |= FIEMAP_EXTENT_LAST;
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-               } else if (size) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       size = 0;
-               }
+               /* Go through holes util pass the EOF */
+               if (blk_to_logical(inode, start_blk++) < isize)
+                       goto prep_next;
+               /* Found a hole beyond isize means no more extents.
+                * Note that the premise is that filesystems don't
+                * punch holes beyond isize and keep size unchanged.
+                */
+               flags |= FIEMAP_EXTENT_LAST;
+       }
  
-               /* if we have holes up to/past EOF then we're done */
-               if (start_blk > last_blk || past_eof || ret)
-                       goto out;
-       } else {
-               if (start_blk > last_blk && !whole_file) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       goto out;
-               }
+       if (size) {
+               if (f2fs_encrypted_inode(inode))
+                       flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  
-               /*
-                * if size != 0 then we know we already have an extent
-                * to add, so add it.
-                */
-               if (size) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       if (ret)
-                               goto out;
-               }
+               ret = fiemap_fill_next_extent(fieinfo, logical,
+                               phys, size, flags);
+       }
  
-               logical = blk_to_logical(inode, start_blk);
-               phys = blk_to_logical(inode, map_bh.b_blocknr);
-               size = map_bh.b_size;
-               flags = 0;
-               if (buffer_unwritten(&map_bh))
-                       flags = FIEMAP_EXTENT_UNWRITTEN;
+       if (start_blk > last_blk || ret)
+               goto out;
  
-               start_blk += logical_to_blk(inode, size);
+       logical = blk_to_logical(inode, start_blk);
+       phys = blk_to_logical(inode, map_bh.b_blocknr);
+       size = map_bh.b_size;
+       flags = 0;
+       if (buffer_unwritten(&map_bh))
+               flags = FIEMAP_EXTENT_UNWRITTEN;
  
-               /*
-                * If we are past the EOF, then we need to make sure as
-                * soon as we find a hole that the last extent we found
-                * is marked with FIEMAP_EXTENT_LAST
-                */
-               if (!past_eof && logical + size >= isize)
-                       past_eof = true;
-       }
+       start_blk += logical_to_blk(inode, size);
+
+prep_next:
         cond_resched();
         if (fatal_signal_pending(current))
                 ret = -EINTR;
@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
          */
         if (unlikely(fio->blk_addr != NEW_ADDR &&
                         !is_cold_data(page) &&
+                       !IS_ATOMIC_WRITTEN_PAGE(page) &&
                         need_inplace_update(inode))) {
                 rewrite_data_page(fio);
                 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
@@ -1179,10 +1178,11 @@ out:
         if (err)
                 ClearPageUptodate(page);
         unlock_page(page);
-       if (need_balance_fs)
-               f2fs_balance_fs(sbi);
-       if (wbc->for_reclaim)
+       f2fs_balance_fs(sbi, need_balance_fs);
+       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               remove_dirty_inode(inode);
+       }
         return 0;
  
  redirty_out:
@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
                         available_free_memory(sbi, DIRTY_DENTS))
                 goto skip_write;
  
+       /* skip writing during file defragment */
+       if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
+               goto skip_write;
+
         /* during POR, we don't need to trigger writepage at all. */
         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                 goto skip_write;
@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
         if (locked)
                 mutex_unlock(&sbi->writepages);
  
-       remove_dirty_dir_inode(inode);
+       remove_dirty_inode(inode);
  
         wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
         return ret;
@@ -1382,13 +1386,85 @@ skip_write:
  static void f2fs_write_failed(struct address_space *mapping, loff_t to)
  {
         struct inode *inode = mapping->host;
+       loff_t i_size = i_size_read(inode);
  
-       if (to > inode->i_size) {
-               truncate_pagecache(inode, inode->i_size);
-               truncate_blocks(inode, inode->i_size, true);
+       if (to > i_size) {
+               truncate_pagecache(inode, i_size);
+               truncate_blocks(inode, i_size, true);
         }
  }
  
+static int prepare_write_begin(struct f2fs_sb_info *sbi,
+                       struct page *page, loff_t pos, unsigned len,
+                       block_t *blk_addr, bool *node_changed)
+{
+       struct inode *inode = page->mapping->host;
+       pgoff_t index = page->index;
+       struct dnode_of_data dn;
+       struct page *ipage;
+       bool locked = false;
+       struct extent_info ei;
+       int err = 0;
+
+       if (f2fs_has_inline_data(inode) ||
+                       (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+               f2fs_lock_op(sbi);
+               locked = true;
+       }
+restart:
+       /* check inline_data */
+       ipage = get_node_page(sbi, inode->i_ino);
+       if (IS_ERR(ipage)) {
+               err = PTR_ERR(ipage);
+               goto unlock_out;
+       }
+
+       set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+       if (f2fs_has_inline_data(inode)) {
+               if (pos + len <= MAX_INLINE_DATA) {
+                       read_inline_data(page, ipage);
+                       set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+                       sync_inode_page(&dn);
+               } else {
+                       err = f2fs_convert_inline_page(&dn, page);
+                       if (err)
+                               goto out;
+                       if (dn.data_blkaddr == NULL_ADDR)
+                               err = f2fs_get_block(&dn, index);
+               }
+       } else if (locked) {
+               err = f2fs_get_block(&dn, index);
+       } else {
+               if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+                       dn.data_blkaddr = ei.blk + index - ei.fofs;
+               } else {
+                       bool restart = false;
+
+                       /* hole case */
+                       err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+                       if (err || (!err && dn.data_blkaddr == NULL_ADDR))
+                               restart = true;
+                       if (restart) {
+                               f2fs_put_dnode(&dn);
+                               f2fs_lock_op(sbi);
+                               locked = true;
+                               goto restart;
+                       }
+               }
+       }
+
+       /* convert_inline_page can make node_changed */
+       *blk_addr = dn.data_blkaddr;
+       *node_changed = dn.node_changed;
+out:
+       f2fs_put_dnode(&dn);
+unlock_out:
+       if (locked)
+               f2fs_unlock_op(sbi);
+       return err;
+}
+
  static int f2fs_write_begin(struct file *file, struct address_space *mapping,
                 loff_t pos, unsigned len, unsigned flags,
                 struct page **pagep, void **fsdata)
@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
         struct inode *inode = mapping->host;
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
         struct page *page = NULL;
-       struct page *ipage;
         pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
-       struct dnode_of_data dn;
+       bool need_balance = false;
+       block_t blkaddr = NULL_ADDR;
         int err = 0;
  
         trace_f2fs_write_begin(inode, pos, len, flags);
  
-       f2fs_balance_fs(sbi);
-
         /*
          * We should check this at this moment to avoid deadlock on inode page
          * and #0 page. The locking rule for inline_data conversion should be:
@@ -1424,41 +1498,27 @@ repeat:
  
         *pagep = page;
  
-       f2fs_lock_op(sbi);
-
-       /* check inline_data */
-       ipage = get_node_page(sbi, inode->i_ino);
-       if (IS_ERR(ipage)) {
-               err = PTR_ERR(ipage);
-               goto unlock_fail;
-       }
-
-       set_new_dnode(&dn, inode, ipage, ipage, 0);
+       err = prepare_write_begin(sbi, page, pos, len,
+                                       &blkaddr, &need_balance);
+       if (err)
+               goto fail;
  
-       if (f2fs_has_inline_data(inode)) {
-               if (pos + len <= MAX_INLINE_DATA) {
-                       read_inline_data(page, ipage);
-                       set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
-                       sync_inode_page(&dn);
-                       goto put_next;
+       if (need_balance && has_not_enough_free_secs(sbi, 0)) {
+               unlock_page(page);
+               f2fs_balance_fs(sbi, true);
+               lock_page(page);
+               if (page->mapping != mapping) {
+                       /* The page got truncated from under us */
+                       f2fs_put_page(page, 1);
+                       goto repeat;
                 }
-               err = f2fs_convert_inline_page(&dn, page);
-               if (err)
-                       goto put_fail;
         }
  
-       err = f2fs_get_block(&dn, index);
-       if (err)
-               goto put_fail;
-put_next:
-       f2fs_put_dnode(&dn);
-       f2fs_unlock_op(sbi);
-
         f2fs_wait_on_page_writeback(page, DATA);
  
         /* wait for GCed encrypted page writeback */
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
-               f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+               f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
  
         if (len == PAGE_CACHE_SIZE)
                 goto out_update;
@@ -1474,14 +1534,14 @@ put_next:
                 goto out_update;
         }
  
-       if (dn.data_blkaddr == NEW_ADDR) {
+       if (blkaddr == NEW_ADDR) {
                 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
         } else {
                 struct f2fs_io_info fio = {
                         .sbi = sbi,
                         .type = DATA,
                         .rw = READ_SYNC,
-                       .blk_addr = dn.data_blkaddr,
+                       .blk_addr = blkaddr,
                         .page = page,
                         .encrypted_page = NULL,
                 };
@@ -1512,10 +1572,6 @@ out_clear:
         clear_cold_data(page);
         return 0;
  
-put_fail:
-       f2fs_put_dnode(&dn);
-unlock_fail:
-       f2fs_unlock_op(sbi);
  fail:
         f2fs_put_page(page, 1);
         f2fs_write_failed(mapping, pos + len);
@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
         }
  
         f2fs_put_page(page, 1);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return copied;
  }
  
@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         int err;
  
         /* we don't need to use inline_data strictly */
-       if (f2fs_has_inline_data(inode)) {
-               err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
+       err = f2fs_convert_inline_inode(inode);
+       if (err)
+               return err;
  
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                 return 0;
@@ -1583,11 +1638,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  
         if (iov_iter_rw(iter) == WRITE) {
-               __allocate_data_blocks(inode, offset, count);
-               if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
-                       err = -EIO;
+               err = __allocate_data_blocks(inode, offset, count);
+               if (err)
                         goto out;
-               }
         }
  
         err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c

index ad1b18a..4fb6ef8 100644 (file)
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -38,12 +38,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
         si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
         si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
         si->total_ext = atomic64_read(&sbi->total_hit_ext);
-       si->ext_tree = sbi->total_ext_tree;
+       si->ext_tree = atomic_read(&sbi->total_ext_tree);
+       si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
         si->ext_node = atomic_read(&sbi->total_ext_node);
         si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
         si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
-       si->ndirty_dirs = sbi->n_dirty_dirs;
         si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+       si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+       si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
+       si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
         si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
         si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
         si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -105,7 +108,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
  
         bimodal = 0;
         total_vblocks = 0;
-       blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
+       blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
         hblks_per_sec = blks_per_sec / 2;
         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
                 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
@@ -189,10 +192,10 @@ get_cache:
         si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
                                         sizeof(struct nat_entry_set);
         si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
-       si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
         for (i = 0; i <= UPDATE_INO; i++)
                 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
-       si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+       si->cache_mem += atomic_read(&sbi->total_ext_tree) *
+                                               sizeof(struct extent_tree);
         si->cache_mem += atomic_read(&sbi->total_ext_node) *
                                                 sizeof(struct extent_node);
  
@@ -267,7 +270,8 @@ static int stat_show(struct seq_file *s, void *v)
                            si->dirty_count);
                 seq_printf(s, "  - Prefree: %d\n  - Free: %d (%d)\n\n",
                            si->prefree_count, si->free_segs, si->free_secs);
-               seq_printf(s, "CP calls: %d\n", si->cp_count);
+               seq_printf(s, "CP calls: %d (BG: %d)\n",
+                               si->cp_count, si->bg_cp_count);
                 seq_printf(s, "GC calls: %d (BG: %d)\n",
                            si->call_count, si->bg_gc);
                 seq_printf(s, "  - data segments : %d (%d)\n",
@@ -288,8 +292,8 @@ static int stat_show(struct seq_file *s, void *v)
                                 !si->total_ext ? 0 :
                                 div64_u64(si->hit_total * 100, si->total_ext),
                                 si->hit_total, si->total_ext);
-               seq_printf(s, "  - Inner Struct Count: tree: %d, node: %d\n",
-                               si->ext_tree, si->ext_node);
+               seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
+                               si->ext_tree, si->zombie_tree, si->ext_node);
                 seq_puts(s, "\nBalancing F2FS Async:\n");
                 seq_printf(s, "  - inmem: %4d, wb: %4d\n",
                            si->inmem_pages, si->wb_pages);
@@ -297,6 +301,8 @@ static int stat_show(struct seq_file *s, void *v)
                            si->ndirty_node, si->node_pages);
                 seq_printf(s, "  - dents: %4d in dirs:%4d\n",
                            si->ndirty_dent, si->ndirty_dirs);
+               seq_printf(s, "  - datas: %4d in files:%4d\n",
+                          si->ndirty_data, si->ndirty_files);
                 seq_printf(s, "  - meta: %4d in %4d\n",
                            si->ndirty_meta, si->meta_pages);
                 seq_printf(s, "  - NATs: %9d/%9d\n  - SITs: %9d/%9d\n",
@@ -404,20 +410,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
         kfree(si);
  }
  
-void __init f2fs_create_root_stats(void)
+int __init f2fs_create_root_stats(void)
  {
         struct dentry *file;
  
         f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
         if (!f2fs_debugfs_root)
-               return;
+               return -ENOMEM;
  
         file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
                         NULL, &stat_fops);
         if (!file) {
                 debugfs_remove(f2fs_debugfs_root);
                 f2fs_debugfs_root = NULL;
+               return -ENOMEM;
         }
+
+       return 0;
  }
  
  void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c

index 7c1678b..faa7495 100644 (file)
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
  
         namehash = f2fs_dentry_hash(&name);
  
-       f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
-
         nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
         nblock = bucket_blocks(level);
  
@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
                 goto out;
  
         max_depth = F2FS_I(dir)->i_current_depth;
+       if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
+               f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
+                               "Corrupted max_depth of %lu: %u",
+                               dir->i_ino, max_depth);
+               max_depth = MAX_DIR_HASH_DEPTH;
+               F2FS_I(dir)->i_current_depth = max_depth;
+               mark_inode_dirty(dir);
+       }
  
         for (level = 0; level < max_depth; level++) {
                 de = find_in_level(dir, level, &fname, res_page);
@@ -444,7 +450,7 @@ error:
         /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
         truncate_inode_pages(&inode->i_data, 0);
         truncate_blocks(inode, 0, false);
-       remove_dirty_dir_inode(inode);
+       remove_dirty_inode(inode);
         remove_inode_page(inode);
         return ERR_PTR(err);
  }
@@ -630,6 +636,7 @@ fail:
         f2fs_put_page(dentry_page, 1);
  out:
         f2fs_fname_free_filename(&fname);
+       f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
         return err;
  }
  
@@ -651,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
         clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
  fail:
         up_write(&F2FS_I(inode)->i_sem);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return err;
  }
  
@@ -695,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
         int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
         int i;
  
+       f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
+
         if (f2fs_has_inline_dentry(dir))
                 return f2fs_delete_inline_entry(dentry, page, dir, inode);
  
@@ -855,25 +865,27 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
  
         for (; n < npages; n++) {
                 dentry_page = get_lock_data_page(inode, n, false);
-               if (IS_ERR(dentry_page))
-                       continue;
+               if (IS_ERR(dentry_page)) {
+                       err = PTR_ERR(dentry_page);
+                       if (err == -ENOENT)
+                               continue;
+                       else
+                               goto out;
+               }
  
                 dentry_blk = kmap(dentry_page);
  
                 make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
  
-               if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
-                       goto stop;
+               if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
+                       kunmap(dentry_page);
+                       f2fs_put_page(dentry_page, 1);
+                       break;
+               }
  
                 ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
                 kunmap(dentry_page);
                 f2fs_put_page(dentry_page, 1);
-               dentry_page = NULL;
-       }
-stop:
-       if (dentry_page && !IS_ERR(dentry_page)) {
-               kunmap(dentry_page);
-               f2fs_put_page(dentry_page, 1);
         }
  out:
         f2fs_fname_crypto_free_buffer(&fstr);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c

index 7ddba81..ccd5c63 100644 (file)
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
  
         rb_link_node(&en->rb_node, parent, p);
         rb_insert_color(&en->rb_node, &et->root);
-       et->count++;
+       atomic_inc(&et->node_cnt);
         atomic_inc(&sbi->total_ext_node);
         return en;
  }
@@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
                                 struct extent_tree *et, struct extent_node *en)
  {
         rb_erase(&en->rb_node, &et->root);
-       et->count--;
+       atomic_dec(&et->node_cnt);
         atomic_dec(&sbi->total_ext_node);
  
         if (et->cached_en == en)
@@ -68,11 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
                 et->root = RB_ROOT;
                 et->cached_en = NULL;
                 rwlock_init(&et->lock);
-               atomic_set(&et->refcount, 0);
-               et->count = 0;
-               sbi->total_ext_tree++;
+               INIT_LIST_HEAD(&et->list);
+               atomic_set(&et->node_cnt, 0);
+               atomic_inc(&sbi->total_ext_tree);
+       } else {
+               atomic_dec(&sbi->total_zombie_tree);
+               list_del_init(&et->list);
         }
-       atomic_inc(&et->refcount);
         up_write(&sbi->extent_tree_lock);
  
         /* never died until evict_inode */
@@ -131,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
  {
         struct rb_node *node, *next;
         struct extent_node *en;
-       unsigned int count = et->count;
+       unsigned int count = atomic_read(&et->node_cnt);
  
         node = rb_first(&et->root);
         while (node) {
@@ -152,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
                 node = next;
         }
  
-       return count - et->count;
+       return count - atomic_read(&et->node_cnt);
  }
  
  static void __drop_largest_extent(struct inode *inode,
@@ -164,34 +166,33 @@ static void __drop_largest_extent(struct inode *inode,
                 largest->len = 0;
  }
  
-void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
-{
-       if (!f2fs_may_extent_tree(inode))
-               return;
-
-       __drop_largest_extent(inode, fofs, 1);
-}
-
-void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+/* return true, if inode page is changed */
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
         struct extent_tree *et;
         struct extent_node *en;
         struct extent_info ei;
  
-       if (!f2fs_may_extent_tree(inode))
-               return;
+       if (!f2fs_may_extent_tree(inode)) {
+               /* drop largest extent */
+               if (i_ext && i_ext->len) {
+                       i_ext->len = 0;
+                       return true;
+               }
+               return false;
+       }
  
         et = __grab_extent_tree(inode);
  
-       if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
-               return;
+       if (!i_ext || !i_ext->len)
+               return false;
  
         set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
                 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
  
         write_lock(&et->lock);
-       if (et->count)
+       if (atomic_read(&et->node_cnt))
                 goto out;
  
         en = __init_extent_tree(sbi, et, &ei);
@@ -202,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
         }
  out:
         write_unlock(&et->lock);
+       return false;
  }
  
  static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
@@ -549,45 +551,44 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  {
         struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+       struct extent_tree *et, *next;
         struct extent_node *en, *tmp;
         unsigned long ino = F2FS_ROOT_INO(sbi);
-       struct radix_tree_root *root = &sbi->extent_tree_root;
         unsigned int found;
         unsigned int node_cnt = 0, tree_cnt = 0;
         int remained;
+       bool do_free = false;
  
         if (!test_opt(sbi, EXTENT_CACHE))
                 return 0;
  
+       if (!atomic_read(&sbi->total_zombie_tree))
+               goto free_node;
+
         if (!down_write_trylock(&sbi->extent_tree_lock))
                 goto out;
  
         /* 1. remove unreferenced extent tree */
-       while ((found = radix_tree_gang_lookup(root,
-                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
-               unsigned i;
-
-               ino = treevec[found - 1]->ino + 1;
-               for (i = 0; i < found; i++) {
-                       struct extent_tree *et = treevec[i];
-
-                       if (!atomic_read(&et->refcount)) {
-                               write_lock(&et->lock);
-                               node_cnt += __free_extent_tree(sbi, et, true);
-                               write_unlock(&et->lock);
+       list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+               if (atomic_read(&et->node_cnt)) {
+                       write_lock(&et->lock);
+                       node_cnt += __free_extent_tree(sbi, et, true);
+                       write_unlock(&et->lock);
+               }
  
-                               radix_tree_delete(root, et->ino);
-                               kmem_cache_free(extent_tree_slab, et);
-                               sbi->total_ext_tree--;
-                               tree_cnt++;
+               list_del_init(&et->list);
+               radix_tree_delete(&sbi->extent_tree_root, et->ino);
+               kmem_cache_free(extent_tree_slab, et);
+               atomic_dec(&sbi->total_ext_tree);
+               atomic_dec(&sbi->total_zombie_tree);
+               tree_cnt++;
  
-                               if (node_cnt + tree_cnt >= nr_shrink)
-                                       goto unlock_out;
-                       }
-               }
+               if (node_cnt + tree_cnt >= nr_shrink)
+                       goto unlock_out;
         }
         up_write(&sbi->extent_tree_lock);
  
+free_node:
         /* 2. remove LRU extent entries */
         if (!down_write_trylock(&sbi->extent_tree_lock))
                 goto out;
@@ -599,15 +600,19 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
                 if (!remained--)
                         break;
                 list_del_init(&en->list);
+               do_free = true;
         }
         spin_unlock(&sbi->extent_lock);
  
+       if (do_free == false)
+               goto unlock_out;
+
         /*
          * reset ino for searching victims from beginning of global extent tree.
          */
         ino = F2FS_ROOT_INO(sbi);
  
-       while ((found = radix_tree_gang_lookup(root,
+       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
                                 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
                 unsigned i;
  
@@ -615,9 +620,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
                 for (i = 0; i < found; i++) {
                         struct extent_tree *et = treevec[i];
  
-                       write_lock(&et->lock);
-                       node_cnt += __free_extent_tree(sbi, et, false);
-                       write_unlock(&et->lock);
+                       if (!atomic_read(&et->node_cnt))
+                               continue;
+
+                       if (write_trylock(&et->lock)) {
+                               node_cnt += __free_extent_tree(sbi, et, false);
+                               write_unlock(&et->lock);
+                       }
  
                         if (node_cnt + tree_cnt >= nr_shrink)
                                 goto unlock_out;
@@ -637,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
         struct extent_tree *et = F2FS_I(inode)->extent_tree;
         unsigned int node_cnt = 0;
  
-       if (!et)
+       if (!et || !atomic_read(&et->node_cnt))
                 return 0;
  
         write_lock(&et->lock);
@@ -656,8 +665,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
         if (!et)
                 return;
  
-       if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
-               atomic_dec(&et->refcount);
+       if (inode->i_nlink && !is_bad_inode(inode) &&
+                                       atomic_read(&et->node_cnt)) {
+               down_write(&sbi->extent_tree_lock);
+               list_add_tail(&et->list, &sbi->zombie_list);
+               atomic_inc(&sbi->total_zombie_tree);
+               up_write(&sbi->extent_tree_lock);
                 return;
         }
  
@@ -666,11 +679,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
  
         /* delete extent tree entry in radix tree */
         down_write(&sbi->extent_tree_lock);
-       atomic_dec(&et->refcount);
-       f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+       f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
         radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
         kmem_cache_free(extent_tree_slab, et);
-       sbi->total_ext_tree--;
+       atomic_dec(&sbi->total_ext_tree);
         up_write(&sbi->extent_tree_lock);
  
         F2FS_I(inode)->extent_tree = NULL;
@@ -722,7 +734,9 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
         init_rwsem(&sbi->extent_tree_lock);
         INIT_LIST_HEAD(&sbi->extent_list);
         spin_lock_init(&sbi->extent_lock);
-       sbi->total_ext_tree = 0;
+       atomic_set(&sbi->total_ext_tree, 0);
+       INIT_LIST_HEAD(&sbi->zombie_list);
+       atomic_set(&sbi->total_zombie_tree, 0);
         atomic_set(&sbi->total_ext_node, 0);
  }
  
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index ec6067c..ff79054 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -21,6 +21,7 @@
  #include <linux/sched.h>
  #include <linux/vmalloc.h>
  #include <linux/bio.h>
+#include <linux/blkdev.h>
  
  #ifdef CONFIG_F2FS_CHECK_FS
  #define f2fs_bug_on(sbi, condition)    BUG_ON(condition)
@@ -54,6 +55,7 @@
  #define F2FS_MOUNT_FASTBOOT            0x00001000
  #define F2FS_MOUNT_EXTENT_CACHE                0x00002000
  #define F2FS_MOUNT_FORCE_FG_GC         0x00004000
+#define F2FS_MOUNT_DATA_FLUSH          0x00008000
  
  #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
  #define set_opt(sbi, option)   (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -125,6 +127,7 @@ enum {
  #define BATCHED_TRIM_BLOCKS(sbi)       \
                 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
  #define DEF_CP_INTERVAL                        60      /* 60 secs */
+#define DEF_IDLE_INTERVAL              120     /* 2 mins */
  
  struct cp_control {
         int reason;
@@ -158,13 +161,7 @@ struct ino_entry {
         nid_t ino;              /* inode number */
  };
  
-/*
- * for the list of directory inodes or gc inodes.
- * NOTE: there are two slab users for this structure, if we add/modify/delete
- * fields in structure for one of slab users, it may affect fields or size of
- * other one, in this condition, it's better to split both of slab and related
- * data structure.
- */
+/* for the list of inodes to be GCed */
  struct inode_entry {
         struct list_head list;  /* list head */
         struct inode *inode;    /* vfs inode pointer */
@@ -234,6 +231,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
  #define F2FS_IOC_ABORT_VOLATILE_WRITE  _IO(F2FS_IOCTL_MAGIC, 5)
  #define F2FS_IOC_GARBAGE_COLLECT       _IO(F2FS_IOCTL_MAGIC, 6)
  #define F2FS_IOC_WRITE_CHECKPOINT      _IO(F2FS_IOCTL_MAGIC, 7)
+#define F2FS_IOC_DEFRAGMENT            _IO(F2FS_IOCTL_MAGIC, 8)
  
  #define F2FS_IOC_SET_ENCRYPTION_POLICY                                 \
                 _IOR('f', 19, struct f2fs_encryption_policy)
@@ -256,10 +254,16 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
  /*
   * ioctl commands in 32 bit emulation
   */
-#define F2FS_IOC32_GETFLAGS             FS_IOC32_GETFLAGS
-#define F2FS_IOC32_SETFLAGS             FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETFLAGS            FS_IOC32_GETFLAGS
+#define F2FS_IOC32_SETFLAGS            FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETVERSION          FS_IOC32_GETVERSION
  #endif
  
+struct f2fs_defragment {
+       u64 start;
+       u64 len;
+};
+
  /*
   * For INODE and NODE manager
   */
@@ -357,9 +361,9 @@ struct extent_tree {
         struct rb_root root;            /* root of extent info rb-tree */
         struct extent_node *cached_en;  /* recently accessed extent node */
         struct extent_info largest;     /* largested extent info */
+       struct list_head list;          /* to be used by sbi->zombie_list */
         rwlock_t lock;                  /* protect extent info rb-tree */
-       atomic_t refcount;              /* reference count of rb-tree */
-       unsigned int count;             /* # of extent node in rb-tree*/
+       atomic_t node_cnt;              /* # of extent node in rb-tree*/
  };
  
  /*
@@ -434,8 +438,8 @@ struct f2fs_inode_info {
         unsigned int clevel;            /* maximum level of given file name */
         nid_t i_xattr_nid;              /* node id that contains xattrs */
         unsigned long long xattr_ver;   /* cp version of xattr modification */
-       struct inode_entry *dirty_dir;  /* the pointer of dirty dir */
  
+       struct list_head dirty_list;    /* linked in global dirty list */
         struct list_head inmem_pages;   /* inmemory pages managed by f2fs */
         struct mutex inmem_lock;        /* lock for inmemory pages */
  
@@ -544,6 +548,7 @@ struct dnode_of_data {
         nid_t nid;                      /* node id of the direct node block */
         unsigned int ofs_in_node;       /* data offset in the node page */
         bool inode_page_locked;         /* inode page is locked or not */
+       bool node_changed;              /* is node block changed */
         block_t data_blkaddr;           /* block address of the node block */
  };
  
@@ -647,6 +652,7 @@ struct f2fs_sm_info {
  enum count_type {
         F2FS_WRITEBACK,
         F2FS_DIRTY_DENTS,
+       F2FS_DIRTY_DATA,
         F2FS_DIRTY_NODES,
         F2FS_DIRTY_META,
         F2FS_INMEM_PAGES,
@@ -695,6 +701,12 @@ struct f2fs_bio_info {
         struct rw_semaphore io_rwsem;   /* blocking op for bio */
  };
  
+enum inode_type {
+       DIR_INODE,                      /* for dirty dir inode */
+       FILE_INODE,                     /* for dirty regular/symlink inode */
+       NR_INODE_TYPE,
+};
+
  /* for inner inode cache management */
  struct inode_management {
         struct radix_tree_root ino_root;        /* ino entry array */
@@ -711,11 +723,17 @@ enum {
         SBI_POR_DOING,                          /* recovery is doing or not */
  };
  
+enum {
+       CP_TIME,
+       REQ_TIME,
+       MAX_TIME,
+};
+
  struct f2fs_sb_info {
         struct super_block *sb;                 /* pointer to VFS super block */
         struct proc_dir_entry *s_proc;          /* proc entry */
-       struct buffer_head *raw_super_buf;      /* buffer head of raw sb */
         struct f2fs_super_block *raw_super;     /* raw super block pointer */
+       int valid_super_block;                  /* valid super block no */
         int s_flag;                             /* flags for sbi */
  
         /* for node-related operations */
@@ -737,23 +755,26 @@ struct f2fs_sb_info {
         struct rw_semaphore node_write;         /* locking node writes */
         struct mutex writepages;                /* mutex for writepages() */
         wait_queue_head_t cp_wait;
-       long cp_expires, cp_interval;           /* next expected periodic cp */
+       unsigned long last_time[MAX_TIME];      /* to store time in jiffies */
+       long interval_time[MAX_TIME];           /* to store thresholds */
  
         struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
  
         /* for orphan inode, use 0'th array */
         unsigned int max_orphans;               /* max orphan inodes */
  
-       /* for directory inode management */
-       struct list_head dir_inode_list;        /* dir inode list */
-       spinlock_t dir_inode_lock;              /* for dir inode list lock */
+       /* for inode management */
+       struct list_head inode_list[NR_INODE_TYPE];     /* dirty inode list */
+       spinlock_t inode_lock[NR_INODE_TYPE];   /* for dirty inode list lock */
  
         /* for extent tree cache */
         struct radix_tree_root extent_tree_root;/* cache extent cache entries */
         struct rw_semaphore extent_tree_lock;   /* locking extent radix tree */
         struct list_head extent_list;           /* lru list for shrinker */
         spinlock_t extent_lock;                 /* locking extent lru list */
-       int total_ext_tree;                     /* extent tree count */
+       atomic_t total_ext_tree;                /* extent tree count */
+       struct list_head zombie_list;           /* extent zombie tree list */
+       atomic_t total_zombie_tree;             /* extent zombie tree count */
         atomic_t total_ext_node;                /* extent info count */
  
         /* basic filesystem units */
@@ -771,6 +792,7 @@ struct f2fs_sb_info {
         unsigned int total_node_count;          /* total node block count */
         unsigned int total_valid_node_count;    /* valid node block count */
         unsigned int total_valid_inode_count;   /* valid inode count */
+       loff_t max_file_blocks;                 /* max block index of file */
         int active_logs;                        /* # of active logs */
         int dir_level;                          /* directory level */
  
@@ -809,7 +831,7 @@ struct f2fs_sb_info {
         atomic_t inline_inode;                  /* # of inline_data inodes */
         atomic_t inline_dir;                    /* # of inline_dentry inodes */
         int bg_gc;                              /* background gc calls */
-       unsigned int n_dirty_dirs;              /* # of dir inodes */
+       unsigned int ndirty_inode[NR_INODE_TYPE];       /* # of dirty inodes */
  #endif
         unsigned int last_victim[2];            /* last victim segment # */
         spinlock_t stat_lock;                   /* lock for stat operations */
@@ -824,6 +846,31 @@ struct f2fs_sb_info {
         unsigned int shrinker_run_no;
  };
  
+static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
+{
+       sbi->last_time[type] = jiffies;
+}
+
+static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
+{
+       struct timespec ts = {sbi->interval_time[type], 0};
+       unsigned long interval = timespec_to_jiffies(&ts);
+
+       return time_after(jiffies, sbi->last_time[type] + interval);
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi)
+{
+       struct block_device *bdev = sbi->sb->s_bdev;
+       struct request_queue *q = bdev_get_queue(bdev);
+       struct request_list *rl = &q->root_rl;
+
+       if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
+               return 0;
+
+       return f2fs_time_over(sbi, REQ_TIME);
+}
+
  /*
   * Inline functions
   */
@@ -1059,8 +1106,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
  static inline void inode_inc_dirty_pages(struct inode *inode)
  {
         atomic_inc(&F2FS_I(inode)->dirty_pages);
-       if (S_ISDIR(inode->i_mode))
-               inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+       inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+                               F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
  }
  
  static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1075,9 +1122,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
                 return;
  
         atomic_dec(&F2FS_I(inode)->dirty_pages);
-
-       if (S_ISDIR(inode->i_mode))
-               dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+       dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+                               F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
  }
  
  static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1092,8 +1138,7 @@ static inline int get_dirty_pages(struct inode *inode)
  
  static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
  {
-       unsigned int pages_per_sec = sbi->segs_per_sec *
-                                       (1 << sbi->log_blocks_per_seg);
+       unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
         return ((get_pages(sbi, block_type) + pages_per_sec - 1)
                         >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
  }
@@ -1416,6 +1461,8 @@ enum {
         FI_DROP_CACHE,          /* drop dirty page cache */
         FI_DATA_EXIST,          /* indicate data exists */
         FI_INLINE_DOTS,         /* indicate inline dot dentries */
+       FI_DO_DEFRAG,           /* indicate defragment is running */
+       FI_DIRTY_FILE,          /* indicate regular/symlink has dirty pages */
  };
  
  static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1659,8 +1706,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
  void f2fs_set_inode_flags(struct inode *);
  struct inode *f2fs_iget(struct super_block *, unsigned long);
  int try_to_free_nats(struct f2fs_sb_info *, int);
-void update_inode(struct inode *, struct page *);
-void update_inode_page(struct inode *);
+int update_inode(struct inode *, struct page *);
+int update_inode_page(struct inode *);
  int f2fs_write_inode(struct inode *, struct writeback_control *);
  void f2fs_evict_inode(struct inode *);
  void handle_failed_inode(struct inode *);
@@ -1765,7 +1812,7 @@ void destroy_node_manager_caches(void);
   */
  void register_inmem_page(struct inode *, struct page *);
  int commit_inmem_pages(struct inode *, bool);
-void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs(struct f2fs_sb_info *, bool);
  void f2fs_balance_fs_bg(struct f2fs_sb_info *);
  int f2fs_issue_flush(struct f2fs_sb_info *);
  int create_flush_cmd_control(struct f2fs_sb_info *);
@@ -1811,9 +1858,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
  int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
  void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
  long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void release_dirty_inode(struct f2fs_sb_info *);
+void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void release_ino_entry(struct f2fs_sb_info *);
  bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
  int acquire_orphan_inode(struct f2fs_sb_info *);
  void release_orphan_inode(struct f2fs_sb_info *);
@@ -1823,9 +1870,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *);
  int get_valid_checkpoint(struct f2fs_sb_info *);
  void update_dirty_page(struct inode *, struct page *);
  void add_dirty_dir_inode(struct inode *);
-void remove_dirty_dir_inode(struct inode *);
-void sync_dirty_dir_inodes(struct f2fs_sb_info *);
-void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
+void remove_dirty_inode(struct inode *);
+int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
+int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
  void init_ino_entry_info(struct f2fs_sb_info *);
  int __init create_checkpoint_caches(void);
  void destroy_checkpoint_caches(void);
@@ -1845,6 +1892,7 @@ struct page *find_data_page(struct inode *, pgoff_t);
  struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
  struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
  int do_write_data_page(struct f2fs_io_info *);
+int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
  void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
  int f2fs_release_page(struct page *, gfp_t);
@@ -1875,8 +1923,9 @@ struct f2fs_stat_info {
         int main_area_segs, main_area_sections, main_area_zones;
         unsigned long long hit_largest, hit_cached, hit_rbtree;
         unsigned long long hit_total, total_ext;
-       int ext_tree, ext_node;
-       int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
+       int ext_tree, zombie_tree, ext_node;
+       int ndirty_node, ndirty_meta;
+       int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
         int nats, dirty_nats, sits, dirty_sits, fnids;
         int total_count, utilization;
         int bg_gc, inmem_pages, wb_pages;
@@ -1886,7 +1935,7 @@ struct f2fs_stat_info {
         int util_free, util_valid, util_invalid;
         int rsvd_segs, overp_segs;
         int dirty_count, node_pages, meta_pages;
-       int prefree_count, call_count, cp_count;
+       int prefree_count, call_count, cp_count, bg_cp_count;
         int tot_segs, node_segs, data_segs, free_segs, free_secs;
         int bg_node_segs, bg_data_segs;
         int tot_blks, data_blks, node_blks;
@@ -1907,10 +1956,11 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
  }
  
  #define stat_inc_cp_count(si)          ((si)->cp_count++)
+#define stat_inc_bg_cp_count(si)       ((si)->bg_cp_count++)
  #define stat_inc_call_count(si)                ((si)->call_count++)
  #define stat_inc_bggc_count(sbi)       ((sbi)->bg_gc++)
-#define stat_inc_dirty_dir(sbi)                ((sbi)->n_dirty_dirs++)
-#define stat_dec_dirty_dir(sbi)                ((sbi)->n_dirty_dirs--)
+#define stat_inc_dirty_inode(sbi, type)        ((sbi)->ndirty_inode[type]++)
+#define stat_dec_dirty_inode(sbi, type)        ((sbi)->ndirty_inode[type]--)
  #define stat_inc_total_hit(sbi)                (atomic64_inc(&(sbi)->total_hit_ext))
  #define stat_inc_rbtree_node_hit(sbi)  (atomic64_inc(&(sbi)->read_hit_rbtree))
  #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
@@ -1985,14 +2035,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
  
  int f2fs_build_stats(struct f2fs_sb_info *);
  void f2fs_destroy_stats(struct f2fs_sb_info *);
-void __init f2fs_create_root_stats(void);
+int __init f2fs_create_root_stats(void);
  void f2fs_destroy_root_stats(void);
  #else
  #define stat_inc_cp_count(si)
+#define stat_inc_bg_cp_count(si)
  #define stat_inc_call_count(si)
  #define stat_inc_bggc_count(si)
-#define stat_inc_dirty_dir(sbi)
-#define stat_dec_dirty_dir(sbi)
+#define stat_inc_dirty_inode(sbi, type)
+#define stat_dec_dirty_inode(sbi, type)
  #define stat_inc_total_hit(sb)
  #define stat_inc_rbtree_node_hit(sb)
  #define stat_inc_largest_node_hit(sbi)
@@ -2013,7 +2064,7 @@ void f2fs_destroy_root_stats(void);
  
  static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
  static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline void __init f2fs_create_root_stats(void) { }
+static inline int __init f2fs_create_root_stats(void) { return 0; }
  static inline void f2fs_destroy_root_stats(void) { }
  #endif
  
@@ -2067,8 +2118,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *);
   * extent_cache.c
   */
  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-void f2fs_drop_largest_extent(struct inode *, pgoff_t);
-void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
+bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
  unsigned int f2fs_destroy_extent_node(struct inode *);
  void f2fs_destroy_extent_tree(struct inode *);
  bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index a197215..18ddb1e 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
         struct dnode_of_data dn;
         int err;
  
-       f2fs_balance_fs(sbi);
-
         sb_start_pagefault(inode->i_sb);
  
         f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -57,6 +55,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
         f2fs_put_dnode(&dn);
         f2fs_unlock_op(sbi);
  
+       f2fs_balance_fs(sbi, dn.node_changed);
+
         file_update_time(vma->vm_file);
         lock_page(page);
         if (unlikely(page->mapping != inode->i_mapping ||
@@ -96,6 +96,7 @@ mapped:
         clear_cold_data(page);
  out:
         sb_end_pagefault(inode->i_sb);
+       f2fs_update_time(sbi, REQ_TIME);
         return block_page_mkwrite_return(err);
  }
  
@@ -201,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         trace_f2fs_sync_file_enter(inode);
  
         /* if fdatasync is triggered, let's do in-place-update */
-       if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+       if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
                 set_inode_flag(fi, FI_NEED_IPU);
         ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
         clear_inode_flag(fi, FI_NEED_IPU);
@@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 goto out;
         }
  go_write:
-       /* guarantee free sections for fsync */
-       f2fs_balance_fs(sbi);
-
         /*
          * Both of fdatasync() and fsync() are able to be recovered from
          * sudden-power-off.
@@ -261,8 +259,10 @@ sync_nodes:
         sync_node_pages(sbi, ino, &wbc);
  
         /* if cp_error was enabled, we should avoid infinite loop */
-       if (unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi))) {
+               ret = -EIO;
                 goto out;
+       }
  
         if (need_inode_block_update(sbi, ino)) {
                 mark_inode_dirty_sync(inode);
@@ -275,12 +275,13 @@ sync_nodes:
                 goto out;
  
         /* once recovery info is written, don't need to tack this */
-       remove_dirty_inode(sbi, ino, APPEND_INO);
+       remove_ino_entry(sbi, ino, APPEND_INO);
         clear_inode_flag(fi, FI_APPEND_WRITE);
  flush_out:
-       remove_dirty_inode(sbi, ino, UPDATE_INO);
+       remove_ino_entry(sbi, ino, UPDATE_INO);
         clear_inode_flag(fi, FI_UPDATE_WRITE);
         ret = f2fs_issue_flush(sbi);
+       f2fs_update_time(sbi, REQ_TIME);
  out:
         trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
         f2fs_trace_ios(NULL, 1);
@@ -418,19 +419,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
  static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
         struct inode *inode = file_inode(file);
+       int err;
  
         if (f2fs_encrypted_inode(inode)) {
-               int err = f2fs_get_encryption_info(inode);
+               err = f2fs_get_encryption_info(inode);
                 if (err)
                         return 0;
         }
  
         /* we don't need to use inline_data strictly */
-       if (f2fs_has_inline_data(inode)) {
-               int err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
+       err = f2fs_convert_inline_inode(inode);
+       if (err)
+               return err;
  
         file_accessed(file);
         vma->vm_ops = &f2fs_file_vm_ops;
@@ -483,11 +483,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
                                                 F2FS_I(dn->inode)) + ofs;
                 f2fs_update_extent_cache_range(dn, fofs, 0, len);
                 dec_valid_block_count(sbi, dn->inode, nr_free);
-               set_page_dirty(dn->node_page);
                 sync_inode_page(dn);
         }
         dn->ofs_in_node = ofs;
  
+       f2fs_update_time(sbi, REQ_TIME);
         trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
                                          dn->ofs_in_node, nr_free);
         return nr_free;
@@ -604,7 +604,7 @@ int f2fs_truncate(struct inode *inode, bool lock)
         trace_f2fs_truncate(inode);
  
         /* we should check inline_data size */
-       if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
+       if (!f2fs_may_inline_data(inode)) {
                 err = f2fs_convert_inline_inode(inode);
                 if (err)
                         return err;
@@ -679,13 +679,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                         err = f2fs_truncate(inode, true);
                         if (err)
                                 return err;
-                       f2fs_balance_fs(F2FS_I_SB(inode));
+                       f2fs_balance_fs(F2FS_I_SB(inode), true);
                 } else {
                         /*
                          * do not trim all blocks after i_size if target size is
                          * larger than i_size.
                          */
                         truncate_setsize(inode, attr->ia_size);
+
+                       /* should convert inline inode here */
+                       if (!f2fs_may_inline_data(inode)) {
+                               err = f2fs_convert_inline_inode(inode);
+                               if (err)
+                                       return err;
+                       }
                         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                 }
         }
@@ -727,7 +734,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
         if (!len)
                 return 0;
  
-       f2fs_balance_fs(sbi);
+       f2fs_balance_fs(sbi, true);
  
         f2fs_lock_op(sbi);
         page = get_new_data_page(inode, NULL, index, false);
@@ -778,13 +785,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
  {
         pgoff_t pg_start, pg_end;
         loff_t off_start, off_end;
-       int ret = 0;
+       int ret;
  
-       if (f2fs_has_inline_data(inode)) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-       }
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               return ret;
  
         pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
         pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -815,7 +820,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
                         loff_t blk_start, blk_end;
                         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  
-                       f2fs_balance_fs(sbi);
+                       f2fs_balance_fs(sbi, true);
  
                         blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
                         blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
@@ -918,7 +923,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
         int ret = 0;
  
         for (; end < nrpages; start++, end++) {
-               f2fs_balance_fs(sbi);
+               f2fs_balance_fs(sbi, true);
                 f2fs_lock_op(sbi);
                 ret = __exchange_data_block(inode, end, start, true);
                 f2fs_unlock_op(sbi);
@@ -941,13 +946,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
                 return -EINVAL;
  
-       f2fs_balance_fs(F2FS_I_SB(inode));
-
-       if (f2fs_has_inline_data(inode)) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-       }
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               return ret;
  
         pg_start = offset >> PAGE_CACHE_SHIFT;
         pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
@@ -991,13 +992,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
         if (ret)
                 return ret;
  
-       f2fs_balance_fs(sbi);
-
-       if (f2fs_has_inline_data(inode)) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-       }
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               return ret;
  
         ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
         if (ret)
@@ -1104,13 +1101,11 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
                 return -EINVAL;
  
-       f2fs_balance_fs(sbi);
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               return ret;
  
-       if (f2fs_has_inline_data(inode)) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-       }
+       f2fs_balance_fs(sbi, true);
  
         ret = truncate_blocks(inode, i_size_read(inode), true);
         if (ret)
@@ -1154,17 +1149,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
         loff_t off_start, off_end;
         int ret = 0;
  
-       f2fs_balance_fs(sbi);
-
         ret = inode_newsize_ok(inode, (len + offset));
         if (ret)
                 return ret;
  
-       if (f2fs_has_inline_data(inode)) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-       }
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               return ret;
+
+       f2fs_balance_fs(sbi, true);
  
         pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
         pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -1246,6 +1239,7 @@ static long f2fs_fallocate(struct file *file, int mode,
         if (!ret) {
                 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                 mark_inode_dirty(inode);
+               f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         }
  
  out:
@@ -1353,8 +1347,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
         if (!inode_owner_or_capable(inode))
                 return -EACCES;
  
-       f2fs_balance_fs(F2FS_I_SB(inode));
-
         if (f2fs_is_atomic_file(inode))
                 return 0;
  
@@ -1363,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
                 return ret;
  
         set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
         return 0;
  }
  
@@ -1384,8 +1378,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
         if (f2fs_is_atomic_file(inode)) {
                 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
                 ret = commit_inmem_pages(inode, false);
-               if (ret)
+               if (ret) {
+                       set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
                         goto err_out;
+               }
         }
  
         ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
@@ -1410,6 +1406,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
                 return ret;
  
         set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return 0;
  }
  
@@ -1441,13 +1438,17 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
         if (ret)
                 return ret;
  
-       f2fs_balance_fs(F2FS_I_SB(inode));
-
-       clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
-       clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
-       commit_inmem_pages(inode, true);
+       if (f2fs_is_atomic_file(inode)) {
+               clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+               commit_inmem_pages(inode, true);
+       }
+       if (f2fs_is_volatile_file(inode)) {
+               clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+               ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+       }
  
         mnt_drop_write_file(filp);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return ret;
  }
  
@@ -1487,6 +1488,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
         default:
                 return -EINVAL;
         }
+       f2fs_update_time(sbi, REQ_TIME);
         return 0;
  }
  
@@ -1517,6 +1519,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
         if (copy_to_user((struct fstrim_range __user *)arg, &range,
                                 sizeof(range)))
                 return -EFAULT;
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return 0;
  }
  
@@ -1540,6 +1543,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
                                 sizeof(policy)))
                 return -EFAULT;
  
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return f2fs_process_policy(&policy, inode);
  #else
         return -EOPNOTSUPP;
@@ -1586,13 +1590,13 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
         generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
  
         err = f2fs_commit_super(sbi, false);
-
-       mnt_drop_write_file(filp);
         if (err) {
                 /* undo new data */
                 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
+               mnt_drop_write_file(filp);
                 return err;
         }
+       mnt_drop_write_file(filp);
  got_it:
         if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
                                                                         16))
@@ -1629,7 +1633,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
  {
         struct inode *inode = file_inode(filp);
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct cp_control cpc;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
@@ -1637,13 +1640,196 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
         if (f2fs_readonly(sbi->sb))
                 return -EROFS;
  
-       cpc.reason = __get_cp_reason(sbi);
+       return f2fs_sync_fs(sbi->sb, 1);
+}
  
-       mutex_lock(&sbi->gc_mutex);
-       write_checkpoint(sbi, &cpc);
-       mutex_unlock(&sbi->gc_mutex);
+static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
+                                       struct file *filp,
+                                       struct f2fs_defragment *range)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_map_blocks map;
+       struct extent_info ei;
+       pgoff_t pg_start, pg_end;
+       unsigned int blk_per_seg = sbi->blocks_per_seg;
+       unsigned int total = 0, sec_num;
+       unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
+       block_t blk_end = 0;
+       bool fragmented = false;
+       int err;
  
-       return 0;
+       /* if in-place-update policy is enabled, don't waste time here */
+       if (need_inplace_update(inode))
+               return -EINVAL;
+
+       pg_start = range->start >> PAGE_CACHE_SHIFT;
+       pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+
+       f2fs_balance_fs(sbi, true);
+
+       mutex_lock(&inode->i_mutex);
+
+       /* writeback all dirty pages in the range */
+       err = filemap_write_and_wait_range(inode->i_mapping, range->start,
+                                               range->start + range->len - 1);
+       if (err)
+               goto out;
+
+       /*
+        * lookup mapping info in extent cache, skip defragmenting if physical
+        * block addresses are continuous.
+        */
+       if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
+               if (ei.fofs + ei.len >= pg_end)
+                       goto out;
+       }
+
+       map.m_lblk = pg_start;
+
+       /*
+        * lookup mapping info in dnode page cache, skip defragmenting if all
+        * physical block addresses are continuous even if there are hole(s)
+        * in logical blocks.
+        */
+       while (map.m_lblk < pg_end) {
+               map.m_len = pg_end - map.m_lblk;
+               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+               if (err)
+                       goto out;
+
+               if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+                       map.m_lblk++;
+                       continue;
+               }
+
+               if (blk_end && blk_end != map.m_pblk) {
+                       fragmented = true;
+                       break;
+               }
+               blk_end = map.m_pblk + map.m_len;
+
+               map.m_lblk += map.m_len;
+       }
+
+       if (!fragmented)
+               goto out;
+
+       map.m_lblk = pg_start;
+       map.m_len = pg_end - pg_start;
+
+       sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
+
+       /*
+        * make sure there are enough free section for LFS allocation, this can
+        * avoid defragment running in SSR mode when free section are allocated
+        * intensively
+        */
+       if (has_not_enough_free_secs(sbi, sec_num)) {
+               err = -EAGAIN;
+               goto out;
+       }
+
+       while (map.m_lblk < pg_end) {
+               pgoff_t idx;
+               int cnt = 0;
+
+do_map:
+               map.m_len = pg_end - map.m_lblk;
+               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+               if (err)
+                       goto clear_out;
+
+               if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+                       map.m_lblk++;
+                       continue;
+               }
+
+               set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+               idx = map.m_lblk;
+               while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
+                       struct page *page;
+
+                       page = get_lock_data_page(inode, idx, true);
+                       if (IS_ERR(page)) {
+                               err = PTR_ERR(page);
+                               goto clear_out;
+                       }
+
+                       set_page_dirty(page);
+                       f2fs_put_page(page, 1);
+
+                       idx++;
+                       cnt++;
+                       total++;
+               }
+
+               map.m_lblk = idx;
+
+               if (idx < pg_end && cnt < blk_per_seg)
+                       goto do_map;
+
+               clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+               err = filemap_fdatawrite(inode->i_mapping);
+               if (err)
+                       goto out;
+       }
+clear_out:
+       clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+out:
+       mutex_unlock(&inode->i_mutex);
+       if (!err)
+               range->len = (u64)total << PAGE_CACHE_SHIFT;
+       return err;
+}
+
+static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_defragment range;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!S_ISREG(inode->i_mode))
+               return -EINVAL;
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               return err;
+
+       if (f2fs_readonly(sbi->sb)) {
+               err = -EROFS;
+               goto out;
+       }
+
+       if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
+                                                       sizeof(range))) {
+               err = -EFAULT;
+               goto out;
+       }
+
+       /* verify alignment of offset & size */
+       if (range.start & (F2FS_BLKSIZE - 1) ||
+               range.len & (F2FS_BLKSIZE - 1)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = f2fs_defragment_range(sbi, filp, &range);
+       f2fs_update_time(sbi, REQ_TIME);
+       if (err < 0)
+               goto out;
+
+       if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
+                                                       sizeof(range)))
+               err = -EFAULT;
+out:
+       mnt_drop_write_file(filp);
+       return err;
  }
  
  long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -1679,6 +1865,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                 return f2fs_ioc_gc(filp, arg);
         case F2FS_IOC_WRITE_CHECKPOINT:
                 return f2fs_ioc_write_checkpoint(filp, arg);
+       case F2FS_IOC_DEFRAGMENT:
+               return f2fs_ioc_defragment(filp, arg);
         default:
                 return -ENOTTY;
         }
@@ -1706,6 +1894,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         case F2FS_IOC32_SETFLAGS:
                 cmd = F2FS_IOC_SETFLAGS;
                 break;
+       case F2FS_IOC32_GETVERSION:
+               cmd = F2FS_IOC_GETVERSION;
+               break;
+       case F2FS_IOC_START_ATOMIC_WRITE:
+       case F2FS_IOC_COMMIT_ATOMIC_WRITE:
+       case F2FS_IOC_START_VOLATILE_WRITE:
+       case F2FS_IOC_RELEASE_VOLATILE_WRITE:
+       case F2FS_IOC_ABORT_VOLATILE_WRITE:
+       case F2FS_IOC_SHUTDOWN:
+       case F2FS_IOC_SET_ENCRYPTION_POLICY:
+       case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+       case F2FS_IOC_GET_ENCRYPTION_POLICY:
+       case F2FS_IOC_GARBAGE_COLLECT:
+       case F2FS_IOC_WRITE_CHECKPOINT:
+       case F2FS_IOC_DEFRAGMENT:
+               break;
         default:
                 return -ENOIOCTLCMD;
         }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c

index fedbf67..f610c2a 100644 (file)
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -16,7 +16,6 @@
  #include <linux/kthread.h>
  #include <linux/delay.h>
  #include <linux/freezer.h>
-#include <linux/blkdev.h>
  
  #include "f2fs.h"
  #include "node.h"
@@ -173,9 +172,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
  {
         /* SSR allocates in a segment unit */
         if (p->alloc_mode == SSR)
-               return 1 << sbi->log_blocks_per_seg;
+               return sbi->blocks_per_seg;
         if (p->gc_mode == GC_GREEDY)
-               return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
+               return sbi->blocks_per_seg * p->ofs_unit;
         else if (p->gc_mode == GC_CB)
                 return UINT_MAX;
         else /* No other gc_mode */
@@ -832,8 +831,10 @@ gc_more:
  
         if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
                 goto stop;
-       if (unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi))) {
+               ret = -EIO;
                 goto stop;
+       }
  
         if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
                 gc_type = FG_GC;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h

index b4a65be..a993967 100644 (file)
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
                 return true;
         return false;
  }
-
-static inline int is_idle(struct f2fs_sb_info *sbi)
-{
-       struct block_device *bdev = sbi->sb->s_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       struct request_list *rl = &q->root_rl;
-       return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
-}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c

index bda7126..c3f0b7d 100644 (file)
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -16,9 +16,6 @@
  
  bool f2fs_may_inline_data(struct inode *inode)
  {
-       if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
-               return false;
-
         if (f2fs_is_atomic_file(inode))
                 return false;
  
@@ -177,6 +174,9 @@ int f2fs_convert_inline_inode(struct inode *inode)
         struct page *ipage, *page;
         int err = 0;
  
+       if (!f2fs_has_inline_data(inode))
+               return 0;
+
         page = grab_cache_page(inode->i_mapping, 0);
         if (!page)
                 return -ENOMEM;
@@ -199,6 +199,9 @@ out:
         f2fs_unlock_op(sbi);
  
         f2fs_put_page(page, 1);
+
+       f2fs_balance_fs(sbi, dn.node_changed);
+
         return err;
  }
  
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c

index 5528801..2adeff2 100644 (file)
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode)
         fi->i_pino = le32_to_cpu(ri->i_pino);
         fi->i_dir_level = ri->i_dir_level;
  
-       f2fs_init_extent_tree(inode, &ri->i_ext);
+       if (f2fs_init_extent_tree(inode, &ri->i_ext))
+               set_page_dirty(node_page);
  
         get_inline_info(fi, ri);
  
@@ -222,7 +223,7 @@ bad_inode:
         return ERR_PTR(ret);
  }
  
-void update_inode(struct inode *inode, struct page *node_page)
+int update_inode(struct inode *inode, struct page *node_page)
  {
         struct f2fs_inode *ri;
  
@@ -260,15 +261,16 @@ void update_inode(struct inode *inode, struct page *node_page)
  
         __set_inode_rdev(inode, ri);
         set_cold_node(inode, node_page);
-       set_page_dirty(node_page);
-
         clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+
+       return set_page_dirty(node_page);
  }
  
-void update_inode_page(struct inode *inode)
+int update_inode_page(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
         struct page *node_page;
+       int ret = 0;
  retry:
         node_page = get_node_page(sbi, inode->i_ino);
         if (IS_ERR(node_page)) {
@@ -279,10 +281,11 @@ retry:
                 } else if (err != -ENOENT) {
                         f2fs_stop_checkpoint(sbi);
                 }
-               return;
+               return 0;
         }
-       update_inode(inode, node_page);
+       ret = update_inode(inode, node_page);
         f2fs_put_page(node_page, 1);
+       return ret;
  }
  
  int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -300,9 +303,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
          * We need to balance fs here to prevent from producing dirty node pages
          * during the urgent cleaning time when runing out of free sections.
          */
-       update_inode_page(inode);
-
-       f2fs_balance_fs(sbi);
+       if (update_inode_page(inode))
+               f2fs_balance_fs(sbi, true);
         return 0;
  }
  
@@ -328,7 +330,7 @@ void f2fs_evict_inode(struct inode *inode)
                 goto out_clear;
  
         f2fs_bug_on(sbi, get_dirty_pages(inode));
-       remove_dirty_dir_inode(inode);
+       remove_dirty_inode(inode);
  
         f2fs_destroy_extent_tree(inode);
  
@@ -358,9 +360,9 @@ no_delete:
         if (xnid)
                 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
         if (is_inode_flag_set(fi, FI_APPEND_WRITE))
-               add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+               add_ino_entry(sbi, inode->i_ino, APPEND_INO);
         if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
-               add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+               add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
         if (is_inode_flag_set(fi, FI_FREE_NID)) {
                 if (err && err != -ENOENT)
                         alloc_nid_done(sbi, inode->i_ino);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c

index e7587fc..6f944e5 100644 (file)
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
         if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
                 f2fs_set_encrypted_inode(inode);
  
-       if (f2fs_may_inline_data(inode))
+       if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
                 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
         if (f2fs_may_inline_dentry(inode))
                 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
@@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
         nid_t ino = 0;
         int err;
  
-       f2fs_balance_fs(sbi);
-
         inode = f2fs_new_inode(dir, mode);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
         inode->i_mapping->a_ops = &f2fs_dblock_aops;
         ino = inode->i_ino;
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
         if (err)
@@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                 !f2fs_is_child_context_consistent_with_parent(dir, inode))
                 return -EPERM;
  
-       f2fs_balance_fs(sbi);
+       f2fs_balance_fs(sbi, true);
  
         inode->i_ctime = CURRENT_TIME;
         ihold(inode);
@@ -214,6 +214,15 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
         struct page *page;
         int err = 0;
  
+       if (f2fs_readonly(sbi->sb)) {
+               f2fs_msg(sbi->sb, KERN_INFO,
+                       "skip recovering inline_dots inode (ino:%lu, pino:%u) "
+                       "in readonly mountpoint", dir->i_ino, pino);
+               return 0;
+       }
+
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
  
         de = f2fs_find_entry(dir, &dot, &page);
@@ -288,12 +297,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
         int err = -ENOENT;
  
         trace_f2fs_unlink_enter(dir, dentry);
-       f2fs_balance_fs(sbi);
  
         de = f2fs_find_entry(dir, &dentry->d_name, &page);
         if (!de)
                 goto fail;
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
         err = acquire_orphan_inode(sbi);
         if (err) {
@@ -344,8 +354,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
         if (len > dir->i_sb->s_blocksize)
                 return -ENAMETOOLONG;
  
-       f2fs_balance_fs(sbi);
-
         inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -357,6 +365,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
         inode_nohighmem(inode);
         inode->i_mapping->a_ops = &f2fs_dblock_aops;
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
         if (err)
@@ -437,8 +447,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         struct inode *inode;
         int err;
  
-       f2fs_balance_fs(sbi);
-
         inode = f2fs_new_inode(dir, S_IFDIR | mode);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -448,6 +456,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         inode->i_mapping->a_ops = &f2fs_dblock_aops;
         mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
  
+       f2fs_balance_fs(sbi, true);
+
         set_inode_flag(F2FS_I(inode), FI_INC_LINK);
         f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
@@ -485,8 +495,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
         struct inode *inode;
         int err = 0;
  
-       f2fs_balance_fs(sbi);
-
         inode = f2fs_new_inode(dir, mode);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -494,6 +502,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
         init_special_inode(inode, inode->i_mode, rdev);
         inode->i_op = &f2fs_special_inode_operations;
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
         if (err)
@@ -520,9 +530,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
         struct inode *inode;
         int err;
  
-       if (!whiteout)
-               f2fs_balance_fs(sbi);
-
         inode = f2fs_new_inode(dir, mode);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -536,6 +543,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
                 inode->i_mapping->a_ops = &f2fs_dblock_aops;
         }
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
         err = acquire_orphan_inode(sbi);
         if (err)
@@ -608,8 +617,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto out;
         }
  
-       f2fs_balance_fs(sbi);
-
         old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
         if (!old_entry)
                 goto out;
@@ -639,6 +646,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 if (!new_entry)
                         goto out_whiteout;
  
+               f2fs_balance_fs(sbi, true);
+
                 f2fs_lock_op(sbi);
  
                 err = acquire_orphan_inode(sbi);
@@ -670,6 +679,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 update_inode_page(old_inode);
                 update_inode_page(new_inode);
         } else {
+               f2fs_balance_fs(sbi, true);
+
                 f2fs_lock_op(sbi);
  
                 err = f2fs_add_link(new_dentry, old_inode);
@@ -767,8 +778,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                                 new_inode)))
                 return -EPERM;
  
-       f2fs_balance_fs(sbi);
-
         old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
         if (!old_entry)
                 goto out;
@@ -811,6 +820,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                         goto out_new_dir;
         }
  
+       f2fs_balance_fs(sbi, true);
+
         f2fs_lock_op(sbi);
  
         err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
@@ -933,7 +944,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
  {
         struct page *cpage = NULL;
         char *caddr, *paddr = NULL;
-       struct f2fs_str cstr;
+       struct f2fs_str cstr = FSTR_INIT(NULL, 0);
         struct f2fs_str pstr = FSTR_INIT(NULL, 0);
         struct f2fs_encrypted_symlink_data *sd;
         loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
@@ -956,6 +967,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
         /* Symlink is encrypted */
         sd = (struct f2fs_encrypted_symlink_data *)caddr;
         cstr.len = le16_to_cpu(sd->len);
+
+       /* this is broken symlink case */
+       if (unlikely(cstr.len == 0)) {
+               res = -ENOENT;
+               goto errout;
+       }
         cstr.name = kmalloc(cstr.len, GFP_NOFS);
         if (!cstr.name) {
                 res = -ENOMEM;
@@ -964,7 +981,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
         memcpy(cstr.name, sd->encrypted_path, cstr.len);
  
         /* this is broken symlink case */
-       if (cstr.name[0] == 0 && cstr.len == 0) {
+       if (unlikely(cstr.name[0] == 0)) {
                 res = -ENOENT;
                 goto errout;
         }
@@ -1005,10 +1022,12 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
         .get_link       = f2fs_encrypted_get_link,
         .getattr        = f2fs_getattr,
         .setattr        = f2fs_setattr,
+#ifdef CONFIG_F2FS_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = f2fs_listxattr,
         .removexattr    = generic_removexattr,
+#endif
  };
  #endif
  
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 7bcbc6e..342597a 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -65,13 +65,14 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
                                 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
         } else if (type == EXTENT_CACHE) {
-               mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+               mem_size = (atomic_read(&sbi->total_ext_tree) *
+                               sizeof(struct extent_tree) +
                                 atomic_read(&sbi->total_ext_node) *
                                 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
         } else {
-               if (sbi->sb->s_bdi->wb.dirty_exceeded)
-                       return false;
+               if (!sbi->sb->s_bdi->wb.dirty_exceeded)
+                       return true;
         }
         return res;
  }
@@ -261,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
  {
         struct nat_entry *e;
  
-       down_write(&nm_i->nat_tree_lock);
         e = __lookup_nat_cache(nm_i, nid);
         if (!e) {
                 e = grab_nat_entry(nm_i, nid);
                 node_info_from_raw_nat(&e->ni, ne);
         }
-       up_write(&nm_i->nat_tree_lock);
  }
  
  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -379,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
  
         memset(&ne, 0, sizeof(struct f2fs_nat_entry));
  
+       down_write(&nm_i->nat_tree_lock);
+
         /* Check current segment summary */
         mutex_lock(&curseg->curseg_mutex);
         i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -399,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
  cache:
         /* cache nat entry */
         cache_nat_entry(NM_I(sbi), nid, &ne);
+       up_write(&nm_i->nat_tree_lock);
  }
  
  /*
@@ -676,7 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
                         ret = truncate_dnode(&rdn);
                         if (ret < 0)
                                 goto out_err;
-                       set_nid(page, i, 0, false);
+                       if (set_nid(page, i, 0, false))
+                               dn->node_changed = true;
                 }
         } else {
                 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
@@ -689,7 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
                         rdn.nid = child_nid;
                         ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
                         if (ret == (NIDS_PER_BLOCK + 1)) {
-                               set_nid(page, i, 0, false);
+                               if (set_nid(page, i, 0, false))
+                                       dn->node_changed = true;
                                 child_nofs += ret;
                         } else if (ret < 0 && ret != -ENOENT) {
                                 goto out_err;
@@ -750,7 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
                 err = truncate_dnode(dn);
                 if (err < 0)
                         goto fail;
-               set_nid(pages[idx], i, 0, false);
+               if (set_nid(pages[idx], i, 0, false))
+                       dn->node_changed = true;
         }
  
         if (offset[idx + 1] == 0) {
@@ -975,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
         fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
         set_cold_node(dn->inode, page);
         SetPageUptodate(page);
-       set_page_dirty(page);
+       if (set_page_dirty(page))
+               dn->node_changed = true;
  
         if (f2fs_has_xattr_block(ofs))
                 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
@@ -1035,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
         struct page *apage;
         int err;
  
+       if (!nid)
+               return;
+       f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+
         apage = find_get_page(NODE_MAPPING(sbi), nid);
         if (apage && PageUptodate(apage)) {
                 f2fs_put_page(apage, 0);
@@ -1050,51 +1060,38 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
         f2fs_put_page(apage, err ? 1 : 0);
  }
  
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+/*
+ * readahead MAX_RA_NODE number of node pages.
+ */
+void ra_node_pages(struct page *parent, int start)
  {
-       struct page *page;
-       int err;
-repeat:
-       page = grab_cache_page(NODE_MAPPING(sbi), nid);
-       if (!page)
-               return ERR_PTR(-ENOMEM);
+       struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+       struct blk_plug plug;
+       int i, end;
+       nid_t nid;
  
-       err = read_node_page(page, READ_SYNC);
-       if (err < 0) {
-               f2fs_put_page(page, 1);
-               return ERR_PTR(err);
-       } else if (err != LOCKED_PAGE) {
-               lock_page(page);
-       }
+       blk_start_plug(&plug);
  
-       if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
-               ClearPageUptodate(page);
-               f2fs_put_page(page, 1);
-               return ERR_PTR(-EIO);
-       }
-       if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
-               f2fs_put_page(page, 1);
-               goto repeat;
+       /* Then, try readahead for siblings of the desired node */
+       end = start + MAX_RA_NODE;
+       end = min(end, NIDS_PER_BLOCK);
+       for (i = start; i < end; i++) {
+               nid = get_nid(parent, i, false);
+               ra_node_page(sbi, nid);
         }
-       return page;
+
+       blk_finish_plug(&plug);
  }
  
-/*
- * Return a locked page for the desired node page.
- * And, readahead MAX_RA_NODE number of node pages.
- */
-struct page *get_node_page_ra(struct page *parent, int start)
+struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+                                       struct page *parent, int start)
  {
-       struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
-       struct blk_plug plug;
         struct page *page;
-       int err, i, end;
-       nid_t nid;
+       int err;
  
-       /* First, try getting the desired direct node. */
-       nid = get_nid(parent, start, false);
         if (!nid)
                 return ERR_PTR(-ENOENT);
+       f2fs_bug_on(sbi, check_nid_range(sbi, nid));
  repeat:
         page = grab_cache_page(NODE_MAPPING(sbi), nid);
         if (!page)
@@ -1108,46 +1105,53 @@ repeat:
                 goto page_hit;
         }
  
-       blk_start_plug(&plug);
-
-       /* Then, try readahead for siblings of the desired node */
-       end = start + MAX_RA_NODE;
-       end = min(end, NIDS_PER_BLOCK);
-       for (i = start + 1; i < end; i++) {
-               nid = get_nid(parent, i, false);
-               if (!nid)
-                       continue;
-               ra_node_page(sbi, nid);
-       }
-
-       blk_finish_plug(&plug);
+       if (parent)
+               ra_node_pages(parent, start + 1);
  
         lock_page(page);
+
+       if (unlikely(!PageUptodate(page))) {
+               f2fs_put_page(page, 1);
+               return ERR_PTR(-EIO);
+       }
         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
                 f2fs_put_page(page, 1);
                 goto repeat;
         }
  page_hit:
-       if (unlikely(!PageUptodate(page))) {
-               f2fs_put_page(page, 1);
-               return ERR_PTR(-EIO);
-       }
+       f2fs_bug_on(sbi, nid != nid_of_node(page));
         return page;
  }
  
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+{
+       return __get_node_page(sbi, nid, NULL, 0);
+}
+
+struct page *get_node_page_ra(struct page *parent, int start)
+{
+       struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+       nid_t nid = get_nid(parent, start, false);
+
+       return __get_node_page(sbi, nid, parent, start);
+}
+
  void sync_inode_page(struct dnode_of_data *dn)
  {
+       int ret = 0;
+
         if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
-               update_inode(dn->inode, dn->node_page);
+               ret = update_inode(dn->inode, dn->node_page);
         } else if (dn->inode_page) {
                 if (!dn->inode_page_locked)
                         lock_page(dn->inode_page);
-               update_inode(dn->inode, dn->inode_page);
+               ret = update_inode(dn->inode, dn->inode_page);
                 if (!dn->inode_page_locked)
                         unlock_page(dn->inode_page);
         } else {
-               update_inode_page(dn->inode);
+               ret = update_inode_page(dn->inode);
         }
+       dn->node_changed = ret ? true: false;
  }
  
  int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
@@ -1175,6 +1179,11 @@ next_step:
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
  
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               pagevec_release(&pvec);
+                               return -EIO;
+                       }
+
                         /*
                          * flushing sequence with step:
                          * 0. indirect nodes
@@ -1349,7 +1358,7 @@ static int f2fs_write_node_page(struct page *page,
         up_read(&sbi->node_write);
         unlock_page(page);
  
-       if (wbc->for_reclaim)
+       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
                 f2fs_submit_merged_bio(sbi, NODE, WRITE);
  
         return 0;
@@ -1440,13 +1449,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
  
         if (build) {
                 /* do not add allocated nids */
-               down_read(&nm_i->nat_tree_lock);
                 ne = __lookup_nat_cache(nm_i, nid);
-               if (ne &&
-                       (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
                                 nat_get_blkaddr(ne) != NULL_ADDR))
                         allocated = true;
-               up_read(&nm_i->nat_tree_lock);
                 if (allocated)
                         return 0;
         }
@@ -1532,6 +1538,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                         META_NAT, true);
  
+       down_read(&nm_i->nat_tree_lock);
+
         while (1) {
                 struct page *page = get_current_nat_page(sbi, nid);
  
@@ -1560,6 +1568,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
                         remove_free_nid(nm_i, nid);
         }
         mutex_unlock(&curseg->curseg_mutex);
+       up_read(&nm_i->nat_tree_lock);
  
         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
                                         nm_i->ra_nid_pages, META_NAT, false);
@@ -1582,8 +1591,6 @@ retry:
  
         /* We should not use stale free nids created by build_free_nids */
         if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
-               struct node_info ni;
-
                 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
                 list_for_each_entry(i, &nm_i->free_nid_list, list)
                         if (i->state == NID_NEW)
@@ -1594,13 +1601,6 @@ retry:
                 i->state = NID_ALLOC;
                 nm_i->fcnt--;
                 spin_unlock(&nm_i->free_nid_list_lock);
-
-               /* check nid is allocated already */
-               get_node_info(sbi, *nid, &ni);
-               if (ni.blk_addr != NULL_ADDR) {
-                       alloc_nid_done(sbi, *nid);
-                       goto retry;
-               }
                 return true;
         }
         spin_unlock(&nm_i->free_nid_list_lock);
@@ -1842,14 +1842,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
  
                 raw_ne = nat_in_journal(sum, i);
  
-               down_write(&nm_i->nat_tree_lock);
                 ne = __lookup_nat_cache(nm_i, nid);
                 if (!ne) {
                         ne = grab_nat_entry(nm_i, nid);
                         node_info_from_raw_nat(&ne->ni, &raw_ne);
                 }
                 __set_nat_cache_dirty(nm_i, ne);
-               up_write(&nm_i->nat_tree_lock);
         }
         update_nats_in_cursum(sum, -i);
         mutex_unlock(&curseg->curseg_mutex);
@@ -1883,7 +1881,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
         struct f2fs_nat_block *nat_blk;
         struct nat_entry *ne, *cur;
         struct page *page = NULL;
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
  
         /*
          * there are two steps to flush nat entries:
@@ -1920,12 +1917,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                         raw_ne = &nat_blk->entries[nid - start_nid];
                 }
                 raw_nat_from_node_info(raw_ne, &ne->ni);
-
-               down_write(&NM_I(sbi)->nat_tree_lock);
                 nat_reset_flag(ne);
                 __clear_nat_cache_dirty(NM_I(sbi), ne);
-               up_write(&NM_I(sbi)->nat_tree_lock);
-
                 if (nat_get_blkaddr(ne) == NULL_ADDR)
                         add_free_nid(sbi, nid, false);
         }
@@ -1937,9 +1930,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
  
         f2fs_bug_on(sbi, set->entry_cnt);
  
-       down_write(&nm_i->nat_tree_lock);
         radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-       up_write(&nm_i->nat_tree_lock);
         kmem_cache_free(nat_entry_set_slab, set);
  }
  
@@ -1959,6 +1950,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
  
         if (!nm_i->dirty_nat_cnt)
                 return;
+
+       down_write(&nm_i->nat_tree_lock);
+
         /*
          * if there are no enough space in journal to store dirty nat
          * entries, remove all entries from journal and merge them
@@ -1967,7 +1961,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
         if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
                 remove_nats_in_journal(sbi);
  
-       down_write(&nm_i->nat_tree_lock);
         while ((found = __gang_lookup_nat_set(nm_i,
                                         set_idx, SETVEC_SIZE, setvec))) {
                 unsigned idx;
@@ -1976,12 +1969,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
                         __adjust_nat_entry_set(setvec[idx], &sets,
                                                         MAX_NAT_JENTRIES(sum));
         }
-       up_write(&nm_i->nat_tree_lock);
  
         /* flush dirty nats in nat entry set */
         list_for_each_entry_safe(set, tmp, &sets, set_list)
                 __flush_nat_entry_set(sbi, set);
  
+       up_write(&nm_i->nat_tree_lock);
+
         f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
  }
  
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h

index e4fffd2..d4d1f63 100644 (file)
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
  
         block_addr = (pgoff_t)(nm_i->nat_blkaddr +
                 (seg_off << sbi->log_blocks_per_seg << 1) +
-               (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
+               (block_off & (sbi->blocks_per_seg - 1)));
  
         if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
                 block_addr += sbi->blocks_per_seg;
@@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page)
         return true;
  }
  
-static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
+static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
  {
         struct f2fs_node *rn = F2FS_NODE(p);
  
@@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
                 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
         else
                 rn->in.nid[off] = cpu_to_le32(nid);
-       set_page_dirty(p);
+       return set_page_dirty(p);
  }
  
  static inline nid_t get_nid(struct page *p, int off, bool i)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c

index cbf74f4..589b20b 100644 (file)
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page)
                         ino_of_node(page), name);
  }
  
+static bool is_same_inode(struct inode *inode, struct page *ipage)
+{
+       struct f2fs_inode *ri = F2FS_INODE(ipage);
+       struct timespec disk;
+
+       if (!IS_INODE(ipage))
+               return true;
+
+       disk.tv_sec = le64_to_cpu(ri->i_ctime);
+       disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
+       if (timespec_compare(&inode->i_ctime, &disk) > 0)
+               return false;
+
+       disk.tv_sec = le64_to_cpu(ri->i_atime);
+       disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
+       if (timespec_compare(&inode->i_atime, &disk) > 0)
+               return false;
+
+       disk.tv_sec = le64_to_cpu(ri->i_mtime);
+       disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+       if (timespec_compare(&inode->i_mtime, &disk) > 0)
+               return false;
+
+       return true;
+}
+
  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
  {
         unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
                         goto next;
  
                 entry = get_fsync_inode(head, ino_of_node(page));
-               if (!entry) {
+               if (entry) {
+                       if (!is_same_inode(entry->inode, page))
+                               goto next;
+               } else {
                         if (IS_INODE(page) && is_dent_dnode(page)) {
                                 err = recover_inode_page(sbi, page);
                                 if (err)
@@ -459,8 +488,7 @@ out:
         return err;
  }
  
-static int recover_data(struct f2fs_sb_info *sbi,
-                               struct list_head *head, int type)
+static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
  {
         unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
         struct curseg_info *curseg;
@@ -469,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
         block_t blkaddr;
  
         /* get node pages in the current segment */
-       curseg = CURSEG_I(sbi, type);
+       curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
         blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
  
         while (1) {
@@ -556,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
         need_writecp = true;
  
         /* step #2: recover data */
-       err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+       err = recover_data(sbi, &inode_list);
         if (!err)
                 f2fs_bug_on(sbi, !list_empty(&inode_list));
  out:
@@ -595,7 +623,7 @@ out:
                         .reason = CP_RECOVERY,
                 };
                 mutex_unlock(&sbi->cp_mutex);
-               write_checkpoint(sbi, &cpc);
+               err = write_checkpoint(sbi, &cpc);
         } else {
                 mutex_unlock(&sbi->cp_mutex);
         }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c

index f77b325..5904a41 100644 (file)
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
  /*
   * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
   * f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * @size must be integral times of unsigned long.
   * Example:
   *                             MSB <--> LSB
   *   f2fs_set_bit(0, bitmap) => 1000 0000
@@ -95,94 +96,73 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
                         unsigned long size, unsigned long offset)
  {
         const unsigned long *p = addr + BIT_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG - 1);
+       unsigned long result = size;
         unsigned long tmp;
  
         if (offset >= size)
                 return size;
  
-       size -= result;
+       size -= (offset & ~(BITS_PER_LONG - 1));
         offset %= BITS_PER_LONG;
-       if (!offset)
-               goto aligned;
-
-       tmp = __reverse_ulong((unsigned char *)p);
-       tmp &= ~0UL >> offset;
-
-       if (size < BITS_PER_LONG)
-               goto found_first;
-       if (tmp)
-               goto found_middle;
-
-       size -= BITS_PER_LONG;
-       result += BITS_PER_LONG;
-       p++;
-aligned:
-       while (size & ~(BITS_PER_LONG-1)) {
+
+       while (1) {
+               if (*p == 0)
+                       goto pass;
+
                 tmp = __reverse_ulong((unsigned char *)p);
+
+               tmp &= ~0UL >> offset;
+               if (size < BITS_PER_LONG)
+                       tmp &= (~0UL << (BITS_PER_LONG - size));
                 if (tmp)
-                       goto found_middle;
-               result += BITS_PER_LONG;
+                       goto found;
+pass:
+               if (size <= BITS_PER_LONG)
+                       break;
                 size -= BITS_PER_LONG;
+               offset = 0;
                 p++;
         }
-       if (!size)
-               return result;
-
-       tmp = __reverse_ulong((unsigned char *)p);
-found_first:
-       tmp &= (~0UL << (BITS_PER_LONG - size));
-       if (!tmp)               /* Are any bits set? */
-               return result + size;   /* Nope. */
-found_middle:
-       return result + __reverse_ffs(tmp);
+       return result;
+found:
+       return result - size + __reverse_ffs(tmp);
  }
  
  static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
                         unsigned long size, unsigned long offset)
  {
         const unsigned long *p = addr + BIT_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG - 1);
+       unsigned long result = size;
         unsigned long tmp;
  
         if (offset >= size)
                 return size;
  
-       size -= result;
+       size -= (offset & ~(BITS_PER_LONG - 1));
         offset %= BITS_PER_LONG;
-       if (!offset)
-               goto aligned;
-
-       tmp = __reverse_ulong((unsigned char *)p);
-       tmp |= ~((~0UL << offset) >> offset);
-
-       if (size < BITS_PER_LONG)
-               goto found_first;
-       if (tmp != ~0UL)
-               goto found_middle;
-
-       size -= BITS_PER_LONG;
-       result += BITS_PER_LONG;
-       p++;
-aligned:
-       while (size & ~(BITS_PER_LONG - 1)) {
+
+       while (1) {
+               if (*p == ~0UL)
+                       goto pass;
+
                 tmp = __reverse_ulong((unsigned char *)p);
+
+               if (offset)
+                       tmp |= ~0UL << (BITS_PER_LONG - offset);
+               if (size < BITS_PER_LONG)
+                       tmp |= ~0UL >> size;
                 if (tmp != ~0UL)
-                       goto found_middle;
-               result += BITS_PER_LONG;
+                       goto found;
+pass:
+               if (size <= BITS_PER_LONG)
+                       break;
                 size -= BITS_PER_LONG;
+               offset = 0;
                 p++;
         }
-       if (!size)
-               return result;
-
-       tmp = __reverse_ulong((unsigned char *)p);
-found_first:
-       tmp |= ~(~0UL << (BITS_PER_LONG - size));
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size;   /* Nope. */
-found_middle:
-       return result + __reverse_ffz(tmp);
+       return result;
+found:
+       return result - size + __reverse_ffz(tmp);
  }
  
  void register_inmem_page(struct inode *inode, struct page *page)
@@ -233,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
          * inode becomes free by iget_locked in f2fs_iget.
          */
         if (!abort) {
-               f2fs_balance_fs(sbi);
+               f2fs_balance_fs(sbi, true);
                 f2fs_lock_op(sbi);
         }
  
@@ -257,6 +237,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
                                 submit_bio = true;
                         }
                 } else {
+                       ClearPageUptodate(cur->page);
                         trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
                 }
                 set_page_private(cur->page, 0);
@@ -281,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort)
   * This function balances dirty node and dentry pages.
   * In addition, it controls garbage collection.
   */
-void f2fs_balance_fs(struct f2fs_sb_info *sbi)
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
  {
+       if (!need)
+               return;
         /*
          * We should do GC or end up with checkpoint, if there are so many dirty
          * dir/node pages without enough free segments.
@@ -310,8 +293,12 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
         if (!available_free_memory(sbi, NAT_ENTRIES) ||
                         excess_prefree_segs(sbi) ||
                         !available_free_memory(sbi, INO_ENTRIES) ||
-                       jiffies > sbi->cp_expires)
+                       (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
+               if (test_opt(sbi, DATA_FLUSH))
+                       sync_dirty_inodes(sbi, FILE_INODE);
                 f2fs_sync_fs(sbi->sb, true);
+               stat_inc_bg_cp_count(sbi->stat_info);
+       }
  }
  
  static int issue_flush_thread(void *data)
@@ -1134,6 +1121,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
         unsigned int start_segno, end_segno;
         struct cp_control cpc;
+       int err = 0;
  
         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
                 return -EINVAL;
@@ -1164,12 +1152,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
                                 sbi->segs_per_sec) - 1, end_segno);
  
                 mutex_lock(&sbi->gc_mutex);
-               write_checkpoint(sbi, &cpc);
+               err = write_checkpoint(sbi, &cpc);
                 mutex_unlock(&sbi->gc_mutex);
         }
  out:
         range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
-       return 0;
+       return err;
  }
  
  static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
@@ -1749,13 +1737,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
                                 return i;
                 }
-               if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
+               if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
                         return update_nats_in_cursum(sum, 1);
         } else if (type == SIT_JOURNAL) {
                 for (i = 0; i < sits_in_cursum(sum); i++)
                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
                                 return i;
-               if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
+               if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
                         return update_sits_in_cursum(sum, 1);
         }
         return -1;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c

index da0d8e0..93606f2 100644 (file)
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
  
  static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
  {
-       return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
+       return atomic_read(&sbi->total_zombie_tree) +
+                               atomic_read(&sbi->total_ext_node);
  }
  
  unsigned long f2fs_shrink_count(struct shrinker *shrink,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c

index 3a65e01..3bf990b 100644 (file)
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -67,6 +67,7 @@ enum {
         Opt_extent_cache,
         Opt_noextent_cache,
         Opt_noinline_data,
+       Opt_data_flush,
         Opt_err,
  };
  
@@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = {
         {Opt_extent_cache, "extent_cache"},
         {Opt_noextent_cache, "noextent_cache"},
         {Opt_noinline_data, "noinline_data"},
+       {Opt_data_flush, "data_flush"},
         {Opt_err, NULL},
  };
  
@@ -216,7 +218,8 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
  
  #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
  static struct attribute *f2fs_attrs[] = {
@@ -235,6 +238,7 @@ static struct attribute *f2fs_attrs[] = {
         ATTR_LIST(ram_thresh),
         ATTR_LIST(ra_nid_pages),
         ATTR_LIST(cp_interval),
+       ATTR_LIST(idle_interval),
         NULL,
  };
  
@@ -406,6 +410,9 @@ static int parse_options(struct super_block *sb, char *options)
                 case Opt_noinline_data:
                         clear_opt(sbi, INLINE_DATA);
                         break;
+               case Opt_data_flush:
+                       set_opt(sbi, DATA_FLUSH);
+                       break;
                 default:
                         f2fs_msg(sb, KERN_ERR,
                                 "Unrecognized mount option \"%s\" or missing value",
@@ -432,6 +439,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
         fi->i_current_depth = 1;
         fi->i_advise = 0;
         init_rwsem(&fi->i_sem);
+       INIT_LIST_HEAD(&fi->dirty_list);
         INIT_LIST_HEAD(&fi->inmem_pages);
         mutex_init(&fi->inmem_lock);
  
@@ -548,7 +556,7 @@ static void f2fs_put_super(struct super_block *sb)
          * normally superblock is clean, so we need to release this.
          * In addition, EIO will skip do checkpoint, we need this as well.
          */
-       release_dirty_inode(sbi);
+       release_ino_entry(sbi);
         release_discard_addrs(sbi);
  
         f2fs_leave_shrinker(sbi);
@@ -566,13 +574,14 @@ static void f2fs_put_super(struct super_block *sb)
         wait_for_completion(&sbi->s_kobj_unregister);
  
         sb->s_fs_info = NULL;
-       brelse(sbi->raw_super_buf);
+       kfree(sbi->raw_super);
         kfree(sbi);
  }
  
  int f2fs_sync_fs(struct super_block *sb, int sync)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int err = 0;
  
         trace_f2fs_sync_fs(sb, sync);
  
@@ -582,14 +591,12 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
                 cpc.reason = __get_cp_reason(sbi);
  
                 mutex_lock(&sbi->gc_mutex);
-               write_checkpoint(sbi, &cpc);
+               err = write_checkpoint(sbi, &cpc);
                 mutex_unlock(&sbi->gc_mutex);
-       } else {
-               f2fs_balance_fs(sbi);
         }
         f2fs_trace_ios(NULL, 1);
  
-       return 0;
+       return err;
  }
  
  static int f2fs_freeze(struct super_block *sb)
@@ -686,6 +693,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                 seq_puts(seq, ",extent_cache");
         else
                 seq_puts(seq, ",noextent_cache");
+       if (test_opt(sbi, DATA_FLUSH))
+               seq_puts(seq, ",data_flush");
         seq_printf(seq, ",active_logs=%u", sbi->active_logs);
  
         return 0;
@@ -898,7 +907,7 @@ static const struct export_operations f2fs_export_ops = {
         .get_parent = f2fs_get_parent,
  };
  
-static loff_t max_file_size(unsigned bits)
+static loff_t max_file_blocks(void)
  {
         loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
         loff_t leaf_count = ADDRS_PER_BLOCK;
@@ -914,10 +923,82 @@ static loff_t max_file_size(unsigned bits)
         leaf_count *= NIDS_PER_BLOCK;
         result += leaf_count;
  
-       result <<= bits;
         return result;
  }
  
+static inline bool sanity_check_area_boundary(struct super_block *sb,
+                                       struct f2fs_super_block *raw_super)
+{
+       u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+       u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
+       u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
+       u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
+       u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+       u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+       u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
+       u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
+       u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
+       u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
+       u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+       u32 segment_count = le32_to_cpu(raw_super->segment_count);
+       u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+
+       if (segment0_blkaddr != cp_blkaddr) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+                       segment0_blkaddr, cp_blkaddr);
+               return true;
+       }
+
+       if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
+                                                       sit_blkaddr) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+                       cp_blkaddr, sit_blkaddr,
+                       segment_count_ckpt << log_blocks_per_seg);
+               return true;
+       }
+
+       if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
+                                                       nat_blkaddr) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+                       sit_blkaddr, nat_blkaddr,
+                       segment_count_sit << log_blocks_per_seg);
+               return true;
+       }
+
+       if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
+                                                       ssa_blkaddr) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+                       nat_blkaddr, ssa_blkaddr,
+                       segment_count_nat << log_blocks_per_seg);
+               return true;
+       }
+
+       if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
+                                                       main_blkaddr) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+                       ssa_blkaddr, main_blkaddr,
+                       segment_count_ssa << log_blocks_per_seg);
+               return true;
+       }
+
+       if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
+               segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
+                       main_blkaddr,
+                       segment0_blkaddr + (segment_count << log_blocks_per_seg),
+                       segment_count_main << log_blocks_per_seg);
+               return true;
+       }
+
+       return false;
+}
+
  static int sanity_check_raw_super(struct super_block *sb,
                         struct f2fs_super_block *raw_super)
  {
@@ -947,6 +1028,14 @@ static int sanity_check_raw_super(struct super_block *sb,
                 return 1;
         }
  
+       /* check log blocks per segment */
+       if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Invalid log blocks per segment (%u)\n",
+                       le32_to_cpu(raw_super->log_blocks_per_seg));
+               return 1;
+       }
+
         /* Currently, support 512/1024/2048/4096 bytes sector size */
         if (le32_to_cpu(raw_super->log_sectorsize) >
                                 F2FS_MAX_LOG_SECTOR_SIZE ||
@@ -965,6 +1054,23 @@ static int sanity_check_raw_super(struct super_block *sb,
                         le32_to_cpu(raw_super->log_sectorsize));
                 return 1;
         }
+
+       /* check reserved ino info */
+       if (le32_to_cpu(raw_super->node_ino) != 1 ||
+               le32_to_cpu(raw_super->meta_ino) != 2 ||
+               le32_to_cpu(raw_super->root_ino) != 3) {
+               f2fs_msg(sb, KERN_INFO,
+                       "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+                       le32_to_cpu(raw_super->node_ino),
+                       le32_to_cpu(raw_super->meta_ino),
+                       le32_to_cpu(raw_super->root_ino));
+               return 1;
+       }
+
+       /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+       if (sanity_check_area_boundary(sb, raw_super))
+               return 1;
+
         return 0;
  }
  
@@ -1018,7 +1124,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
                 atomic_set(&sbi->nr_pages[i], 0);
  
         sbi->dir_level = DEF_DIR_LEVEL;
-       sbi->cp_interval = DEF_CP_INTERVAL;
+       sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
+       sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
         clear_sbi_flag(sbi, SBI_NEED_FSCK);
  
         INIT_LIST_HEAD(&sbi->s_list);
@@ -1032,111 +1139,114 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
   */
  static int read_raw_super_block(struct super_block *sb,
                         struct f2fs_super_block **raw_super,
-                       struct buffer_head **raw_super_buf,
-                       int *recovery)
+                       int *valid_super_block, int *recovery)
  {
         int block = 0;
-       struct buffer_head *buffer;
-       struct f2fs_super_block *super;
+       struct buffer_head *bh;
+       struct f2fs_super_block *super, *buf;
         int err = 0;
  
+       super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
+       if (!super)
+               return -ENOMEM;
  retry:
-       buffer = sb_bread(sb, block);
-       if (!buffer) {
+       bh = sb_bread(sb, block);
+       if (!bh) {
                 *recovery = 1;
                 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
                                 block + 1);
-               if (block == 0) {
-                       block++;
-                       goto retry;
-               } else {
-                       err = -EIO;
-                       goto out;
-               }
+               err = -EIO;
+               goto next;
         }
  
-       super = (struct f2fs_super_block *)
-               ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
+       buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET);
  
         /* sanity checking of raw super */
-       if (sanity_check_raw_super(sb, super)) {
-               brelse(buffer);
+       if (sanity_check_raw_super(sb, buf)) {
+               brelse(bh);
                 *recovery = 1;
                 f2fs_msg(sb, KERN_ERR,
                         "Can't find valid F2FS filesystem in %dth superblock",
                                                                 block + 1);
-               if (block == 0) {
-                       block++;
-                       goto retry;
-               } else {
-                       err = -EINVAL;
-                       goto out;
-               }
+               err = -EINVAL;
+               goto next;
         }
  
         if (!*raw_super) {
-               *raw_super_buf = buffer;
+               memcpy(super, buf, sizeof(*super));
+               *valid_super_block = block;
                 *raw_super = super;
-       } else {
-               /* already have a valid superblock */
-               brelse(buffer);
         }
+       brelse(bh);
  
+next:
         /* check the validity of the second superblock */
         if (block == 0) {
                 block++;
                 goto retry;
         }
  
-out:
         /* No valid superblock */
-       if (!*raw_super)
+       if (!*raw_super) {
+               kfree(super);
                 return err;
+       }
  
         return 0;
  }
  
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
+{
+       struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
+       struct buffer_head *bh;
+       int err;
+
+       bh = sb_getblk(sbi->sb, block);
+       if (!bh)
+               return -EIO;
+
+       lock_buffer(bh);
+       memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
+       set_buffer_uptodate(bh);
+       set_buffer_dirty(bh);
+       unlock_buffer(bh);
+
+       /* it's rare case, we can do fua all the time */
+       err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+       brelse(bh);
+
+       return err;
+}
+
  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
  {
-       struct buffer_head *sbh = sbi->raw_super_buf;
-       sector_t block = sbh->b_blocknr;
         int err;
  
         /* write back-up superblock first */
-       sbh->b_blocknr = block ? 0 : 1;
-       mark_buffer_dirty(sbh);
-       err = sync_dirty_buffer(sbh);
-
-       sbh->b_blocknr = block;
+       err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
  
         /* if we are in recovery path, skip writing valid superblock */
         if (recover || err)
-               goto out;
+               return err;
  
         /* write current valid superblock */
-       mark_buffer_dirty(sbh);
-       err = sync_dirty_buffer(sbh);
-out:
-       clear_buffer_write_io_error(sbh);
-       set_buffer_uptodate(sbh);
-       return err;
+       return __f2fs_commit_super(sbi, sbi->valid_super_block);
  }
  
  static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
  {
         struct f2fs_sb_info *sbi;
         struct f2fs_super_block *raw_super;
-       struct buffer_head *raw_super_buf;
         struct inode *root;
         long err;
         bool retry = true, need_fsck = false;
         char *options = NULL;
-       int recovery, i;
+       int recovery, i, valid_super_block;
  
  try_onemore:
         err = -EINVAL;
         raw_super = NULL;
-       raw_super_buf = NULL;
+       valid_super_block = -1;
         recovery = 0;
  
         /* allocate memory for f2fs-specific super block info */
@@ -1150,7 +1260,8 @@ try_onemore:
                 goto free_sbi;
         }
  
-       err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
+       err = read_raw_super_block(sb, &raw_super, &valid_super_block,
+                                                               &recovery);
         if (err)
                 goto free_sbi;
  
@@ -1167,7 +1278,9 @@ try_onemore:
         if (err)
                 goto free_options;
  
-       sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
+       sbi->max_file_blocks = max_file_blocks();
+       sb->s_maxbytes = sbi->max_file_blocks <<
+                               le32_to_cpu(raw_super->log_blocksize);
         sb->s_max_links = F2FS_LINK_MAX;
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
  
@@ -1183,7 +1296,7 @@ try_onemore:
         /* init f2fs-specific super block info */
         sbi->sb = sb;
         sbi->raw_super = raw_super;
-       sbi->raw_super_buf = raw_super_buf;
+       sbi->valid_super_block = valid_super_block;
         mutex_init(&sbi->gc_mutex);
         mutex_init(&sbi->writepages);
         mutex_init(&sbi->cp_mutex);
@@ -1236,8 +1349,10 @@ try_onemore:
                                 le64_to_cpu(sbi->ckpt->valid_block_count);
         sbi->last_valid_block_count = sbi->total_valid_block_count;
         sbi->alloc_valid_block_count = 0;
-       INIT_LIST_HEAD(&sbi->dir_inode_list);
-       spin_lock_init(&sbi->dir_inode_lock);
+       for (i = 0; i < NR_INODE_TYPE; i++) {
+               INIT_LIST_HEAD(&sbi->inode_list[i]);
+               spin_lock_init(&sbi->inode_lock[i]);
+       }
  
         init_extent_cache_info(sbi);
  
@@ -1355,12 +1470,14 @@ try_onemore:
                 f2fs_commit_super(sbi, true);
         }
  
-       sbi->cp_expires = round_jiffies_up(jiffies);
-
+       f2fs_update_time(sbi, CP_TIME);
+       f2fs_update_time(sbi, REQ_TIME);
         return 0;
  
  free_kobj:
         kobject_del(&sbi->s_kobj);
+       kobject_put(&sbi->s_kobj);
+       wait_for_completion(&sbi->s_kobj_unregister);
  free_proc:
         if (sbi->s_proc) {
                 remove_proc_entry("segment_info", sbi->s_proc);
@@ -1387,7 +1504,7 @@ free_meta_inode:
  free_options:
         kfree(options);
  free_sb_buf:
-       brelse(raw_super_buf);
+       kfree(raw_super);
  free_sbi:
         kfree(sbi);
  
@@ -1478,10 +1595,14 @@ static int __init init_f2fs_fs(void)
         err = register_filesystem(&f2fs_fs_type);
         if (err)
                 goto free_shrinker;
-       f2fs_create_root_stats();
+       err = f2fs_create_root_stats();
+       if (err)
+               goto free_filesystem;
         f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
         return 0;
  
+free_filesystem:
+       unregister_filesystem(&f2fs_fs_type);
  free_shrinker:
         unregister_shrinker(&f2fs_shrinker_info);
  free_crypto:
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c

index 036952a..10f1e78 100644 (file)
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -571,7 +571,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
         if (ipage)
                 return __f2fs_setxattr(inode, index, name, value,
                                                 size, ipage, flags);
-       f2fs_balance_fs(sbi);
+       f2fs_balance_fs(sbi, true);
  
         f2fs_lock_op(sbi);
         /* protect xattr_ver */
@@ -580,5 +580,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
         up_write(&F2FS_I(inode)->i_sem);
         f2fs_unlock_op(sbi);
  
+       f2fs_update_time(sbi, REQ_TIME);
         return err;
  }
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h

index 25c6324..e59c3be 100644 (file)
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -51,6 +51,7 @@
  #define MAX_ACTIVE_DATA_LOGS   8
  
  #define VERSION_LEN    256
+#define MAX_VOLUME_NAME                512
  
  /*
   * For superblock
@@ -84,7 +85,7 @@ struct f2fs_super_block {
         __le32 node_ino;                /* node inode number */
         __le32 meta_ino;                /* meta inode number */
         __u8 uuid[16];                  /* 128-bit uuid for volume */
-       __le16 volume_name[512];        /* volume name */
+       __le16 volume_name[MAX_VOLUME_NAME];    /* volume name */
         __le32 extension_count;         /* # of extensions below */
         __u8 extension_list[F2FS_MAX_EXTENSION][8];     /* extension array */
         __le32 cp_payload;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h

index 00b4a63..a1b4888 100644 (file)
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1265,6 +1265,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
                 __entry->node_cnt)
  );
  
+DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
+
+       TP_PROTO(struct super_block *sb, int type, int count),
+
+       TP_ARGS(sb, type, count),
+
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, type)
+               __field(int, count)
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->type   = type;
+               __entry->count  = count;
+       ),
+
+       TP_printk("dev = (%d,%d), %s, dirty count = %d",
+               show_dev(__entry),
+               show_file_type(__entry->type),
+               __entry->count)
+);
+
+DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter,
+
+       TP_PROTO(struct super_block *sb, int type, int count),
+
+       TP_ARGS(sb, type, count)
+);
+
+DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
+
+       TP_PROTO(struct super_block *sb, int type, int count),
+
+       TP_ARGS(sb, type, count)
+);
+
  #endif /* _TRACE_F2FS_H */
  
   /* This part must be outside protection */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 14 Jan 2016 05:01:44 +0000 (21:01 -0800)
Documentation/ABI/testing/sysfs-fs-f2fs		patch \| blob \| history
Documentation/filesystems/f2fs.txt		patch \| blob \| history
fs/f2fs/checkpoint.c		patch \| blob \| history
fs/f2fs/data.c		patch \| blob \| history
fs/f2fs/debug.c		patch \| blob \| history
fs/f2fs/dir.c		patch \| blob \| history
fs/f2fs/extent_cache.c		patch \| blob \| history
fs/f2fs/f2fs.h		patch \| blob \| history
fs/f2fs/file.c		patch \| blob \| history
fs/f2fs/gc.c		patch \| blob \| history
fs/f2fs/gc.h		patch \| blob \| history
fs/f2fs/inline.c		patch \| blob \| history
fs/f2fs/inode.c		patch \| blob \| history
fs/f2fs/namei.c		patch \| blob \| history
fs/f2fs/node.c		patch \| blob \| history
fs/f2fs/node.h		patch \| blob \| history
fs/f2fs/recovery.c		patch \| blob \| history
fs/f2fs/segment.c		patch \| blob \| history
fs/f2fs/shrinker.c		patch \| blob \| history
fs/f2fs/super.c		patch \| blob \| history
fs/f2fs/xattr.c		patch \| blob \| history
include/linux/f2fs_fs.h		patch \| blob \| history
include/trace/events/f2fs.h		patch \| blob \| history