Description:
Controls the checkpoint timing.
+What: /sys/fs/f2fs/<disk>/idle_interval
+Date: January 2016
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the idle timing.
+
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection
- will be truned off. If background_gc=sync, it will turn
+ will be turned off. If background_gc=sync, it will turn
on synchronous garbage collection running in background.
Default value for this option is on. So garbage
collection is on by default.
as many as extent which map between contiguous logical
address and physical address per inode, resulting in
increasing the cache hit ratio. Set by default.
-noextent_cache Diable an extent cache based on rb-tree explicitly, see
+noextent_cache Disable an extent cache based on rb-tree explicitly, see
the above extent_cache mount option.
noinline_data Disable the inline data feature, inline data feature is
enabled by default.
+data_flush Enable data flushing before checkpoint in order to
+ persist data of regular and symlink.
================================================================================
DEBUGFS ENTRIES
policy for garbage collection. Setting gc_idle = 0
(default) will disable this option. Setting
gc_idle = 1 will select the Cost Benefit approach
- & setting gc_idle = 2 will select the greedy aproach.
+ & setting gc_idle = 2 will select the greedy approach.
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
file. Each file is dump_ssa and dump_sit.
The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
-It shows on-disk inode information reconized by a given inode number, and is
+It shows on-disk inode information recognized by a given inode number, and is
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
spin_unlock(&im->ino_lock);
}
-void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, type);
}
-void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
return e ? true : false;
}
-void release_dirty_inode(struct f2fs_sb_info *sbi)
+void release_ino_entry(struct f2fs_sb_info *sbi)
{
struct ino_entry *e, *tmp;
int i;
return -EINVAL;
}
-static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
+static void __add_dirty_inode(struct inode *inode, enum inode_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
- if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
- return -EEXIST;
+ if (is_inode_flag_set(fi, flag))
+ return;
- set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
- F2FS_I(inode)->dirty_dir = new;
- list_add_tail(&new->list, &sbi->dir_inode_list);
- stat_inc_dirty_dir(sbi);
- return 0;
+ set_inode_flag(fi, flag);
+ list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
+ stat_inc_dirty_inode(sbi, type);
+}
+
+static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
+
+ if (get_dirty_pages(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), flag))
+ return;
+
+ list_del_init(&fi->dirty_list);
+ clear_inode_flag(fi, flag);
+ stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
void update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *new;
- int ret = 0;
+ enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
- if (!S_ISDIR(inode->i_mode)) {
- inode_inc_dirty_pages(inode);
- goto out;
- }
-
- new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- new->inode = inode;
- INIT_LIST_HEAD(&new->list);
-
- spin_lock(&sbi->dir_inode_lock);
- ret = __add_dirty_inode(inode, new);
+ spin_lock(&sbi->inode_lock[type]);
+ __add_dirty_inode(inode, type);
inode_inc_dirty_pages(inode);
- spin_unlock(&sbi->dir_inode_lock);
+ spin_unlock(&sbi->inode_lock[type]);
- if (ret)
- kmem_cache_free(inode_entry_slab, new);
-out:
SetPagePrivate(page);
f2fs_trace_pid(page);
}
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *new =
- f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- int ret = 0;
-
- new->inode = inode;
- INIT_LIST_HEAD(&new->list);
- spin_lock(&sbi->dir_inode_lock);
- ret = __add_dirty_inode(inode, new);
- spin_unlock(&sbi->dir_inode_lock);
-
- if (ret)
- kmem_cache_free(inode_entry_slab, new);
+ spin_lock(&sbi->inode_lock[DIR_INODE]);
+ __add_dirty_inode(inode, DIR_INODE);
+ spin_unlock(&sbi->inode_lock[DIR_INODE]);
}
-void remove_dirty_dir_inode(struct inode *inode)
+void remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *entry;
-
- if (!S_ISDIR(inode->i_mode))
- return;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
- spin_lock(&sbi->dir_inode_lock);
- if (get_dirty_pages(inode) ||
- !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
- spin_unlock(&sbi->dir_inode_lock);
+ if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
+ !S_ISLNK(inode->i_mode))
return;
- }
- entry = F2FS_I(inode)->dirty_dir;
- list_del(&entry->list);
- F2FS_I(inode)->dirty_dir = NULL;
- clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
- stat_dec_dirty_dir(sbi);
- spin_unlock(&sbi->dir_inode_lock);
- kmem_cache_free(inode_entry_slab, entry);
+ spin_lock(&sbi->inode_lock[type]);
+ __remove_dirty_inode(inode, type);
+ spin_unlock(&sbi->inode_lock[type]);
/* Only from the recovery routine */
- if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
- clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
+ if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
+ clear_inode_flag(fi, FI_DELAY_IPUT);
iput(inode);
}
}
-void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
- struct inode_entry *entry;
struct inode *inode;
+ struct f2fs_inode_info *fi;
+ bool is_dir = (type == DIR_INODE);
+
+ trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
- spin_lock(&sbi->dir_inode_lock);
+ spin_lock(&sbi->inode_lock[type]);
- head = &sbi->dir_inode_list;
+ head = &sbi->inode_list[type];
if (list_empty(head)) {
- spin_unlock(&sbi->dir_inode_lock);
- return;
+ spin_unlock(&sbi->inode_lock[type]);
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
+ return 0;
}
- entry = list_entry(head->next, struct inode_entry, list);
- inode = igrab(entry->inode);
- spin_unlock(&sbi->dir_inode_lock);
+ fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[type]);
if (inode) {
filemap_fdatawrite(inode->i_mapping);
iput(inode);
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
- sync_dirty_dir_inodes(sbi);
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
+ err = sync_dirty_inodes(sbi, DIR_INODE);
+ if (err)
goto out;
- }
goto retry_flush_dents;
}
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
- sync_node_pages(sbi, 0, &wbc);
- if (unlikely(f2fs_cp_error(sbi))) {
+ err = sync_node_pages(sbi, 0, &wbc);
+ if (err) {
f2fs_unlock_all(sbi);
- err = -EIO;
goto out;
}
goto retry_flush_nodes;
finish_wait(&sbi->cp_wait, &wait);
}
-static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
}
next_free_nid(sbi, &last_nid);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
- release_dirty_inode(sbi);
+ release_ino_entry(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
+
+ return 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
-void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
+ int err = 0;
mutex_lock(&sbi->cp_mutex);
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
goto out;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
goto out;
- if (f2fs_readonly(sbi->sb))
+ }
+ if (f2fs_readonly(sbi->sb)) {
+ err = -EROFS;
goto out;
+ }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
- if (block_operations(sbi))
+ err = block_operations(sbi);
+ if (err)
goto out;
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
- do_checkpoint(sbi, cpc);
+ err = do_checkpoint(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
"checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
- sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
+ f2fs_update_time(sbi, CP_TIME);
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
mutex_unlock(&sbi->cp_mutex);
- trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
+ return err;
}
void init_ino_entry_info(struct f2fs_sb_info *sbi)
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
- set_page_dirty(node_page);
+ if (set_page_dirty(node_page))
+ dn->node_changed = true;
}
int reserve_new_block(struct dnode_of_data *dn)
struct page *page;
struct dnode_of_data dn;
int err;
-repeat:
+
page = f2fs_grab_cache_page(mapping, index, true);
if (!page) {
/*
} else {
f2fs_put_page(page, 1);
- page = get_read_data_page(inode, index, READ_SYNC, true);
+ /* if ipage exists, blkaddr should be NEW_ADDR */
+ f2fs_bug_on(F2FS_I_SB(inode), ipage);
+ page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
- goto repeat;
-
- /* wait for read completion */
- lock_page(page);
+ return page;
}
got_it:
if (new_i_size && i_size_read(inode) <
if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
i_size_write(dn->inode,
((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
-
- /* direct IO doesn't use extent cache to maximize the performance */
- f2fs_drop_largest_extent(dn->inode, fofs);
-
return 0;
}
-static void __allocate_data_blocks(struct inode *inode, loff_t offset,
+static int __allocate_data_blocks(struct inode *inode, loff_t offset,
size_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
u64 len = F2FS_BYTES_TO_BLK(count);
bool allocated;
u64 end_offset;
+ int err = 0;
while (len) {
- f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&dn, start, ALLOC_NODE))
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ if (err)
goto out;
allocated = false;
while (dn.ofs_in_node < end_offset && len) {
block_t blkaddr;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
goto sync_out;
+ }
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
- if (__allocate_data_block(&dn))
+ err = __allocate_data_block(&dn);
+ if (err)
goto sync_out;
allocated = true;
}
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
+
+ f2fs_balance_fs(sbi, dn.node_changed);
}
- return;
+ return err;
sync_out:
if (allocated)
f2fs_put_dnode(&dn);
out:
f2fs_unlock_op(sbi);
- return;
+ f2fs_balance_fs(sbi, dn.node_changed);
+ return err;
}
/*
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag)
{
unsigned int maxblocks = map->m_len;
int err = 0, ofs = 1;
struct extent_info ei;
bool allocated = false;
+ block_t blkaddr;
map->m_len = 0;
map->m_flags = 0;
}
if (create)
- f2fs_lock_op(F2FS_I_SB(inode));
+ f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
pgofs++;
get_next:
+ if (map->m_len >= maxblocks)
+ goto sync_out;
+
if (dn.ofs_in_node >= end_offset) {
if (allocated)
sync_inode_page(&dn);
allocated = false;
f2fs_put_dnode(&dn);
+ if (create) {
+ f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+ f2fs_lock_op(sbi);
+ }
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
- if (maxblocks > map->m_len) {
- block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
- if (create) {
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
- goto sync_out;
- }
- err = __allocate_data_block(&dn);
- if (err)
- goto sync_out;
- allocated = true;
- map->m_flags |= F2FS_MAP_NEW;
- blkaddr = dn.data_blkaddr;
- } else {
- /*
- * we only merge preallocated unwritten blocks
- * for fiemap.
- */
- if (flag != F2FS_GET_BLOCK_FIEMAP ||
- blkaddr != NEW_ADDR)
- goto sync_out;
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+ if (create) {
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto sync_out;
}
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto sync_out;
+ allocated = true;
+ map->m_flags |= F2FS_MAP_NEW;
+ blkaddr = dn.data_blkaddr;
+ } else {
+ /*
+ * we only merge preallocated unwritten blocks
+ * for fiemap.
+ */
+ if (flag != F2FS_GET_BLOCK_FIEMAP ||
+ blkaddr != NEW_ADDR)
+ goto sync_out;
}
+ }
- /* Give more consecutive addresses for the readahead */
- if ((map->m_pblk != NEW_ADDR &&
- blkaddr == (map->m_pblk + ofs)) ||
- (map->m_pblk == NEW_ADDR &&
- blkaddr == NEW_ADDR)) {
- ofs++;
- dn.ofs_in_node++;
- pgofs++;
- map->m_len++;
- goto get_next;
- }
+ /* Give more consecutive addresses for the readahead */
+ if ((map->m_pblk != NEW_ADDR &&
+ blkaddr == (map->m_pblk + ofs)) ||
+ (map->m_pblk == NEW_ADDR &&
+ blkaddr == NEW_ADDR)) {
+ ofs++;
+ dn.ofs_in_node++;
+ pgofs++;
+ map->m_len++;
+ goto get_next;
}
+
sync_out:
if (allocated)
sync_inode_page(&dn);
put_out:
f2fs_put_dnode(&dn);
unlock_out:
- if (create)
- f2fs_unlock_op(F2FS_I_SB(inode));
+ if (create) {
+ f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+ }
out:
trace_f2fs_map_blocks(inode, map, err);
return err;
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
+ /* Block number less than F2FS MAX BLOCKS */
+ if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
+ return -EFBIG;
+
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP);
}
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
- loff_t isize = i_size_read(inode);
+ loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
- bool past_eof = false, whole_file = false;
int ret = 0;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
mutex_lock(&inode->i_mutex);
- if (len >= isize) {
- whole_file = true;
- len = isize;
- }
+ isize = i_size_read(inode);
+ if (start >= isize)
+ goto out;
+
+ if (start + len > isize)
+ len = isize - start;
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
start_blk = logical_to_blk(inode, start);
last_blk = logical_to_blk(inode, start + len - 1);
+
next:
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
-
- if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
- past_eof = 1;
-
- if (past_eof && size) {
- flags |= FIEMAP_EXTENT_LAST;
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- } else if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- size = 0;
- }
+ /* Go through holes util pass the EOF */
+ if (blk_to_logical(inode, start_blk++) < isize)
+ goto prep_next;
+ /* Found a hole beyond isize means no more extents.
+ * Note that the premise is that filesystems don't
+ * punch holes beyond isize and keep size unchanged.
+ */
+ flags |= FIEMAP_EXTENT_LAST;
+ }
- /* if we have holes up to/past EOF then we're done */
- if (start_blk > last_blk || past_eof || ret)
- goto out;
- } else {
- if (start_blk > last_blk && !whole_file) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- goto out;
- }
+ if (size) {
+ if (f2fs_encrypted_inode(inode))
+ flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
- /*
- * if size != 0 then we know we already have an extent
- * to add, so add it.
- */
- if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- if (ret)
- goto out;
- }
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ }
- logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, map_bh.b_blocknr);
- size = map_bh.b_size;
- flags = 0;
- if (buffer_unwritten(&map_bh))
- flags = FIEMAP_EXTENT_UNWRITTEN;
+ if (start_blk > last_blk || ret)
+ goto out;
- start_blk += logical_to_blk(inode, size);
+ logical = blk_to_logical(inode, start_blk);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
+ flags = 0;
+ if (buffer_unwritten(&map_bh))
+ flags = FIEMAP_EXTENT_UNWRITTEN;
- /*
- * If we are past the EOF, then we need to make sure as
- * soon as we find a hole that the last extent we found
- * is marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof && logical + size >= isize)
- past_eof = true;
- }
+ start_blk += logical_to_blk(inode, size);
+
+prep_next:
cond_resched();
if (fatal_signal_pending(current))
ret = -EINTR;
*/
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
+ !IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
rewrite_data_page(fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
if (err)
ClearPageUptodate(page);
unlock_page(page);
- if (need_balance_fs)
- f2fs_balance_fs(sbi);
- if (wbc->for_reclaim)
+ f2fs_balance_fs(sbi, need_balance_fs);
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ remove_dirty_inode(inode);
+ }
return 0;
redirty_out:
available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
+ /* skip writing during file defragment */
+ if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
+ goto skip_write;
+
/* during POR, we don't need to trigger writepage at all. */
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
if (locked)
mutex_unlock(&sbi->writepages);
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
+ loff_t i_size = i_size_read(inode);
- if (to > inode->i_size) {
- truncate_pagecache(inode, inode->i_size);
- truncate_blocks(inode, inode->i_size, true);
+ if (to > i_size) {
+ truncate_pagecache(inode, i_size);
+ truncate_blocks(inode, i_size, true);
}
}
+static int prepare_write_begin(struct f2fs_sb_info *sbi,
+ struct page *page, loff_t pos, unsigned len,
+ block_t *blk_addr, bool *node_changed)
+{
+ struct inode *inode = page->mapping->host;
+ pgoff_t index = page->index;
+ struct dnode_of_data dn;
+ struct page *ipage;
+ bool locked = false;
+ struct extent_info ei;
+ int err = 0;
+
+ if (f2fs_has_inline_data(inode) ||
+ (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+ f2fs_lock_op(sbi);
+ locked = true;
+ }
+restart:
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto unlock_out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode)) {
+ if (pos + len <= MAX_INLINE_DATA) {
+ read_inline_data(page, ipage);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ sync_inode_page(&dn);
+ } else {
+ err = f2fs_convert_inline_page(&dn, page);
+ if (err)
+ goto out;
+ if (dn.data_blkaddr == NULL_ADDR)
+ err = f2fs_get_block(&dn, index);
+ }
+ } else if (locked) {
+ err = f2fs_get_block(&dn, index);
+ } else {
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ } else {
+ bool restart = false;
+
+ /* hole case */
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err || (!err && dn.data_blkaddr == NULL_ADDR))
+ restart = true;
+ if (restart) {
+ f2fs_put_dnode(&dn);
+ f2fs_lock_op(sbi);
+ locked = true;
+ goto restart;
+ }
+ }
+ }
+
+ /* convert_inline_page can make node_changed */
+ *blk_addr = dn.data_blkaddr;
+ *node_changed = dn.node_changed;
+out:
+ f2fs_put_dnode(&dn);
+unlock_out:
+ if (locked)
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page = NULL;
- struct page *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
- struct dnode_of_data dn;
+ bool need_balance = false;
+ block_t blkaddr = NULL_ADDR;
int err = 0;
trace_f2fs_write_begin(inode, pos, len, flags);
- f2fs_balance_fs(sbi);
-
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
*pagep = page;
- f2fs_lock_op(sbi);
-
- /* check inline_data */
- ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto unlock_fail;
- }
-
- set_new_dnode(&dn, inode, ipage, ipage, 0);
+ err = prepare_write_begin(sbi, page, pos, len,
+ &blkaddr, &need_balance);
+ if (err)
+ goto fail;
- if (f2fs_has_inline_data(inode)) {
- if (pos + len <= MAX_INLINE_DATA) {
- read_inline_data(page, ipage);
- set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
- sync_inode_page(&dn);
- goto put_next;
+ if (need_balance && has_not_enough_free_secs(sbi, 0)) {
+ unlock_page(page);
+ f2fs_balance_fs(sbi, true);
+ lock_page(page);
+ if (page->mapping != mapping) {
+ /* The page got truncated from under us */
+ f2fs_put_page(page, 1);
+ goto repeat;
}
- err = f2fs_convert_inline_page(&dn, page);
- if (err)
- goto put_fail;
}
- err = f2fs_get_block(&dn, index);
- if (err)
- goto put_fail;
-put_next:
- f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
-
f2fs_wait_on_page_writeback(page, DATA);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+ f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
if (len == PAGE_CACHE_SIZE)
goto out_update;
goto out_update;
}
- if (dn.data_blkaddr == NEW_ADDR) {
+ if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
- .blk_addr = dn.data_blkaddr,
+ .blk_addr = blkaddr,
.page = page,
.encrypted_page = NULL,
};
clear_cold_data(page);
return 0;
-put_fail:
- f2fs_put_dnode(&dn);
-unlock_fail:
- f2fs_unlock_op(sbi);
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
}
f2fs_put_page(page, 1);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
int err;
/* we don't need to use inline_data strictly */
- if (f2fs_has_inline_data(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == WRITE) {
- __allocate_data_blocks(inode, offset, count);
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
- err = -EIO;
+ err = __allocate_data_blocks(inode, offset, count);
+ if (err)
goto out;
- }
}
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
- si->ext_tree = sbi->total_ext_tree;
+ si->ext_tree = atomic_read(&sbi->total_ext_tree);
+ si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
- si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+ si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
+ si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
bimodal = 0;
total_vblocks = 0;
- blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
+ blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
- si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
- si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+ si->cache_mem += atomic_read(&sbi->total_ext_tree) *
+ sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node);
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
- seq_printf(s, "CP calls: %d\n", si->cp_count);
+ seq_printf(s, "CP calls: %d (BG: %d)\n",
+ si->cp_count, si->bg_cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
- seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
- si->ext_tree, si->ext_node);
+ seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
+ si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
+ seq_printf(s, " - datas: %4d in files:%4d\n",
+ si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
kfree(si);
}
-void __init f2fs_create_root_stats(void)
+int __init f2fs_create_root_stats(void)
{
struct dentry *file;
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
- return;
+ return -ENOMEM;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
if (!file) {
debugfs_remove(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
+ return -ENOMEM;
}
+
+ return 0;
}
void f2fs_destroy_root_stats(void)
namehash = f2fs_dentry_hash(&name);
- f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
-
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
goto out;
max_depth = F2FS_I(dir)->i_current_depth;
+ if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
+ f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
+ "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
+ max_depth = MAX_DIR_HASH_DEPTH;
+ F2FS_I(dir)->i_current_depth = max_depth;
+ mark_inode_dirty(dir);
+ }
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, &fname, res_page);
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0, false);
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
}
f2fs_put_page(dentry_page, 1);
out:
f2fs_fname_free_filename(&fname);
+ f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
fail:
up_write(&F2FS_I(inode)->i_sem);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
+ f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
+
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
- if (IS_ERR(dentry_page))
- continue;
+ if (IS_ERR(dentry_page)) {
+ err = PTR_ERR(dentry_page);
+ if (err == -ENOENT)
+ continue;
+ else
+ goto out;
+ }
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
- if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
- goto stop;
+ if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ break;
+ }
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
- dentry_page = NULL;
- }
-stop:
- if (dentry_page && !IS_ERR(dentry_page)) {
- kunmap(dentry_page);
- f2fs_put_page(dentry_page, 1);
}
out:
f2fs_fname_crypto_free_buffer(&fstr);
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
- et->count++;
+ atomic_inc(&et->node_cnt);
atomic_inc(&sbi->total_ext_node);
return en;
}
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
- et->count--;
+ atomic_dec(&et->node_cnt);
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
- atomic_set(&et->refcount, 0);
- et->count = 0;
- sbi->total_ext_tree++;
+ INIT_LIST_HEAD(&et->list);
+ atomic_set(&et->node_cnt, 0);
+ atomic_inc(&sbi->total_ext_tree);
+ } else {
+ atomic_dec(&sbi->total_zombie_tree);
+ list_del_init(&et->list);
}
- atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
{
struct rb_node *node, *next;
struct extent_node *en;
- unsigned int count = et->count;
+ unsigned int count = atomic_read(&et->node_cnt);
node = rb_first(&et->root);
while (node) {
node = next;
}
- return count - et->count;
+ return count - atomic_read(&et->node_cnt);
}
static void __drop_largest_extent(struct inode *inode,
largest->len = 0;
}
-void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
-{
- if (!f2fs_may_extent_tree(inode))
- return;
-
- __drop_largest_extent(inode, fofs, 1);
-}
-
-void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+/* return true, if inode page is changed */
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
- if (!f2fs_may_extent_tree(inode))
- return;
+ if (!f2fs_may_extent_tree(inode)) {
+ /* drop largest extent */
+ if (i_ext && i_ext->len) {
+ i_ext->len = 0;
+ return true;
+ }
+ return false;
+ }
et = __grab_extent_tree(inode);
- if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
- return;
+ if (!i_ext || !i_ext->len)
+ return false;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
- if (et->count)
+ if (atomic_read(&et->node_cnt))
goto out;
en = __init_extent_tree(sbi, et, &ei);
}
out:
write_unlock(&et->lock);
+ return false;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+ struct extent_tree *et, *next;
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
- struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
+ bool do_free = false;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
+ if (!atomic_read(&sbi->total_zombie_tree))
+ goto free_node;
+
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
- while ((found = radix_tree_gang_lookup(root,
- (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
- unsigned i;
-
- ino = treevec[found - 1]->ino + 1;
- for (i = 0; i < found; i++) {
- struct extent_tree *et = treevec[i];
-
- if (!atomic_read(&et->refcount)) {
- write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, true);
- write_unlock(&et->lock);
+ list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+ if (atomic_read(&et->node_cnt)) {
+ write_lock(&et->lock);
+ node_cnt += __free_extent_tree(sbi, et, true);
+ write_unlock(&et->lock);
+ }
- radix_tree_delete(root, et->ino);
- kmem_cache_free(extent_tree_slab, et);
- sbi->total_ext_tree--;
- tree_cnt++;
+ list_del_init(&et->list);
+ radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ kmem_cache_free(extent_tree_slab, et);
+ atomic_dec(&sbi->total_ext_tree);
+ atomic_dec(&sbi->total_zombie_tree);
+ tree_cnt++;
- if (node_cnt + tree_cnt >= nr_shrink)
- goto unlock_out;
- }
- }
+ if (node_cnt + tree_cnt >= nr_shrink)
+ goto unlock_out;
}
up_write(&sbi->extent_tree_lock);
+free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
if (!remained--)
break;
list_del_init(&en->list);
+ do_free = true;
}
spin_unlock(&sbi->extent_lock);
+ if (do_free == false)
+ goto unlock_out;
+
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
- while ((found = radix_tree_gang_lookup(root,
+ while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
- write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, false);
- write_unlock(&et->lock);
+ if (!atomic_read(&et->node_cnt))
+ continue;
+
+ if (write_trylock(&et->lock)) {
+ node_cnt += __free_extent_tree(sbi, et, false);
+ write_unlock(&et->lock);
+ }
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
- if (!et)
+ if (!et || !atomic_read(&et->node_cnt))
return 0;
write_lock(&et->lock);
if (!et)
return;
- if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
- atomic_dec(&et->refcount);
+ if (inode->i_nlink && !is_bad_inode(inode) &&
+ atomic_read(&et->node_cnt)) {
+ down_write(&sbi->extent_tree_lock);
+ list_add_tail(&et->list, &sbi->zombie_list);
+ atomic_inc(&sbi->total_zombie_tree);
+ up_write(&sbi->extent_tree_lock);
return;
}
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
- atomic_dec(&et->refcount);
- f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+ f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
- sbi->total_ext_tree--;
+ atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
- sbi->total_ext_tree = 0;
+ atomic_set(&sbi->total_ext_tree, 0);
+ INIT_LIST_HEAD(&sbi->zombie_list);
+ atomic_set(&sbi->total_zombie_tree, 0);
atomic_set(&sbi->total_ext_node, 0);
}
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
+#include <linux/blkdev.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
+#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define DEF_CP_INTERVAL 60 /* 60 secs */
+#define DEF_IDLE_INTERVAL 120 /* 2 mins */
struct cp_control {
int reason;
nid_t ino; /* inode number */
};
-/*
- * for the list of directory inodes or gc inodes.
- * NOTE: there are two slab users for this structure, if we add/modify/delete
- * fields in structure for one of slab users, it may affect fields or size of
- * other one, in this condition, it's better to split both of slab and related
- * data structure.
- */
+/* for the list of inodes to be GCed */
struct inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
+#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy)
/*
* ioctl commands in 32 bit emulation
*/
-#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
#endif
+struct f2fs_defragment {
+ u64 start;
+ u64 len;
+};
+
/*
* For INODE and NODE manager
*/
struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
+ struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
- atomic_t refcount; /* reference count of rb-tree */
- unsigned int count; /* # of extent node in rb-tree*/
+ atomic_t node_cnt; /* # of extent node in rb-tree*/
};
/*
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
- struct inode_entry *dirty_dir; /* the pointer of dirty dir */
+ struct list_head dirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
nid_t nid; /* node id of the direct node block */
unsigned int ofs_in_node; /* data offset in the node page */
bool inode_page_locked; /* inode page is locked or not */
+ bool node_changed; /* is node block changed */
block_t data_blkaddr; /* block address of the node block */
};
enum count_type {
F2FS_WRITEBACK,
F2FS_DIRTY_DENTS,
+ F2FS_DIRTY_DATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
+enum inode_type {
+ DIR_INODE, /* for dirty dir inode */
+ FILE_INODE, /* for dirty regular/symlink inode */
+ NR_INODE_TYPE,
+};
+
/* for inner inode cache management */
struct inode_management {
struct radix_tree_root ino_root; /* ino entry array */
SBI_POR_DOING, /* recovery is doing or not */
};
+enum {
+ CP_TIME,
+ REQ_TIME,
+ MAX_TIME,
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
- struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
+ int valid_super_block; /* valid super block no */
int s_flag; /* flags for sbi */
/* for node-related operations */
struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
- long cp_expires, cp_interval; /* next expected periodic cp */
+ unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
+ long interval_time[MAX_TIME]; /* to store thresholds */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
- /* for directory inode management */
- struct list_head dir_inode_list; /* dir inode list */
- spinlock_t dir_inode_lock; /* for dir inode list lock */
+ /* for inode management */
+ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
+ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
struct list_head extent_list; /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
- int total_ext_tree; /* extent tree count */
+ atomic_t total_ext_tree; /* extent tree count */
+ struct list_head zombie_list; /* extent zombie tree list */
+ atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node; /* extent info count */
/* basic filesystem units */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
+ loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
- unsigned int n_dirty_dirs; /* # of dir inodes */
+ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
unsigned int shrinker_run_no;
};
+static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
+{
+ sbi->last_time[type] = jiffies;
+}
+
+static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
+{
+ struct timespec ts = {sbi->interval_time[type], 0};
+ unsigned long interval = timespec_to_jiffies(&ts);
+
+ return time_after(jiffies, sbi->last_time[type] + interval);
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct request_list *rl = &q->root_rl;
+
+ if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
+ return 0;
+
+ return f2fs_time_over(sbi, REQ_TIME);
+}
+
/*
* Inline functions
*/
static inline void inode_inc_dirty_pages(struct inode *inode)
{
atomic_inc(&F2FS_I(inode)->dirty_pages);
- if (S_ISDIR(inode->i_mode))
- inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+ inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
return;
atomic_dec(&F2FS_I(inode)->dirty_pages);
-
- if (S_ISDIR(inode->i_mode))
- dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+ dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
- unsigned int pages_per_sec = sbi->segs_per_sec *
- (1 << sbi->log_blocks_per_seg);
+ unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
return ((get_pages(sbi, block_type) + pages_per_sec - 1)
>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
}
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
FI_INLINE_DOTS, /* indicate inline dot dentries */
+ FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
-void update_inode(struct inode *, struct page *);
-void update_inode_page(struct inode *);
+int update_inode(struct inode *, struct page *);
+int update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
void handle_failed_inode(struct inode *);
*/
void register_inmem_page(struct inode *, struct page *);
int commit_inmem_pages(struct inode *, bool);
-void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void release_dirty_inode(struct f2fs_sb_info *);
+void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void release_ino_entry(struct f2fs_sb_info *);
bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
-void remove_dirty_dir_inode(struct inode *);
-void sync_dirty_dir_inodes(struct f2fs_sb_info *);
-void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
+void remove_dirty_inode(struct inode *);
+int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
+int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *);
+int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
int main_area_segs, main_area_sections, main_area_zones;
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
- int ext_tree, ext_node;
- int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
+ int ext_tree, zombie_tree, ext_node;
+ int ndirty_node, ndirty_meta;
+ int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, inmem_pages, wb_pages;
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
- int prefree_count, call_count, cp_count;
+ int prefree_count, call_count, cp_count, bg_cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
}
#define stat_inc_cp_count(si) ((si)->cp_count++)
+#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
-#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
-#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
+#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
+#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
int f2fs_build_stats(struct f2fs_sb_info *);
void f2fs_destroy_stats(struct f2fs_sb_info *);
-void __init f2fs_create_root_stats(void);
+int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si)
+#define stat_inc_bg_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
-#define stat_inc_dirty_dir(sbi)
-#define stat_dec_dirty_dir(sbi)
+#define stat_inc_dirty_inode(sbi, type)
+#define stat_dec_dirty_inode(sbi, type)
#define stat_inc_total_hit(sb)
#define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline void __init f2fs_create_root_stats(void) { }
+static inline int __init f2fs_create_root_stats(void) { return 0; }
static inline void f2fs_destroy_root_stats(void) { }
#endif
* extent_cache.c
*/
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-void f2fs_drop_largest_extent(struct inode *, pgoff_t);
-void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
+bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
unsigned int f2fs_destroy_extent_node(struct inode *);
void f2fs_destroy_extent_tree(struct inode *);
bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
struct dnode_of_data dn;
int err;
- f2fs_balance_fs(sbi);
-
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+
file_update_time(vma->vm_file);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
+ f2fs_update_time(sbi, REQ_TIME);
return block_page_mkwrite_return(err);
}
trace_f2fs_sync_file_enter(inode);
/* if fdatasync is triggered, let's do in-place-update */
- if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(fi, FI_NEED_IPU);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
clear_inode_flag(fi, FI_NEED_IPU);
goto out;
}
go_write:
- /* guarantee free sections for fsync */
- f2fs_balance_fs(sbi);
-
/*
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
sync_node_pages(sbi, ino, &wbc);
/* if cp_error was enabled, we should avoid infinite loop */
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ ret = -EIO;
goto out;
+ }
if (need_inode_block_update(sbi, ino)) {
mark_inode_dirty_sync(inode);
goto out;
/* once recovery info is written, don't need to tack this */
- remove_dirty_inode(sbi, ino, APPEND_INO);
+ remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
- remove_dirty_inode(sbi, ino, UPDATE_INO);
+ remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
+ f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
f2fs_trace_ios(NULL, 1);
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ int err;
if (f2fs_encrypted_inode(inode)) {
- int err = f2fs_get_encryption_info(inode);
+ err = f2fs_get_encryption_info(inode);
if (err)
return 0;
}
/* we don't need to use inline_data strictly */
- if (f2fs_has_inline_data(inode)) {
- int err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
F2FS_I(dn->inode)) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
- set_page_dirty(dn->node_page);
sync_inode_page(dn);
}
dn->ofs_in_node = ofs;
+ f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
dn->ofs_in_node, nr_free);
return nr_free;
trace_f2fs_truncate(inode);
/* we should check inline_data size */
- if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
+ if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
err = f2fs_truncate(inode, true);
if (err)
return err;
- f2fs_balance_fs(F2FS_I_SB(inode));
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
truncate_setsize(inode, attr->ia_size);
+
+ /* should convert inline inode here */
+ if (!f2fs_may_inline_data(inode)) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
}
if (!len)
return 0;
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
page = get_new_data_page(inode, NULL, index, false);
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
- int ret = 0;
+ int ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
int ret = 0;
for (; end < nrpages; start++, end++) {
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
ret = __exchange_data_block(inode, end, start, true);
f2fs_unlock_op(sbi);
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
pg_start = offset >> PAGE_CACHE_SHIFT;
pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
if (ret)
return ret;
- f2fs_balance_fs(sbi);
-
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
- f2fs_balance_fs(sbi);
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ f2fs_balance_fs(sbi, true);
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
loff_t off_start, off_end;
int ret = 0;
- f2fs_balance_fs(sbi);
-
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
if (!ret) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
out:
if (!inode_owner_or_capable(inode))
return -EACCES;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
if (f2fs_is_atomic_file(inode))
return 0;
return ret;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
return 0;
}
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode, false);
- if (ret)
+ if (ret) {
+ set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
goto err_out;
+ }
}
ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
return ret;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
if (ret)
return ret;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
- clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- commit_inmem_pages(inode, true);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ commit_inmem_pages(inode, true);
+ }
+ if (f2fs_is_volatile_file(inode)) {
+ clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+ }
mnt_drop_write_file(filp);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return ret;
}
default:
return -EINVAL;
}
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
}
if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
sizeof(policy)))
return -EFAULT;
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return f2fs_process_policy(&policy, inode);
#else
return -EOPNOTSUPP;
generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
err = f2fs_commit_super(sbi, false);
-
- mnt_drop_write_file(filp);
if (err) {
/* undo new data */
memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
+ mnt_drop_write_file(filp);
return err;
}
+ mnt_drop_write_file(filp);
got_it:
if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
16))
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct cp_control cpc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (f2fs_readonly(sbi->sb))
return -EROFS;
- cpc.reason = __get_cp_reason(sbi);
+ return f2fs_sync_fs(sbi->sb, 1);
+}
- mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
+static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
+ struct file *filp,
+ struct f2fs_defragment *range)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_map_blocks map;
+ struct extent_info ei;
+ pgoff_t pg_start, pg_end;
+ unsigned int blk_per_seg = sbi->blocks_per_seg;
+ unsigned int total = 0, sec_num;
+ unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
+ block_t blk_end = 0;
+ bool fragmented = false;
+ int err;
- return 0;
+ /* if in-place-update policy is enabled, don't waste time here */
+ if (need_inplace_update(inode))
+ return -EINVAL;
+
+ pg_start = range->start >> PAGE_CACHE_SHIFT;
+ pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+
+ f2fs_balance_fs(sbi, true);
+
+ mutex_lock(&inode->i_mutex);
+
+ /* writeback all dirty pages in the range */
+ err = filemap_write_and_wait_range(inode->i_mapping, range->start,
+ range->start + range->len - 1);
+ if (err)
+ goto out;
+
+ /*
+ * lookup mapping info in extent cache, skip defragmenting if physical
+ * block addresses are continuous.
+ */
+ if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
+ if (ei.fofs + ei.len >= pg_end)
+ goto out;
+ }
+
+ map.m_lblk = pg_start;
+
+ /*
+ * lookup mapping info in dnode page cache, skip defragmenting if all
+ * physical block addresses are continuous even if there are hole(s)
+ * in logical blocks.
+ */
+ while (map.m_lblk < pg_end) {
+ map.m_len = pg_end - map.m_lblk;
+ err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+ if (err)
+ goto out;
+
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ map.m_lblk++;
+ continue;
+ }
+
+ if (blk_end && blk_end != map.m_pblk) {
+ fragmented = true;
+ break;
+ }
+ blk_end = map.m_pblk + map.m_len;
+
+ map.m_lblk += map.m_len;
+ }
+
+ if (!fragmented)
+ goto out;
+
+ map.m_lblk = pg_start;
+ map.m_len = pg_end - pg_start;
+
+ sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
+
+ /*
+ * make sure there are enough free section for LFS allocation, this can
+ * avoid defragment running in SSR mode when free section are allocated
+ * intensively
+ */
+ if (has_not_enough_free_secs(sbi, sec_num)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ while (map.m_lblk < pg_end) {
+ pgoff_t idx;
+ int cnt = 0;
+
+do_map:
+ map.m_len = pg_end - map.m_lblk;
+ err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+ if (err)
+ goto clear_out;
+
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ map.m_lblk++;
+ continue;
+ }
+
+ set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+ idx = map.m_lblk;
+ while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
+ struct page *page;
+
+ page = get_lock_data_page(inode, idx, true);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto clear_out;
+ }
+
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+
+ idx++;
+ cnt++;
+ total++;
+ }
+
+ map.m_lblk = idx;
+
+ if (idx < pg_end && cnt < blk_per_seg)
+ goto do_map;
+
+ clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+ err = filemap_fdatawrite(inode->i_mapping);
+ if (err)
+ goto out;
+ }
+clear_out:
+ clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+out:
+ mutex_unlock(&inode->i_mutex);
+ if (!err)
+ range->len = (u64)total << PAGE_CACHE_SHIFT;
+ return err;
+}
+
+static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_defragment range;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ if (f2fs_readonly(sbi->sb)) {
+ err = -EROFS;
+ goto out;
+ }
+
+ if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
+ sizeof(range))) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* verify alignment of offset & size */
+ if (range.start & (F2FS_BLKSIZE - 1) ||
+ range.len & (F2FS_BLKSIZE - 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = f2fs_defragment_range(sbi, filp, &range);
+ f2fs_update_time(sbi, REQ_TIME);
+ if (err < 0)
+ goto out;
+
+ if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
+ sizeof(range)))
+ err = -EFAULT;
+out:
+ mnt_drop_write_file(filp);
+ return err;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_write_checkpoint(filp, arg);
+ case F2FS_IOC_DEFRAGMENT:
+ return f2fs_ioc_defragment(filp, arg);
default:
return -ENOTTY;
}
case F2FS_IOC32_SETFLAGS:
cmd = F2FS_IOC_SETFLAGS;
break;
+ case F2FS_IOC32_GETVERSION:
+ cmd = F2FS_IOC_GETVERSION;
+ break;
+ case F2FS_IOC_START_ATOMIC_WRITE:
+ case F2FS_IOC_COMMIT_ATOMIC_WRITE:
+ case F2FS_IOC_START_VOLATILE_WRITE:
+ case F2FS_IOC_RELEASE_VOLATILE_WRITE:
+ case F2FS_IOC_ABORT_VOLATILE_WRITE:
+ case F2FS_IOC_SHUTDOWN:
+ case F2FS_IOC_SET_ENCRYPTION_POLICY:
+ case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+ case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ case F2FS_IOC_GARBAGE_COLLECT:
+ case F2FS_IOC_WRITE_CHECKPOINT:
+ case F2FS_IOC_DEFRAGMENT:
+ break;
default:
return -ENOIOCTLCMD;
}
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
-#include <linux/blkdev.h>
#include "f2fs.h"
#include "node.h"
{
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
- return 1 << sbi->log_blocks_per_seg;
+ return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
- return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
+ return sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ ret = -EIO;
goto stop;
+ }
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC;
return true;
return false;
}
-
-static inline int is_idle(struct f2fs_sb_info *sbi)
-{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- struct request_list *rl = &q->root_rl;
- return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
-}
bool f2fs_may_inline_data(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
- return false;
-
if (f2fs_is_atomic_file(inode))
return false;
struct page *ipage, *page;
int err = 0;
+ if (!f2fs_has_inline_data(inode))
+ return 0;
+
page = grab_cache_page(inode->i_mapping, 0);
if (!page)
return -ENOMEM;
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
+
+ f2fs_balance_fs(sbi, dn.node_changed);
+
return err;
}
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
- f2fs_init_extent_tree(inode, &ri->i_ext);
+ if (f2fs_init_extent_tree(inode, &ri->i_ext))
+ set_page_dirty(node_page);
get_inline_info(fi, ri);
return ERR_PTR(ret);
}
-void update_inode(struct inode *inode, struct page *node_page)
+int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
- set_page_dirty(node_page);
-
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+
+ return set_page_dirty(node_page);
}
-void update_inode_page(struct inode *inode)
+int update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
+ int ret = 0;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi);
}
- return;
+ return 0;
}
- update_inode(inode, node_page);
+ ret = update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
+ return ret;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- update_inode_page(inode);
-
- f2fs_balance_fs(sbi);
+ if (update_inode_page(inode))
+ f2fs_balance_fs(sbi, true);
return 0;
}
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(fi, FI_APPEND_WRITE))
- add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
- add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+ add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_may_inline_data(inode))
+ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
nid_t ino = 0;
int err;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
!f2fs_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
struct page *page;
int err = 0;
+ if (f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "skip recovering inline_dots inode (ino:%lu, pino:%u) "
+ "in readonly mountpoint", dir->i_ino, pino);
+ return 0;
+ }
+
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
de = f2fs_find_entry(dir, &dot, &page);
int err = -ENOENT;
trace_f2fs_unlink_enter(dir, dentry);
- f2fs_balance_fs(sbi);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto fail;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err) {
if (len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
struct inode *inode;
int err;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+ f2fs_balance_fs(sbi, true);
+
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
struct inode *inode;
int err = 0;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
struct inode *inode;
int err;
- if (!whiteout)
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
goto out;
}
- f2fs_balance_fs(sbi);
-
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
if (!new_entry)
goto out_whiteout;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(new_dentry, old_inode);
new_inode)))
return -EPERM;
- f2fs_balance_fs(sbi);
-
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
goto out_new_dir;
}
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
- struct f2fs_str cstr;
+ struct f2fs_str cstr = FSTR_INIT(NULL, 0);
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
/* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr;
cstr.len = le16_to_cpu(sd->len);
+
+ /* this is broken symlink case */
+ if (unlikely(cstr.len == 0)) {
+ res = -ENOENT;
+ goto errout;
+ }
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */
- if (cstr.name[0] == 0 && cstr.len == 0) {
+ if (unlikely(cstr.name[0] == 0)) {
res = -ENOENT;
goto errout;
}
.get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
+#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
+#endif
};
#endif
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
- mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+ mem_size = (atomic_read(&sbi->total_ext_tree) *
+ sizeof(struct extent_tree) +
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
- if (sbi->sb->s_bdi->wb.dirty_exceeded)
- return false;
+ if (!sbi->sb->s_bdi->wb.dirty_exceeded)
+ return true;
}
return res;
}
{
struct nat_entry *e;
- down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
}
- up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ down_write(&nm_i->nat_tree_lock);
+
/* Check current segment summary */
mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
cache:
/* cache nat entry */
cache_nat_entry(NM_I(sbi), nid, &ne);
+ up_write(&nm_i->nat_tree_lock);
}
/*
ret = truncate_dnode(&rdn);
if (ret < 0)
goto out_err;
- set_nid(page, i, 0, false);
+ if (set_nid(page, i, 0, false))
+ dn->node_changed = true;
}
} else {
child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
rdn.nid = child_nid;
ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
if (ret == (NIDS_PER_BLOCK + 1)) {
- set_nid(page, i, 0, false);
+ if (set_nid(page, i, 0, false))
+ dn->node_changed = true;
child_nofs += ret;
} else if (ret < 0 && ret != -ENOENT) {
goto out_err;
err = truncate_dnode(dn);
if (err < 0)
goto fail;
- set_nid(pages[idx], i, 0, false);
+ if (set_nid(pages[idx], i, 0, false))
+ dn->node_changed = true;
}
if (offset[idx + 1] == 0) {
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
- set_page_dirty(page);
+ if (set_page_dirty(page))
+ dn->node_changed = true;
if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
struct page *apage;
int err;
+ if (!nid)
+ return;
+ f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+
apage = find_get_page(NODE_MAPPING(sbi), nid);
if (apage && PageUptodate(apage)) {
f2fs_put_page(apage, 0);
f2fs_put_page(apage, err ? 1 : 0);
}
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+/*
+ * readahead MAX_RA_NODE number of node pages.
+ */
+void ra_node_pages(struct page *parent, int start)
{
- struct page *page;
- int err;
-repeat:
- page = grab_cache_page(NODE_MAPPING(sbi), nid);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+ struct blk_plug plug;
+ int i, end;
+ nid_t nid;
- err = read_node_page(page, READ_SYNC);
- if (err < 0) {
- f2fs_put_page(page, 1);
- return ERR_PTR(err);
- } else if (err != LOCKED_PAGE) {
- lock_page(page);
- }
+ blk_start_plug(&plug);
- if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
- ClearPageUptodate(page);
- f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
- }
- if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
- f2fs_put_page(page, 1);
- goto repeat;
+ /* Then, try readahead for siblings of the desired node */
+ end = start + MAX_RA_NODE;
+ end = min(end, NIDS_PER_BLOCK);
+ for (i = start; i < end; i++) {
+ nid = get_nid(parent, i, false);
+ ra_node_page(sbi, nid);
}
- return page;
+
+ blk_finish_plug(&plug);
}
-/*
- * Return a locked page for the desired node page.
- * And, readahead MAX_RA_NODE number of node pages.
- */
-struct page *get_node_page_ra(struct page *parent, int start)
+struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+ struct page *parent, int start)
{
- struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
- struct blk_plug plug;
struct page *page;
- int err, i, end;
- nid_t nid;
+ int err;
- /* First, try getting the desired direct node. */
- nid = get_nid(parent, start, false);
if (!nid)
return ERR_PTR(-ENOENT);
+ f2fs_bug_on(sbi, check_nid_range(sbi, nid));
repeat:
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
goto page_hit;
}
- blk_start_plug(&plug);
-
- /* Then, try readahead for siblings of the desired node */
- end = start + MAX_RA_NODE;
- end = min(end, NIDS_PER_BLOCK);
- for (i = start + 1; i < end; i++) {
- nid = get_nid(parent, i, false);
- if (!nid)
- continue;
- ra_node_page(sbi, nid);
- }
-
- blk_finish_plug(&plug);
+ if (parent)
+ ra_node_pages(parent, start + 1);
lock_page(page);
+
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
page_hit:
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
- }
+ f2fs_bug_on(sbi, nid != nid_of_node(page));
return page;
}
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+{
+ return __get_node_page(sbi, nid, NULL, 0);
+}
+
+struct page *get_node_page_ra(struct page *parent, int start)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+ nid_t nid = get_nid(parent, start, false);
+
+ return __get_node_page(sbi, nid, parent, start);
+}
+
void sync_inode_page(struct dnode_of_data *dn)
{
+ int ret = 0;
+
if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
- update_inode(dn->inode, dn->node_page);
+ ret = update_inode(dn->inode, dn->node_page);
} else if (dn->inode_page) {
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
- update_inode(dn->inode, dn->inode_page);
+ ret = update_inode(dn->inode, dn->inode_page);
if (!dn->inode_page_locked)
unlock_page(dn->inode_page);
} else {
- update_inode_page(dn->inode);
+ ret = update_inode_page(dn->inode);
}
+ dn->node_changed = ret ? true: false;
}
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ if (unlikely(f2fs_cp_error(sbi))) {
+ pagevec_release(&pvec);
+ return -EIO;
+ }
+
/*
* flushing sequence with step:
* 0. indirect nodes
up_read(&sbi->node_write);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
if (build) {
/* do not add allocated nids */
- down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne &&
- (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+ if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
- up_read(&nm_i->nat_tree_lock);
if (allocated)
return 0;
}
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
+ down_read(&nm_i->nat_tree_lock);
+
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
+ up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
- struct node_info ni;
-
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
i->state = NID_ALLOC;
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
-
- /* check nid is allocated already */
- get_node_info(sbi, *nid, &ni);
- if (ni.blk_addr != NULL_ADDR) {
- alloc_nid_done(sbi, *nid);
- goto retry;
- }
return true;
}
spin_unlock(&nm_i->free_nid_list_lock);
raw_ne = nat_in_journal(sum, i);
- down_write(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
__set_nat_cache_dirty(nm_i, ne);
- up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;
- struct f2fs_nm_info *nm_i = NM_I(sbi);
/*
* there are two steps to flush nat entries:
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);
-
- down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- up_write(&NM_I(sbi)->nat_tree_lock);
-
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
}
f2fs_bug_on(sbi, set->entry_cnt);
- down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
- up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set);
}
if (!nm_i->dirty_nat_cnt)
return;
+
+ down_write(&nm_i->nat_tree_lock);
+
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
- down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}
- up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
+ up_write(&nm_i->nat_tree_lock);
+
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
- (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
+ (block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
return true;
}
-static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
+static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
else
rn->in.nid[off] = cpu_to_le32(nid);
- set_page_dirty(p);
+ return set_page_dirty(p);
}
static inline nid_t get_nid(struct page *p, int off, bool i)
ino_of_node(page), name);
}
+static bool is_same_inode(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_inode *ri = F2FS_INODE(ipage);
+ struct timespec disk;
+
+ if (!IS_INODE(ipage))
+ return true;
+
+ disk.tv_sec = le64_to_cpu(ri->i_ctime);
+ disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
+ if (timespec_compare(&inode->i_ctime, &disk) > 0)
+ return false;
+
+ disk.tv_sec = le64_to_cpu(ri->i_atime);
+ disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
+ if (timespec_compare(&inode->i_atime, &disk) > 0)
+ return false;
+
+ disk.tv_sec = le64_to_cpu(ri->i_mtime);
+ disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+ if (timespec_compare(&inode->i_mtime, &disk) > 0)
+ return false;
+
+ return true;
+}
+
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
- if (!entry) {
+ if (entry) {
+ if (!is_same_inode(entry->inode, page))
+ goto next;
+ } else {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
return err;
}
-static int recover_data(struct f2fs_sb_info *sbi,
- struct list_head *head, int type)
+static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
block_t blkaddr;
/* get node pages in the current segment */
- curseg = CURSEG_I(sbi, type);
+ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
while (1) {
need_writecp = true;
/* step #2: recover data */
- err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ err = recover_data(sbi, &inode_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
.reason = CP_RECOVERY,
};
mutex_unlock(&sbi->cp_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
} else {
mutex_unlock(&sbi->cp_mutex);
}
/*
* __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
* f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * @size must be integral times of unsigned long.
* Example:
* MSB <--> LSB
* f2fs_set_bit(0, bitmap) => 1000 0000
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
- size -= result;
+ size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
- if (!offset)
- goto aligned;
-
- tmp = __reverse_ulong((unsigned char *)p);
- tmp &= ~0UL >> offset;
-
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp)
- goto found_middle;
-
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- p++;
-aligned:
- while (size & ~(BITS_PER_LONG-1)) {
+
+ while (1) {
+ if (*p == 0)
+ goto pass;
+
tmp = __reverse_ulong((unsigned char *)p);
+
+ tmp &= ~0UL >> offset;
+ if (size < BITS_PER_LONG)
+ tmp &= (~0UL << (BITS_PER_LONG - size));
if (tmp)
- goto found_middle;
- result += BITS_PER_LONG;
+ goto found;
+pass:
+ if (size <= BITS_PER_LONG)
+ break;
size -= BITS_PER_LONG;
+ offset = 0;
p++;
}
- if (!size)
- return result;
-
- tmp = __reverse_ulong((unsigned char *)p);
-found_first:
- tmp &= (~0UL << (BITS_PER_LONG - size));
- if (!tmp) /* Are any bits set? */
- return result + size; /* Nope. */
-found_middle:
- return result + __reverse_ffs(tmp);
+ return result;
+found:
+ return result - size + __reverse_ffs(tmp);
}
static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
- size -= result;
+ size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
- if (!offset)
- goto aligned;
-
- tmp = __reverse_ulong((unsigned char *)p);
- tmp |= ~((~0UL << offset) >> offset);
-
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp != ~0UL)
- goto found_middle;
-
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- p++;
-aligned:
- while (size & ~(BITS_PER_LONG - 1)) {
+
+ while (1) {
+ if (*p == ~0UL)
+ goto pass;
+
tmp = __reverse_ulong((unsigned char *)p);
+
+ if (offset)
+ tmp |= ~0UL << (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ tmp |= ~0UL >> size;
if (tmp != ~0UL)
- goto found_middle;
- result += BITS_PER_LONG;
+ goto found;
+pass:
+ if (size <= BITS_PER_LONG)
+ break;
size -= BITS_PER_LONG;
+ offset = 0;
p++;
}
- if (!size)
- return result;
-
- tmp = __reverse_ulong((unsigned char *)p);
-found_first:
- tmp |= ~(~0UL << (BITS_PER_LONG - size));
- if (tmp == ~0UL) /* Are any bits zero? */
- return result + size; /* Nope. */
-found_middle:
- return result + __reverse_ffz(tmp);
+ return result;
+found:
+ return result - size + __reverse_ffz(tmp);
}
void register_inmem_page(struct inode *inode, struct page *page)
* inode becomes free by iget_locked in f2fs_iget.
*/
if (!abort) {
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
}
submit_bio = true;
}
} else {
+ ClearPageUptodate(cur->page);
trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
}
set_page_private(cur->page, 0);
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
*/
-void f2fs_balance_fs(struct f2fs_sb_info *sbi)
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ if (!need)
+ return;
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
if (!available_free_memory(sbi, NAT_ENTRIES) ||
excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES) ||
- jiffies > sbi->cp_expires)
+ (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
+ if (test_opt(sbi, DATA_FLUSH))
+ sync_dirty_inodes(sbi, FILE_INODE);
f2fs_sync_fs(sbi->sb, true);
+ stat_inc_bg_cp_count(sbi->stat_info);
+ }
}
static int issue_flush_thread(void *data)
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
unsigned int start_segno, end_segno;
struct cp_control cpc;
+ int err = 0;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
sbi->segs_per_sec) - 1, end_segno);
mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
- return 0;
+ return err;
}
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
if (le32_to_cpu(nid_in_journal(sum, i)) == val)
return i;
}
- if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
+ if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
return update_nats_in_cursum(sum, 1);
} else if (type == SIT_JOURNAL) {
for (i = 0; i < sits_in_cursum(sum); i++)
if (le32_to_cpu(segno_in_journal(sum, i)) == val)
return i;
- if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
+ if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
return update_sits_in_cursum(sum, 1);
}
return -1;
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
- return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
+ return atomic_read(&sbi->total_zombie_tree) +
+ atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
Opt_extent_cache,
Opt_noextent_cache,
Opt_noinline_data,
+ Opt_data_flush,
Opt_err,
};
{Opt_extent_cache, "extent_cache"},
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
+ {Opt_data_flush, "data_flush"},
{Opt_err, NULL},
};
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(cp_interval),
+ ATTR_LIST(idle_interval),
NULL,
};
case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA);
break;
+ case Opt_data_flush:
+ set_opt(sbi, DATA_FLUSH);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
+ INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
*/
- release_dirty_inode(sbi);
+ release_ino_entry(sbi);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
- brelse(sbi->raw_super_buf);
+ kfree(sbi->raw_super);
kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int err = 0;
trace_f2fs_sync_fs(sb, sync);
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
- } else {
- f2fs_balance_fs(sbi);
}
f2fs_trace_ios(NULL, 1);
- return 0;
+ return err;
}
static int f2fs_freeze(struct super_block *sb)
seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
+ if (test_opt(sbi, DATA_FLUSH))
+ seq_puts(seq, ",data_flush");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
.get_parent = f2fs_get_parent,
};
-static loff_t max_file_size(unsigned bits)
+static loff_t max_file_blocks(void)
{
loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
loff_t leaf_count = ADDRS_PER_BLOCK;
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
- result <<= bits;
return result;
}
+static inline bool sanity_check_area_boundary(struct super_block *sb,
+ struct f2fs_super_block *raw_super)
+{
+ u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+ u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
+ u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
+ u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
+ u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+ u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
+ u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
+ u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
+ u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
+ u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ u32 segment_count = le32_to_cpu(raw_super->segment_count);
+ u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment0_blkaddr != cp_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
+ return true;
+ }
+
+ if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
+ sit_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
+ return true;
+ }
+
+ if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
+ nat_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
+ return true;
+ }
+
+ if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
+ ssa_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
+ return true;
+ }
+
+ if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
+ main_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
+ return true;
+ }
+
+ if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
+ segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
+ main_blkaddr,
+ segment0_blkaddr + (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
+ return true;
+ }
+
+ return false;
+}
+
static int sanity_check_raw_super(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
return 1;
}
+ /* check log blocks per segment */
+ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid log blocks per segment (%u)\n",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
+ return 1;
+ }
+
/* Currently, support 512/1024/2048/4096 bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
+
+ /* check reserved ino info */
+ if (le32_to_cpu(raw_super->node_ino) != 1 ||
+ le32_to_cpu(raw_super->meta_ino) != 2 ||
+ le32_to_cpu(raw_super->root_ino) != 3) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
+ return 1;
+ }
+
+ /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+ if (sanity_check_area_boundary(sb, raw_super))
+ return 1;
+
return 0;
}
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
- sbi->cp_interval = DEF_CP_INTERVAL;
+ sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
+ sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
*/
static int read_raw_super_block(struct super_block *sb,
struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf,
- int *recovery)
+ int *valid_super_block, int *recovery)
{
int block = 0;
- struct buffer_head *buffer;
- struct f2fs_super_block *super;
+ struct buffer_head *bh;
+ struct f2fs_super_block *super, *buf;
int err = 0;
+ super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
+ if (!super)
+ return -ENOMEM;
retry:
- buffer = sb_bread(sb, block);
- if (!buffer) {
+ bh = sb_bread(sb, block);
+ if (!bh) {
*recovery = 1;
f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
block + 1);
- if (block == 0) {
- block++;
- goto retry;
- } else {
- err = -EIO;
- goto out;
- }
+ err = -EIO;
+ goto next;
}
- super = (struct f2fs_super_block *)
- ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
+ buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (sanity_check_raw_super(sb, super)) {
- brelse(buffer);
+ if (sanity_check_raw_super(sb, buf)) {
+ brelse(bh);
*recovery = 1;
f2fs_msg(sb, KERN_ERR,
"Can't find valid F2FS filesystem in %dth superblock",
block + 1);
- if (block == 0) {
- block++;
- goto retry;
- } else {
- err = -EINVAL;
- goto out;
- }
+ err = -EINVAL;
+ goto next;
}
if (!*raw_super) {
- *raw_super_buf = buffer;
+ memcpy(super, buf, sizeof(*super));
+ *valid_super_block = block;
*raw_super = super;
- } else {
- /* already have a valid superblock */
- brelse(buffer);
}
+ brelse(bh);
+next:
/* check the validity of the second superblock */
if (block == 0) {
block++;
goto retry;
}
-out:
/* No valid superblock */
- if (!*raw_super)
+ if (!*raw_super) {
+ kfree(super);
return err;
+ }
return 0;
}
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
+{
+ struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
+ struct buffer_head *bh;
+ int err;
+
+ bh = sb_getblk(sbi->sb, block);
+ if (!bh)
+ return -EIO;
+
+ lock_buffer(bh);
+ memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
+ set_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+
+ /* it's rare case, we can do fua all the time */
+ err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+ brelse(bh);
+
+ return err;
+}
+
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
- struct buffer_head *sbh = sbi->raw_super_buf;
- sector_t block = sbh->b_blocknr;
int err;
/* write back-up superblock first */
- sbh->b_blocknr = block ? 0 : 1;
- mark_buffer_dirty(sbh);
- err = sync_dirty_buffer(sbh);
-
- sbh->b_blocknr = block;
+ err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
- goto out;
+ return err;
/* write current valid superblock */
- mark_buffer_dirty(sbh);
- err = sync_dirty_buffer(sbh);
-out:
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- return err;
+ return __f2fs_commit_super(sbi, sbi->valid_super_block);
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
struct f2fs_super_block *raw_super;
- struct buffer_head *raw_super_buf;
struct inode *root;
long err;
bool retry = true, need_fsck = false;
char *options = NULL;
- int recovery, i;
+ int recovery, i, valid_super_block;
try_onemore:
err = -EINVAL;
raw_super = NULL;
- raw_super_buf = NULL;
+ valid_super_block = -1;
recovery = 0;
/* allocate memory for f2fs-specific super block info */
goto free_sbi;
}
- err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
+ err = read_raw_super_block(sb, &raw_super, &valid_super_block,
+ &recovery);
if (err)
goto free_sbi;
if (err)
goto free_options;
- sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
+ sbi->max_file_blocks = max_file_blocks();
+ sb->s_maxbytes = sbi->max_file_blocks <<
+ le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
/* init f2fs-specific super block info */
sbi->sb = sb;
sbi->raw_super = raw_super;
- sbi->raw_super_buf = raw_super_buf;
+ sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->alloc_valid_block_count = 0;
- INIT_LIST_HEAD(&sbi->dir_inode_list);
- spin_lock_init(&sbi->dir_inode_lock);
+ for (i = 0; i < NR_INODE_TYPE; i++) {
+ INIT_LIST_HEAD(&sbi->inode_list[i]);
+ spin_lock_init(&sbi->inode_lock[i]);
+ }
init_extent_cache_info(sbi);
f2fs_commit_super(sbi, true);
}
- sbi->cp_expires = round_jiffies_up(jiffies);
-
+ f2fs_update_time(sbi, CP_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
free_kobj:
kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
free_proc:
if (sbi->s_proc) {
remove_proc_entry("segment_info", sbi->s_proc);
free_options:
kfree(options);
free_sb_buf:
- brelse(raw_super_buf);
+ kfree(raw_super);
free_sbi:
kfree(sbi);
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
- f2fs_create_root_stats();
+ err = f2fs_create_root_stats();
+ if (err)
+ goto free_filesystem;
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
+free_filesystem:
+ unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_crypto:
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
/* protect xattr_ver */
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
+ f2fs_update_time(sbi, REQ_TIME);
return err;
}
#define MAX_ACTIVE_DATA_LOGS 8
#define VERSION_LEN 256
+#define MAX_VOLUME_NAME 512
/*
* For superblock
__le32 node_ino; /* node inode number */
__le32 meta_ino; /* meta inode number */
__u8 uuid[16]; /* 128-bit uuid for volume */
- __le16 volume_name[512]; /* volume name */
+ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */
__le32 extension_count; /* # of extensions below */
__u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
__le32 cp_payload;
__entry->node_cnt)
);
+DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
+
+ TP_PROTO(struct super_block *sb, int type, int count),
+
+ TP_ARGS(sb, type, count),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->type = type;
+ __entry->count = count;
+ ),
+
+ TP_printk("dev = (%d,%d), %s, dirty count = %d",
+ show_dev(__entry),
+ show_file_type(__entry->type),
+ __entry->count)
+);
+
+DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter,
+
+ TP_PROTO(struct super_block *sb, int type, int count),
+
+ TP_ARGS(sb, type, count)
+);
+
+DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
+
+ TP_PROTO(struct super_block *sb, int type, int count),
+
+ TP_ARGS(sb, type, count)
+);
+
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */