f2fs: support plain user/group quota
[linux-2.6-microblaze.git] / fs / f2fs / node.c
index 481aa8d..3ed2f94 100644 (file)
@@ -22,7 +22,7 @@
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
-#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
+#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
 
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
@@ -63,8 +63,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
                int i;
 
                for (i = 0; i <= UPDATE_INO; i++)
-                       mem_size += (sbi->im[i].ino_num *
-                               sizeof(struct ino_entry)) >> PAGE_SHIFT;
+                       mem_size += sbi->im[i].ino_num *
+                                               sizeof(struct ino_entry);
+               mem_size >>= PAGE_SHIFT;
                res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
        } else if (type == EXTENT_CACHE) {
                mem_size = (atomic_read(&sbi->total_ext_tree) *
@@ -157,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
        nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
        struct nat_entry_set *head;
 
-       if (get_nat_flag(ne, IS_DIRTY))
-               return;
-
        head = radix_tree_lookup(&nm_i->nat_set_root, set);
        if (!head) {
                head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
@@ -170,25 +168,27 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
                head->entry_cnt = 0;
                f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
        }
-       list_move_tail(&ne->list, &head->entry_list);
+
+       if (get_nat_flag(ne, IS_DIRTY))
+               goto refresh_list;
+
        nm_i->dirty_nat_cnt++;
        head->entry_cnt++;
        set_nat_flag(ne, IS_DIRTY, true);
+refresh_list:
+       if (nat_get_blkaddr(ne) == NEW_ADDR)
+               list_del_init(&ne->list);
+       else
+               list_move_tail(&ne->list, &head->entry_list);
 }
 
 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
-                                               struct nat_entry *ne)
+               struct nat_entry_set *set, struct nat_entry *ne)
 {
-       nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
-       struct nat_entry_set *head;
-
-       head = radix_tree_lookup(&nm_i->nat_set_root, set);
-       if (head) {
-               list_move_tail(&ne->list, &nm_i->nat_entries);
-               set_nat_flag(ne, IS_DIRTY, false);
-               head->entry_cnt--;
-               nm_i->dirty_nat_cnt--;
-       }
+       list_move_tail(&ne->list, &nm_i->nat_entries);
+       set_nat_flag(ne, IS_DIRTY, false);
+       set->entry_cnt--;
+       nm_i->dirty_nat_cnt--;
 }
 
 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
@@ -381,6 +381,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        struct page *page = NULL;
        struct f2fs_nat_entry ne;
        struct nat_entry *e;
+       pgoff_t index;
        int i;
 
        ni->nid = nid;
@@ -406,17 +407,21 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
                node_info_from_raw_nat(ni, &ne);
        }
        up_read(&curseg->journal_rwsem);
-       if (i >= 0)
+       if (i >= 0) {
+               up_read(&nm_i->nat_tree_lock);
                goto cache;
+       }
 
        /* Fill node_info from nat page */
-       page = get_current_nat_page(sbi, start_nid);
+       index = current_nat_addr(sbi, nid);
+       up_read(&nm_i->nat_tree_lock);
+
+       page = get_meta_page(sbi, index);
        nat_blk = (struct f2fs_nat_block *)page_address(page);
        ne = nat_blk->entries[nid - start_nid];
        node_info_from_raw_nat(ni, &ne);
        f2fs_put_page(page, 1);
 cache:
-       up_read(&nm_i->nat_tree_lock);
        /* cache nat entry */
        down_write(&nm_i->nat_tree_lock);
        cache_nat_entry(sbi, nid, &ne);
@@ -673,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn)
        struct node_info ni;
 
        get_node_info(sbi, dn->nid, &ni);
-       if (dn->inode->i_blocks == 0) {
-               f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
-               goto invalidate;
-       }
        f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 
        /* Deallocate node address */
        invalidate_blocks(sbi, ni.blk_addr);
-       dec_valid_node_count(sbi, dn->inode);
+       dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
        set_node_addr(sbi, &ni, NULL_ADDR, false);
 
        if (dn->nid == dn->inode->i_ino) {
@@ -689,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn)
                dec_valid_inode_count(sbi);
                f2fs_inode_synced(dn->inode);
        }
-invalidate:
+
        clear_node_page_dirty(dn->node_page);
        set_sbi_flag(sbi, SBI_IS_DIRTY);
 
@@ -1006,7 +1007,7 @@ int remove_inode_page(struct inode *inode)
 
        /* 0 is possible, after f2fs_new_inode() has failed */
        f2fs_bug_on(F2FS_I_SB(inode),
-                       inode->i_blocks != 0 && inode->i_blocks != 1);
+                       inode->i_blocks != 0 && inode->i_blocks != 8);
 
        /* will put inode & node pages */
        truncate_node(&dn);
@@ -1039,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn,
        if (!page)
                return ERR_PTR(-ENOMEM);
 
-       if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
-               err = -ENOSPC;
+       if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
                goto fail;
-       }
+
 #ifdef CONFIG_F2FS_CHECK_FS
        get_node_info(sbi, dn->nid, &new_ni);
        f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
@@ -1152,6 +1152,7 @@ repeat:
                f2fs_put_page(page, 1);
                return ERR_PTR(err);
        } else if (err == LOCKED_PAGE) {
+               err = 0;
                goto page_hit;
        }
 
@@ -1165,15 +1166,22 @@ repeat:
                goto repeat;
        }
 
-       if (unlikely(!PageUptodate(page)))
+       if (unlikely(!PageUptodate(page))) {
+               err = -EIO;
                goto out_err;
+       }
 page_hit:
        if(unlikely(nid != nid_of_node(page))) {
-               f2fs_bug_on(sbi, 1);
+               f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
+                       "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+                       nid, nid_of_node(page), ino_of_node(page),
+                       ofs_of_node(page), cpver_of_node(page),
+                       next_blkaddr_of_node(page));
                ClearPageUptodate(page);
+               err = -EINVAL;
 out_err:
                f2fs_put_page(page, 1);
-               return ERR_PTR(-EIO);
+               return ERR_PTR(err);
        }
        return page;
 }
@@ -1373,15 +1381,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        up_read(&sbi->node_write);
 
        if (wbc->for_reclaim) {
-               f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
-                                               page->index, NODE, WRITE);
+               f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
+                                               page->index, NODE);
                submitted = NULL;
        }
 
        unlock_page(page);
 
        if (unlikely(f2fs_cp_error(sbi))) {
-               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+               f2fs_submit_merged_write(sbi, NODE);
                submitted = NULL;
        }
        if (submitted)
@@ -1463,6 +1471,9 @@ continue_unlock:
                        f2fs_wait_on_page_writeback(page, NODE, true);
                        BUG_ON(PageWriteback(page));
 
+                       set_fsync_mark(page, 0);
+                       set_dentry_mark(page, 0);
+
                        if (!atomic || page == last_page) {
                                set_fsync_mark(page, 1);
                                if (IS_INODE(page)) {
@@ -1515,8 +1526,7 @@ continue_unlock:
        }
 out:
        if (last_idx != ULONG_MAX)
-               f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
-                                                       NODE, WRITE);
+               f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
        return ret ? -EIO: 0;
 }
 
@@ -1622,7 +1632,7 @@ continue_unlock:
        }
 out:
        if (nwritten)
-               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+               f2fs_submit_merged_write(sbi, NODE);
        return ret;
 }
 
@@ -1672,6 +1682,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
        struct blk_plug plug;
        long diff;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+               goto skip_write;
+
        /* balancing f2fs's metadata in background */
        f2fs_balance_fs_bg(sbi);
 
@@ -1766,40 +1779,67 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
 static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct free_nid *i;
+       struct free_nid *i, *e;
        struct nat_entry *ne;
-       int err;
+       int err = -EINVAL;
+       bool ret = false;
 
        /* 0 nid should not be used */
        if (unlikely(nid == 0))
                return false;
 
-       if (build) {
-               /* do not add allocated nids */
-               ne = __lookup_nat_cache(nm_i, nid);
-               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
-                               nat_get_blkaddr(ne) != NULL_ADDR))
-                       return false;
-       }
-
        i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
        i->nid = nid;
        i->state = NID_NEW;
 
-       if (radix_tree_preload(GFP_NOFS)) {
-               kmem_cache_free(free_nid_slab, i);
-               return true;
-       }
+       if (radix_tree_preload(GFP_NOFS))
+               goto err;
 
        spin_lock(&nm_i->nid_list_lock);
+
+       if (build) {
+               /*
+                *   Thread A             Thread B
+                *  - f2fs_create
+                *   - f2fs_new_inode
+                *    - alloc_nid
+                *     - __insert_nid_to_list(ALLOC_NID_LIST)
+                *                     - f2fs_balance_fs_bg
+                *                      - build_free_nids
+                *                       - __build_free_nids
+                *                        - scan_nat_page
+                *                         - add_free_nid
+                *                          - __lookup_nat_cache
+                *  - f2fs_add_link
+                *   - init_inode_metadata
+                *    - new_inode_page
+                *     - new_node_page
+                *      - set_node_addr
+                *  - alloc_nid_done
+                *   - __remove_nid_from_list(ALLOC_NID_LIST)
+                *                         - __insert_nid_to_list(FREE_NID_LIST)
+                */
+               ne = __lookup_nat_cache(nm_i, nid);
+               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+                               nat_get_blkaddr(ne) != NULL_ADDR))
+                       goto err_out;
+
+               e = __lookup_free_nid_list(nm_i, nid);
+               if (e) {
+                       if (e->state == NID_NEW)
+                               ret = true;
+                       goto err_out;
+               }
+       }
+       ret = true;
        err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
+err_out:
        spin_unlock(&nm_i->nid_list_lock);
        radix_tree_preload_end();
-       if (err) {
+err:
+       if (err)
                kmem_cache_free(free_nid_slab, i);
-               return true;
-       }
-       return true;
+       return ret;
 }
 
 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1821,7 +1861,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 }
 
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-                       bool set, bool build, bool locked)
+                                                       bool set, bool build)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1835,14 +1875,10 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
        else
                __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 
-       if (!locked)
-               spin_lock(&nm_i->free_nid_lock);
        if (set)
                nm_i->free_nid_count[nat_ofs]++;
        else if (!build)
                nm_i->free_nid_count[nat_ofs]--;
-       if (!locked)
-               spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1871,7 +1907,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
                f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
                if (blk_addr == NULL_ADDR)
                        freed = add_free_nid(sbi, start_nid, true);
-               update_free_nid_bitmap(sbi, start_nid, freed, true, false);
+               spin_lock(&NM_I(sbi)->nid_list_lock);
+               update_free_nid_bitmap(sbi, start_nid, freed, true);
+               spin_unlock(&NM_I(sbi)->nid_list_lock);
        }
 }
 
@@ -1927,6 +1965,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
        int i = 0;
        nid_t nid = nm_i->next_scan_nid;
 
+       if (unlikely(nid >= nm_i->max_nid))
+               nid = 0;
+
        /* Enough entries */
        if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
                return;
@@ -2026,7 +2067,7 @@ retry:
                __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
                nm_i->available_nids--;
 
-               update_free_nid_bitmap(sbi, *nid, false, false, false);
+               update_free_nid_bitmap(sbi, *nid, false, false);
 
                spin_unlock(&nm_i->nid_list_lock);
                return true;
@@ -2082,7 +2123,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
        nm_i->available_nids++;
 
-       update_free_nid_bitmap(sbi, nid, true, false, false);
+       update_free_nid_bitmap(sbi, nid, true, false);
 
        spin_unlock(&nm_i->nid_list_lock);
 
@@ -2161,14 +2202,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
        get_node_info(sbi, prev_xnid, &ni);
        f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
        invalidate_blocks(sbi, ni.blk_addr);
-       dec_valid_node_count(sbi, inode);
+       dec_valid_node_count(sbi, inode, false);
        set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 recover_xnid:
        /* 2: update xattr nid in inode */
        remove_free_nid(sbi, new_xnid);
        f2fs_i_xnid_write(inode, new_xnid);
-       if (unlikely(!inc_valid_node_count(sbi, inode)))
+       if (unlikely(inc_valid_node_count(sbi, inode, false)))
                f2fs_bug_on(sbi, 1);
        update_inode_page(inode);
 
@@ -2226,7 +2267,7 @@ retry:
        new_ni = old_ni;
        new_ni.ino = ino;
 
-       if (unlikely(!inc_valid_node_count(sbi, NULL)))
+       if (unlikely(inc_valid_node_count(sbi, NULL, true)))
                WARN_ON(1);
        set_node_addr(sbi, &new_ni, NEW_ADDR, false);
        inc_valid_inode_count(sbi);
@@ -2393,8 +2434,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                nid_t nid = nat_get_nid(ne);
                int offset;
 
-               if (nat_get_blkaddr(ne) == NEW_ADDR)
-                       continue;
+               f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
 
                if (to_journal) {
                        offset = lookup_journal_in_cursum(journal,
@@ -2407,16 +2447,16 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                }
                raw_nat_from_node_info(raw_ne, &ne->ni);
                nat_reset_flag(ne);
-               __clear_nat_cache_dirty(NM_I(sbi), ne);
+               __clear_nat_cache_dirty(NM_I(sbi), set, ne);
                if (nat_get_blkaddr(ne) == NULL_ADDR) {
                        add_free_nid(sbi, nid, false);
                        spin_lock(&NM_I(sbi)->nid_list_lock);
                        NM_I(sbi)->available_nids++;
-                       update_free_nid_bitmap(sbi, nid, true, false, false);
+                       update_free_nid_bitmap(sbi, nid, true, false);
                        spin_unlock(&NM_I(sbi)->nid_list_lock);
                } else {
                        spin_lock(&NM_I(sbi)->nid_list_lock);
-                       update_free_nid_bitmap(sbi, nid, false, false, false);
+                       update_free_nid_bitmap(sbi, nid, false, false);
                        spin_unlock(&NM_I(sbi)->nid_list_lock);
                }
        }
@@ -2428,10 +2468,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                f2fs_put_page(page, 1);
        }
 
-       f2fs_bug_on(sbi, set->entry_cnt);
-
-       radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-       kmem_cache_free(nat_entry_set_slab, set);
+       /* Allow dirty nats by node block allocation in write_begin */
+       if (!set->entry_cnt) {
+               radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+               kmem_cache_free(nat_entry_set_slab, set);
+       }
 }
 
 /*
@@ -2476,8 +2517,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                __flush_nat_entry_set(sbi, set, cpc);
 
        up_write(&nm_i->nat_tree_lock);
-
-       f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
+       /* Allow dirty nats by node block allocation in write_begin */
 }
 
 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@ -2522,7 +2562,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
        return 0;
 }
 
-inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned int i = 0;
@@ -2541,10 +2581,10 @@ inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
                nid = i * NAT_ENTRY_PER_BLOCK;
                last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
 
-               spin_lock(&nm_i->free_nid_lock);
+               spin_lock(&NM_I(sbi)->nid_list_lock);
                for (; nid < last_nid; nid++)
-                       update_free_nid_bitmap(sbi, nid, true, true, true);
-               spin_unlock(&nm_i->free_nid_lock);
+                       update_free_nid_bitmap(sbi, nid, true, true);
+               spin_unlock(&NM_I(sbi)->nid_list_lock);
        }
 
        for (i = 0; i < nm_i->nat_blocks; i++) {
@@ -2635,9 +2675,6 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
                                        sizeof(unsigned short), GFP_KERNEL);
        if (!nm_i->free_nid_count)
                return -ENOMEM;
-
-       spin_lock_init(&nm_i->free_nid_lock);
-
        return 0;
 }