Merge tag 'acpi-fix-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-microblaze.git] / fs / f2fs / segment.c
index c695ff4..c117e09 100644 (file)
@@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
                return true;
 
        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
-                                               2 * reserved_sections(sbi));
+                       SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
 }
 
 void register_inmem_page(struct inode *inode, struct page *page)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
        struct inmem_pages *new;
 
@@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page)
        mutex_lock(&fi->inmem_lock);
        get_page(page);
        list_add_tail(&new->list, &fi->inmem_pages);
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (list_empty(&fi->inmem_ilist))
+               list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
        mutex_unlock(&fi->inmem_lock);
 
@@ -262,12 +267,41 @@ next:
        return err;
 }
 
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+{
+       struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
+       struct inode *inode;
+       struct f2fs_inode_info *fi;
+next:
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (list_empty(head)) {
+               spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+               return;
+       }
+       fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
+       inode = igrab(&fi->vfs_inode);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+       if (inode) {
+               drop_inmem_pages(inode);
+               iput(inode);
+       }
+       congestion_wait(BLK_RW_ASYNC, HZ/50);
+       cond_resched();
+       goto next;
+}
+
 void drop_inmem_pages(struct inode *inode)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
        mutex_lock(&fi->inmem_lock);
        __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (!list_empty(&fi->inmem_ilist))
+               list_del_init(&fi->inmem_ilist);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        mutex_unlock(&fi->inmem_lock);
 
        clear_inode_flag(inode, FI_ATOMIC_FILE);
@@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode,
        struct inmem_pages *cur, *tmp;
        struct f2fs_io_info fio = {
                .sbi = sbi,
+               .ino = inode->i_ino,
                .type = DATA,
                .op = REQ_OP_WRITE,
                .op_flags = REQ_SYNC | REQ_PRIO,
@@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode)
                /* drop all uncommitted pages */
                __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
        }
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (!list_empty(&fi->inmem_ilist))
+               list_del_init(&fi->inmem_ilist);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        mutex_unlock(&fi->inmem_lock);
 
        clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
                                struct block_device *bdev)
 {
-       struct bio *bio = f2fs_bio_alloc(0);
+       struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
        int ret;
 
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
@@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
        return ret;
 }
 
-static int submit_flush_wait(struct f2fs_sb_info *sbi)
+static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev);
+       int ret = 0;
        int i;
 
-       if (!sbi->s_ndevs || ret)
-               return ret;
+       if (!sbi->s_ndevs)
+               return __submit_flush_wait(sbi, sbi->sb->s_bdev);
 
-       for (i = 1; i < sbi->s_ndevs; i++) {
+       for (i = 0; i < sbi->s_ndevs; i++) {
+               if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+                       continue;
                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
                if (ret)
                        break;
@@ -519,7 +560,9 @@ repeat:
                fcc->dispatch_list = llist_del_all(&fcc->issue_list);
                fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 
-               ret = submit_flush_wait(sbi);
+               cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
+
+               ret = submit_flush_wait(sbi, cmd->ino);
                atomic_inc(&fcc->issued_flush);
 
                llist_for_each_entry_safe(cmd, next,
@@ -537,7 +580,7 @@ repeat:
        goto repeat;
 }
 
-int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 {
        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
        struct flush_cmd cmd;
@@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
                return 0;
 
        if (!test_opt(sbi, FLUSH_MERGE)) {
-               ret = submit_flush_wait(sbi);
+               ret = submit_flush_wait(sbi, ino);
                atomic_inc(&fcc->issued_flush);
                return ret;
        }
 
-       if (atomic_inc_return(&fcc->issing_flush) == 1) {
-               ret = submit_flush_wait(sbi);
+       if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+               ret = submit_flush_wait(sbi, ino);
                atomic_dec(&fcc->issing_flush);
 
                atomic_inc(&fcc->issued_flush);
                return ret;
        }
 
+       cmd.ino = ino;
        init_completion(&cmd.wait);
 
        llist_add(&cmd.llnode, &fcc->issue_list);
@@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
                } else {
                        struct flush_cmd *tmp, *next;
 
-                       ret = submit_flush_wait(sbi);
+                       ret = submit_flush_wait(sbi, ino);
 
                        llist_for_each_entry_safe(tmp, next, list, llnode) {
                                if (tmp == &cmd) {
@@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
        }
 }
 
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
+{
+       int ret = 0, i;
+
+       if (!sbi->s_ndevs)
+               return 0;
+
+       for (i = 1; i < sbi->s_ndevs; i++) {
+               if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
+                       continue;
+               ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+               if (ret)
+                       break;
+
+               spin_lock(&sbi->dev_lock);
+               f2fs_clear_bit(i, (char *)&sbi->dirty_device);
+               spin_unlock(&sbi->dev_lock);
+       }
+
+       return ret;
+}
+
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                enum dirty_type dirty_type)
 {
@@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+       trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+
        f2fs_bug_on(sbi, dc->ref);
 
        if (dc->error == -EOPNOTSUPP)
@@ -845,10 +913,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
-                               struct discard_cmd *dc)
+                                               struct discard_policy *dpolicy,
+                                               struct discard_cmd *dc)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+                                       &(dcc->fstrim_list) : &(dcc->wait_list);
        struct bio *bio = NULL;
+       int flag = dpolicy->sync ? REQ_SYNC : 0;
 
        if (dc->state != D_PREP)
                return;
@@ -867,9 +939,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
                if (bio) {
                        bio->bi_private = dc;
                        bio->bi_end_io = f2fs_submit_discard_endio;
-                       bio->bi_opf |= REQ_SYNC;
+                       bio->bi_opf |= flag;
                        submit_bio(bio);
-                       list_move_tail(&dc->list, &dcc->wait_list);
+                       list_move_tail(&dc->list, wait_list);
                        __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
 
                        f2fs_update_iostat(sbi, FS_DISCARD, 1);
@@ -886,7 +958,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
                                struct rb_node *insert_parent)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct rb_node **p = &dcc->root.rb_node;
+       struct rb_node **p;
        struct rb_node *parent = NULL;
        struct discard_cmd *dc = NULL;
 
@@ -1054,58 +1126,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
        return 0;
 }
 
-static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy,
+                                       unsigned int start, unsigned int end)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       struct discard_cmd *dc;
+       struct blk_plug plug;
+       int issued;
+
+next:
+       issued = 0;
+
+       mutex_lock(&dcc->cmd_lock);
+       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, start,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (!dc)
+               dc = next_dc;
+
+       blk_start_plug(&plug);
+
+       while (dc && dc->lstart <= end) {
+               struct rb_node *node;
+
+               if (dc->len < dpolicy->granularity)
+                       goto skip;
+
+               if (dc->state != D_PREP) {
+                       list_move_tail(&dc->list, &dcc->fstrim_list);
+                       goto skip;
+               }
+
+               __submit_discard_cmd(sbi, dpolicy, dc);
+
+               if (++issued >= dpolicy->max_requests) {
+                       start = dc->lstart + dc->len;
+
+                       blk_finish_plug(&plug);
+                       mutex_unlock(&dcc->cmd_lock);
+
+                       schedule();
+
+                       goto next;
+               }
+skip:
+               node = rb_next(&dc->rb_node);
+               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+               if (fatal_signal_pending(current))
+                       break;
+       }
+
+       blk_finish_plug(&plug);
+       mutex_unlock(&dcc->cmd_lock);
+}
+
+static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *pend_list;
        struct discard_cmd *dc, *tmp;
        struct blk_plug plug;
-       int iter = 0, issued = 0;
-       int i;
+       int i, iter = 0, issued = 0;
        bool io_interrupted = false;
 
-       mutex_lock(&dcc->cmd_lock);
-       f2fs_bug_on(sbi,
-               !__check_rb_tree_consistence(sbi, &dcc->root));
-       blk_start_plug(&plug);
-       for (i = MAX_PLIST_NUM - 1;
-                       i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+       for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+               if (i + 1 < dpolicy->granularity)
+                       break;
                pend_list = &dcc->pend_list[i];
+
+               mutex_lock(&dcc->cmd_lock);
+               f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+               blk_start_plug(&plug);
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
                        f2fs_bug_on(sbi, dc->state != D_PREP);
 
-                       /* Hurry up to finish fstrim */
-                       if (dcc->pend_list_tag[i] & P_TRIM) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-
-                               if (fatal_signal_pending(current))
-                                       break;
-                               continue;
-                       }
-
-                       if (!issue_cond) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-                               continue;
-                       }
-
-                       if (is_idle(sbi)) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-                       } else {
+                       if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
+                                                               !is_idle(sbi)) {
                                io_interrupted = true;
+                               goto skip;
                        }
 
-                       if (++iter >= DISCARD_ISSUE_RATE)
-                               goto out;
+                       __submit_discard_cmd(sbi, dpolicy, dc);
+                       issued++;
+skip:
+                       if (++iter >= dpolicy->max_requests)
+                               break;
                }
-               if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM)
-                       dcc->pend_list_tag[i] &= (~P_TRIM);
+               blk_finish_plug(&plug);
+               mutex_unlock(&dcc->cmd_lock);
+
+               if (iter >= dpolicy->max_requests)
+                       break;
        }
-out:
-       blk_finish_plug(&plug);
-       mutex_unlock(&dcc->cmd_lock);
 
        if (!issued && io_interrupted)
                issued = -1;
@@ -1113,12 +1234,13 @@ out:
        return issued;
 }
 
-static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
+static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *pend_list;
        struct discard_cmd *dc, *tmp;
        int i;
+       bool dropped = false;
 
        mutex_lock(&dcc->cmd_lock);
        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
@@ -1126,39 +1248,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
                        f2fs_bug_on(sbi, dc->state != D_PREP);
                        __remove_discard_cmd(sbi, dc);
+                       dropped = true;
                }
        }
        mutex_unlock(&dcc->cmd_lock);
+
+       return dropped;
 }
 
-static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
                                                        struct discard_cmd *dc)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       unsigned int len = 0;
 
        wait_for_completion_io(&dc->wait);
        mutex_lock(&dcc->cmd_lock);
        f2fs_bug_on(sbi, dc->state != D_DONE);
        dc->ref--;
-       if (!dc->ref)
+       if (!dc->ref) {
+               if (!dc->error)
+                       len = dc->len;
                __remove_discard_cmd(sbi, dc);
+       }
        mutex_unlock(&dcc->cmd_lock);
+
+       return len;
 }
 
-static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
+static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                               struct discard_policy *dpolicy,
+                                               block_t start, block_t end)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct list_head *wait_list = &(dcc->wait_list);
+       struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+                                       &(dcc->fstrim_list) : &(dcc->wait_list);
        struct discard_cmd *dc, *tmp;
        bool need_wait;
+       unsigned int trimmed = 0;
 
 next:
        need_wait = false;
 
        mutex_lock(&dcc->cmd_lock);
        list_for_each_entry_safe(dc, tmp, wait_list, list) {
-               if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
+               if (dc->lstart + dc->len <= start || end <= dc->lstart)
+                       continue;
+               if (dc->len < dpolicy->granularity)
+                       continue;
+               if (dc->state == D_DONE && !dc->ref) {
                        wait_for_completion_io(&dc->wait);
+                       if (!dc->error)
+                               trimmed += dc->len;
                        __remove_discard_cmd(sbi, dc);
                } else {
                        dc->ref++;
@@ -1169,9 +1310,17 @@ next:
        mutex_unlock(&dcc->cmd_lock);
 
        if (need_wait) {
-               __wait_one_discard_bio(sbi, dc);
+               trimmed += __wait_one_discard_bio(sbi, dc);
                goto next;
        }
+
+       return trimmed;
+}
+
+static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+                                               struct discard_policy *dpolicy)
+{
+       __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1209,23 +1358,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
        }
 }
 
-/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
-{
-       __issue_discard_cmd(sbi, false);
-       __drop_discard_cmd(sbi);
-       __wait_discard_cmd(sbi, !umount);
-}
-
-static void mark_discard_range_all(struct f2fs_sb_info *sbi)
+/* This comes from f2fs_put_super */
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       int i;
+       struct discard_policy dpolicy;
+       bool dropped;
 
-       mutex_lock(&dcc->cmd_lock);
-       for (i = 0; i < MAX_PLIST_NUM; i++)
-               dcc->pend_list_tag[i] |= P_TRIM;
-       mutex_unlock(&dcc->cmd_lock);
+       init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+       __issue_discard_cmd(sbi, &dpolicy);
+       dropped = __drop_discard_cmd(sbi);
+       __wait_all_discard_cmd(sbi, &dpolicy);
+
+       return dropped;
 }
 
 static int issue_discard_thread(void *data)
@@ -1233,12 +1378,16 @@ static int issue_discard_thread(void *data)
        struct f2fs_sb_info *sbi = data;
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        wait_queue_head_t *q = &dcc->discard_wait_queue;
+       struct discard_policy dpolicy;
        unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
        int issued;
 
        set_freezable();
 
        do {
+               init_discard_policy(&dpolicy, DPOLICY_BG,
+                                       dcc->discard_granularity);
+
                wait_event_interruptible_timeout(*q,
                                kthread_should_stop() || freezing(current) ||
                                dcc->discard_wake,
@@ -1251,17 +1400,18 @@ static int issue_discard_thread(void *data)
                if (dcc->discard_wake) {
                        dcc->discard_wake = 0;
                        if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
-                               mark_discard_range_all(sbi);
+                               init_discard_policy(&dpolicy,
+                                                       DPOLICY_FORCE, 1);
                }
 
                sb_start_intwrite(sbi->sb);
 
-               issued = __issue_discard_cmd(sbi, true);
+               issued = __issue_discard_cmd(sbi, &dpolicy);
                if (issued) {
-                       __wait_discard_cmd(sbi, true);
-                       wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+                       __wait_all_discard_cmd(sbi, &dpolicy);
+                       wait_ms = dpolicy.min_interval;
                } else {
-                       wait_ms = DEF_MAX_DISCARD_ISSUE_TIME;
+                       wait_ms = dpolicy.max_interval;
                }
 
                sb_end_intwrite(sbi->sb);
@@ -1525,7 +1675,6 @@ find_next:
 
                        f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
                                                                        len);
-                       cpc->trimmed += len;
                        total_len += len;
                } else {
                        next_pos = find_next_bit_le(entry->discard_map,
@@ -1546,6 +1695,37 @@ skip:
        wake_up_discard_thread(sbi, false);
 }
 
+void init_discard_policy(struct discard_policy *dpolicy,
+                               int discard_type, unsigned int granularity)
+{
+       /* common policy */
+       dpolicy->type = discard_type;
+       dpolicy->sync = true;
+       dpolicy->granularity = granularity;
+
+       if (discard_type == DPOLICY_BG) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+               dpolicy->io_aware_gran = MAX_PLIST_NUM;
+               dpolicy->io_aware = true;
+       } else if (discard_type == DPOLICY_FORCE) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+               dpolicy->io_aware_gran = MAX_PLIST_NUM;
+               dpolicy->io_aware = true;
+       } else if (discard_type == DPOLICY_FSTRIM) {
+               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+               dpolicy->io_aware_gran = MAX_PLIST_NUM;
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_UMOUNT) {
+               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+               dpolicy->io_aware_gran = MAX_PLIST_NUM;
+               dpolicy->io_aware = false;
+       }
+}
+
 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
        dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -1563,12 +1743,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 
        dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
        INIT_LIST_HEAD(&dcc->entry_list);
-       for (i = 0; i < MAX_PLIST_NUM; i++) {
+       for (i = 0; i < MAX_PLIST_NUM; i++)
                INIT_LIST_HEAD(&dcc->pend_list[i]);
-               if (i >= dcc->discard_granularity - 1)
-                       dcc->pend_list_tag[i] |= P_ACTIVE;
-       }
        INIT_LIST_HEAD(&dcc->wait_list);
+       INIT_LIST_HEAD(&dcc->fstrim_list);
        mutex_init(&dcc->cmd_lock);
        atomic_set(&dcc->issued_discard, 0);
        atomic_set(&dcc->issing_discard, 0);
@@ -1716,16 +1894,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                get_sec_entry(sbi, segno)->valid_blocks += del;
 }
 
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
-{
-       update_sit_entry(sbi, new, 1);
-       if (GET_SEGNO(sbi, old) != NULL_SEGNO)
-               update_sit_entry(sbi, old, -1);
-
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
-}
-
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
 {
        unsigned int segno = GET_SEGNO(sbi, addr);
@@ -1736,14 +1904,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
                return;
 
        /* add it into sit main buffer */
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        update_sit_entry(sbi, addr, -1);
 
        /* add it into dirty seglist */
        locate_dirty_segment(sbi, segno);
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 }
 
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
@@ -1756,7 +1924,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
        if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
                return true;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_read(&sit_i->sentry_lock);
 
        segno = GET_SEGNO(sbi, blkaddr);
        se = get_seg_entry(sbi, segno);
@@ -1765,7 +1933,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
        if (f2fs_test_bit(offset, se->ckpt_valid_map))
                is_cp = true;
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_read(&sit_i->sentry_lock);
 
        return is_cp;
 }
@@ -1823,12 +1991,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
 {
        struct page *page = grab_meta_page(sbi, blk_addr);
-       void *dst = page_address(page);
 
-       if (src)
-               memcpy(dst, src, PAGE_SIZE);
-       else
-               memset(dst, 0, PAGE_SIZE);
+       memcpy(page_address(page), src, PAGE_SIZE);
        set_page_dirty(page);
        f2fs_put_page(page, 1);
 }
@@ -1927,7 +2091,6 @@ find_other_zone:
        }
        secno = left_start;
 skip_left:
-       hint = secno;
        segno = GET_SEG_FROM_SEC(sbi, secno);
        zoneno = GET_ZONE_FROM_SEC(sbi, secno);
 
@@ -2162,12 +2325,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
        unsigned int old_segno;
        int i;
 
+       down_write(&SIT_I(sbi)->sentry_lock);
+
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                curseg = CURSEG_I(sbi, i);
                old_segno = curseg->segno;
                SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
                locate_dirty_segment(sbi, old_segno);
        }
+
+       up_write(&SIT_I(sbi)->sentry_lock);
 }
 
 static const struct segment_allocation default_salloc_ops = {
@@ -2179,14 +2346,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        __u64 trim_start = cpc->trim_start;
        bool has_candidate = false;
 
-       mutex_lock(&SIT_I(sbi)->sentry_lock);
+       down_write(&SIT_I(sbi)->sentry_lock);
        for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
                if (add_discard_addrs(sbi, cpc, true)) {
                        has_candidate = true;
                        break;
                }
        }
-       mutex_unlock(&SIT_I(sbi)->sentry_lock);
+       up_write(&SIT_I(sbi)->sentry_lock);
 
        cpc->trim_start = trim_start;
        return has_candidate;
@@ -2196,14 +2363,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
        __u64 start = F2FS_BYTES_TO_BLK(range->start);
        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
-       unsigned int start_segno, end_segno;
+       unsigned int start_segno, end_segno, cur_segno;
+       block_t start_block, end_block;
        struct cp_control cpc;
+       struct discard_policy dpolicy;
+       unsigned long long trimmed = 0;
        int err = 0;
 
        if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
                return -EINVAL;
 
-       cpc.trimmed = 0;
        if (end <= MAIN_BLKADDR(sbi))
                goto out;
 
@@ -2217,12 +2386,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
        start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
        end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
                                                GET_SEGNO(sbi, end);
+
        cpc.reason = CP_DISCARD;
        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
 
        /* do checkpoint to issue discard commands safely */
-       for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
-               cpc.trim_start = start_segno;
+       for (cur_segno = start_segno; cur_segno <= end_segno;
+                                       cur_segno = cpc.trim_end + 1) {
+               cpc.trim_start = cur_segno;
 
                if (sbi->discard_blks == 0)
                        break;
@@ -2230,7 +2401,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
                        cpc.trim_end = end_segno;
                else
                        cpc.trim_end = min_t(unsigned int,
-                               rounddown(start_segno +
+                               rounddown(cur_segno +
                                BATCHED_TRIM_SEGMENTS(sbi),
                                sbi->segs_per_sec) - 1, end_segno);
 
@@ -2242,11 +2413,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 
                schedule();
        }
-       /* It's time to issue all the filed discards */
-       mark_discard_range_all(sbi);
-       f2fs_wait_discard_bios(sbi, false);
+
+       start_block = START_BLOCK(sbi, start_segno);
+       end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+
+       init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+       __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
+       trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+                                       start_block, end_block);
 out:
-       range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
+       range->len = F2FS_BLK_TO_BYTES(trimmed);
        return err;
 }
 
@@ -2258,6 +2434,18 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
        return false;
 }
 
+int rw_hint_to_seg_type(enum rw_hint hint)
+{
+       switch (hint) {
+       case WRITE_LIFE_SHORT:
+               return CURSEG_HOT_DATA;
+       case WRITE_LIFE_EXTREME:
+               return CURSEG_COLD_DATA;
+       default:
+               return CURSEG_WARM_DATA;
+       }
+}
+
 static int __get_segment_type_2(struct f2fs_io_info *fio)
 {
        if (fio->type == DATA)
@@ -2292,7 +2480,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
                        return CURSEG_COLD_DATA;
                if (is_inode_flag_set(inode, FI_HOT_DATA))
                        return CURSEG_HOT_DATA;
-               return CURSEG_WARM_DATA;
+               return rw_hint_to_seg_type(inode->i_write_hint);
        } else {
                if (IS_DNODE(fio->page))
                        return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
@@ -2336,8 +2524,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, type);
 
+       down_read(&SM_I(sbi)->curseg_lock);
+
        mutex_lock(&curseg->curseg_mutex);
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
@@ -2354,15 +2544,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
        stat_inc_block_count(sbi, curseg);
 
+       /*
+        * SIT information should be updated before segment allocation,
+        * since SSR needs latest valid block information.
+        */
+       update_sit_entry(sbi, *new_blkaddr, 1);
+       if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+               update_sit_entry(sbi, old_blkaddr, -1);
+
        if (!__has_curseg_space(sbi, type))
                sit_i->s_ops->allocate_segment(sbi, type, false);
+
        /*
-        * SIT information should be updated after segment allocation,
-        * since we need to keep dirty segments precisely under SSR.
+        * segment dirty status should be updated after segment allocation,
+        * so we just need to update status only one time after previous
+        * segment being closed.
         */
-       refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 
        if (page && IS_NODESEG(type)) {
                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
@@ -2382,6 +2583,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        }
 
        mutex_unlock(&curseg->curseg_mutex);
+
+       up_read(&SM_I(sbi)->curseg_lock);
+}
+
+static void update_device_state(struct f2fs_io_info *fio)
+{
+       struct f2fs_sb_info *sbi = fio->sbi;
+       unsigned int devidx;
+
+       if (!sbi->s_ndevs)
+               return;
+
+       devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+
+       /* update device state for fsync */
+       set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+
+       /* update device state for checkpoint */
+       if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+               spin_lock(&sbi->dev_lock);
+               f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+               spin_unlock(&sbi->dev_lock);
+       }
 }
 
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -2398,6 +2622,8 @@ reallocate:
        if (err == -EAGAIN) {
                fio->old_blkaddr = fio->new_blkaddr;
                goto reallocate;
+       } else if (!err) {
+               update_device_state(fio);
        }
 }
 
@@ -2458,12 +2684,26 @@ int rewrite_data_page(struct f2fs_io_info *fio)
        stat_inc_inplace_blocks(fio->sbi);
 
        err = f2fs_submit_page_bio(fio);
+       if (!err)
+               update_device_state(fio);
 
        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
 
        return err;
 }
 
+static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
+                                               unsigned int segno)
+{
+       int i;
+
+       for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+               if (CURSEG_I(sbi, i)->segno == segno)
+                       break;
+       }
+       return i;
+}
+
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                                block_t old_blkaddr, block_t new_blkaddr,
                                bool recover_curseg, bool recover_newaddr)
@@ -2479,6 +2719,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        se = get_seg_entry(sbi, segno);
        type = se->type;
 
+       down_write(&SM_I(sbi)->curseg_lock);
+
        if (!recover_curseg) {
                /* for recovery flow */
                if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
@@ -2488,14 +2730,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                                type = CURSEG_WARM_DATA;
                }
        } else {
-               if (!IS_CURSEG(sbi, segno))
+               if (IS_CURSEG(sbi, segno)) {
+                       /* se->type is volatile as SSR allocation */
+                       type = __f2fs_get_curseg(sbi, segno);
+                       f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
+               } else {
                        type = CURSEG_WARM_DATA;
+               }
        }
 
        curseg = CURSEG_I(sbi, type);
 
        mutex_lock(&curseg->curseg_mutex);
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        old_cursegno = curseg->segno;
        old_blkoff = curseg->next_blkoff;
@@ -2527,8 +2774,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                curseg->next_blkoff = old_blkoff;
        }
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
+       up_write(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -2982,7 +3230,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        bool to_journal = true;
        struct seg_entry *se;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        if (!sit_i->dirty_sentries)
                goto out;
@@ -3076,7 +3324,7 @@ out:
 
                cpc->trim_start = trim_start;
        }
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 
        set_prefree_as_free_segments(sbi);
 }
@@ -3169,7 +3417,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
        sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
        sit_i->mounted_time = ktime_get_real_seconds();
-       mutex_init(&sit_i->sentry_lock);
+       init_rwsem(&sit_i->sentry_lock);
        return 0;
 }
 
@@ -3410,7 +3658,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
        struct sit_info *sit_i = SIT_I(sbi);
        unsigned int segno;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        sit_i->min_mtime = LLONG_MAX;
 
@@ -3427,7 +3675,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
                        sit_i->min_mtime = mtime;
        }
        sit_i->max_mtime = get_mtime(sbi);
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 }
 
 int build_segment_manager(struct f2fs_sb_info *sbi)
@@ -3460,11 +3708,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
+       sm_info->min_ssr_sections = reserved_sections(sbi);
 
        sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
 
        INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
+       init_rwsem(&sm_info->curseg_lock);
+
        if (!f2fs_readonly(sbi->sb)) {
                err = create_flush_cmd_control(sbi);
                if (err)