4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/module.h>
13 #include <linux/backing-dev.h>
14 #include <linux/init.h>
15 #include <linux/f2fs_fs.h>
16 #include <linux/kthread.h>
17 #include <linux/delay.h>
18 #include <linux/freezer.h>
24 #include <trace/events/f2fs.h>
26 static int gc_thread_func(void *data)
28 struct f2fs_sb_info *sbi = data;
29 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
30 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
33 wait_ms = gc_th->min_sleep_time;
39 wait_event_interruptible_timeout(*wq,
40 kthread_should_stop(),
41 msecs_to_jiffies(wait_ms));
42 if (kthread_should_stop())
45 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
46 increase_sleep_time(gc_th, &wait_ms);
50 #ifdef CONFIG_F2FS_FAULT_INJECTION
51 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
52 f2fs_show_injection_info(FAULT_CHECKPOINT);
53 f2fs_stop_checkpoint(sbi, false);
58 * [GC triggering condition]
59 * 0. GC is not conducted currently.
60 * 1. There are enough dirty segments.
61 * 2. IO subsystem is idle by checking the # of writeback pages.
62 * 3. IO subsystem is idle by checking the # of requests in
63 * bdev's request list.
65 * Note) We have to avoid triggering GCs frequently.
66 * Because it is possible that some segments can be
67 * invalidated soon after by user update or deletion.
68 * So, I'd like to wait some time to collect dirty segments.
70 if (!mutex_trylock(&sbi->gc_mutex))
74 increase_sleep_time(gc_th, &wait_ms);
75 mutex_unlock(&sbi->gc_mutex);
79 if (has_enough_invalid_blocks(sbi))
80 decrease_sleep_time(gc_th, &wait_ms);
82 increase_sleep_time(gc_th, &wait_ms);
84 stat_inc_bggc_count(sbi);
86 /* if return value is not zero, no victim was selected */
87 if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
88 wait_ms = gc_th->no_gc_sleep_time;
90 trace_f2fs_background_gc(sbi->sb, wait_ms,
91 prefree_segments(sbi), free_segments(sbi));
93 /* balancing f2fs's metadata periodically */
94 f2fs_balance_fs_bg(sbi);
96 } while (!kthread_should_stop());
100 int start_gc_thread(struct f2fs_sb_info *sbi)
102 struct f2fs_gc_kthread *gc_th;
103 dev_t dev = sbi->sb->s_bdev->bd_dev;
106 gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
112 gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
113 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
114 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
118 sbi->gc_thread = gc_th;
119 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
120 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
121 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
122 if (IS_ERR(gc_th->f2fs_gc_task)) {
123 err = PTR_ERR(gc_th->f2fs_gc_task);
125 sbi->gc_thread = NULL;
131 void stop_gc_thread(struct f2fs_sb_info *sbi)
133 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
136 kthread_stop(gc_th->f2fs_gc_task);
138 sbi->gc_thread = NULL;
141 static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
143 int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
145 if (gc_th && gc_th->gc_idle) {
146 if (gc_th->gc_idle == 1)
148 else if (gc_th->gc_idle == 2)
154 static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
155 int type, struct victim_sel_policy *p)
157 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
159 if (p->alloc_mode == SSR) {
160 p->gc_mode = GC_GREEDY;
161 p->dirty_segmap = dirty_i->dirty_segmap[type];
162 p->max_search = dirty_i->nr_dirty[type];
165 p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
166 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
167 p->max_search = dirty_i->nr_dirty[DIRTY];
168 p->ofs_unit = sbi->segs_per_sec;
171 /* we need to check every dirty segments in the FG_GC case */
172 if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
173 p->max_search = sbi->max_victim_search;
175 /* let's select beginning hot/small space first */
176 if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
179 p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
182 static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
183 struct victim_sel_policy *p)
185 /* SSR allocates in a segment unit */
186 if (p->alloc_mode == SSR)
187 return sbi->blocks_per_seg;
188 if (p->gc_mode == GC_GREEDY)
189 return 2 * sbi->blocks_per_seg * p->ofs_unit;
190 else if (p->gc_mode == GC_CB)
192 else /* No other gc_mode */
196 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
198 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
202 * If the gc_type is FG_GC, we can select victim segments
203 * selected by background GC before.
204 * Those segments guarantee they have small valid blocks.
206 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
207 if (sec_usage_check(sbi, secno))
210 if (no_fggc_candidate(sbi, secno))
213 clear_bit(secno, dirty_i->victim_secmap);
214 return GET_SEG_FROM_SEC(sbi, secno);
219 static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
221 struct sit_info *sit_i = SIT_I(sbi);
222 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
223 unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
224 unsigned long long mtime = 0;
225 unsigned int vblocks;
226 unsigned char age = 0;
230 for (i = 0; i < sbi->segs_per_sec; i++)
231 mtime += get_seg_entry(sbi, start + i)->mtime;
232 vblocks = get_valid_blocks(sbi, segno, true);
234 mtime = div_u64(mtime, sbi->segs_per_sec);
235 vblocks = div_u64(vblocks, sbi->segs_per_sec);
237 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
239 /* Handle if the system time has changed by the user */
240 if (mtime < sit_i->min_mtime)
241 sit_i->min_mtime = mtime;
242 if (mtime > sit_i->max_mtime)
243 sit_i->max_mtime = mtime;
244 if (sit_i->max_mtime != sit_i->min_mtime)
245 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
246 sit_i->max_mtime - sit_i->min_mtime);
248 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
251 static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
254 unsigned int valid_blocks =
255 get_valid_blocks(sbi, segno, true);
257 return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
258 valid_blocks * 2 : valid_blocks;
261 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
262 unsigned int segno, struct victim_sel_policy *p)
264 if (p->alloc_mode == SSR)
265 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
267 /* alloc_mode == LFS */
268 if (p->gc_mode == GC_GREEDY)
269 return get_greedy_cost(sbi, segno);
271 return get_cb_cost(sbi, segno);
274 static unsigned int count_bits(const unsigned long *addr,
275 unsigned int offset, unsigned int len)
277 unsigned int end = offset + len, sum = 0;
279 while (offset < end) {
280 if (test_bit(offset++, addr))
287 * This function is called from two paths.
288 * One is garbage collection and the other is SSR segment selection.
289 * When it is called during GC, it just gets a victim segment
290 * and it does not remove it from dirty seglist.
291 * When it is called from SSR segment selection, it finds a segment
292 * which has minimum valid blocks and removes it from dirty seglist.
294 static int get_victim_by_default(struct f2fs_sb_info *sbi,
295 unsigned int *result, int gc_type, int type, char alloc_mode)
297 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
298 struct sit_info *sm = SIT_I(sbi);
299 struct victim_sel_policy p;
300 unsigned int secno, last_victim;
301 unsigned int last_segment = MAIN_SEGS(sbi);
302 unsigned int nsearched = 0;
304 mutex_lock(&dirty_i->seglist_lock);
306 p.alloc_mode = alloc_mode;
307 select_policy(sbi, gc_type, type, &p);
309 p.min_segno = NULL_SEGNO;
310 p.min_cost = get_max_cost(sbi, &p);
312 if (*result != NULL_SEGNO) {
313 if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
314 get_valid_blocks(sbi, *result, false) &&
315 !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
316 p.min_segno = *result;
320 if (p.max_search == 0)
323 last_victim = sm->last_victim[p.gc_mode];
324 if (p.alloc_mode == LFS && gc_type == FG_GC) {
325 p.min_segno = check_bg_victims(sbi);
326 if (p.min_segno != NULL_SEGNO)
334 segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
335 if (segno >= last_segment) {
336 if (sm->last_victim[p.gc_mode]) {
338 sm->last_victim[p.gc_mode];
339 sm->last_victim[p.gc_mode] = 0;
346 p.offset = segno + p.ofs_unit;
347 if (p.ofs_unit > 1) {
348 p.offset -= segno % p.ofs_unit;
349 nsearched += count_bits(p.dirty_segmap,
350 p.offset - p.ofs_unit,
356 secno = GET_SEC_FROM_SEG(sbi, segno);
358 if (sec_usage_check(sbi, secno))
360 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
362 if (gc_type == FG_GC && p.alloc_mode == LFS &&
363 no_fggc_candidate(sbi, secno))
366 cost = get_gc_cost(sbi, segno, &p);
368 if (p.min_cost > cost) {
373 if (nsearched >= p.max_search) {
374 if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
375 sm->last_victim[p.gc_mode] = last_victim + 1;
377 sm->last_victim[p.gc_mode] = segno + 1;
378 sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
382 if (p.min_segno != NULL_SEGNO) {
384 if (p.alloc_mode == LFS) {
385 secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
386 if (gc_type == FG_GC)
387 sbi->cur_victim_sec = secno;
389 set_bit(secno, dirty_i->victim_secmap);
391 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
393 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
395 prefree_segments(sbi), free_segments(sbi));
398 mutex_unlock(&dirty_i->seglist_lock);
400 return (p.min_segno == NULL_SEGNO) ? 0 : 1;
403 static const struct victim_selection default_v_ops = {
404 .get_victim = get_victim_by_default,
407 static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
409 struct inode_entry *ie;
411 ie = radix_tree_lookup(&gc_list->iroot, ino);
417 static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
419 struct inode_entry *new_ie;
421 if (inode == find_gc_inode(gc_list, inode->i_ino)) {
425 new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
426 new_ie->inode = inode;
428 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
429 list_add_tail(&new_ie->list, &gc_list->ilist);
432 static void put_gc_inode(struct gc_inode_list *gc_list)
434 struct inode_entry *ie, *next_ie;
435 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
436 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
439 kmem_cache_free(inode_entry_slab, ie);
443 static int check_valid_map(struct f2fs_sb_info *sbi,
444 unsigned int segno, int offset)
446 struct sit_info *sit_i = SIT_I(sbi);
447 struct seg_entry *sentry;
450 mutex_lock(&sit_i->sentry_lock);
451 sentry = get_seg_entry(sbi, segno);
452 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
453 mutex_unlock(&sit_i->sentry_lock);
458 * This function compares node address got in summary with that in NAT.
459 * On validity, copy that node with cold status, otherwise (invalid node)
462 static void gc_node_segment(struct f2fs_sb_info *sbi,
463 struct f2fs_summary *sum, unsigned int segno, int gc_type)
465 struct f2fs_summary *entry;
470 start_addr = START_BLOCK(sbi, segno);
475 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
476 nid_t nid = le32_to_cpu(entry->nid);
477 struct page *node_page;
480 /* stop BG_GC if there is not enough free sections. */
481 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
484 if (check_valid_map(sbi, segno, off) == 0)
488 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
494 ra_node_page(sbi, nid);
499 node_page = get_node_page(sbi, nid);
500 if (IS_ERR(node_page))
503 /* block may become invalid during get_node_page */
504 if (check_valid_map(sbi, segno, off) == 0) {
505 f2fs_put_page(node_page, 1);
509 get_node_info(sbi, nid, &ni);
510 if (ni.blk_addr != start_addr + off) {
511 f2fs_put_page(node_page, 1);
515 move_node_page(node_page, gc_type);
516 stat_inc_node_blk_count(sbi, 1, gc_type);
524 * Calculate start block index indicating the given node offset.
525 * Be careful, caller should give this node offset only indicating direct node
526 * blocks. If any node offsets, which point the other types of node blocks such
527 * as indirect or double indirect node blocks, are given, it must be a caller's
530 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
532 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
540 } else if (node_ofs <= indirect_blks) {
541 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
542 bidx = node_ofs - 2 - dec;
544 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
545 bidx = node_ofs - 5 - dec;
547 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
550 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
551 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
553 struct page *node_page;
555 unsigned int ofs_in_node;
556 block_t source_blkaddr;
558 nid = le32_to_cpu(sum->nid);
559 ofs_in_node = le16_to_cpu(sum->ofs_in_node);
561 node_page = get_node_page(sbi, nid);
562 if (IS_ERR(node_page))
565 get_node_info(sbi, nid, dni);
567 if (sum->version != dni->version) {
568 f2fs_msg(sbi->sb, KERN_WARNING,
569 "%s: valid data with mismatched node version.",
571 set_sbi_flag(sbi, SBI_NEED_FSCK);
574 *nofs = ofs_of_node(node_page);
575 source_blkaddr = datablock_addr(node_page, ofs_in_node);
576 f2fs_put_page(node_page, 1);
578 if (source_blkaddr != blkaddr)
583 static void move_encrypted_block(struct inode *inode, block_t bidx,
584 unsigned int segno, int off)
586 struct f2fs_io_info fio = {
587 .sbi = F2FS_I_SB(inode),
591 .encrypted_page = NULL,
593 struct dnode_of_data dn;
594 struct f2fs_summary sum;
600 /* do not read out */
601 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
605 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
608 if (f2fs_is_atomic_file(inode))
611 set_new_dnode(&dn, inode, NULL, NULL, 0);
612 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
616 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
617 ClearPageUptodate(page);
622 * don't cache encrypted data into meta inode until previous dirty
623 * data were writebacked to avoid racing between GC and flush.
625 f2fs_wait_on_page_writeback(page, DATA, true);
627 get_node_info(fio.sbi, dn.nid, &ni);
628 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
632 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
634 allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
635 &sum, CURSEG_COLD_DATA);
637 fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
638 FGP_LOCK | FGP_CREAT, GFP_NOFS);
639 if (!fio.encrypted_page) {
644 err = f2fs_submit_page_bio(&fio);
649 lock_page(fio.encrypted_page);
651 if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
655 if (unlikely(!PageUptodate(fio.encrypted_page))) {
660 set_page_dirty(fio.encrypted_page);
661 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
662 if (clear_page_dirty_for_io(fio.encrypted_page))
663 dec_page_count(fio.sbi, F2FS_DIRTY_META);
665 set_page_writeback(fio.encrypted_page);
667 /* allocate block address */
668 f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
670 fio.op = REQ_OP_WRITE;
671 fio.op_flags = REQ_SYNC;
672 fio.new_blkaddr = newaddr;
673 f2fs_submit_page_mbio(&fio);
675 f2fs_update_data_blkaddr(&dn, newaddr);
676 set_inode_flag(inode, FI_APPEND_WRITE);
677 if (page->index == 0)
678 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
680 f2fs_put_page(fio.encrypted_page, 1);
683 __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
688 f2fs_put_page(page, 1);
691 static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
692 unsigned int segno, int off)
696 page = get_lock_data_page(inode, bidx, true);
700 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
703 if (f2fs_is_atomic_file(inode))
706 if (gc_type == BG_GC) {
707 if (PageWriteback(page))
709 set_page_dirty(page);
712 struct f2fs_io_info fio = {
713 .sbi = F2FS_I_SB(inode),
716 .op_flags = REQ_SYNC,
717 .old_blkaddr = NULL_ADDR,
719 .encrypted_page = NULL,
722 bool is_dirty = PageDirty(page);
726 set_page_dirty(page);
727 f2fs_wait_on_page_writeback(page, DATA, true);
728 if (clear_page_dirty_for_io(page)) {
729 inode_dec_dirty_pages(inode);
730 remove_dirty_inode(inode);
735 err = do_write_data_page(&fio);
736 if (err == -ENOMEM && is_dirty) {
737 congestion_wait(BLK_RW_ASYNC, HZ/50);
742 f2fs_put_page(page, 1);
746 * This function tries to get parent node of victim data block, and identifies
747 * data block validity. If the block is valid, copy that with cold status and
748 * modify parent node.
749 * If the parent node is not valid or the data block address is different,
750 * the victim data block is ignored.
752 static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
753 struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
755 struct super_block *sb = sbi->sb;
756 struct f2fs_summary *entry;
761 start_addr = START_BLOCK(sbi, segno);
766 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
767 struct page *data_page;
769 struct node_info dni; /* dnode info for the data */
770 unsigned int ofs_in_node, nofs;
772 nid_t nid = le32_to_cpu(entry->nid);
774 /* stop BG_GC if there is not enough free sections. */
775 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
778 if (check_valid_map(sbi, segno, off) == 0)
782 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
788 ra_node_page(sbi, nid);
792 /* Get an inode by ino with checking validity */
793 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
797 ra_node_page(sbi, dni.ino);
801 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
804 inode = f2fs_iget(sb, dni.ino);
805 if (IS_ERR(inode) || is_bad_inode(inode))
808 /* if encrypted inode, let's go phase 3 */
809 if (f2fs_encrypted_inode(inode) &&
810 S_ISREG(inode->i_mode)) {
811 add_gc_inode(gc_list, inode);
815 start_bidx = start_bidx_of_node(nofs, inode);
816 data_page = get_read_data_page(inode,
817 start_bidx + ofs_in_node, REQ_RAHEAD,
819 if (IS_ERR(data_page)) {
824 f2fs_put_page(data_page, 0);
825 add_gc_inode(gc_list, inode);
830 inode = find_gc_inode(gc_list, dni.ino);
832 struct f2fs_inode_info *fi = F2FS_I(inode);
835 if (S_ISREG(inode->i_mode)) {
836 if (!down_write_trylock(&fi->dio_rwsem[READ]))
838 if (!down_write_trylock(
839 &fi->dio_rwsem[WRITE])) {
840 up_write(&fi->dio_rwsem[READ]);
846 start_bidx = start_bidx_of_node(nofs, inode)
848 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
849 move_encrypted_block(inode, start_bidx, segno, off);
851 move_data_page(inode, start_bidx, gc_type, segno, off);
854 up_write(&fi->dio_rwsem[WRITE]);
855 up_write(&fi->dio_rwsem[READ]);
858 stat_inc_data_blk_count(sbi, 1, gc_type);
866 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
869 struct sit_info *sit_i = SIT_I(sbi);
872 mutex_lock(&sit_i->sentry_lock);
873 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
875 mutex_unlock(&sit_i->sentry_lock);
879 static int do_garbage_collect(struct f2fs_sb_info *sbi,
880 unsigned int start_segno,
881 struct gc_inode_list *gc_list, int gc_type)
883 struct page *sum_page;
884 struct f2fs_summary_block *sum;
885 struct blk_plug plug;
886 unsigned int segno = start_segno;
887 unsigned int end_segno = start_segno + sbi->segs_per_sec;
889 unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
890 SUM_TYPE_DATA : SUM_TYPE_NODE;
892 /* readahead multi ssa blocks those have contiguous address */
893 if (sbi->segs_per_sec > 1)
894 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
895 sbi->segs_per_sec, META_SSA, true);
897 /* reference all summary page */
898 while (segno < end_segno) {
899 sum_page = get_sum_page(sbi, segno++);
900 unlock_page(sum_page);
903 blk_start_plug(&plug);
905 for (segno = start_segno; segno < end_segno; segno++) {
907 /* find segment summary of victim */
908 sum_page = find_get_page(META_MAPPING(sbi),
909 GET_SUM_BLOCK(sbi, segno));
910 f2fs_put_page(sum_page, 0);
912 if (get_valid_blocks(sbi, segno, false) == 0 ||
913 !PageUptodate(sum_page) ||
914 unlikely(f2fs_cp_error(sbi)))
917 sum = page_address(sum_page);
918 f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
921 * this is to avoid deadlock:
922 * - lock_page(sum_page) - f2fs_replace_block
923 * - check_valid_map() - mutex_lock(sentry_lock)
924 * - mutex_lock(sentry_lock) - change_curseg()
925 * - lock_page(sum_page)
927 if (type == SUM_TYPE_NODE)
928 gc_node_segment(sbi, sum->entries, segno, gc_type);
930 gc_data_segment(sbi, sum->entries, gc_list, segno,
933 stat_inc_seg_count(sbi, type, gc_type);
935 f2fs_put_page(sum_page, 0);
938 if (gc_type == FG_GC)
939 f2fs_submit_merged_bio(sbi,
940 (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
942 blk_finish_plug(&plug);
944 if (gc_type == FG_GC &&
945 get_valid_blocks(sbi, start_segno, true) == 0)
948 stat_inc_call_count(sbi->stat_info);
953 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
954 bool background, unsigned int segno)
956 int gc_type = sync ? FG_GC : BG_GC;
959 struct cp_control cpc;
960 unsigned int init_segno = segno;
961 struct gc_inode_list gc_list = {
962 .ilist = LIST_HEAD_INIT(gc_list.ilist),
963 .iroot = RADIX_TREE_INIT(GFP_NOFS),
966 cpc.reason = __get_cp_reason(sbi);
968 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
970 if (unlikely(f2fs_cp_error(sbi))) {
975 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
977 * For example, if there are many prefree_segments below given
978 * threshold, we can make them free by checkpoint. Then, we
979 * secure free segments which doesn't need fggc any more.
981 if (prefree_segments(sbi)) {
982 ret = write_checkpoint(sbi, &cpc);
986 if (has_not_enough_free_secs(sbi, 0, 0))
990 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
991 if (gc_type == BG_GC && !background)
993 if (!__get_victim(sbi, &segno, gc_type))
997 if (do_garbage_collect(sbi, segno, &gc_list, gc_type) &&
1001 if (gc_type == FG_GC)
1002 sbi->cur_victim_sec = NULL_SEGNO;
1005 if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
1010 if (gc_type == FG_GC)
1011 ret = write_checkpoint(sbi, &cpc);
1014 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
1015 SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
1016 mutex_unlock(&sbi->gc_mutex);
1018 put_gc_inode(&gc_list);
1021 ret = sec_freed ? 0 : -EAGAIN;
1025 void build_gc_manager(struct f2fs_sb_info *sbi)
1027 u64 main_count, resv_count, ovp_count;
1029 DIRTY_I(sbi)->v_ops = &default_v_ops;
1031 /* threshold of # of valid blocks in a section for victims of FG_GC */
1032 main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
1033 resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
1034 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
1036 sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
1037 BLKS_PER_SEC(sbi), (main_count - resv_count));
1039 /* give warm/cold data area from slower device */
1040 if (sbi->s_ndevs && sbi->segs_per_sec == 1)
1041 SIT_I(sbi)->last_victim[ALLOC_NEXT] =
1042 GET_SEGNO(sbi, FDEV(0).end_blk) + 1;