f2fs: avoid deadlock caused by lock order of page and lock_op
[linux-2.6-microblaze.git] / fs / f2fs / super.c
1 /*
2  * fs/f2fs/super.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/f2fs_fs.h>
26 #include <linux/sysfs.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "gc.h"
33 #include "trace.h"
34
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/f2fs.h>
37
38 static struct kmem_cache *f2fs_inode_cachep;
39
40 #ifdef CONFIG_F2FS_FAULT_INJECTION
41
42 char *fault_name[FAULT_MAX] = {
43         [FAULT_KMALLOC]         = "kmalloc",
44         [FAULT_PAGE_ALLOC]      = "page alloc",
45         [FAULT_ALLOC_NID]       = "alloc nid",
46         [FAULT_ORPHAN]          = "orphan",
47         [FAULT_BLOCK]           = "no more block",
48         [FAULT_DIR_DEPTH]       = "too big dir depth",
49         [FAULT_EVICT_INODE]     = "evict_inode fail",
50         [FAULT_TRUNCATE]        = "truncate fail",
51         [FAULT_IO]              = "IO error",
52         [FAULT_CHECKPOINT]      = "checkpoint error",
53 };
54
55 static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
56                                                 unsigned int rate)
57 {
58         struct f2fs_fault_info *ffi = &sbi->fault_info;
59
60         if (rate) {
61                 atomic_set(&ffi->inject_ops, 0);
62                 ffi->inject_rate = rate;
63                 ffi->inject_type = (1 << FAULT_MAX) - 1;
64         } else {
65                 memset(ffi, 0, sizeof(struct f2fs_fault_info));
66         }
67 }
68 #endif
69
70 /* f2fs-wide shrinker description */
71 static struct shrinker f2fs_shrinker_info = {
72         .scan_objects = f2fs_shrink_scan,
73         .count_objects = f2fs_shrink_count,
74         .seeks = DEFAULT_SEEKS,
75 };
76
77 enum {
78         Opt_gc_background,
79         Opt_disable_roll_forward,
80         Opt_norecovery,
81         Opt_discard,
82         Opt_nodiscard,
83         Opt_noheap,
84         Opt_heap,
85         Opt_user_xattr,
86         Opt_nouser_xattr,
87         Opt_acl,
88         Opt_noacl,
89         Opt_active_logs,
90         Opt_disable_ext_identify,
91         Opt_inline_xattr,
92         Opt_noinline_xattr,
93         Opt_inline_data,
94         Opt_inline_dentry,
95         Opt_noinline_dentry,
96         Opt_flush_merge,
97         Opt_noflush_merge,
98         Opt_nobarrier,
99         Opt_fastboot,
100         Opt_extent_cache,
101         Opt_noextent_cache,
102         Opt_noinline_data,
103         Opt_data_flush,
104         Opt_mode,
105         Opt_io_size_bits,
106         Opt_fault_injection,
107         Opt_lazytime,
108         Opt_nolazytime,
109         Opt_err,
110 };
111
112 static match_table_t f2fs_tokens = {
113         {Opt_gc_background, "background_gc=%s"},
114         {Opt_disable_roll_forward, "disable_roll_forward"},
115         {Opt_norecovery, "norecovery"},
116         {Opt_discard, "discard"},
117         {Opt_nodiscard, "nodiscard"},
118         {Opt_noheap, "no_heap"},
119         {Opt_heap, "heap"},
120         {Opt_user_xattr, "user_xattr"},
121         {Opt_nouser_xattr, "nouser_xattr"},
122         {Opt_acl, "acl"},
123         {Opt_noacl, "noacl"},
124         {Opt_active_logs, "active_logs=%u"},
125         {Opt_disable_ext_identify, "disable_ext_identify"},
126         {Opt_inline_xattr, "inline_xattr"},
127         {Opt_noinline_xattr, "noinline_xattr"},
128         {Opt_inline_data, "inline_data"},
129         {Opt_inline_dentry, "inline_dentry"},
130         {Opt_noinline_dentry, "noinline_dentry"},
131         {Opt_flush_merge, "flush_merge"},
132         {Opt_noflush_merge, "noflush_merge"},
133         {Opt_nobarrier, "nobarrier"},
134         {Opt_fastboot, "fastboot"},
135         {Opt_extent_cache, "extent_cache"},
136         {Opt_noextent_cache, "noextent_cache"},
137         {Opt_noinline_data, "noinline_data"},
138         {Opt_data_flush, "data_flush"},
139         {Opt_mode, "mode=%s"},
140         {Opt_io_size_bits, "io_bits=%u"},
141         {Opt_fault_injection, "fault_injection=%u"},
142         {Opt_lazytime, "lazytime"},
143         {Opt_nolazytime, "nolazytime"},
144         {Opt_err, NULL},
145 };
146
147 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
148 {
149         struct va_format vaf;
150         va_list args;
151
152         va_start(args, fmt);
153         vaf.fmt = fmt;
154         vaf.va = &args;
155         printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
156         va_end(args);
157 }
158
159 static void init_once(void *foo)
160 {
161         struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
162
163         inode_init_once(&fi->vfs_inode);
164 }
165
166 static int parse_options(struct super_block *sb, char *options)
167 {
168         struct f2fs_sb_info *sbi = F2FS_SB(sb);
169         struct request_queue *q;
170         substring_t args[MAX_OPT_ARGS];
171         char *p, *name;
172         int arg = 0;
173
174         if (!options)
175                 return 0;
176
177         while ((p = strsep(&options, ",")) != NULL) {
178                 int token;
179                 if (!*p)
180                         continue;
181                 /*
182                  * Initialize args struct so we know whether arg was
183                  * found; some options take optional arguments.
184                  */
185                 args[0].to = args[0].from = NULL;
186                 token = match_token(p, f2fs_tokens, args);
187
188                 switch (token) {
189                 case Opt_gc_background:
190                         name = match_strdup(&args[0]);
191
192                         if (!name)
193                                 return -ENOMEM;
194                         if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
195                                 set_opt(sbi, BG_GC);
196                                 clear_opt(sbi, FORCE_FG_GC);
197                         } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
198                                 clear_opt(sbi, BG_GC);
199                                 clear_opt(sbi, FORCE_FG_GC);
200                         } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
201                                 set_opt(sbi, BG_GC);
202                                 set_opt(sbi, FORCE_FG_GC);
203                         } else {
204                                 kfree(name);
205                                 return -EINVAL;
206                         }
207                         kfree(name);
208                         break;
209                 case Opt_disable_roll_forward:
210                         set_opt(sbi, DISABLE_ROLL_FORWARD);
211                         break;
212                 case Opt_norecovery:
213                         /* this option mounts f2fs with ro */
214                         set_opt(sbi, DISABLE_ROLL_FORWARD);
215                         if (!f2fs_readonly(sb))
216                                 return -EINVAL;
217                         break;
218                 case Opt_discard:
219                         q = bdev_get_queue(sb->s_bdev);
220                         if (blk_queue_discard(q)) {
221                                 set_opt(sbi, DISCARD);
222                         } else if (!f2fs_sb_mounted_blkzoned(sb)) {
223                                 f2fs_msg(sb, KERN_WARNING,
224                                         "mounting with \"discard\" option, but "
225                                         "the device does not support discard");
226                         }
227                         break;
228                 case Opt_nodiscard:
229                         if (f2fs_sb_mounted_blkzoned(sb)) {
230                                 f2fs_msg(sb, KERN_WARNING,
231                                         "discard is required for zoned block devices");
232                                 return -EINVAL;
233                         }
234                         clear_opt(sbi, DISCARD);
235                         break;
236                 case Opt_noheap:
237                         set_opt(sbi, NOHEAP);
238                         break;
239                 case Opt_heap:
240                         clear_opt(sbi, NOHEAP);
241                         break;
242 #ifdef CONFIG_F2FS_FS_XATTR
243                 case Opt_user_xattr:
244                         set_opt(sbi, XATTR_USER);
245                         break;
246                 case Opt_nouser_xattr:
247                         clear_opt(sbi, XATTR_USER);
248                         break;
249                 case Opt_inline_xattr:
250                         set_opt(sbi, INLINE_XATTR);
251                         break;
252                 case Opt_noinline_xattr:
253                         clear_opt(sbi, INLINE_XATTR);
254                         break;
255 #else
256                 case Opt_user_xattr:
257                         f2fs_msg(sb, KERN_INFO,
258                                 "user_xattr options not supported");
259                         break;
260                 case Opt_nouser_xattr:
261                         f2fs_msg(sb, KERN_INFO,
262                                 "nouser_xattr options not supported");
263                         break;
264                 case Opt_inline_xattr:
265                         f2fs_msg(sb, KERN_INFO,
266                                 "inline_xattr options not supported");
267                         break;
268                 case Opt_noinline_xattr:
269                         f2fs_msg(sb, KERN_INFO,
270                                 "noinline_xattr options not supported");
271                         break;
272 #endif
273 #ifdef CONFIG_F2FS_FS_POSIX_ACL
274                 case Opt_acl:
275                         set_opt(sbi, POSIX_ACL);
276                         break;
277                 case Opt_noacl:
278                         clear_opt(sbi, POSIX_ACL);
279                         break;
280 #else
281                 case Opt_acl:
282                         f2fs_msg(sb, KERN_INFO, "acl options not supported");
283                         break;
284                 case Opt_noacl:
285                         f2fs_msg(sb, KERN_INFO, "noacl options not supported");
286                         break;
287 #endif
288                 case Opt_active_logs:
289                         if (args->from && match_int(args, &arg))
290                                 return -EINVAL;
291                         if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
292                                 return -EINVAL;
293                         sbi->active_logs = arg;
294                         break;
295                 case Opt_disable_ext_identify:
296                         set_opt(sbi, DISABLE_EXT_IDENTIFY);
297                         break;
298                 case Opt_inline_data:
299                         set_opt(sbi, INLINE_DATA);
300                         break;
301                 case Opt_inline_dentry:
302                         set_opt(sbi, INLINE_DENTRY);
303                         break;
304                 case Opt_noinline_dentry:
305                         clear_opt(sbi, INLINE_DENTRY);
306                         break;
307                 case Opt_flush_merge:
308                         set_opt(sbi, FLUSH_MERGE);
309                         break;
310                 case Opt_noflush_merge:
311                         clear_opt(sbi, FLUSH_MERGE);
312                         break;
313                 case Opt_nobarrier:
314                         set_opt(sbi, NOBARRIER);
315                         break;
316                 case Opt_fastboot:
317                         set_opt(sbi, FASTBOOT);
318                         break;
319                 case Opt_extent_cache:
320                         set_opt(sbi, EXTENT_CACHE);
321                         break;
322                 case Opt_noextent_cache:
323                         clear_opt(sbi, EXTENT_CACHE);
324                         break;
325                 case Opt_noinline_data:
326                         clear_opt(sbi, INLINE_DATA);
327                         break;
328                 case Opt_data_flush:
329                         set_opt(sbi, DATA_FLUSH);
330                         break;
331                 case Opt_mode:
332                         name = match_strdup(&args[0]);
333
334                         if (!name)
335                                 return -ENOMEM;
336                         if (strlen(name) == 8 &&
337                                         !strncmp(name, "adaptive", 8)) {
338                                 if (f2fs_sb_mounted_blkzoned(sb)) {
339                                         f2fs_msg(sb, KERN_WARNING,
340                                                  "adaptive mode is not allowed with "
341                                                  "zoned block device feature");
342                                         kfree(name);
343                                         return -EINVAL;
344                                 }
345                                 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
346                         } else if (strlen(name) == 3 &&
347                                         !strncmp(name, "lfs", 3)) {
348                                 set_opt_mode(sbi, F2FS_MOUNT_LFS);
349                         } else {
350                                 kfree(name);
351                                 return -EINVAL;
352                         }
353                         kfree(name);
354                         break;
355                 case Opt_io_size_bits:
356                         if (args->from && match_int(args, &arg))
357                                 return -EINVAL;
358                         if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
359                                 f2fs_msg(sb, KERN_WARNING,
360                                         "Not support %d, larger than %d",
361                                         1 << arg, BIO_MAX_PAGES);
362                                 return -EINVAL;
363                         }
364                         sbi->write_io_size_bits = arg;
365                         break;
366                 case Opt_fault_injection:
367                         if (args->from && match_int(args, &arg))
368                                 return -EINVAL;
369 #ifdef CONFIG_F2FS_FAULT_INJECTION
370                         f2fs_build_fault_attr(sbi, arg);
371                         set_opt(sbi, FAULT_INJECTION);
372 #else
373                         f2fs_msg(sb, KERN_INFO,
374                                 "FAULT_INJECTION was not selected");
375 #endif
376                         break;
377                 case Opt_lazytime:
378                         sb->s_flags |= MS_LAZYTIME;
379                         break;
380                 case Opt_nolazytime:
381                         sb->s_flags &= ~MS_LAZYTIME;
382                         break;
383                 default:
384                         f2fs_msg(sb, KERN_ERR,
385                                 "Unrecognized mount option \"%s\" or missing value",
386                                 p);
387                         return -EINVAL;
388                 }
389         }
390
391         if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
392                 f2fs_msg(sb, KERN_ERR,
393                                 "Should set mode=lfs with %uKB-sized IO",
394                                 F2FS_IO_SIZE_KB(sbi));
395                 return -EINVAL;
396         }
397         return 0;
398 }
399
400 static struct inode *f2fs_alloc_inode(struct super_block *sb)
401 {
402         struct f2fs_inode_info *fi;
403
404         fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
405         if (!fi)
406                 return NULL;
407
408         init_once((void *) fi);
409
410         /* Initialize f2fs-specific inode info */
411         fi->vfs_inode.i_version = 1;
412         atomic_set(&fi->dirty_pages, 0);
413         fi->i_current_depth = 1;
414         fi->i_advise = 0;
415         init_rwsem(&fi->i_sem);
416         INIT_LIST_HEAD(&fi->dirty_list);
417         INIT_LIST_HEAD(&fi->gdirty_list);
418         INIT_LIST_HEAD(&fi->inmem_pages);
419         mutex_init(&fi->inmem_lock);
420         init_rwsem(&fi->dio_rwsem[READ]);
421         init_rwsem(&fi->dio_rwsem[WRITE]);
422         init_rwsem(&fi->i_mmap_sem);
423
424         /* Will be used by directory only */
425         fi->i_dir_level = F2FS_SB(sb)->dir_level;
426         return &fi->vfs_inode;
427 }
428
429 static int f2fs_drop_inode(struct inode *inode)
430 {
431         int ret;
432         /*
433          * This is to avoid a deadlock condition like below.
434          * writeback_single_inode(inode)
435          *  - f2fs_write_data_page
436          *    - f2fs_gc -> iput -> evict
437          *       - inode_wait_for_writeback(inode)
438          */
439         if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
440                 if (!inode->i_nlink && !is_bad_inode(inode)) {
441                         /* to avoid evict_inode call simultaneously */
442                         atomic_inc(&inode->i_count);
443                         spin_unlock(&inode->i_lock);
444
445                         /* some remained atomic pages should discarded */
446                         if (f2fs_is_atomic_file(inode))
447                                 drop_inmem_pages(inode);
448
449                         /* should remain fi->extent_tree for writepage */
450                         f2fs_destroy_extent_node(inode);
451
452                         sb_start_intwrite(inode->i_sb);
453                         f2fs_i_size_write(inode, 0);
454
455                         if (F2FS_HAS_BLOCKS(inode))
456                                 f2fs_truncate(inode);
457
458                         sb_end_intwrite(inode->i_sb);
459
460                         fscrypt_put_encryption_info(inode, NULL);
461                         spin_lock(&inode->i_lock);
462                         atomic_dec(&inode->i_count);
463                 }
464                 trace_f2fs_drop_inode(inode, 0);
465                 return 0;
466         }
467         ret = generic_drop_inode(inode);
468         trace_f2fs_drop_inode(inode, ret);
469         return ret;
470 }
471
472 int f2fs_inode_dirtied(struct inode *inode, bool sync)
473 {
474         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
475         int ret = 0;
476
477         spin_lock(&sbi->inode_lock[DIRTY_META]);
478         if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
479                 ret = 1;
480         } else {
481                 set_inode_flag(inode, FI_DIRTY_INODE);
482                 stat_inc_dirty_inode(sbi, DIRTY_META);
483         }
484         if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
485                 list_add_tail(&F2FS_I(inode)->gdirty_list,
486                                 &sbi->inode_list[DIRTY_META]);
487                 inc_page_count(sbi, F2FS_DIRTY_IMETA);
488         }
489         spin_unlock(&sbi->inode_lock[DIRTY_META]);
490         return ret;
491 }
492
493 void f2fs_inode_synced(struct inode *inode)
494 {
495         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
496
497         spin_lock(&sbi->inode_lock[DIRTY_META]);
498         if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
499                 spin_unlock(&sbi->inode_lock[DIRTY_META]);
500                 return;
501         }
502         if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
503                 list_del_init(&F2FS_I(inode)->gdirty_list);
504                 dec_page_count(sbi, F2FS_DIRTY_IMETA);
505         }
506         clear_inode_flag(inode, FI_DIRTY_INODE);
507         clear_inode_flag(inode, FI_AUTO_RECOVER);
508         stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
509         spin_unlock(&sbi->inode_lock[DIRTY_META]);
510 }
511
512 /*
513  * f2fs_dirty_inode() is called from __mark_inode_dirty()
514  *
515  * We should call set_dirty_inode to write the dirty inode through write_inode.
516  */
517 static void f2fs_dirty_inode(struct inode *inode, int flags)
518 {
519         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
520
521         if (inode->i_ino == F2FS_NODE_INO(sbi) ||
522                         inode->i_ino == F2FS_META_INO(sbi))
523                 return;
524
525         if (flags == I_DIRTY_TIME)
526                 return;
527
528         if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
529                 clear_inode_flag(inode, FI_AUTO_RECOVER);
530
531         f2fs_inode_dirtied(inode, false);
532 }
533
534 static void f2fs_i_callback(struct rcu_head *head)
535 {
536         struct inode *inode = container_of(head, struct inode, i_rcu);
537         kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
538 }
539
540 static void f2fs_destroy_inode(struct inode *inode)
541 {
542         call_rcu(&inode->i_rcu, f2fs_i_callback);
543 }
544
545 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
546 {
547         percpu_counter_destroy(&sbi->alloc_valid_block_count);
548         percpu_counter_destroy(&sbi->total_valid_inode_count);
549 }
550
551 static void destroy_device_list(struct f2fs_sb_info *sbi)
552 {
553         int i;
554
555         for (i = 0; i < sbi->s_ndevs; i++) {
556                 blkdev_put(FDEV(i).bdev, FMODE_EXCL);
557 #ifdef CONFIG_BLK_DEV_ZONED
558                 kfree(FDEV(i).blkz_type);
559 #endif
560         }
561         kfree(sbi->devs);
562 }
563
564 static void f2fs_put_super(struct super_block *sb)
565 {
566         struct f2fs_sb_info *sbi = F2FS_SB(sb);
567         int i;
568
569         /* prevent remaining shrinker jobs */
570         mutex_lock(&sbi->umount_mutex);
571
572         /*
573          * We don't need to do checkpoint when superblock is clean.
574          * But, the previous checkpoint was not done by umount, it needs to do
575          * clean checkpoint again.
576          */
577         if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
578                         !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
579                 struct cp_control cpc = {
580                         .reason = CP_UMOUNT,
581                 };
582                 write_checkpoint(sbi, &cpc);
583         }
584
585         /* be sure to wait for any on-going discard commands */
586         f2fs_wait_discard_bios(sbi);
587
588         if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
589                 struct cp_control cpc = {
590                         .reason = CP_UMOUNT | CP_TRIMMED,
591                 };
592                 write_checkpoint(sbi, &cpc);
593         }
594
595         /* write_checkpoint can update stat informaion */
596         f2fs_destroy_stats(sbi);
597
598         /*
599          * normally superblock is clean, so we need to release this.
600          * In addition, EIO will skip do checkpoint, we need this as well.
601          */
602         release_ino_entry(sbi, true);
603
604         f2fs_leave_shrinker(sbi);
605         mutex_unlock(&sbi->umount_mutex);
606
607         /* our cp_error case, we can wait for any writeback page */
608         f2fs_flush_merged_writes(sbi);
609
610         iput(sbi->node_inode);
611         iput(sbi->meta_inode);
612
613         /* destroy f2fs internal modules */
614         destroy_node_manager(sbi);
615         destroy_segment_manager(sbi);
616
617         kfree(sbi->ckpt);
618
619         f2fs_exit_sysfs(sbi);
620
621         sb->s_fs_info = NULL;
622         if (sbi->s_chksum_driver)
623                 crypto_free_shash(sbi->s_chksum_driver);
624         kfree(sbi->raw_super);
625
626         destroy_device_list(sbi);
627         mempool_destroy(sbi->write_io_dummy);
628         destroy_percpu_info(sbi);
629         for (i = 0; i < NR_PAGE_TYPE; i++)
630                 kfree(sbi->write_io[i]);
631         kfree(sbi);
632 }
633
634 int f2fs_sync_fs(struct super_block *sb, int sync)
635 {
636         struct f2fs_sb_info *sbi = F2FS_SB(sb);
637         int err = 0;
638
639         trace_f2fs_sync_fs(sb, sync);
640
641         if (sync) {
642                 struct cp_control cpc;
643
644                 cpc.reason = __get_cp_reason(sbi);
645
646                 mutex_lock(&sbi->gc_mutex);
647                 err = write_checkpoint(sbi, &cpc);
648                 mutex_unlock(&sbi->gc_mutex);
649         }
650         f2fs_trace_ios(NULL, 1);
651
652         return err;
653 }
654
655 static int f2fs_freeze(struct super_block *sb)
656 {
657         if (f2fs_readonly(sb))
658                 return 0;
659
660         /* IO error happened before */
661         if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
662                 return -EIO;
663
664         /* must be clean, since sync_filesystem() was already called */
665         if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
666                 return -EINVAL;
667         return 0;
668 }
669
670 static int f2fs_unfreeze(struct super_block *sb)
671 {
672         return 0;
673 }
674
675 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
676 {
677         struct super_block *sb = dentry->d_sb;
678         struct f2fs_sb_info *sbi = F2FS_SB(sb);
679         u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
680         block_t total_count, user_block_count, start_count, ovp_count;
681         u64 avail_node_count;
682
683         total_count = le64_to_cpu(sbi->raw_super->block_count);
684         user_block_count = sbi->user_block_count;
685         start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
686         ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
687         buf->f_type = F2FS_SUPER_MAGIC;
688         buf->f_bsize = sbi->blocksize;
689
690         buf->f_blocks = total_count - start_count;
691         buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
692         buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
693                                                 sbi->reserved_blocks;
694
695         avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
696
697         if (avail_node_count > user_block_count) {
698                 buf->f_files = user_block_count;
699                 buf->f_ffree = buf->f_bavail;
700         } else {
701                 buf->f_files = avail_node_count;
702                 buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
703                                         buf->f_bavail);
704         }
705
706         buf->f_namelen = F2FS_NAME_LEN;
707         buf->f_fsid.val[0] = (u32)id;
708         buf->f_fsid.val[1] = (u32)(id >> 32);
709
710         return 0;
711 }
712
713 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
714 {
715         struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
716
717         if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
718                 if (test_opt(sbi, FORCE_FG_GC))
719                         seq_printf(seq, ",background_gc=%s", "sync");
720                 else
721                         seq_printf(seq, ",background_gc=%s", "on");
722         } else {
723                 seq_printf(seq, ",background_gc=%s", "off");
724         }
725         if (test_opt(sbi, DISABLE_ROLL_FORWARD))
726                 seq_puts(seq, ",disable_roll_forward");
727         if (test_opt(sbi, DISCARD))
728                 seq_puts(seq, ",discard");
729         if (test_opt(sbi, NOHEAP))
730                 seq_puts(seq, ",no_heap");
731         else
732                 seq_puts(seq, ",heap");
733 #ifdef CONFIG_F2FS_FS_XATTR
734         if (test_opt(sbi, XATTR_USER))
735                 seq_puts(seq, ",user_xattr");
736         else
737                 seq_puts(seq, ",nouser_xattr");
738         if (test_opt(sbi, INLINE_XATTR))
739                 seq_puts(seq, ",inline_xattr");
740         else
741                 seq_puts(seq, ",noinline_xattr");
742 #endif
743 #ifdef CONFIG_F2FS_FS_POSIX_ACL
744         if (test_opt(sbi, POSIX_ACL))
745                 seq_puts(seq, ",acl");
746         else
747                 seq_puts(seq, ",noacl");
748 #endif
749         if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
750                 seq_puts(seq, ",disable_ext_identify");
751         if (test_opt(sbi, INLINE_DATA))
752                 seq_puts(seq, ",inline_data");
753         else
754                 seq_puts(seq, ",noinline_data");
755         if (test_opt(sbi, INLINE_DENTRY))
756                 seq_puts(seq, ",inline_dentry");
757         else
758                 seq_puts(seq, ",noinline_dentry");
759         if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
760                 seq_puts(seq, ",flush_merge");
761         if (test_opt(sbi, NOBARRIER))
762                 seq_puts(seq, ",nobarrier");
763         if (test_opt(sbi, FASTBOOT))
764                 seq_puts(seq, ",fastboot");
765         if (test_opt(sbi, EXTENT_CACHE))
766                 seq_puts(seq, ",extent_cache");
767         else
768                 seq_puts(seq, ",noextent_cache");
769         if (test_opt(sbi, DATA_FLUSH))
770                 seq_puts(seq, ",data_flush");
771
772         seq_puts(seq, ",mode=");
773         if (test_opt(sbi, ADAPTIVE))
774                 seq_puts(seq, "adaptive");
775         else if (test_opt(sbi, LFS))
776                 seq_puts(seq, "lfs");
777         seq_printf(seq, ",active_logs=%u", sbi->active_logs);
778         if (F2FS_IO_SIZE_BITS(sbi))
779                 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
780 #ifdef CONFIG_F2FS_FAULT_INJECTION
781         if (test_opt(sbi, FAULT_INJECTION))
782                 seq_printf(seq, ",fault_injection=%u",
783                                 sbi->fault_info.inject_rate);
784 #endif
785
786         return 0;
787 }
788
789 static void default_options(struct f2fs_sb_info *sbi)
790 {
791         /* init some FS parameters */
792         sbi->active_logs = NR_CURSEG_TYPE;
793
794         set_opt(sbi, BG_GC);
795         set_opt(sbi, INLINE_XATTR);
796         set_opt(sbi, INLINE_DATA);
797         set_opt(sbi, INLINE_DENTRY);
798         set_opt(sbi, EXTENT_CACHE);
799         set_opt(sbi, NOHEAP);
800         sbi->sb->s_flags |= MS_LAZYTIME;
801         set_opt(sbi, FLUSH_MERGE);
802         if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
803                 set_opt_mode(sbi, F2FS_MOUNT_LFS);
804                 set_opt(sbi, DISCARD);
805         } else {
806                 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
807         }
808
809 #ifdef CONFIG_F2FS_FS_XATTR
810         set_opt(sbi, XATTR_USER);
811 #endif
812 #ifdef CONFIG_F2FS_FS_POSIX_ACL
813         set_opt(sbi, POSIX_ACL);
814 #endif
815
816 #ifdef CONFIG_F2FS_FAULT_INJECTION
817         f2fs_build_fault_attr(sbi, 0);
818 #endif
819 }
820
821 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
822 {
823         struct f2fs_sb_info *sbi = F2FS_SB(sb);
824         struct f2fs_mount_info org_mount_opt;
825         int err, active_logs;
826         bool need_restart_gc = false;
827         bool need_stop_gc = false;
828         bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
829 #ifdef CONFIG_F2FS_FAULT_INJECTION
830         struct f2fs_fault_info ffi = sbi->fault_info;
831 #endif
832
833         /*
834          * Save the old mount options in case we
835          * need to restore them.
836          */
837         org_mount_opt = sbi->mount_opt;
838         active_logs = sbi->active_logs;
839
840         /* recover superblocks we couldn't write due to previous RO mount */
841         if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
842                 err = f2fs_commit_super(sbi, false);
843                 f2fs_msg(sb, KERN_INFO,
844                         "Try to recover all the superblocks, ret: %d", err);
845                 if (!err)
846                         clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
847         }
848
849         default_options(sbi);
850
851         /* parse mount options */
852         err = parse_options(sb, data);
853         if (err)
854                 goto restore_opts;
855
856         /*
857          * Previous and new state of filesystem is RO,
858          * so skip checking GC and FLUSH_MERGE conditions.
859          */
860         if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
861                 goto skip;
862
863         /* disallow enable/disable extent_cache dynamically */
864         if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
865                 err = -EINVAL;
866                 f2fs_msg(sbi->sb, KERN_WARNING,
867                                 "switch extent_cache option is not allowed");
868                 goto restore_opts;
869         }
870
871         /*
872          * We stop the GC thread if FS is mounted as RO
873          * or if background_gc = off is passed in mount
874          * option. Also sync the filesystem.
875          */
876         if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
877                 if (sbi->gc_thread) {
878                         stop_gc_thread(sbi);
879                         need_restart_gc = true;
880                 }
881         } else if (!sbi->gc_thread) {
882                 err = start_gc_thread(sbi);
883                 if (err)
884                         goto restore_opts;
885                 need_stop_gc = true;
886         }
887
888         if (*flags & MS_RDONLY) {
889                 writeback_inodes_sb(sb, WB_REASON_SYNC);
890                 sync_inodes_sb(sb);
891
892                 set_sbi_flag(sbi, SBI_IS_DIRTY);
893                 set_sbi_flag(sbi, SBI_IS_CLOSE);
894                 f2fs_sync_fs(sb, 1);
895                 clear_sbi_flag(sbi, SBI_IS_CLOSE);
896         }
897
898         /*
899          * We stop issue flush thread if FS is mounted as RO
900          * or if flush_merge is not passed in mount option.
901          */
902         if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
903                 clear_opt(sbi, FLUSH_MERGE);
904                 destroy_flush_cmd_control(sbi, false);
905         } else {
906                 err = create_flush_cmd_control(sbi);
907                 if (err)
908                         goto restore_gc;
909         }
910 skip:
911         /* Update the POSIXACL Flag */
912         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
913                 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
914
915         return 0;
916 restore_gc:
917         if (need_restart_gc) {
918                 if (start_gc_thread(sbi))
919                         f2fs_msg(sbi->sb, KERN_WARNING,
920                                 "background gc thread has stopped");
921         } else if (need_stop_gc) {
922                 stop_gc_thread(sbi);
923         }
924 restore_opts:
925         sbi->mount_opt = org_mount_opt;
926         sbi->active_logs = active_logs;
927 #ifdef CONFIG_F2FS_FAULT_INJECTION
928         sbi->fault_info = ffi;
929 #endif
930         return err;
931 }
932
933 static struct super_operations f2fs_sops = {
934         .alloc_inode    = f2fs_alloc_inode,
935         .drop_inode     = f2fs_drop_inode,
936         .destroy_inode  = f2fs_destroy_inode,
937         .write_inode    = f2fs_write_inode,
938         .dirty_inode    = f2fs_dirty_inode,
939         .show_options   = f2fs_show_options,
940         .evict_inode    = f2fs_evict_inode,
941         .put_super      = f2fs_put_super,
942         .sync_fs        = f2fs_sync_fs,
943         .freeze_fs      = f2fs_freeze,
944         .unfreeze_fs    = f2fs_unfreeze,
945         .statfs         = f2fs_statfs,
946         .remount_fs     = f2fs_remount,
947 };
948
949 #ifdef CONFIG_F2FS_FS_ENCRYPTION
950 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
951 {
952         return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
953                                 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
954                                 ctx, len, NULL);
955 }
956
957 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
958                                                         void *fs_data)
959 {
960         return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
961                                 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
962                                 ctx, len, fs_data, XATTR_CREATE);
963 }
964
965 static unsigned f2fs_max_namelen(struct inode *inode)
966 {
967         return S_ISLNK(inode->i_mode) ?
968                         inode->i_sb->s_blocksize : F2FS_NAME_LEN;
969 }
970
971 static const struct fscrypt_operations f2fs_cryptops = {
972         .key_prefix     = "f2fs:",
973         .get_context    = f2fs_get_context,
974         .set_context    = f2fs_set_context,
975         .is_encrypted   = f2fs_encrypted_inode,
976         .empty_dir      = f2fs_empty_dir,
977         .max_namelen    = f2fs_max_namelen,
978 };
979 #else
980 static const struct fscrypt_operations f2fs_cryptops = {
981         .is_encrypted   = f2fs_encrypted_inode,
982 };
983 #endif
984
985 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
986                 u64 ino, u32 generation)
987 {
988         struct f2fs_sb_info *sbi = F2FS_SB(sb);
989         struct inode *inode;
990
991         if (check_nid_range(sbi, ino))
992                 return ERR_PTR(-ESTALE);
993
994         /*
995          * f2fs_iget isn't quite right if the inode is currently unallocated!
996          * However f2fs_iget currently does appropriate checks to handle stale
997          * inodes so everything is OK.
998          */
999         inode = f2fs_iget(sb, ino);
1000         if (IS_ERR(inode))
1001                 return ERR_CAST(inode);
1002         if (unlikely(generation && inode->i_generation != generation)) {
1003                 /* we didn't find the right inode.. */
1004                 iput(inode);
1005                 return ERR_PTR(-ESTALE);
1006         }
1007         return inode;
1008 }
1009
1010 static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1011                 int fh_len, int fh_type)
1012 {
1013         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1014                                     f2fs_nfs_get_inode);
1015 }
1016
1017 static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
1018                 int fh_len, int fh_type)
1019 {
1020         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1021                                     f2fs_nfs_get_inode);
1022 }
1023
1024 static const struct export_operations f2fs_export_ops = {
1025         .fh_to_dentry = f2fs_fh_to_dentry,
1026         .fh_to_parent = f2fs_fh_to_parent,
1027         .get_parent = f2fs_get_parent,
1028 };
1029
1030 static loff_t max_file_blocks(void)
1031 {
1032         loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
1033         loff_t leaf_count = ADDRS_PER_BLOCK;
1034
1035         /* two direct node blocks */
1036         result += (leaf_count * 2);
1037
1038         /* two indirect node blocks */
1039         leaf_count *= NIDS_PER_BLOCK;
1040         result += (leaf_count * 2);
1041
1042         /* one double indirect node block */
1043         leaf_count *= NIDS_PER_BLOCK;
1044         result += leaf_count;
1045
1046         return result;
1047 }
1048
1049 static int __f2fs_commit_super(struct buffer_head *bh,
1050                         struct f2fs_super_block *super)
1051 {
1052         lock_buffer(bh);
1053         if (super)
1054                 memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1055         set_buffer_uptodate(bh);
1056         set_buffer_dirty(bh);
1057         unlock_buffer(bh);
1058
1059         /* it's rare case, we can do fua all the time */
1060         return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
1061 }
1062
1063 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1064                                         struct buffer_head *bh)
1065 {
1066         struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1067                                         (bh->b_data + F2FS_SUPER_OFFSET);
1068         struct super_block *sb = sbi->sb;
1069         u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1070         u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1071         u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
1072         u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
1073         u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1074         u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1075         u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
1076         u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
1077         u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
1078         u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
1079         u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
1080         u32 segment_count = le32_to_cpu(raw_super->segment_count);
1081         u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1082         u64 main_end_blkaddr = main_blkaddr +
1083                                 (segment_count_main << log_blocks_per_seg);
1084         u64 seg_end_blkaddr = segment0_blkaddr +
1085                                 (segment_count << log_blocks_per_seg);
1086
1087         if (segment0_blkaddr != cp_blkaddr) {
1088                 f2fs_msg(sb, KERN_INFO,
1089                         "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1090                         segment0_blkaddr, cp_blkaddr);
1091                 return true;
1092         }
1093
1094         if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
1095                                                         sit_blkaddr) {
1096                 f2fs_msg(sb, KERN_INFO,
1097                         "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1098                         cp_blkaddr, sit_blkaddr,
1099                         segment_count_ckpt << log_blocks_per_seg);
1100                 return true;
1101         }
1102
1103         if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
1104                                                         nat_blkaddr) {
1105                 f2fs_msg(sb, KERN_INFO,
1106                         "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1107                         sit_blkaddr, nat_blkaddr,
1108                         segment_count_sit << log_blocks_per_seg);
1109                 return true;
1110         }
1111
1112         if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
1113                                                         ssa_blkaddr) {
1114                 f2fs_msg(sb, KERN_INFO,
1115                         "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1116                         nat_blkaddr, ssa_blkaddr,
1117                         segment_count_nat << log_blocks_per_seg);
1118                 return true;
1119         }
1120
1121         if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
1122                                                         main_blkaddr) {
1123                 f2fs_msg(sb, KERN_INFO,
1124                         "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1125                         ssa_blkaddr, main_blkaddr,
1126                         segment_count_ssa << log_blocks_per_seg);
1127                 return true;
1128         }
1129
1130         if (main_end_blkaddr > seg_end_blkaddr) {
1131                 f2fs_msg(sb, KERN_INFO,
1132                         "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1133                         main_blkaddr,
1134                         segment0_blkaddr +
1135                                 (segment_count << log_blocks_per_seg),
1136                         segment_count_main << log_blocks_per_seg);
1137                 return true;
1138         } else if (main_end_blkaddr < seg_end_blkaddr) {
1139                 int err = 0;
1140                 char *res;
1141
1142                 /* fix in-memory information all the time */
1143                 raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
1144                                 segment0_blkaddr) >> log_blocks_per_seg);
1145
1146                 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1147                         set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1148                         res = "internally";
1149                 } else {
1150                         err = __f2fs_commit_super(bh, NULL);
1151                         res = err ? "failed" : "done";
1152                 }
1153                 f2fs_msg(sb, KERN_INFO,
1154                         "Fix alignment : %s, start(%u) end(%u) block(%u)",
1155                         res, main_blkaddr,
1156                         segment0_blkaddr +
1157                                 (segment_count << log_blocks_per_seg),
1158                         segment_count_main << log_blocks_per_seg);
1159                 if (err)
1160                         return true;
1161         }
1162         return false;
1163 }
1164
1165 static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1166                                 struct buffer_head *bh)
1167 {
1168         struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1169                                         (bh->b_data + F2FS_SUPER_OFFSET);
1170         struct super_block *sb = sbi->sb;
1171         unsigned int blocksize;
1172
1173         if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
1174                 f2fs_msg(sb, KERN_INFO,
1175                         "Magic Mismatch, valid(0x%x) - read(0x%x)",
1176                         F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
1177                 return 1;
1178         }
1179
1180         /* Currently, support only 4KB page cache size */
1181         if (F2FS_BLKSIZE != PAGE_SIZE) {
1182                 f2fs_msg(sb, KERN_INFO,
1183                         "Invalid page_cache_size (%lu), supports only 4KB\n",
1184                         PAGE_SIZE);
1185                 return 1;
1186         }
1187
1188         /* Currently, support only 4KB block size */
1189         blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
1190         if (blocksize != F2FS_BLKSIZE) {
1191                 f2fs_msg(sb, KERN_INFO,
1192                         "Invalid blocksize (%u), supports only 4KB\n",
1193                         blocksize);
1194                 return 1;
1195         }
1196
1197         /* check log blocks per segment */
1198         if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1199                 f2fs_msg(sb, KERN_INFO,
1200                         "Invalid log blocks per segment (%u)\n",
1201                         le32_to_cpu(raw_super->log_blocks_per_seg));
1202                 return 1;
1203         }
1204
1205         /* Currently, support 512/1024/2048/4096 bytes sector size */
1206         if (le32_to_cpu(raw_super->log_sectorsize) >
1207                                 F2FS_MAX_LOG_SECTOR_SIZE ||
1208                 le32_to_cpu(raw_super->log_sectorsize) <
1209                                 F2FS_MIN_LOG_SECTOR_SIZE) {
1210                 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
1211                         le32_to_cpu(raw_super->log_sectorsize));
1212                 return 1;
1213         }
1214         if (le32_to_cpu(raw_super->log_sectors_per_block) +
1215                 le32_to_cpu(raw_super->log_sectorsize) !=
1216                         F2FS_MAX_LOG_SECTOR_SIZE) {
1217                 f2fs_msg(sb, KERN_INFO,
1218                         "Invalid log sectors per block(%u) log sectorsize(%u)",
1219                         le32_to_cpu(raw_super->log_sectors_per_block),
1220                         le32_to_cpu(raw_super->log_sectorsize));
1221                 return 1;
1222         }
1223
1224         /* check reserved ino info */
1225         if (le32_to_cpu(raw_super->node_ino) != 1 ||
1226                 le32_to_cpu(raw_super->meta_ino) != 2 ||
1227                 le32_to_cpu(raw_super->root_ino) != 3) {
1228                 f2fs_msg(sb, KERN_INFO,
1229                         "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1230                         le32_to_cpu(raw_super->node_ino),
1231                         le32_to_cpu(raw_super->meta_ino),
1232                         le32_to_cpu(raw_super->root_ino));
1233                 return 1;
1234         }
1235
1236         if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
1237                 f2fs_msg(sb, KERN_INFO,
1238                         "Invalid segment count (%u)",
1239                         le32_to_cpu(raw_super->segment_count));
1240                 return 1;
1241         }
1242
1243         /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1244         if (sanity_check_area_boundary(sbi, bh))
1245                 return 1;
1246
1247         return 0;
1248 }
1249
1250 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1251 {
1252         unsigned int total, fsmeta;
1253         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1254         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1255         unsigned int ovp_segments, reserved_segments;
1256         unsigned int main_segs, blocks_per_seg;
1257         int i;
1258
1259         total = le32_to_cpu(raw_super->segment_count);
1260         fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
1261         fsmeta += le32_to_cpu(raw_super->segment_count_sit);
1262         fsmeta += le32_to_cpu(raw_super->segment_count_nat);
1263         fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
1264         fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
1265
1266         if (unlikely(fsmeta >= total))
1267                 return 1;
1268
1269         ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1270         reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1271
1272         if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1273                         ovp_segments == 0 || reserved_segments == 0)) {
1274                 f2fs_msg(sbi->sb, KERN_ERR,
1275                         "Wrong layout: check mkfs.f2fs version");
1276                 return 1;
1277         }
1278
1279         main_segs = le32_to_cpu(raw_super->segment_count_main);
1280         blocks_per_seg = sbi->blocks_per_seg;
1281
1282         for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1283                 if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
1284                         le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
1285                         return 1;
1286         }
1287         for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1288                 if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
1289                         le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
1290                         return 1;
1291         }
1292
1293         if (unlikely(f2fs_cp_error(sbi))) {
1294                 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1295                 return 1;
1296         }
1297         return 0;
1298 }
1299
1300 static void init_sb_info(struct f2fs_sb_info *sbi)
1301 {
1302         struct f2fs_super_block *raw_super = sbi->raw_super;
1303         int i, j;
1304
1305         sbi->log_sectors_per_block =
1306                 le32_to_cpu(raw_super->log_sectors_per_block);
1307         sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
1308         sbi->blocksize = 1 << sbi->log_blocksize;
1309         sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1310         sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
1311         sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
1312         sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
1313         sbi->total_sections = le32_to_cpu(raw_super->section_count);
1314         sbi->total_node_count =
1315                 (le32_to_cpu(raw_super->segment_count_nat) / 2)
1316                         * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
1317         sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
1318         sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
1319         sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
1320         sbi->cur_victim_sec = NULL_SECNO;
1321         sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1322
1323         sbi->dir_level = DEF_DIR_LEVEL;
1324         sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1325         sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1326         clear_sbi_flag(sbi, SBI_NEED_FSCK);
1327
1328         for (i = 0; i < NR_COUNT_TYPE; i++)
1329                 atomic_set(&sbi->nr_pages[i], 0);
1330
1331         atomic_set(&sbi->wb_sync_req, 0);
1332
1333         INIT_LIST_HEAD(&sbi->s_list);
1334         mutex_init(&sbi->umount_mutex);
1335         for (i = 0; i < NR_PAGE_TYPE - 1; i++)
1336                 for (j = HOT; j < NR_TEMP_TYPE; j++)
1337                         mutex_init(&sbi->wio_mutex[i][j]);
1338         spin_lock_init(&sbi->cp_lock);
1339 }
1340
1341 static int init_percpu_info(struct f2fs_sb_info *sbi)
1342 {
1343         int err;
1344
1345         err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1346         if (err)
1347                 return err;
1348
1349         return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1350                                                                 GFP_KERNEL);
1351 }
1352
1353 #ifdef CONFIG_BLK_DEV_ZONED
1354 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1355 {
1356         struct block_device *bdev = FDEV(devi).bdev;
1357         sector_t nr_sectors = bdev->bd_part->nr_sects;
1358         sector_t sector = 0;
1359         struct blk_zone *zones;
1360         unsigned int i, nr_zones;
1361         unsigned int n = 0;
1362         int err = -EIO;
1363
1364         if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1365                 return 0;
1366
1367         if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1368                                 SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
1369                 return -EINVAL;
1370         sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
1371         if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1372                                 __ilog2_u32(sbi->blocks_per_blkz))
1373                 return -EINVAL;
1374         sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1375         FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1376                                         sbi->log_blocks_per_blkz;
1377         if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
1378                 FDEV(devi).nr_blkz++;
1379
1380         FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1381         if (!FDEV(devi).blkz_type)
1382                 return -ENOMEM;
1383
1384 #define F2FS_REPORT_NR_ZONES   4096
1385
1386         zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1387                         GFP_KERNEL);
1388         if (!zones)
1389                 return -ENOMEM;
1390
1391         /* Get block zones type */
1392         while (zones && sector < nr_sectors) {
1393
1394                 nr_zones = F2FS_REPORT_NR_ZONES;
1395                 err = blkdev_report_zones(bdev, sector,
1396                                           zones, &nr_zones,
1397                                           GFP_KERNEL);
1398                 if (err)
1399                         break;
1400                 if (!nr_zones) {
1401                         err = -EIO;
1402                         break;
1403                 }
1404
1405                 for (i = 0; i < nr_zones; i++) {
1406                         FDEV(devi).blkz_type[n] = zones[i].type;
1407                         sector += zones[i].len;
1408                         n++;
1409                 }
1410         }
1411
1412         kfree(zones);
1413
1414         return err;
1415 }
1416 #endif
1417
1418 /*
1419  * Read f2fs raw super block.
1420  * Because we have two copies of super block, so read both of them
1421  * to get the first valid one. If any one of them is broken, we pass
1422  * them recovery flag back to the caller.
1423  */
1424 static int read_raw_super_block(struct f2fs_sb_info *sbi,
1425                         struct f2fs_super_block **raw_super,
1426                         int *valid_super_block, int *recovery)
1427 {
1428         struct super_block *sb = sbi->sb;
1429         int block;
1430         struct buffer_head *bh;
1431         struct f2fs_super_block *super;
1432         int err = 0;
1433
1434         super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1435         if (!super)
1436                 return -ENOMEM;
1437
1438         for (block = 0; block < 2; block++) {
1439                 bh = sb_bread(sb, block);
1440                 if (!bh) {
1441                         f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1442                                 block + 1);
1443                         err = -EIO;
1444                         continue;
1445                 }
1446
1447                 /* sanity checking of raw super */
1448                 if (sanity_check_raw_super(sbi, bh)) {
1449                         f2fs_msg(sb, KERN_ERR,
1450                                 "Can't find valid F2FS filesystem in %dth superblock",
1451                                 block + 1);
1452                         err = -EINVAL;
1453                         brelse(bh);
1454                         continue;
1455                 }
1456
1457                 if (!*raw_super) {
1458                         memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
1459                                                         sizeof(*super));
1460                         *valid_super_block = block;
1461                         *raw_super = super;
1462                 }
1463                 brelse(bh);
1464         }
1465
1466         /* Fail to read any one of the superblocks*/
1467         if (err < 0)
1468                 *recovery = 1;
1469
1470         /* No valid superblock */
1471         if (!*raw_super)
1472                 kfree(super);
1473         else
1474                 err = 0;
1475
1476         return err;
1477 }
1478
1479 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1480 {
1481         struct buffer_head *bh;
1482         int err;
1483
1484         if ((recover && f2fs_readonly(sbi->sb)) ||
1485                                 bdev_read_only(sbi->sb->s_bdev)) {
1486                 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1487                 return -EROFS;
1488         }
1489
1490         /* write back-up superblock first */
1491         bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1492         if (!bh)
1493                 return -EIO;
1494         err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1495         brelse(bh);
1496
1497         /* if we are in recovery path, skip writing valid superblock */
1498         if (recover || err)
1499                 return err;
1500
1501         /* write current valid superblock */
1502         bh = sb_getblk(sbi->sb, sbi->valid_super_block);
1503         if (!bh)
1504                 return -EIO;
1505         err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1506         brelse(bh);
1507         return err;
1508 }
1509
1510 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1511 {
1512         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1513         unsigned int max_devices = MAX_DEVICES;
1514         int i;
1515
1516         /* Initialize single device information */
1517         if (!RDEV(0).path[0]) {
1518                 if (!bdev_is_zoned(sbi->sb->s_bdev))
1519                         return 0;
1520                 max_devices = 1;
1521         }
1522
1523         /*
1524          * Initialize multiple devices information, or single
1525          * zoned block device information.
1526          */
1527         sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1528                                 GFP_KERNEL);
1529         if (!sbi->devs)
1530                 return -ENOMEM;
1531
1532         for (i = 0; i < max_devices; i++) {
1533
1534                 if (i > 0 && !RDEV(i).path[0])
1535                         break;
1536
1537                 if (max_devices == 1) {
1538                         /* Single zoned block device mount */
1539                         FDEV(0).bdev =
1540                                 blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1541                                         sbi->sb->s_mode, sbi->sb->s_type);
1542                 } else {
1543                         /* Multi-device mount */
1544                         memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1545                         FDEV(i).total_segments =
1546                                 le32_to_cpu(RDEV(i).total_segments);
1547                         if (i == 0) {
1548                                 FDEV(i).start_blk = 0;
1549                                 FDEV(i).end_blk = FDEV(i).start_blk +
1550                                     (FDEV(i).total_segments <<
1551                                     sbi->log_blocks_per_seg) - 1 +
1552                                     le32_to_cpu(raw_super->segment0_blkaddr);
1553                         } else {
1554                                 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1555                                 FDEV(i).end_blk = FDEV(i).start_blk +
1556                                         (FDEV(i).total_segments <<
1557                                         sbi->log_blocks_per_seg) - 1;
1558                         }
1559                         FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1560                                         sbi->sb->s_mode, sbi->sb->s_type);
1561                 }
1562                 if (IS_ERR(FDEV(i).bdev))
1563                         return PTR_ERR(FDEV(i).bdev);
1564
1565                 /* to release errored devices */
1566                 sbi->s_ndevs = i + 1;
1567
1568 #ifdef CONFIG_BLK_DEV_ZONED
1569                 if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1570                                 !f2fs_sb_mounted_blkzoned(sbi->sb)) {
1571                         f2fs_msg(sbi->sb, KERN_ERR,
1572                                 "Zoned block device feature not enabled\n");
1573                         return -EINVAL;
1574                 }
1575                 if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1576                         if (init_blkz_info(sbi, i)) {
1577                                 f2fs_msg(sbi->sb, KERN_ERR,
1578                                         "Failed to initialize F2FS blkzone information");
1579                                 return -EINVAL;
1580                         }
1581                         if (max_devices == 1)
1582                                 break;
1583                         f2fs_msg(sbi->sb, KERN_INFO,
1584                                 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1585                                 i, FDEV(i).path,
1586                                 FDEV(i).total_segments,
1587                                 FDEV(i).start_blk, FDEV(i).end_blk,
1588                                 bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1589                                 "Host-aware" : "Host-managed");
1590                         continue;
1591                 }
1592 #endif
1593                 f2fs_msg(sbi->sb, KERN_INFO,
1594                         "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1595                                 i, FDEV(i).path,
1596                                 FDEV(i).total_segments,
1597                                 FDEV(i).start_blk, FDEV(i).end_blk);
1598         }
1599         f2fs_msg(sbi->sb, KERN_INFO,
1600                         "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1601         return 0;
1602 }
1603
1604 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1605 {
1606         struct f2fs_sb_info *sbi;
1607         struct f2fs_super_block *raw_super;
1608         struct inode *root;
1609         int err;
1610         bool retry = true, need_fsck = false;
1611         char *options = NULL;
1612         int recovery, i, valid_super_block;
1613         struct curseg_info *seg_i;
1614
1615 try_onemore:
1616         err = -EINVAL;
1617         raw_super = NULL;
1618         valid_super_block = -1;
1619         recovery = 0;
1620
1621         /* allocate memory for f2fs-specific super block info */
1622         sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
1623         if (!sbi)
1624                 return -ENOMEM;
1625
1626         sbi->sb = sb;
1627
1628         /* Load the checksum driver */
1629         sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1630         if (IS_ERR(sbi->s_chksum_driver)) {
1631                 f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
1632                 err = PTR_ERR(sbi->s_chksum_driver);
1633                 sbi->s_chksum_driver = NULL;
1634                 goto free_sbi;
1635         }
1636
1637         /* set a block size */
1638         if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
1639                 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
1640                 goto free_sbi;
1641         }
1642
1643         err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1644                                                                 &recovery);
1645         if (err)
1646                 goto free_sbi;
1647
1648         sb->s_fs_info = sbi;
1649         sbi->raw_super = raw_super;
1650
1651         /*
1652          * The BLKZONED feature indicates that the drive was formatted with
1653          * zone alignment optimization. This is optional for host-aware
1654          * devices, but mandatory for host-managed zoned block devices.
1655          */
1656 #ifndef CONFIG_BLK_DEV_ZONED
1657         if (f2fs_sb_mounted_blkzoned(sb)) {
1658                 f2fs_msg(sb, KERN_ERR,
1659                          "Zoned block device support is not enabled\n");
1660                 err = -EOPNOTSUPP;
1661                 goto free_sb_buf;
1662         }
1663 #endif
1664         default_options(sbi);
1665         /* parse mount options */
1666         options = kstrdup((const char *)data, GFP_KERNEL);
1667         if (data && !options) {
1668                 err = -ENOMEM;
1669                 goto free_sb_buf;
1670         }
1671
1672         err = parse_options(sb, options);
1673         if (err)
1674                 goto free_options;
1675
1676         sbi->max_file_blocks = max_file_blocks();
1677         sb->s_maxbytes = sbi->max_file_blocks <<
1678                                 le32_to_cpu(raw_super->log_blocksize);
1679         sb->s_max_links = F2FS_LINK_MAX;
1680         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1681
1682         sb->s_op = &f2fs_sops;
1683         sb->s_cop = &f2fs_cryptops;
1684         sb->s_xattr = f2fs_xattr_handlers;
1685         sb->s_export_op = &f2fs_export_ops;
1686         sb->s_magic = F2FS_SUPER_MAGIC;
1687         sb->s_time_gran = 1;
1688         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1689                 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1690         memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1691
1692         /* init f2fs-specific super block info */
1693         sbi->valid_super_block = valid_super_block;
1694         mutex_init(&sbi->gc_mutex);
1695         mutex_init(&sbi->cp_mutex);
1696         init_rwsem(&sbi->node_write);
1697         init_rwsem(&sbi->node_change);
1698
1699         /* disallow all the data/node/meta page writes */
1700         set_sbi_flag(sbi, SBI_POR_DOING);
1701         spin_lock_init(&sbi->stat_lock);
1702
1703         for (i = 0; i < NR_PAGE_TYPE; i++) {
1704                 int n = (i == META) ? 1: NR_TEMP_TYPE;
1705                 int j;
1706
1707                 sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
1708                                                                 GFP_KERNEL);
1709                 if (!sbi->write_io[i]) {
1710                         err = -ENOMEM;
1711                         goto free_options;
1712                 }
1713
1714                 for (j = HOT; j < n; j++) {
1715                         init_rwsem(&sbi->write_io[i][j].io_rwsem);
1716                         sbi->write_io[i][j].sbi = sbi;
1717                         sbi->write_io[i][j].bio = NULL;
1718                         spin_lock_init(&sbi->write_io[i][j].io_lock);
1719                         INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
1720                 }
1721         }
1722
1723         init_rwsem(&sbi->cp_rwsem);
1724         init_waitqueue_head(&sbi->cp_wait);
1725         init_sb_info(sbi);
1726
1727         err = init_percpu_info(sbi);
1728         if (err)
1729                 goto free_options;
1730
1731         if (F2FS_IO_SIZE(sbi) > 1) {
1732                 sbi->write_io_dummy =
1733                         mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
1734                 if (!sbi->write_io_dummy) {
1735                         err = -ENOMEM;
1736                         goto free_options;
1737                 }
1738         }
1739
1740         /* get an inode for meta space */
1741         sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1742         if (IS_ERR(sbi->meta_inode)) {
1743                 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1744                 err = PTR_ERR(sbi->meta_inode);
1745                 goto free_io_dummy;
1746         }
1747
1748         err = get_valid_checkpoint(sbi);
1749         if (err) {
1750                 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
1751                 goto free_meta_inode;
1752         }
1753
1754         /* Initialize device list */
1755         err = f2fs_scan_devices(sbi);
1756         if (err) {
1757                 f2fs_msg(sb, KERN_ERR, "Failed to find devices");
1758                 goto free_devices;
1759         }
1760
1761         sbi->total_valid_node_count =
1762                                 le32_to_cpu(sbi->ckpt->valid_node_count);
1763         percpu_counter_set(&sbi->total_valid_inode_count,
1764                                 le32_to_cpu(sbi->ckpt->valid_inode_count));
1765         sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
1766         sbi->total_valid_block_count =
1767                                 le64_to_cpu(sbi->ckpt->valid_block_count);
1768         sbi->last_valid_block_count = sbi->total_valid_block_count;
1769         sbi->reserved_blocks = 0;
1770
1771         for (i = 0; i < NR_INODE_TYPE; i++) {
1772                 INIT_LIST_HEAD(&sbi->inode_list[i]);
1773                 spin_lock_init(&sbi->inode_lock[i]);
1774         }
1775
1776         init_extent_cache_info(sbi);
1777
1778         init_ino_entry_info(sbi);
1779
1780         /* setup f2fs internal modules */
1781         err = build_segment_manager(sbi);
1782         if (err) {
1783                 f2fs_msg(sb, KERN_ERR,
1784                         "Failed to initialize F2FS segment manager");
1785                 goto free_sm;
1786         }
1787         err = build_node_manager(sbi);
1788         if (err) {
1789                 f2fs_msg(sb, KERN_ERR,
1790                         "Failed to initialize F2FS node manager");
1791                 goto free_nm;
1792         }
1793
1794         /* For write statistics */
1795         if (sb->s_bdev->bd_part)
1796                 sbi->sectors_written_start =
1797                         (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
1798
1799         /* Read accumulated write IO statistics if exists */
1800         seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1801         if (__exist_node_summaries(sbi))
1802                 sbi->kbytes_written =
1803                         le64_to_cpu(seg_i->journal->info.kbytes_written);
1804
1805         build_gc_manager(sbi);
1806
1807         /* get an inode for node space */
1808         sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
1809         if (IS_ERR(sbi->node_inode)) {
1810                 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
1811                 err = PTR_ERR(sbi->node_inode);
1812                 goto free_nm;
1813         }
1814
1815         f2fs_join_shrinker(sbi);
1816
1817         err = f2fs_build_stats(sbi);
1818         if (err)
1819                 goto free_nm;
1820
1821         /* if there are nt orphan nodes free them */
1822         err = recover_orphan_inodes(sbi);
1823         if (err)
1824                 goto free_node_inode;
1825
1826         /* read root inode and dentry */
1827         root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
1828         if (IS_ERR(root)) {
1829                 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
1830                 err = PTR_ERR(root);
1831                 goto free_node_inode;
1832         }
1833         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1834                 iput(root);
1835                 err = -EINVAL;
1836                 goto free_node_inode;
1837         }
1838
1839         sb->s_root = d_make_root(root); /* allocate root dentry */
1840         if (!sb->s_root) {
1841                 err = -ENOMEM;
1842                 goto free_root_inode;
1843         }
1844
1845         err = f2fs_init_sysfs(sbi);
1846         if (err)
1847                 goto free_root_inode;
1848
1849         /* recover fsynced data */
1850         if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1851                 /*
1852                  * mount should be failed, when device has readonly mode, and
1853                  * previous checkpoint was not done by clean system shutdown.
1854                  */
1855                 if (bdev_read_only(sb->s_bdev) &&
1856                                 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
1857                         err = -EROFS;
1858                         goto free_sysfs;
1859                 }
1860
1861                 if (need_fsck)
1862                         set_sbi_flag(sbi, SBI_NEED_FSCK);
1863
1864                 if (!retry)
1865                         goto skip_recovery;
1866
1867                 err = recover_fsync_data(sbi, false);
1868                 if (err < 0) {
1869                         need_fsck = true;
1870                         f2fs_msg(sb, KERN_ERR,
1871                                 "Cannot recover all fsync data errno=%d", err);
1872                         goto free_sysfs;
1873                 }
1874         } else {
1875                 err = recover_fsync_data(sbi, true);
1876
1877                 if (!f2fs_readonly(sb) && err > 0) {
1878                         err = -EINVAL;
1879                         f2fs_msg(sb, KERN_ERR,
1880                                 "Need to recover fsync data");
1881                         goto free_sysfs;
1882                 }
1883         }
1884 skip_recovery:
1885         /* recover_fsync_data() cleared this already */
1886         clear_sbi_flag(sbi, SBI_POR_DOING);
1887
1888         /*
1889          * If filesystem is not mounted as read-only then
1890          * do start the gc_thread.
1891          */
1892         if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
1893                 /* After POR, we can run background GC thread.*/
1894                 err = start_gc_thread(sbi);
1895                 if (err)
1896                         goto free_sysfs;
1897         }
1898         kfree(options);
1899
1900         /* recover broken superblock */
1901         if (recovery) {
1902                 err = f2fs_commit_super(sbi, true);
1903                 f2fs_msg(sb, KERN_INFO,
1904                         "Try to recover %dth superblock, ret: %d",
1905                         sbi->valid_super_block ? 1 : 2, err);
1906         }
1907
1908         f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
1909                                 cur_cp_version(F2FS_CKPT(sbi)));
1910         f2fs_update_time(sbi, CP_TIME);
1911         f2fs_update_time(sbi, REQ_TIME);
1912         return 0;
1913
1914 free_sysfs:
1915         f2fs_sync_inode_meta(sbi);
1916         f2fs_exit_sysfs(sbi);
1917 free_root_inode:
1918         dput(sb->s_root);
1919         sb->s_root = NULL;
1920 free_node_inode:
1921         truncate_inode_pages_final(NODE_MAPPING(sbi));
1922         mutex_lock(&sbi->umount_mutex);
1923         release_ino_entry(sbi, true);
1924         f2fs_leave_shrinker(sbi);
1925         /*
1926          * Some dirty meta pages can be produced by recover_orphan_inodes()
1927          * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
1928          * followed by write_checkpoint() through f2fs_write_node_pages(), which
1929          * falls into an infinite loop in sync_meta_pages().
1930          */
1931         truncate_inode_pages_final(META_MAPPING(sbi));
1932         iput(sbi->node_inode);
1933         mutex_unlock(&sbi->umount_mutex);
1934         f2fs_destroy_stats(sbi);
1935 free_nm:
1936         destroy_node_manager(sbi);
1937 free_sm:
1938         destroy_segment_manager(sbi);
1939 free_devices:
1940         destroy_device_list(sbi);
1941         kfree(sbi->ckpt);
1942 free_meta_inode:
1943         make_bad_inode(sbi->meta_inode);
1944         iput(sbi->meta_inode);
1945 free_io_dummy:
1946         mempool_destroy(sbi->write_io_dummy);
1947 free_options:
1948         for (i = 0; i < NR_PAGE_TYPE; i++)
1949                 kfree(sbi->write_io[i]);
1950         destroy_percpu_info(sbi);
1951         kfree(options);
1952 free_sb_buf:
1953         kfree(raw_super);
1954 free_sbi:
1955         if (sbi->s_chksum_driver)
1956                 crypto_free_shash(sbi->s_chksum_driver);
1957         kfree(sbi);
1958
1959         /* give only one another chance */
1960         if (retry) {
1961                 retry = false;
1962                 shrink_dcache_sb(sb);
1963                 goto try_onemore;
1964         }
1965         return err;
1966 }
1967
1968 static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
1969                         const char *dev_name, void *data)
1970 {
1971         return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
1972 }
1973
1974 static void kill_f2fs_super(struct super_block *sb)
1975 {
1976         if (sb->s_root) {
1977                 set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
1978                 stop_gc_thread(F2FS_SB(sb));
1979                 stop_discard_thread(F2FS_SB(sb));
1980         }
1981         kill_block_super(sb);
1982 }
1983
1984 static struct file_system_type f2fs_fs_type = {
1985         .owner          = THIS_MODULE,
1986         .name           = "f2fs",
1987         .mount          = f2fs_mount,
1988         .kill_sb        = kill_f2fs_super,
1989         .fs_flags       = FS_REQUIRES_DEV,
1990 };
1991 MODULE_ALIAS_FS("f2fs");
1992
1993 static int __init init_inodecache(void)
1994 {
1995         f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
1996                         sizeof(struct f2fs_inode_info), 0,
1997                         SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
1998         if (!f2fs_inode_cachep)
1999                 return -ENOMEM;
2000         return 0;
2001 }
2002
2003 static void destroy_inodecache(void)
2004 {
2005         /*
2006          * Make sure all delayed rcu free inodes are flushed before we
2007          * destroy cache.
2008          */
2009         rcu_barrier();
2010         kmem_cache_destroy(f2fs_inode_cachep);
2011 }
2012
2013 static int __init init_f2fs_fs(void)
2014 {
2015         int err;
2016
2017         f2fs_build_trace_ios();
2018
2019         err = init_inodecache();
2020         if (err)
2021                 goto fail;
2022         err = create_node_manager_caches();
2023         if (err)
2024                 goto free_inodecache;
2025         err = create_segment_manager_caches();
2026         if (err)
2027                 goto free_node_manager_caches;
2028         err = create_checkpoint_caches();
2029         if (err)
2030                 goto free_segment_manager_caches;
2031         err = create_extent_cache();
2032         if (err)
2033                 goto free_checkpoint_caches;
2034         err = f2fs_register_sysfs();
2035         if (err)
2036                 goto free_extent_cache;
2037         err = register_shrinker(&f2fs_shrinker_info);
2038         if (err)
2039                 goto free_sysfs;
2040         err = register_filesystem(&f2fs_fs_type);
2041         if (err)
2042                 goto free_shrinker;
2043         err = f2fs_create_root_stats();
2044         if (err)
2045                 goto free_filesystem;
2046         return 0;
2047
2048 free_filesystem:
2049         unregister_filesystem(&f2fs_fs_type);
2050 free_shrinker:
2051         unregister_shrinker(&f2fs_shrinker_info);
2052 free_sysfs:
2053         f2fs_unregister_sysfs();
2054 free_extent_cache:
2055         destroy_extent_cache();
2056 free_checkpoint_caches:
2057         destroy_checkpoint_caches();
2058 free_segment_manager_caches:
2059         destroy_segment_manager_caches();
2060 free_node_manager_caches:
2061         destroy_node_manager_caches();
2062 free_inodecache:
2063         destroy_inodecache();
2064 fail:
2065         return err;
2066 }
2067
2068 static void __exit exit_f2fs_fs(void)
2069 {
2070         f2fs_destroy_root_stats();
2071         unregister_filesystem(&f2fs_fs_type);
2072         unregister_shrinker(&f2fs_shrinker_info);
2073         f2fs_unregister_sysfs();
2074         destroy_extent_cache();
2075         destroy_checkpoint_caches();
2076         destroy_segment_manager_caches();
2077         destroy_node_manager_caches();
2078         destroy_inodecache();
2079         f2fs_destroy_trace_ios();
2080 }
2081
2082 module_init(init_f2fs_fs)
2083 module_exit(exit_f2fs_fs)
2084
2085 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2086 MODULE_DESCRIPTION("Flash Friendly File System");
2087 MODULE_LICENSE("GPL");
2088