#include <linux/quota.h>
#include <linux/unicode.h>
#include <linux/part_stat.h>
+#include <linux/zstd.h>
+#include <linux/lz4.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "xattr.h"
#include "gc.h"
-#include "trace.h"
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
[FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_BIO] = "alloc bio",
[FAULT_ALLOC_NID] = "alloc nid",
[FAULT_ORPHAN] = "orphan",
[FAULT_BLOCK] = "no more block",
Opt_checkpoint_disable_cap,
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
+ Opt_checkpoint_merge,
+ Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
+ {Opt_checkpoint_merge, "checkpoint_merge"},
+ {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
return 0;
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+#ifdef CONFIG_F2FS_FS_LZ4
+static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
+{
+#ifdef CONFIG_F2FS_FS_LZ4HC
+ unsigned int level;
+#endif
+
+ if (strlen(str) == 3) {
+ F2FS_OPTION(sbi).compress_level = 0;
+ return 0;
+ }
+
+#ifdef CONFIG_F2FS_FS_LZ4HC
+ str += 3;
+
+ if (str[0] != ':') {
+ f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ return -EINVAL;
+ }
+ if (kstrtouint(str + 1, 10, &level))
+ return -EINVAL;
+
+ if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) {
+ f2fs_info(sbi, "invalid lz4hc compress level: %d", level);
+ return -EINVAL;
+ }
+
+ F2FS_OPTION(sbi).compress_level = level;
+ return 0;
+#else
+ f2fs_info(sbi, "kernel doesn't support lz4hc compression");
+ return -EINVAL;
+#endif
+}
+#endif
+
+#ifdef CONFIG_F2FS_FS_ZSTD
+static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
+{
+ unsigned int level;
+ int len = 4;
+
+ if (strlen(str) == len) {
+ F2FS_OPTION(sbi).compress_level = 0;
+ return 0;
+ }
+
+ str += len;
+
+ if (str[0] != ':') {
+ f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ return -EINVAL;
+ }
+ if (kstrtouint(str + 1, 10, &level))
+ return -EINVAL;
+
+ if (!level || level > ZSTD_maxCLevel()) {
+ f2fs_info(sbi, "invalid zstd compress level: %d", level);
+ return -EINVAL;
+ }
+
+ F2FS_OPTION(sbi).compress_level = level;
+ return 0;
+}
+#endif
+#endif
+
static int parse_options(struct super_block *sb, char *options, bool is_remount)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
case Opt_checkpoint_enable:
clear_opt(sbi, DISABLE_CHECKPOINT);
break;
+ case Opt_checkpoint_merge:
+ set_opt(sbi, MERGE_CHECKPOINT);
+ break;
+ case Opt_nocheckpoint_merge:
+ clear_opt(sbi, MERGE_CHECKPOINT);
+ break;
#ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
if (!name)
return -ENOMEM;
if (!strcmp(name, "lzo")) {
+#ifdef CONFIG_F2FS_FS_LZO
+ F2FS_OPTION(sbi).compress_level = 0;
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZO;
- } else if (!strcmp(name, "lz4")) {
+#else
+ f2fs_info(sbi, "kernel doesn't support lzo compression");
+#endif
+ } else if (!strncmp(name, "lz4", 3)) {
+#ifdef CONFIG_F2FS_FS_LZ4
+ ret = f2fs_set_lz4hc_level(sbi, name);
+ if (ret) {
+ kfree(name);
+ return -EINVAL;
+ }
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZ4;
- } else if (!strcmp(name, "zstd")) {
+#else
+ f2fs_info(sbi, "kernel doesn't support lz4 compression");
+#endif
+ } else if (!strncmp(name, "zstd", 4)) {
+#ifdef CONFIG_F2FS_FS_ZSTD
+ ret = f2fs_set_zstd_level(sbi, name);
+ if (ret) {
+ kfree(name);
+ return -EINVAL;
+ }
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_ZSTD;
+#else
+ f2fs_info(sbi, "kernel doesn't support zstd compression");
+#endif
} else if (!strcmp(name, "lzo-rle")) {
+#ifdef CONFIG_F2FS_FS_LZORLE
+ F2FS_OPTION(sbi).compress_level = 0;
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZORLE;
+#else
+ f2fs_info(sbi, "kernel doesn't support lzorle compression");
+#endif
} else {
kfree(name);
return -EINVAL;
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
- fi->ra_offset = -1;
-
return &fi->vfs_inode;
}
inode->i_ino == F2FS_META_INO(sbi))
return;
- if (flags == I_DIRTY_TIME)
- return;
-
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
/* prevent remaining shrinker jobs */
mutex_lock(&sbi->umount_mutex);
+ /*
+ * flush all issued checkpoints and stop checkpoint issue thread.
+ * after then, all checkpoints should be done by each process context.
+ */
+ f2fs_stop_ckpt_thread(sbi);
+
/*
* We don't need to do checkpoint when superblock is clean.
* But, the previous checkpoint was not done by umount, it needs to do
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return -EAGAIN;
- if (sync) {
- struct cp_control cpc;
-
- cpc.reason = __get_cp_reason(sbi);
-
- down_write(&sbi->gc_lock);
- err = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
- }
- f2fs_trace_ios(NULL, 1);
+ if (sync)
+ err = f2fs_issue_checkpoint(sbi);
return err;
}
/* must be clean, since sync_filesystem() was already called */
if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
return -EINVAL;
+
+ /* ensure no checkpoint required */
+ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
+ return -EINVAL;
return 0;
}
}
seq_printf(seq, ",compress_algorithm=%s", algtype);
+ if (F2FS_OPTION(sbi).compress_level)
+ seq_printf(seq, ":%d", F2FS_OPTION(sbi).compress_level);
+
seq_printf(seq, ",compress_log_size=%u",
F2FS_OPTION(sbi).compress_log_size);
if (test_opt(sbi, DISABLE_CHECKPOINT))
seq_printf(seq, ",checkpoint=disable:%u",
F2FS_OPTION(sbi).unusable_cap);
+ if (test_opt(sbi, MERGE_CHECKPOINT))
+ seq_puts(seq, ",checkpoint_merge");
+ else
+ seq_puts(seq, ",nocheckpoint_merge");
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
+ /* we should flush all the data to keep data consistency */
+ sync_inodes_sb(sbi->sb);
+
down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
}
}
+ if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
+ test_opt(sbi, MERGE_CHECKPOINT)) {
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+ "Failed to start F2FS issue_checkpoint_thread (%d)",
+ err);
+ goto restore_gc;
+ }
+ } else {
+ f2fs_stop_ckpt_thread(sbi);
+ }
+
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
.get_parent = f2fs_get_parent,
};
-static loff_t max_file_blocks(void)
+loff_t max_file_blocks(struct inode *inode)
{
loff_t result = 0;
- loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
+ loff_t leaf_count;
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
* result as zero.
*/
+ if (inode && f2fs_compressed_file(inode))
+ leaf_count = ADDRS_PER_BLOCK(inode);
+ else
+ leaf_count = DEF_ADDRS_PER_BLOCK;
+
/* two direct node blocks */
result += (leaf_count * 2);
if (err)
goto free_options;
- sbi->max_file_blocks = max_file_blocks();
- sb->s_maxbytes = sbi->max_file_blocks <<
+ sb->s_maxbytes = max_file_blocks(NULL) <<
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
f2fs_init_fsync_node_info(sbi);
+ /* setup checkpoint request control and start checkpoint issue thread */
+ f2fs_init_ckpt_req_control(sbi);
+ if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
+ test_opt(sbi, MERGE_CHECKPOINT)) {
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+ "Failed to start F2FS issue_checkpoint_thread (%d)",
+ err);
+ goto stop_ckpt_thread;
+ }
+ }
+
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
* previous checkpoint was not done by clean system shutdown.
*/
if (f2fs_hw_is_readonly(sbi)) {
- if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
- err = -EROFS;
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))
f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
- goto free_meta;
- }
- f2fs_info(sbi, "write access unavailable, skipping recovery");
+ else
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
goto reset_checkpoint;
}
free_sm:
f2fs_destroy_segment_manager(sbi);
f2fs_destroy_post_read_wq(sbi);
+stop_ckpt_thread:
+ f2fs_stop_ckpt_thread(sbi);
free_devices:
destroy_device_list(sbi);
kvfree(sbi->ckpt);
return -EINVAL;
}
- f2fs_build_trace_ios();
-
err = init_inodecache();
if (err)
goto fail;
f2fs_destroy_segment_manager_caches();
f2fs_destroy_node_manager_caches();
destroy_inodecache();
- f2fs_destroy_trace_ios();
}
module_init(init_f2fs_fs)
/*
* Inode state bits. Protected by inode->i_lock
*
- * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
- * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
+ * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
+ * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
*
* Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
* until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
* Two bits are used for locking and completion notification, I_NEW and I_SYNC.
*
* I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
- * fdatasync(). i_atime is the usual cause.
- * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
+ * fdatasync() (unless I_DIRTY_DATASYNC is also set).
+ * Timestamp updates are the usual cause.
+ * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
* these changes separately from I_DIRTY_SYNC so that we
* don't have to write inode on fdatasync() when only
- * mtime has changed in it.
+ * e.g. the timestamps have changed.
* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
+ * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
+ * lazytime mount option is enabled. We keep track of this
+ * separately from I_DIRTY_SYNC in order to implement
+ * lazytime. This gets cleared if I_DIRTY_INODE
+ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
+ * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
+ * i_state, but not both. I_DIRTY_PAGES may still be set.
* I_NEW Serves as both a mutex and completion notification.
* New inodes set I_NEW. If two processes both create
* the same inode, one of them will release its inode and
__mark_inode_dirty(inode, I_DIRTY_SYNC);
}
+ /*
+ * Returns true if the given inode itself only has dirty timestamps (its pages
+ * may still be dirty) and isn't currently being allocated or freed.
+ * Filesystems should call this if when writing an inode when lazytime is
+ * enabled, they want to opportunistically write the timestamps of other inodes
+ * located very nearby on-disk, e.g. in the same inode block. This returns true
+ * if the given inode is in need of such an opportunistic update. Requires
+ * i_lock, or at least later re-checking under i_lock.
+ */
+ static inline bool inode_is_dirtytime_only(struct inode *inode)
+ {
+ return (inode->i_state & (I_DIRTY_TIME | I_NEW |
+ I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
+ }
+
extern void inc_nlink(struct inode *inode);
extern void drop_nlink(struct inode *inode);
extern void clear_nlink(struct inode *inode);
extern int generic_check_addressable(unsigned, u64);
-#ifdef CONFIG_UNICODE
-extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
-extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name);
-#endif
extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
#ifdef CONFIG_MIGRATION