#include "f2fs.h"
#include "node.h"
#include "segment.h"
+ #include "iostat.h"
#include <trace/events/f2fs.h>
#define NUM_PREALLOC_POST_READ_CTXS 128
struct f2fs_sb_info *sbi;
struct work_struct work;
unsigned int enabled_steps;
+ block_t fs_blkaddr;
};
static void f2fs_finish_read_bio(struct bio *bio)
struct bio_vec *bv;
struct bvec_iter_all iter_all;
bool all_compressed = true;
- block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
+ block_t blkaddr = ctx->fs_blkaddr;
bio_for_each_segment_all(bv, ctx->bio, iter_all) {
struct page *page = bv->bv_page;
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
- struct bio_post_read_ctx *ctx = bio->bi_private;
+ struct bio_post_read_ctx *ctx;
+
+ iostat_update_and_unbind_ctx(bio, 0);
+ ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
static void f2fs_write_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = bio->bi_private;
+ struct f2fs_sb_info *sbi;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ iostat_update_and_unbind_ctx(bio, 1);
+ sbi = bio->bi_private;
+
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
bio->bi_status = BLK_STS_IOERR;
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
}
+ iostat_alloc_and_bind_ctx(sbi, bio, NULL);
+
if (fio->io_wbc)
wbc_init_bio(fio->io_wbc, bio);
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
else
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
+
+ iostat_update_submit_ctx(bio, type);
submit_bio(bio);
}
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct bio_entry *be;
- be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+ be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
be->bio = bio;
bio_get(bio);
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- struct bio_post_read_ctx *ctx;
+ struct bio_post_read_ctx *ctx = NULL;
unsigned int post_read_steps = 0;
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
ctx->bio = bio;
ctx->sbi = sbi;
ctx->enabled_steps = post_read_steps;
+ ctx->fs_blkaddr = blkaddr;
bio->bi_private = ctx;
}
+ iostat_alloc_and_bind_ctx(sbi, bio, ctx);
return bio;
}
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
{
- struct extent_info ei = {0, 0, 0};
+ struct extent_info ei = {0, };
struct inode *inode = dn->inode;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
int err;
page = f2fs_grab_cache_page(mapping, index, for_write);
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
block_t blkaddr;
unsigned int start_pgofs;
if (err) {
if (flag == F2FS_GET_BLOCK_BMAP)
map->m_pblk = 0;
+
if (err == -ENOENT) {
+ /*
+ * There is one exceptional case that read_node_page()
+ * may return -ENOENT due to filesystem has been
+ * shutdown or cp_error, so force to convert error
+ * number to EIO for such case.
+ */
+ if (map->m_may_create &&
+ (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto unlock_out;
+ }
+
err = 0;
if (map->m_next_pgofs)
*map->m_next_pgofs =
map->m_flags |= F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr;
} else {
+ if (f2fs_compressed_file(inode) &&
+ f2fs_sanity_check_cluster(&dn) &&
+ (flag != F2FS_GET_BLOCK_FIEMAP ||
+ IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
+ err = -EFSCORRUPTED;
+ goto sync_out;
+ }
if (flag == F2FS_GET_BLOCK_BMAP) {
map->m_pblk = 0;
goto sync_out;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
- bool compr_cluster = false;
+ bool compr_cluster = false, compr_appended;
unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+ unsigned int count_in_cluster = 0;
loff_t maxbytes;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
map.m_next_pgofs = &next_pgofs;
map.m_seg_type = NO_CHECK_TYPE;
- if (compr_cluster)
- map.m_len = cluster_size - 1;
+ if (compr_cluster) {
+ map.m_lblk += 1;
+ map.m_len = cluster_size - count_in_cluster;
+ }
ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
if (ret)
goto out;
/* HOLE */
- if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
start_blk = next_pgofs;
if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
flags |= FIEMAP_EXTENT_LAST;
}
+ compr_appended = false;
+ /* In a case of compressed cluster, append this to the last extent */
+ if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
+ !(map.m_flags & F2FS_MAP_FLAGS))) {
+ compr_appended = true;
+ goto skip_fill;
+ }
+
if (size) {
flags |= FIEMAP_EXTENT_MERGED;
if (IS_ENCRYPTED(inode))
if (start_blk > last_blk)
goto out;
- if (compr_cluster) {
- compr_cluster = false;
-
-
- logical = blks_to_bytes(inode, start_blk - 1);
- phys = blks_to_bytes(inode, map.m_pblk);
- size = blks_to_bytes(inode, cluster_size);
-
- flags |= FIEMAP_EXTENT_ENCODED;
-
- start_blk += cluster_size - 1;
-
- if (start_blk > last_blk)
- goto out;
-
- goto prep_next;
- }
-
+ skip_fill:
if (map.m_pblk == COMPRESS_ADDR) {
compr_cluster = true;
- start_blk++;
- goto prep_next;
- }
-
- logical = blks_to_bytes(inode, start_blk);
- phys = blks_to_bytes(inode, map.m_pblk);
- size = blks_to_bytes(inode, map.m_len);
- flags = 0;
- if (map.m_flags & F2FS_MAP_UNWRITTEN)
- flags = FIEMAP_EXTENT_UNWRITTEN;
+ count_in_cluster = 1;
+ } else if (compr_appended) {
+ unsigned int appended_blks = cluster_size -
+ count_in_cluster + 1;
+ size += blks_to_bytes(inode, appended_blks);
+ start_blk += appended_blks;
+ compr_cluster = false;
+ } else {
+ logical = blks_to_bytes(inode, start_blk);
+ phys = __is_valid_data_blkaddr(map.m_pblk) ?
+ blks_to_bytes(inode, map.m_pblk) : 0;
+ size = blks_to_bytes(inode, map.m_len);
+ flags = 0;
+
+ if (compr_cluster) {
+ flags = FIEMAP_EXTENT_ENCODED;
+ count_in_cluster += map.m_len;
+ if (count_in_cluster == cluster_size) {
+ compr_cluster = false;
+ size += blks_to_bytes(inode, 1);
+ }
+ } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+ flags = FIEMAP_EXTENT_UNWRITTEN;
+ }
- start_blk += bytes_to_blks(inode, size);
+ start_blk += bytes_to_blks(inode, size);
+ }
prep_next:
cond_resched();
sector_t last_block_in_file;
const unsigned blocksize = blks_to_bytes(inode, 1);
struct decompress_io_ctx *dic = NULL;
+ struct extent_info ei = {0, };
+ bool from_dnode = true;
int i;
int ret = 0;
continue;
}
unlock_page(page);
+ if (for_write)
+ put_page(page);
cc->rpages[i] = NULL;
cc->nr_rpages--;
}
if (f2fs_cluster_is_empty(cc))
goto out;
+ if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
+ from_dnode = false;
+
+ if (!from_dnode)
+ goto skip_reading_dnode;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
if (ret)
f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
+ skip_reading_dnode:
for (i = 1; i < cc->cluster_size; i++) {
block_t blkaddr;
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
- dn.ofs_in_node + i);
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i) :
+ ei.blk + i - 1;
if (!__is_valid_data_blkaddr(blkaddr))
break;
goto out_put_dnode;
}
cc->nr_cpages++;
+
+ if (!from_dnode && i >= ei.c_len)
+ break;
}
/* nothing to decompress */
block_t blkaddr;
struct bio_post_read_ctx *ctx;
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
- dn.ofs_in_node + i + 1);
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i + 1) :
+ ei.blk + i;
f2fs_wait_on_block_writeback(inode, blkaddr);
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- ctx = bio->bi_private;
+ ctx = get_post_read_ctx(bio);
ctx->enabled_steps |= STEP_DECOMPRESS;
refcount_inc(&dic->refcnt);
*last_block_in_bio = blkaddr;
}
- f2fs_put_dnode(&dn);
+ if (from_dnode)
+ f2fs_put_dnode(&dn);
*bio_ret = bio;
return 0;
out_put_dnode:
- f2fs_put_dnode(&dn);
+ if (from_dnode)
+ f2fs_put_dnode(&dn);
out:
for (i = 0; i < cc->cluster_size; i++) {
if (cc->rpages[i]) {
.nr_rpages = 0,
.nr_cpages = 0,
};
+ pgoff_t nc_cluster_idx = NULL_CLUSTER;
#endif
unsigned nr_pages = rac ? readahead_count(rac) : 1;
unsigned max_nr_pages = nr_pages;
if (ret)
goto set_error_page;
}
- ret = f2fs_is_compressed_cluster(inode, page->index);
- if (ret < 0)
- goto set_error_page;
- else if (!ret)
- goto read_single_page;
+ if (cc.cluster_idx == NULL_CLUSTER) {
+ if (nc_cluster_idx ==
+ page->index >> cc.log_cluster_size) {
+ goto read_single_page;
+ }
+ ret = f2fs_is_compressed_cluster(inode, page->index);
+ if (ret < 0)
+ goto set_error_page;
+ else if (!ret) {
+ nc_cluster_idx =
+ page->index >> cc.log_cluster_size;
+ goto read_single_page;
+ }
+
+ nc_cluster_idx = NULL_CLUSTER;
+ }
ret = f2fs_init_compress_ctx(&cc);
if (ret)
goto set_error_page;
return true;
if (f2fs_is_atomic_file(inode))
return true;
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
struct node_info ni;
bool ipu_force = false;
int err = 0;
FS_CP_DATA_IO : FS_DATA_IO);
}
- static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+ static void f2fs_write_failed(struct inode *inode, loff_t to)
{
- struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
if (IS_NOQUOTA(inode))
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(mapping);
- down_write(&F2FS_I(inode)->i_mmap_sem);
++ filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
- filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_mmap_sem);
++ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
struct dnode_of_data dn;
struct page *ipage;
bool locked = false;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
int err = 0;
int flag;
*fsdata = NULL;
+ if (len == PAGE_SIZE)
+ goto repeat;
+
ret = f2fs_prepare_compress_overwrite(inode, pagep,
index, fsdata);
if (ret < 0) {
fail:
f2fs_put_page(page, 1);
- f2fs_write_failed(mapping, pos + len);
+ f2fs_write_failed(inode, pos + len);
if (drop_atomic)
f2fs_drop_inmem_pages_all(sbi, false);
return err;
if (f2fs_force_buffered_io(inode, iocb, iter))
return 0;
- do_opu = allow_outplace_dio(inode, iocb, iter);
+ do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
count - iov_iter_count(iter));
} else if (err < 0) {
- f2fs_write_failed(mapping, offset + count);
+ f2fs_write_failed(inode, offset + count);
}
} else {
if (err > 0)
int ret = 0;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
clear_inode_flag(inode, FI_DO_DEFRAG);
clear_inode_flag(inode, FI_ALIGNED_WRITE);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
FAULT_KVMALLOC,
FAULT_PAGE_ALLOC,
FAULT_PAGE_GET,
+ FAULT_ALLOC_BIO, /* it's obsolete due to bio_alloc() will never fail */
FAULT_ALLOC_NID,
FAULT_ORPHAN,
FAULT_BLOCK,
FAULT_CHECKPOINT,
FAULT_DISCARD,
FAULT_WRITE_IO,
+ FAULT_SLAB_ALLOC,
FAULT_MAX,
};
int fsync_mode; /* fsync policy */
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
+ int discard_unit; /*
+ * discard command's offset/size should
+ * be aligned to this unit: block,
+ * segment or section
+ */
struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */
block_t unusable_cap_perc; /* percentage for cap */
block_t unusable_cap; /* Amount of space allowed to be
*/
};
- #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+ #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
/* congestion wait timeout value, default: 20ms */
#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
unsigned int fofs; /* start offset in a file */
unsigned int len; /* length of the extent */
u32 blk; /* start block address of the extent */
+ #ifdef CONFIG_F2FS_FS_COMPRESSION
+ unsigned int c_len; /* physical extent length of compressed blocks */
+ #endif
};
struct extent_node {
/* avoid racing between foreground op and gc */
struct rw_semaphore i_gc_rwsem[2];
- struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
ei->fofs = fofs;
ei->blk = blk;
ei->len = len;
+ #ifdef CONFIG_F2FS_FS_COMPRESSION
+ ei->c_len = 0;
+ #endif
}
static inline bool __is_discard_mergeable(struct discard_info *back,
static inline bool __is_extent_mergeable(struct extent_info *back,
struct extent_info *front)
{
+ #ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (back->c_len && back->len != back->c_len)
+ return false;
+ if (front->c_len && front->len != front->c_len)
+ return false;
+ #endif
return (back->fofs + back->len == front->fofs &&
back->blk + back->len == front->blk);
}
GC_IDLE_AT,
GC_URGENT_HIGH,
GC_URGENT_LOW,
+ MAX_GC_MODE,
};
enum {
*/
};
+ enum {
+ DISCARD_UNIT_BLOCK, /* basic discard unit is block */
+ DISCARD_UNIT_SEGMENT, /* basic discard unit is segment */
+ DISCARD_UNIT_SECTION, /* basic discard unit is section */
+ };
+
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
#endif
spinlock_t stat_lock; /* lock for stat operations */
- /* For app/fs IO statistics */
- spinlock_t iostat_lock;
- unsigned long long rw_iostat[NR_IO_TYPE];
- unsigned long long prev_rw_iostat[NR_IO_TYPE];
- bool iostat_enable;
- unsigned long iostat_next_period;
- unsigned int iostat_period_ms;
-
/* to attach REQ_META|REQ_FUA flags */
unsigned int data_io_flag;
unsigned int node_io_flag;
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
unsigned int inline_xattr_slab_size; /* default inline xattr slab size */
+ /* For reclaimed segs statistics per each GC mode */
+ unsigned int gc_segment_mode; /* GC state for reclaimed segments */
+ unsigned int gc_reclaimed_segs[MAX_GC_MODE]; /* Reclaimed segs for each mode */
+
+ unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */
+
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */
unsigned int compress_watermark; /* cache page watermark */
atomic_t compress_page_hit; /* cache hit count */
#endif
+
+ #ifdef CONFIG_F2FS_IOSTAT
+ /* For app/fs IO statistics */
+ spinlock_t iostat_lock;
+ unsigned long long rw_iostat[NR_IO_TYPE];
+ unsigned long long prev_rw_iostat[NR_IO_TYPE];
+ bool iostat_enable;
+ unsigned long iostat_next_period;
+ unsigned int iostat_period_ms;
+
+ /* For io latency related statistics info in one iostat period */
+ spinlock_t iostat_lat_lock;
+ struct iostat_lat_info *iostat_io_lat;
+ #endif
};
struct f2fs_private_dio {
spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
- static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
- {
- unsigned long flags;
- unsigned char *nat_bits;
-
- /*
- * In order to re-enable nat_bits we need to call fsck.f2fs by
- * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
- * so let's rely on regular fsck or unclean shutdown.
- */
-
- if (lock)
- spin_lock_irqsave(&sbi->cp_lock, flags);
- __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
- nat_bits = NM_I(sbi)->nat_bits;
- NM_I(sbi)->nat_bits = NULL;
- if (lock)
- spin_unlock_irqrestore(&sbi->cp_lock, flags);
-
- kvfree(nat_bits);
- }
-
- static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
- struct cp_control *cpc)
- {
- bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
-
- return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set;
- }
-
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
}
- static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+ static inline void *f2fs_kmem_cache_alloc_nofail(struct kmem_cache *cachep,
gfp_t flags)
{
void *entry;
return entry;
}
+ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, bool nofail, struct f2fs_sb_info *sbi)
+ {
+ if (nofail)
+ return f2fs_kmem_cache_alloc_nofail(cachep, flags);
+
+ if (time_to_inject(sbi, FAULT_SLAB_ALLOC)) {
+ f2fs_show_injection_info(sbi, FAULT_SLAB_ALLOC);
+ return NULL;
+ }
+
+ return kmem_cache_alloc(cachep, flags);
+ }
+
static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
{
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
- #define DEFAULT_IOSTAT_PERIOD_MS 3000
- #define MIN_IOSTAT_PERIOD_MS 100
- /* maximum period of iostat tracing is 1 day */
- #define MAX_IOSTAT_PERIOD_MS 8640000
-
- static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
- {
- int i;
-
- spin_lock(&sbi->iostat_lock);
- for (i = 0; i < NR_IO_TYPE; i++) {
- sbi->rw_iostat[i] = 0;
- sbi->prev_rw_iostat[i] = 0;
- }
- spin_unlock(&sbi->iostat_lock);
- }
-
- extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
-
- static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
- enum iostat_type type, unsigned long long io_bytes)
- {
- if (!sbi->iostat_enable)
- return;
- spin_lock(&sbi->iostat_lock);
- sbi->rw_iostat[type] += io_bytes;
-
- if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->rw_iostat[APP_BUFFERED_IO] =
- sbi->rw_iostat[APP_WRITE_IO] -
- sbi->rw_iostat[APP_DIRECT_IO];
-
- if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
- sbi->rw_iostat[APP_BUFFERED_READ_IO] =
- sbi->rw_iostat[APP_READ_IO] -
- sbi->rw_iostat[APP_DIRECT_READ_IO];
- spin_unlock(&sbi->iostat_lock);
-
- f2fs_record_iostat(sbi);
- }
-
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
int f2fs_truncate_xattr_node(struct inode *inode);
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
unsigned int seq_id);
+ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
+ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi);
int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
/*
* sysfs.c
*/
+ #define MIN_RA_MUL 2
+ #define MAX_RA_MUL 256
+
int __init f2fs_init_sysfs(void);
void f2fs_exit_sysfs(void);
int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
block_t blkaddr);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
+ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
int f2fs_write_multi_pages(struct compress_ctx *cc,
int *submitted,
struct writeback_control *wbc,
enum iostat_type io_type);
int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index);
+ void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+ pgoff_t fofs, block_t blkaddr, unsigned int llen,
+ unsigned int c_len);
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
bool is_readahead, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
void f2fs_put_page_dic(struct page *page);
+ unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
{
WARN_ON_ONCE(1);
}
+ static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; }
+ static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; }
static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { }
static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
nid_t ino) { }
#define inc_compr_inode_stat(inode) do { } while (0)
+ static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+ pgoff_t fofs, block_t blkaddr, unsigned int llen,
+ unsigned int c_len) { }
#endif
static inline void set_compress_context(struct inode *inode)
1 << COMPRESS_CHKSUM : 0;
F2FS_I(inode)->i_cluster_size =
1 << F2FS_I(inode)->i_log_cluster_size;
- if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 &&
+ if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
+ F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
F2FS_OPTION(sbi).compress_level)
F2FS_I(inode)->i_compress_flag |=
F2FS_OPTION(sbi).compress_level <<
return align & blocksize_mask;
}
- static inline int allow_outplace_dio(struct inode *inode,
- struct kiocb *iocb, struct iov_iter *iter)
- {
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int rw = iov_iter_rw(iter);
-
- return (f2fs_lfs_mode(sbi) && (rw == WRITE) &&
- !block_unaligned_IO(inode, iocb, iter));
- }
-
static inline bool f2fs_force_buffered_io(struct inode *inode,
struct kiocb *iocb, struct iov_iter *iter)
{
return false;
}
+ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi)
+ {
+ return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
+ }
+
#define EFSBADCRC EBADMSG /* Bad CRC detected */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
#include <linux/nls.h>
#include <linux/sched/signal.h>
#include <linux/fileattr.h>
+ #include <linux/fadvise.h>
#include "f2fs.h"
#include "node.h"
#include "xattr.h"
#include "acl.h"
#include "gc.h"
+ #include "iostat.h"
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
- down_read(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_fault(vmf);
- up_read(&F2FS_I(inode)->i_mmap_sem);
-
if (!ret)
f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
F2FS_BLKSIZE);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
file_update_time(vmf->vma->vm_file);
- down_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
trace_f2fs_vm_page_mkwrite(page, DATA);
out_sem:
- up_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
err:
};
unsigned int seq_id = 0;
- if (unlikely(f2fs_readonly(inode->i_sb) ||
- is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
trace_f2fs_sync_file_enter(inode);
ret = file_write_and_wait_range(file, start, end);
clear_inode_flag(inode, FI_NEED_IPU);
- if (ret) {
+ if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
return ret;
}
f2fs_exist_written_data(sbi, ino, UPDATE_INO))
goto flush_out;
goto out;
+ } else {
+ /*
+ * for OPU case, during fsync(), node can be persisted before
+ * data when lower device doesn't support write barrier, result
+ * in data corruption after SPO.
+ * So for strict fsync mode, force to use atomic write sematics
+ * to keep write order in between data/node and last node to
+ * avoid potential data corruption.
+ */
+ if (F2FS_OPTION(sbi).fsync_mode ==
+ FSYNC_MODE_STRICT && !atomic)
+ atomic = true;
}
go_write:
/*
return err;
#ifdef CONFIG_F2FS_FS_COMPRESSION
+ /*
+ * For compressed file, after release compress blocks, don't allow write
+ * direct, but we should allow write direct after truncate to zero.
+ */
+ if (f2fs_compressed_file(inode) && !free_from
+ && is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ clear_inode_flag(inode, FI_COMPRESS_RELEASED);
+
if (from != free_from) {
err = f2fs_truncate_partial_cluster(inode, from, lock);
if (err)
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
}
if (pg_start < pg_end) {
- struct address_space *mapping = inode->i_mapping;
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(mapping);
- down_write(&F2FS_I(inode)->i_mmap_sem);
++ filemap_invalidate_lock(inode->i_mapping);
- truncate_inode_pages_range(mapping, blk_start,
- blk_end - 1);
+ truncate_pagecache_range(inode, blk_start, blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
- filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_mmap_sem);
++ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
return ret;
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
ret = f2fs_truncate_blocks(inode, new_size, true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
return ret;
pgoff_t end;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache_range(inode,
(loff_t)index << PAGE_SHIFT,
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
pgoff_t nr, pg_start, pg_end, delta, idx;
loff_t new_size;
int ret = 0;
f2fs_balance_fs(sbi, true);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
return ret;
/* write out all dirty pages from offset */
- ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
if (ret)
return ret;
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ filemap_invalidate_lock(mapping);
+ filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
released_blocks += ret;
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
out:
inode_unlock(inode);
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
reserved_blocks += ret;
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (ret >= 0) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
goto err;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = filemap_write_and_wait_range(mapping, range.start,
to_end ? LLONG_MAX : end_addr - 1);
ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
prev_block, len, range.flags);
out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
* back to buffered IO.
*/
if (!f2fs_force_buffered_io(inode, iocb, from) &&
- allow_outplace_dio(inode, iocb, from))
+ f2fs_lfs_mode(F2FS_I_SB(inode)))
goto write;
}
preallocated = true;
/* if we couldn't write data, we should deallocate blocks. */
if (preallocated && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
return ret;
}
+ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
+ int advice)
+ {
+ struct inode *inode;
+ struct address_space *mapping;
+ struct backing_dev_info *bdi;
+
+ if (advice == POSIX_FADV_SEQUENTIAL) {
+ inode = file_inode(filp);
+ if (S_ISFIFO(inode->i_mode))
+ return -ESPIPE;
+
+ mapping = filp->f_mapping;
+ if (!mapping || len < 0)
+ return -EINVAL;
+
+ bdi = inode_to_bdi(mapping->host);
+ filp->f_ra.ra_pages = bdi->ra_pages *
+ F2FS_I_SB(inode)->seq_file_ra_mul;
+ spin_lock(&filp->f_lock);
+ filp->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&filp->f_lock);
+ return 0;
+ }
+
+ return generic_fadvise(filp, offset, len, advice);
+ }
+
#ifdef CONFIG_COMPAT
struct compat_f2fs_gc_range {
u32 sync;
#endif
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
+ .fadvise = f2fs_file_fadvise,
};
#include "segment.h"
#include "xattr.h"
#include "gc.h"
+ #include "iostat.h"
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
[FAULT_CHECKPOINT] = "checkpoint error",
[FAULT_DISCARD] = "discard error",
[FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
Opt_atgc,
Opt_gc_merge,
Opt_nogc_merge,
+ Opt_discard_unit,
Opt_err,
};
{Opt_atgc, "atgc"},
{Opt_gc_merge, "gc_merge"},
{Opt_nogc_merge, "nogc_merge"},
+ {Opt_discard_unit, "discard_unit=%s"},
{Opt_err, NULL},
};
return -EINVAL;
break;
case Opt_discard:
+ if (!f2fs_hw_support_discard(sbi)) {
+ f2fs_warn(sbi, "device does not support discard");
+ break;
+ }
set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sbi)) {
+ if (f2fs_hw_should_discard(sbi)) {
f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
case Opt_nogc_merge:
clear_opt(sbi, GC_MERGE);
break;
+ case Opt_discard_unit:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "block")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_BLOCK;
+ } else if (!strcmp(name, "segment")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SEGMENT;
+ } else if (!strcmp(name, "section")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
return -EINVAL;
}
#endif
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ if (F2FS_OPTION(sbi).discard_unit !=
+ DISCARD_UNIT_SECTION) {
+ f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ }
+ }
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_test_compress_extension(sbi)) {
{
struct f2fs_inode_info *fi;
- fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
+ fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
+ GFP_F2FS_ZERO, false, F2FS_SB(sb));
if (!fi)
return NULL;
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->i_gc_rwsem[READ]);
init_rwsem(&fi->i_gc_rwsem[WRITE]);
- init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
/* Will be used by directory only */
#endif
fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
destroy_percpu_info(sbi);
+ f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
if (test_opt(sbi, ATGC))
seq_puts(seq, ",atgc");
+
+ if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+ seq_printf(seq, ",discard_unit=%s", "block");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+ seq_printf(seq, ",discard_unit=%s", "segment");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+ seq_printf(seq, ",discard_unit=%s", "section");
+
return 0;
}
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi))
+ if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+ set_opt(sbi, DISCARD);
+ if (f2fs_sb_has_blkzoned(sbi)) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- else
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+ } else {
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+ }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
+ int retry = DEFAULT_RETRY_IO_COUNT;
+
/* we should flush all the data to keep data consistency */
- sync_inodes_sb(sbi->sb);
+ do {
+ sync_inodes_sb(sbi->sb);
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+ if (unlikely(retry < 0))
+ f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
bool need_restart_gc = false, need_stop_gc = false;
bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
+ bool need_restart_discard = false, need_stop_discard = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
- bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+ bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
+ bool no_discard = !test_opt(sbi, DISCARD);
bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
- bool checkpoint_changed;
+ bool block_unit_discard = f2fs_block_unit_discard(sbi);
+ struct discard_cmd_control *dcc;
#ifdef CONFIG_QUOTA
int i, j;
#endif
err = parse_options(sb, data, true);
if (err)
goto restore_opts;
- checkpoint_changed =
- disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
/*
* Previous and new state of filesystem is RO,
goto restore_opts;
}
+ if (block_unit_discard != f2fs_block_unit_discard(sbi)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch discard_unit option is not allowed");
+ goto restore_opts;
+ }
+
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
need_stop_flush = true;
}
- if (checkpoint_changed) {
+ if (no_discard == !!test_opt(sbi, DISCARD)) {
+ if (test_opt(sbi, DISCARD)) {
+ err = f2fs_start_discard_thread(sbi);
+ if (err)
+ goto restore_flush;
+ need_stop_discard = true;
+ } else {
+ dcc = SM_I(sbi)->dcc_info;
+ f2fs_stop_discard_thread(sbi);
+ if (atomic_read(&dcc->discard_cmd_cnt))
+ f2fs_issue_discard_timeout(sbi);
+ need_restart_discard = true;
+ }
+ }
+
+ if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) {
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
- goto restore_flush;
+ goto restore_discard;
} else {
f2fs_enable_checkpoint(sbi);
}
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+ restore_discard:
+ if (need_restart_discard) {
+ if (f2fs_start_discard_thread(sbi))
+ f2fs_warn(sbi, "discard has been stopped");
+ } else if (need_stop_discard) {
+ f2fs_stop_discard_thread(sbi);
+ }
restore_flush:
if (need_restart_flush) {
if (f2fs_create_flush_cmd_control(sbi))
return 0;
}
+ static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type)
+ {
+ struct quota_info *dqopt = sb_dqopt(sbi->sb);
+ struct address_space *mapping = dqopt->files[type]->i_mapping;
+ int ret = 0;
+
+ ret = dquot_writeback_dquots(sbi->sb, type);
+ if (ret)
+ goto out;
+
+ ret = filemap_fdatawrite(mapping);
+ if (ret)
+ goto out;
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ goto out;
+
+ ret = filemap_fdatawait(mapping);
+
+ truncate_inode_pages(&dqopt->files[type]->i_data, 0);
+ out:
+ if (ret)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ return ret;
+ }
+
int f2fs_quota_sync(struct super_block *sb, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int cnt;
int ret;
- /*
- * do_quotactl
- * f2fs_quota_sync
- * down_read(quota_sem)
- * dquot_writeback_dquots()
- * f2fs_dquot_commit
- * block_operation
- * down_read(quota_sem)
- */
- f2fs_lock_op(sbi);
-
- down_read(&sbi->quota_sem);
- ret = dquot_writeback_dquots(sb, type);
- if (ret)
- goto out;
-
/*
* Now when everything is written we can discard the pagecache so
* that userspace sees the changes.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- struct address_space *mapping;
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- mapping = dqopt->files[cnt]->i_mapping;
+ if (!sb_has_quota_active(sb, type))
+ return 0;
- ret = filemap_fdatawrite(mapping);
- if (ret)
- goto out;
+ inode_lock(dqopt->files[cnt]);
- /* if we are using journalled quota */
- if (is_journalled_quota(sbi))
- continue;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+ down_read(&sbi->quota_sem);
- ret = filemap_fdatawait(mapping);
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ ret = f2fs_quota_sync_file(sbi, cnt);
+
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
- inode_lock(dqopt->files[cnt]);
- truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
inode_unlock(dqopt->files[cnt]);
+
+ if (ret)
+ break;
}
- out:
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
- f2fs_unlock_op(sbi);
return ret;
}
return -EFSCORRUPTED;
}
- if (le32_to_cpu(raw_super->cp_payload) >
- (blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ if (le32_to_cpu(raw_super->cp_payload) >=
+ (blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE)) {
+ f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE);
return -EFSCORRUPTED;
}
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
+ unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
return 1;
}
+ nat_blocks = nat_segs << log_blocks_per_seg;
+ nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+ nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+ if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+ (cp_payload + F2FS_CP_PACKS +
+ NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+ f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+ cp_payload, nat_bits_blocks);
+ return -EFSCORRUPTED;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = sbi->segs_per_sec;
+ sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
/* adjust parameters according to the volume size */
if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
- sm_i->dcc_info->discard_granularity = 1;
+ if (f2fs_block_unit_discard(sbi))
+ sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
- /* init iostat info */
- spin_lock_init(&sbi->iostat_lock);
- sbi->iostat_enable = false;
- sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
-
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1 : NR_TEMP_TYPE;
int j;
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
- err = init_percpu_info(sbi);
+ err = f2fs_init_iostat(sbi);
if (err)
goto free_bio_info;
+ err = init_percpu_info(sbi);
+ if (err)
+ goto free_iostat;
+
if (F2FS_IO_ALIGNED(sbi)) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
+ free_iostat:
+ f2fs_destroy_iostat(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
- err = f2fs_init_bio_entry_cache();
+ err = f2fs_init_iostat_processing();
if (err)
goto free_post_read;
+ err = f2fs_init_bio_entry_cache();
+ if (err)
+ goto free_iostat;
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
+ free_iostat:
+ f2fs_destroy_iostat_processing();
free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
+ f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
+ #include "iostat.h"
#include <trace/events/f2fs.h>
static struct proc_dir_entry *f2fs_proc_root;
return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
#endif
+ if (!strcmp(a->attr.name, "gc_segment_mode"))
+ return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode);
+
+ if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
+ return sysfs_emit(buf, "%u\n",
+ sbi->gc_reclaimed_segs[sbi->gc_segment_mode]);
+ }
+
ui = (unsigned int *)(ptr + a->offset);
return sprintf(buf, "%u\n", *ui);
set = false;
}
- if (strlen(name) >= F2FS_EXTENSION_LEN)
+ if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN)
return -EINVAL;
down_write(&sbi->sb_lock);
ret = kstrtol(name, 10, &data);
if (ret)
return ret;
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
if (!strcmp(a->attr.name, "discard_granularity")) {
if (t == 0 || t > MAX_PLIST_NUM)
return -EINVAL;
+ if (!f2fs_block_unit_discard(sbi))
+ return -EINVAL;
if (t == *ui)
return count;
*ui = t;
return count;
}
+ #ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
if (!sbi->iostat_enable)
spin_unlock(&sbi->iostat_lock);
return count;
}
+ #endif
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (!strcmp(a->attr.name, "compr_written_block") ||
return count;
}
+ if (!strcmp(a->attr.name, "gc_segment_mode")) {
+ if (t < MAX_GC_MODE)
+ sbi->gc_segment_mode = t;
+ else
+ return -EINVAL;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->gc_reclaimed_segs[sbi->gc_segment_mode] = 0;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "seq_file_ra_mul")) {
+ if (t >= MIN_RA_MUL && t <= MAX_RA_MUL)
+ sbi->seq_file_ra_mul = t;
+ else
+ return -EINVAL;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
+ #ifdef CONFIG_F2FS_IOSTAT
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
+ #endif
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight);
F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
+
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_urgent_sleep_time),
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
+ #ifdef CONFIG_F2FS_IOSTAT
ATTR_LIST(iostat_enable),
ATTR_LIST(iostat_period_ms),
+ #endif
ATTR_LIST(readdir_ra),
ATTR_LIST(max_io_bytes),
ATTR_LIST(gc_pin_file_thresh),
ATTR_LIST(atgc_candidate_count),
ATTR_LIST(atgc_age_weight),
ATTR_LIST(atgc_age_threshold),
+ ATTR_LIST(seq_file_ra_mul),
+ ATTR_LIST(gc_segment_mode),
+ ATTR_LIST(gc_reclaimed_segments),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
return 0;
}
- void f2fs_record_iostat(struct f2fs_sb_info *sbi)
- {
- unsigned long long iostat_diff[NR_IO_TYPE];
- int i;
-
- if (time_is_after_jiffies(sbi->iostat_next_period))
- return;
-
- /* Need double check under the lock */
- spin_lock(&sbi->iostat_lock);
- if (time_is_after_jiffies(sbi->iostat_next_period)) {
- spin_unlock(&sbi->iostat_lock);
- return;
- }
- sbi->iostat_next_period = jiffies +
- msecs_to_jiffies(sbi->iostat_period_ms);
-
- for (i = 0; i < NR_IO_TYPE; i++) {
- iostat_diff[i] = sbi->rw_iostat[i] -
- sbi->prev_rw_iostat[i];
- sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
- }
- spin_unlock(&sbi->iostat_lock);
-
- trace_f2fs_iostat(sbi, iostat_diff);
- }
-
- static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
- void *offset)
- {
- struct super_block *sb = seq->private;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
- time64_t now = ktime_get_real_seconds();
-
- if (!sbi->iostat_enable)
- return 0;
-
- seq_printf(seq, "time: %-16llu\n", now);
-
- /* print app write IOs */
- seq_puts(seq, "[WRITE]\n");
- seq_printf(seq, "app buffered: %-16llu\n",
- sbi->rw_iostat[APP_BUFFERED_IO]);
- seq_printf(seq, "app direct: %-16llu\n",
- sbi->rw_iostat[APP_DIRECT_IO]);
- seq_printf(seq, "app mapped: %-16llu\n",
- sbi->rw_iostat[APP_MAPPED_IO]);
-
- /* print fs write IOs */
- seq_printf(seq, "fs data: %-16llu\n",
- sbi->rw_iostat[FS_DATA_IO]);
- seq_printf(seq, "fs node: %-16llu\n",
- sbi->rw_iostat[FS_NODE_IO]);
- seq_printf(seq, "fs meta: %-16llu\n",
- sbi->rw_iostat[FS_META_IO]);
- seq_printf(seq, "fs gc data: %-16llu\n",
- sbi->rw_iostat[FS_GC_DATA_IO]);
- seq_printf(seq, "fs gc node: %-16llu\n",
- sbi->rw_iostat[FS_GC_NODE_IO]);
- seq_printf(seq, "fs cp data: %-16llu\n",
- sbi->rw_iostat[FS_CP_DATA_IO]);
- seq_printf(seq, "fs cp node: %-16llu\n",
- sbi->rw_iostat[FS_CP_NODE_IO]);
- seq_printf(seq, "fs cp meta: %-16llu\n",
- sbi->rw_iostat[FS_CP_META_IO]);
-
- /* print app read IOs */
- seq_puts(seq, "[READ]\n");
- seq_printf(seq, "app buffered: %-16llu\n",
- sbi->rw_iostat[APP_BUFFERED_READ_IO]);
- seq_printf(seq, "app direct: %-16llu\n",
- sbi->rw_iostat[APP_DIRECT_READ_IO]);
- seq_printf(seq, "app mapped: %-16llu\n",
- sbi->rw_iostat[APP_MAPPED_READ_IO]);
-
- /* print fs read IOs */
- seq_printf(seq, "fs data: %-16llu\n",
- sbi->rw_iostat[FS_DATA_READ_IO]);
- seq_printf(seq, "fs gc data: %-16llu\n",
- sbi->rw_iostat[FS_GDATA_READ_IO]);
- seq_printf(seq, "fs compr_data: %-16llu\n",
- sbi->rw_iostat[FS_CDATA_READ_IO]);
- seq_printf(seq, "fs node: %-16llu\n",
- sbi->rw_iostat[FS_NODE_READ_IO]);
- seq_printf(seq, "fs meta: %-16llu\n",
- sbi->rw_iostat[FS_META_READ_IO]);
-
- /* print other IOs */
- seq_puts(seq, "[OTHER]\n");
- seq_printf(seq, "fs discard: %-16llu\n",
- sbi->rw_iostat[FS_DISCARD]);
-
- return 0;
- }
-
static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
void *offset)
{
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
if (sbi->s_proc) {
- proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+ proc_create_single_data("segment_info", 0444, sbi->s_proc,
segment_info_seq_show, sb);
- proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+ proc_create_single_data("segment_bits", 0444, sbi->s_proc,
segment_bits_seq_show, sb);
- proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+ #ifdef CONFIG_F2FS_IOSTAT
+ proc_create_single_data("iostat_info", 0444, sbi->s_proc,
iostat_info_seq_show, sb);
- proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
+ #endif
+ proc_create_single_data("victim_bits", 0444, sbi->s_proc,
victim_bits_seq_show, sb);
}
return 0;
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
{
if (sbi->s_proc) {
+ #ifdef CONFIG_F2FS_IOSTAT
remove_proc_entry("iostat_info", sbi->s_proc);
+ #endif
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry("victim_bits", sbi->s_proc);