#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
+#include <linux/sched/mm.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
-#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/cleancache.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
+ #include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
f2fs_invalidate_compress_page(sbi, old_blkaddr);
}
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
- /*
- * i_size will be updated by direct_IO. Otherwise, we'll get stale
- * data from unwritten block via dio_read.
- */
return 0;
}
- int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct f2fs_map_blocks map;
- int flag;
- int err = 0;
- bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
- map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
- map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
- if (map.m_len > map.m_lblk)
- map.m_len -= map.m_lblk;
- else
- map.m_len = 0;
-
- map.m_next_pgofs = NULL;
- map.m_next_extent = NULL;
- map.m_seg_type = NO_CHECK_TYPE;
- map.m_may_create = true;
-
- if (direct_io) {
- map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, iocb, from) ?
- F2FS_GET_BLOCK_PRE_AIO :
- F2FS_GET_BLOCK_PRE_DIO;
- goto map_blocks;
- }
- if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
- if (f2fs_has_inline_data(inode))
- return err;
-
- flag = F2FS_GET_BLOCK_PRE_AIO;
-
- map_blocks:
- err = f2fs_map_blocks(inode, &map, 1, flag);
- if (map.m_len > 0 && err == -ENOSPC) {
- if (!direct_io)
- set_inode_flag(inode, FI_NO_PREALLOC);
- err = 0;
- }
- return err;
- }
-
void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
return (blks << inode->i_blkbits);
}
- static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type, bool may_write)
- {
- struct f2fs_map_blocks map;
- int err;
-
- map.m_lblk = iblock;
- map.m_len = bytes_to_blks(inode, bh->b_size);
- map.m_next_pgofs = next_pgofs;
- map.m_next_extent = NULL;
- map.m_seg_type = seg_type;
- map.m_may_create = may_write;
-
- err = f2fs_map_blocks(inode, &map, create, flag);
- if (!err) {
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = blks_to_bytes(inode, map.m_len);
-
- if (map.m_multidev_dio)
- bh->b_bdev = map.m_bdev;
- }
- return err;
- }
-
- static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
- {
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- true);
- }
-
- static int get_data_block_dio(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
- {
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- false);
- }
-
static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo)
{
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ /* The below cases were checked when setting it. */
+ if (f2fs_is_pinned_file(inode))
+ return false;
+ if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return true;
if (f2fs_lfs_mode(sbi))
return true;
if (S_ISDIR(inode->i_mode))
return true;
if (f2fs_is_atomic_file(inode))
return true;
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
fio->need_lock = LOCK_REQ;
}
- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
if (err)
goto out_writepage;
.rpages = NULL,
.nr_rpages = 0,
.cpages = NULL,
+ .valid_nr_cpages = 0,
.rbuf = NULL,
.cbuf = NULL,
.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
FS_CP_DATA_IO : FS_DATA_IO);
}
- static void f2fs_write_failed(struct inode *inode, loff_t to)
+ void f2fs_write_failed(struct inode *inode, loff_t to)
{
loff_t i_size = i_size_read(inode);
int flag;
/*
- * we already allocated all the blocks, so we don't need to get
- * the block addresses when there is no need to fill the page.
+ * If a whole page is being written and we already preallocated all the
+ * blocks, then there is no need to get a block address now.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
- !f2fs_verity_in_progress(inode))
+ if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
return 0;
/* f2fs_lock_op avoids race between write CP and convert_inline_page */
return copied;
}
- static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
- loff_t offset)
- {
- unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
- unsigned blkbits = i_blkbits;
- unsigned blocksize_mask = (1 << blkbits) - 1;
- unsigned long align = offset | iov_iter_alignment(iter);
- struct block_device *bdev = inode->i_sb->s_bdev;
-
- if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
- return 1;
-
- if (align & blocksize_mask) {
- if (bdev)
- blkbits = blksize_bits(bdev_logical_block_size(bdev));
- blocksize_mask = (1 << blkbits) - 1;
- if (align & blocksize_mask)
- return -EINVAL;
- return 1;
- }
- return 0;
- }
-
- static void f2fs_dio_end_io(struct bio *bio)
- {
- struct f2fs_private_dio *dio = bio->bi_private;
-
- dec_page_count(F2FS_I_SB(dio->inode),
- dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- bio->bi_private = dio->orig_private;
- bio->bi_end_io = dio->orig_end_io;
-
- kfree(dio);
-
- bio_endio(bio);
- }
-
- static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
- loff_t file_offset)
- {
- struct f2fs_private_dio *dio;
- bool write = (bio_op(bio) == REQ_OP_WRITE);
-
- dio = f2fs_kzalloc(F2FS_I_SB(inode),
- sizeof(struct f2fs_private_dio), GFP_NOFS);
- if (!dio)
- goto out;
-
- dio->inode = inode;
- dio->orig_end_io = bio->bi_end_io;
- dio->orig_private = bio->bi_private;
- dio->write = write;
-
- bio->bi_end_io = f2fs_dio_end_io;
- bio->bi_private = dio;
-
- inc_page_count(F2FS_I_SB(inode),
- write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- submit_bio(bio);
- return;
- out:
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- }
-
- static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
- {
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- size_t count = iov_iter_count(iter);
- loff_t offset = iocb->ki_pos;
- int rw = iov_iter_rw(iter);
- int err;
- enum rw_hint hint = iocb->ki_hint;
- int whint_mode = F2FS_OPTION(sbi).whint_mode;
- bool do_opu;
-
- err = check_direct_IO(inode, iter, offset);
- if (err)
- return err < 0 ? err : 0;
-
- if (f2fs_force_buffered_io(inode, iocb, iter))
- return 0;
-
- do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-
- if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = WRITE_LIFE_NOT_SET;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
- up_read(&fi->i_gc_rwsem[rw]);
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- } else {
- down_read(&fi->i_gc_rwsem[rw]);
- if (do_opu)
- down_read(&fi->i_gc_rwsem[READ]);
- }
-
- err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, rw == WRITE ? get_data_block_dio_write :
- get_data_block_dio, NULL, f2fs_dio_submit_bio,
- rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
- DIO_SKIP_HOLES);
-
- if (do_opu)
- up_read(&fi->i_gc_rwsem[READ]);
-
- up_read(&fi->i_gc_rwsem[rw]);
-
- if (rw == WRITE) {
- if (whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = hint;
- if (err > 0) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- err);
- if (!do_opu)
- set_inode_flag(inode, FI_UPDATE_WRITE);
- } else if (err == -EIOCBQUEUED) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- count - iov_iter_count(iter));
- } else if (err < 0) {
- f2fs_write_failed(inode, offset + count);
- }
- } else {
- if (err > 0)
- f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
- else if (err == -EIOCBQUEUED)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
- count - iov_iter_count(iter));
- }
-
- out:
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-
- return err;
- }
-
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
clear_page_private_gcing(page);
- if (test_opt(sbi, COMPRESS_CACHE)) {
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(page);
- }
+ if (test_opt(sbi, COMPRESS_CACHE) &&
+ inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ clear_page_private_data(page);
if (page_private_atomic(page))
return f2fs_drop_inmem_page(inode, page);
return 0;
if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
- struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct inode *inode = page->mapping->host;
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode)))
clear_page_private_data(page);
}
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
- .direct_IO = f2fs_direct_IO,
+ .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
{
kmem_cache_destroy(bio_entry_slab);
}
+
+ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+ {
+ struct f2fs_map_blocks map = {};
+ pgoff_t next_pgofs = 0;
+ int err;
+
+ map.m_lblk = bytes_to_blks(inode, offset);
+ map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+ map.m_next_pgofs = &next_pgofs;
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ if (flags & IOMAP_WRITE)
+ map.m_may_create = true;
+
+ err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+ F2FS_GET_BLOCK_DIO);
+ if (err)
+ return err;
+
+ iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+ if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+ iomap->length = blks_to_bytes(inode, map.m_len);
+ if (map.m_flags & F2FS_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags |= IOMAP_F_MERGED;
+ } else {
+ iomap->type = IOMAP_UNWRITTEN;
+ }
+ if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+ return -EINVAL;
+
+ iomap->bdev = map.m_bdev;
+ iomap->addr = blks_to_bytes(inode, map.m_pblk);
+ } else {
+ iomap->length = blks_to_bytes(inode, next_pgofs) -
+ iomap->offset;
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ }
+
+ if (map.m_flags & F2FS_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
+ if ((inode->i_state & I_DIRTY_DATASYNC) ||
+ offset + length > i_size_read(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+
+ return 0;
+ }
+
+ const struct iomap_ops f2fs_iomap_ops = {
+ .iomap_begin = f2fs_iomap_begin,
+ };
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
+struct pagevec;
+
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
#else
FAULT_WRITE_IO,
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
+ FAULT_LOCK_OP,
FAULT_MAX,
};
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+ #define FADVISE_TRUNC_BIT 0x80
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
+ #define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+ #define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+ #define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0
enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
FI_HOT_DATA, /* indicate file is hot */
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
unsigned int ovp_segments; /* # of overprovision segments */
/* a threshold to reclaim prefree segments */
unsigned int nr_rpages; /* total page number in rpages */
struct page **cpages; /* pages store compressed data in cluster */
unsigned int nr_cpages; /* total page number in cpages */
+ unsigned int valid_nr_cpages; /* valid page number in cpages */
void *rbuf; /* virtual mapped address on rpages */
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
+ spinlock_t gc_urgent_high_lock;
+ bool gc_urgent_high_limited; /* indicates having limited trial count */
+ unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */
/* for skip statistic */
unsigned int atomic_files; /* # of opened atomic file */
#endif
};
- struct f2fs_private_dio {
- struct inode *inode;
- void *orig_private;
- bio_end_io_t *orig_end_io;
- bool write;
- };
-
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
{
+ if (time_to_inject(sbi, FAULT_LOCK_OP)) {
+ f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+ return 0;
+ }
return down_read_trylock(&sbi->cp_rwsem);
}
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ avail_user_block_count -= sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (avail_user_block_count > sbi->unusable_block_count)
avail_user_block_count -= sbi->unusable_block_count;
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ valid_block_count += sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
user_block_count -= sbi->unusable_block_count;
static inline void set_file(struct inode *inode, int type)
{
+ if (is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise |= type;
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void clear_file(struct inode *inode, int type)
{
+ if (!is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise &= ~type;
f2fs_mark_inode_dirty_sync(inode, true);
}
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni);
+ struct node_info *ni, bool checkpoint_context);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
- int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
struct writeback_control *wbc,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
+ void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+ extern const struct iomap_ops f2fs_iomap_ops;
/*
* gc.c
*/
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/f2fs_fs.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
* So, I'd like to wait some time to collect dirty segments.
*/
if (sbi->gc_mode == GC_URGENT_HIGH) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ if (sbi->gc_urgent_high_limited) {
+ if (!sbi->gc_urgent_high_remaining) {
+ sbi->gc_urgent_high_limited = false;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+ sbi->gc_mode = GC_NORMAL;
+ continue;
+ }
+ sbi->gc_urgent_high_remaining--;
+ }
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
wait_ms = gc_th->urgent_sleep_time;
down_write(&sbi->gc_lock);
goto do_gc;
continue;
}
- if (f2fs_get_node_info(sbi, nid, &ni)) {
+ if (f2fs_get_node_info(sbi, nid, &ni, false)) {
f2fs_put_page(node_page, 1);
continue;
}
if (IS_ERR(node_page))
return false;
- if (f2fs_get_node_info(sbi, nid, dni)) {
+ if (f2fs_get_node_info(sbi, nid, dni, false)) {
f2fs_put_page(node_page, 1);
return false;
}
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
+ if (f2fs_check_nid_range(sbi, dni->ino))
+ return false;
+
*nofs = ofs_of_node(node_page);
source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
blkaddr, source_blkaddr, segno);
- f2fs_bug_on(sbi, 1);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
}
}
#endif
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
if (err)
goto put_out;
if (err) {
clear_page_private_gcing(page);
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
if (is_dirty)
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode) || is_bad_inode(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode) ||
+ special_file(inode->i_mode))
continue;
if (!down_write_trylock(
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
} else if (ino == F2FS_COMPRESS_INO(sbi)) {
#ifdef CONFIG_F2FS_FS_COMPRESSION
inode->i_mapping->a_ops = &f2fs_compress_aops;
+ /*
+ * generic_error_remove_page only truncates pages of regular
+ * inode
+ */
+ inode->i_mode |= S_IFREG;
#endif
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
}
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
- if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
+ if ((inode->i_nlink || is_bad_inode(inode)) &&
+ test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
f2fs_invalidate_compress_pages(sbi, inode->i_ino);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/mpage.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/swap.h>
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *new, *e;
+ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
+ if (rwsem_is_locked(&sbi->cp_global_sem))
+ return;
+
new = __alloc_nat_entry(sbi, nid, false);
if (!new)
return;
}
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni)
+ struct node_info *ni, bool checkpoint_context)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
* nat_tree_lock. Therefore, we should retry, if we failed to grab here
* while not bothering checkpoint.
*/
- if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
down_read(&curseg->journal_rwsem);
- } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+ !down_read_trylock(&curseg->journal_rwsem)) {
up_read(&nm_i->nat_tree_lock);
goto retry;
}
int err;
pgoff_t index;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
if (err) {
dec_valid_node_count(sbi, dn->inode, !ofs);
goto fail;
return LOCKED_PAGE;
}
- err = f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni, false);
if (err)
return err;
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
- if (f2fs_get_node_info(sbi, nid, &ni))
+ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
goto redirty_out;
if (wbc->for_reclaim) {
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
if (err)
return err;
struct page *ipage;
int err;
- err = f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni, false);
if (err)
return err;
retry:
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
if (!ipage) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
#include <asm/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_dn;
}
goto out;
f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err)
goto err;
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_prev;
}
goto err;
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/sched/mm.h>
#include <linux/prefetch.h>
#include <linux/kthread.h>
#include <linux/swap.h>
LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
err = -EAGAIN;
goto next;
}
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err) {
f2fs_put_dnode(&dn);
return err;
err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
unlock_page(page);
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
[FAULT_WRITE_IO] = "write IO error",
[FAULT_SLAB_ALLOC] = "slab alloc",
[FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
static const struct f2fs_sb_encodings {
__u16 magic;
char *name;
- char *version;
+ unsigned int version;
} f2fs_sb_encoding_map[] = {
- {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+ {F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
};
-static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
- const struct f2fs_sb_encodings **encoding,
- __u16 *flags)
+static const struct f2fs_sb_encodings *
+f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
{
__u16 magic = le16_to_cpu(sb->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
if (magic == f2fs_sb_encoding_map[i].magic)
- break;
-
- if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
- return -EINVAL;
-
- *encoding = &f2fs_sb_encoding_map[i];
- *flags = le16_to_cpu(sb->s_encoding_flags);
+ return &f2fs_sb_encoding_map[i];
- return 0;
+ return NULL;
}
struct kmem_cache *f2fs_cf_name_slab;
F2FS_OPTION(sbi).s_resgid));
}
+ static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+ {
+ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ unsigned int avg_vblocks;
+ unsigned int wanted_reserved_segments;
+ block_t avail_user_block_count;
+
+ if (!F2FS_IO_ALIGNED(sbi))
+ return 0;
+
+ /* average valid block count in section in worst case */
+ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+ /*
+ * we need enough free space when migrating one section in worst case
+ */
+ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+ reserved_segments(sbi);
+ wanted_reserved_segments -= reserved_segments(sbi);
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks -
+ F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (wanted_reserved_segments * sbi->blocks_per_seg >
+ avail_user_block_count) {
+ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+ wanted_reserved_segments,
+ avail_user_block_count >> sbi->log_blocks_per_seg);
+ return -ENOSPC;
+ }
+
+ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+ wanted_reserved_segments);
+
+ return 0;
+ }
+
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto repeat;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
+ spin_lock_init(&sbi->gc_urgent_high_lock);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
struct unicode_map *encoding;
__u16 encoding_flags;
- if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
- &encoding_flags)) {
+ encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
+ if (!encoding_info) {
f2fs_err(sbi,
"Encoding requested by superblock is unknown");
return -EINVAL;
}
+ encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
f2fs_err(sbi,
- "can't mount with superblock charset: %s-%s "
+ "can't mount with superblock charset: %s-%u.%u.%u "
"not supported by the kernel. flags: 0x%x.",
- encoding_info->name, encoding_info->version,
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
encoding_flags);
return PTR_ERR(encoding);
}
f2fs_info(sbi, "Using encoding defined by superblock: "
- "%s-%s with flags 0x%hx", encoding_info->name,
- encoding_info->version?:"\b", encoding_flags);
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
sbi->sb->s_encoding = encoding;
sbi->sb->s_encoding_flags = encoding_flags;
goto free_nm;
}
+ err = adjust_reserved_segment(sbi);
+ if (err)
+ goto free_nm;
+
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
return sprintf(buf, "%lx\n", sbi->s_flag);
}
+ static ssize_t pending_discard_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+ {
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+ &SM_I(sbi)->dcc_info->discard_cmd_cnt));
+ }
+
static ssize_t features_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->sb;
if (f2fs_sb_has_casefold(sbi))
- return sysfs_emit(buf, "%s (%d.%d.%d)\n",
- sb->s_encoding->charset,
+ return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
(sb->s_encoding->version >> 16) & 0xff,
(sb->s_encoding->version >> 8) & 0xff,
sb->s_encoding->version & 0xff);
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- F2FS_OPTION(sbi).root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks -
+ sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
return count;
}
+ if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ sbi->gc_urgent_high_limited = t != 0;
+ sbi->gc_urgent_high_remaining = t;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
#endif
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
+ F2FS_GENERAL_RO_ATTR(pending_discard);
#ifdef CONFIG_F2FS_STAT_FS
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
+ ATTR_LIST(pending_discard),
ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
#endif
ATTR_LIST(data_io_flag),
ATTR_LIST(node_io_flag),
+ ATTR_LIST(gc_urgent_high_remaining),
ATTR_LIST(ckpt_thread_ioprio),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),