Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
[linux-2.6-microblaze.git] / fs / f2fs / data.c
index 9f0ba90..9ac2625 100644 (file)
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+static bool __is_cp_guaranteed(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       struct inode *inode;
+       struct f2fs_sb_info *sbi;
+
+       if (!mapping)
+               return false;
+
+       inode = mapping->host;
+       sbi = F2FS_I_SB(inode);
+
+       if (inode->i_ino == F2FS_META_INO(sbi) ||
+                       inode->i_ino ==  F2FS_NODE_INO(sbi) ||
+                       S_ISDIR(inode->i_mode) ||
+                       is_cold_data(page))
+               return true;
+       return false;
+}
+
 static void f2fs_read_end_io(struct bio *bio)
 {
        struct bio_vec *bvec;
@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
 
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
+               enum count_type type = WB_DATA_TYPE(page);
 
                fscrypt_pullback_bio_page(&page, true);
 
@@ -78,15 +99,57 @@ static void f2fs_write_end_io(struct bio *bio)
                        mapping_set_error(page->mapping, -EIO);
                        f2fs_stop_checkpoint(sbi, true);
                }
+               dec_page_count(sbi, type);
+               clear_cold_data(page);
                end_page_writeback(page);
        }
-       if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
+       if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
                                wq_has_sleeper(&sbi->cp_wait))
                wake_up(&sbi->cp_wait);
 
        bio_put(bio);
 }
 
+/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+                               block_t blk_addr, struct bio *bio)
+{
+       struct block_device *bdev = sbi->sb->s_bdev;
+       int i;
+
+       for (i = 0; i < sbi->s_ndevs; i++) {
+               if (FDEV(i).start_blk <= blk_addr &&
+                                       FDEV(i).end_blk >= blk_addr) {
+                       blk_addr -= FDEV(i).start_blk;
+                       bdev = FDEV(i).bdev;
+                       break;
+               }
+       }
+       if (bio) {
+               bio->bi_bdev = bdev;
+               bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+       }
+       return bdev;
+}
+
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+       int i;
+
+       for (i = 0; i < sbi->s_ndevs; i++)
+               if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+                       return i;
+       return 0;
+}
+
+static bool __same_bdev(struct f2fs_sb_info *sbi,
+                               block_t blk_addr, struct bio *bio)
+{
+       return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+}
+
 /*
  * Low-level block read/write IO operations.
  */
@@ -97,8 +160,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 
        bio = f2fs_bio_alloc(npages);
 
-       bio->bi_bdev = sbi->sb->s_bdev;
-       bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+       f2fs_target_device(sbi, blk_addr, bio);
        bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
        bio->bi_private = is_read ? NULL : sbi;
 
@@ -109,8 +171,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
                                struct bio *bio, enum page_type type)
 {
        if (!is_read_io(bio_op(bio))) {
-               atomic_inc(&sbi->nr_wb_bios);
-               if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+               if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
                        current->plug && (type == DATA || type == NODE))
                        blk_finish_plug(current->plug);
        }
@@ -198,11 +259,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
        if (type >= META_FLUSH) {
                io->fio.type = META_FLUSH;
                io->fio.op = REQ_OP_WRITE;
-               if (test_opt(sbi, NOBARRIER))
-                       io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
-               else
-                       io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
-                                                               REQ_PRIO;
+               io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+               if (!test_opt(sbi, NOBARRIER))
+                       io->fio.op_flags |= REQ_FUA;
        }
        __submit_merged_bio(io);
 out:
@@ -270,22 +329,24 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
                verify_block_addr(sbi, fio->old_blkaddr);
        verify_block_addr(sbi, fio->new_blkaddr);
 
+       bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+       if (!is_read)
+               inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+
        down_write(&io->io_rwsem);
 
        if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
-           (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+           (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+                       !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
                __submit_merged_bio(io);
 alloc_new:
        if (io->bio == NULL) {
-               int bio_blocks = MAX_BIO_BLOCKS(sbi);
-
                io->bio = __bio_alloc(sbi, fio->new_blkaddr,
-                                               bio_blocks, is_read);
+                                               BIO_MAX_PAGES, is_read);
                io->fio = *fio;
        }
 
-       bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
-
        if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
                                                        PAGE_SIZE) {
                __submit_merged_bio(io);
@@ -483,7 +544,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
                return page;
        f2fs_put_page(page, 0);
 
-       page = get_read_data_page(inode, index, READ_SYNC, false);
+       page = get_read_data_page(inode, index, 0, false);
        if (IS_ERR(page))
                return page;
 
@@ -509,7 +570,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
 repeat:
-       page = get_read_data_page(inode, index, READ_SYNC, for_write);
+       page = get_read_data_page(inode, index, 0, for_write);
        if (IS_ERR(page))
                return page;
 
@@ -590,7 +651,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
        struct f2fs_summary sum;
        struct node_info ni;
-       int seg = CURSEG_WARM_DATA;
        pgoff_t fofs;
        blkcnt_t count = 1;
 
@@ -608,11 +668,8 @@ alloc:
        get_node_info(sbi, dn->nid, &ni);
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
-       if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
-               seg = CURSEG_DIRECT_IO;
-
        allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-                                                               &sum, seg);
+                                               &sum, CURSEG_WARM_DATA);
        set_data_blkaddr(dn);
 
        /* update i_size */
@@ -624,11 +681,18 @@ alloc:
        return 0;
 }
 
-ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
+static inline bool __force_buffered_io(struct inode *inode, int rw)
+{
+       return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+                       (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+                       F2FS_I_SB(inode)->s_ndevs);
+}
+
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct f2fs_map_blocks map;
-       ssize_t ret = 0;
+       int err = 0;
 
        map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
        map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@ -640,19 +704,22 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
        map.m_next_pgofs = NULL;
 
        if (iocb->ki_flags & IOCB_DIRECT) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
-               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+               err = f2fs_convert_inline_inode(inode);
+               if (err)
+                       return err;
+               return f2fs_map_blocks(inode, &map, 1,
+                       __force_buffered_io(inode, WRITE) ?
+                               F2FS_GET_BLOCK_PRE_AIO :
+                               F2FS_GET_BLOCK_PRE_DIO);
        }
        if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       return ret;
+               err = f2fs_convert_inline_inode(inode);
+               if (err)
+                       return err;
        }
        if (!f2fs_has_inline_data(inode))
                return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
-       return ret;
+       return err;
 }
 
 /*
@@ -676,7 +743,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        unsigned int ofs_in_node, last_ofs_in_node;
        blkcnt_t prealloc;
        struct extent_info ei;
-       bool allocated = false;
        block_t blkaddr;
 
        if (!maxblocks)
@@ -716,7 +782,7 @@ next_dnode:
        }
 
        prealloc = 0;
-       ofs_in_node = dn.ofs_in_node;
+       last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
        end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
 next_block:
@@ -735,10 +801,8 @@ next_block:
                                }
                        } else {
                                err = __allocate_data_block(&dn);
-                               if (!err) {
+                               if (!err)
                                        set_inode_flag(inode, FI_APPEND_WRITE);
-                                       allocated = true;
-                               }
                        }
                        if (err)
                                goto sync_out;
@@ -793,7 +857,6 @@ skip:
                err = reserve_new_blocks(&dn, prealloc);
                if (err)
                        goto sync_out;
-               allocated = dn.node_changed;
 
                map->m_len += dn.ofs_in_node - ofs_in_node;
                if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -812,9 +875,8 @@ skip:
 
        if (create) {
                f2fs_unlock_op(sbi);
-               f2fs_balance_fs(sbi, allocated);
+               f2fs_balance_fs(sbi, dn.node_changed);
        }
-       allocated = false;
        goto next_dnode;
 
 sync_out:
@@ -822,7 +884,7 @@ sync_out:
 unlock_out:
        if (create) {
                f2fs_unlock_op(sbi);
-               f2fs_balance_fs(sbi, allocated);
+               f2fs_balance_fs(sbi, dn.node_changed);
        }
 out:
        trace_f2fs_map_blocks(inode, map, err);
@@ -834,19 +896,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
                        pgoff_t *next_pgofs)
 {
        struct f2fs_map_blocks map;
-       int ret;
+       int err;
 
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
        map.m_next_pgofs = next_pgofs;
 
-       ret = f2fs_map_blocks(inode, &map, create, flag);
-       if (!ret) {
+       err = f2fs_map_blocks(inode, &map, create, flag);
+       if (!err) {
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
                bh->b_size = map.m_len << inode->i_blkbits;
        }
-       return ret;
+       return err;
 }
 
 static int get_data_block(struct inode *inode, sector_t iblock,
@@ -891,7 +953,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        struct buffer_head map_bh;
        sector_t start_blk, last_blk;
        pgoff_t next_pgofs;
-       loff_t isize;
        u64 logical = 0, phys = 0, size = 0;
        u32 flags = 0;
        int ret = 0;
@@ -908,13 +969,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        inode_lock(inode);
 
-       isize = i_size_read(inode);
-       if (start >= isize)
-               goto out;
-
-       if (start + len > isize)
-               len = isize - start;
-
        if (logical_to_blk(inode, len) == 0)
                len = blk_to_logical(inode, 1);
 
@@ -933,13 +987,11 @@ next:
        /* HOLE */
        if (!buffer_mapped(&map_bh)) {
                start_blk = next_pgofs;
-               /* Go through holes util pass the EOF */
-               if (blk_to_logical(inode, start_blk) < isize)
+
+               if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+                                       F2FS_I_SB(inode)->max_file_blocks))
                        goto prep_next;
-               /* Found a hole beyond isize means no more extents.
-                * Note that the premise is that filesystems don't
-                * punch holes beyond isize and keep size unchanged.
-                */
+
                flags |= FIEMAP_EXTENT_LAST;
        }
 
@@ -982,7 +1034,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct fscrypt_ctx *ctx = NULL;
-       struct block_device *bdev = sbi->sb->s_bdev;
        struct bio *bio;
 
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -1000,8 +1051,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
                        fscrypt_release_ctx(ctx);
                return ERR_PTR(-ENOMEM);
        }
-       bio->bi_bdev = bdev;
-       bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+       f2fs_target_device(sbi, blkaddr, bio);
        bio->bi_end_io = f2fs_read_end_io;
        bio->bi_private = ctx;
 
@@ -1096,7 +1146,8 @@ got_it:
                 * This page will go to BIO.  Do we need to send this
                 * BIO off first?
                 */
-               if (bio && (last_block_in_bio != block_nr - 1)) {
+               if (bio && (last_block_in_bio != block_nr - 1 ||
+                       !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
 submit_and_realloc:
                        __submit_bio(F2FS_I_SB(inode), bio, DATA);
                        bio = NULL;
@@ -1253,7 +1304,7 @@ static int f2fs_write_data_page(struct page *page,
                .sbi = sbi,
                .type = DATA,
                .op = REQ_OP_WRITE,
-               .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+               .op_flags = wbc_to_write_flags(wbc),
                .page = page,
                .encrypted_page = NULL,
        };
@@ -1313,7 +1364,6 @@ done:
        if (err && err != -ENOENT)
                goto redirty_out;
 
-       clear_cold_data(page);
 out:
        inode_dec_dirty_pages(inode);
        if (err)
@@ -1334,6 +1384,8 @@ out:
 
 redirty_out:
        redirty_page_for_writepage(wbc, page);
+       if (!err)
+               return AOP_WRITEPAGE_ACTIVATE;
        unlock_page(page);
        return err;
 }
@@ -1429,6 +1481,15 @@ continue_unlock:
 
                        ret = mapping->a_ops->writepage(page, wbc);
                        if (unlikely(ret)) {
+                               /*
+                                * keep nr_to_write, since vfs uses this to
+                                * get # of written pages.
+                                */
+                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                       unlock_page(page);
+                                       ret = 0;
+                                       continue;
+                               }
                                done_index = page->index + 1;
                                done = 1;
                                break;
@@ -1665,7 +1726,7 @@ repeat:
                        err = PTR_ERR(bio);
                        goto fail;
                }
-               bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+               bio->bi_opf = REQ_OP_READ;
                if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                        bio_put(bio);
                        err = -EFAULT;
@@ -1716,7 +1777,6 @@ static int f2fs_write_end(struct file *file,
                goto unlock_out;
 
        set_page_dirty(page);
-       clear_cold_data(page);
 
        if (pos + copied > i_size_read(inode))
                f2fs_i_size_write(inode, pos + copied);
@@ -1753,9 +1813,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        if (err)
                return err;
 
-       if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
-               return 0;
-       if (test_opt(F2FS_I_SB(inode), LFS))
+       if (__force_buffered_io(inode, rw))
                return 0;
 
        trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@ -1787,12 +1845,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
                return;
 
        if (PageDirty(page)) {
-               if (inode->i_ino == F2FS_META_INO(sbi))
+               if (inode->i_ino == F2FS_META_INO(sbi)) {
                        dec_page_count(sbi, F2FS_DIRTY_META);
-               else if (inode->i_ino == F2FS_NODE_INO(sbi))
+               } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
                        dec_page_count(sbi, F2FS_DIRTY_NODES);
-               else
+               } else {
                        inode_dec_dirty_pages(inode);
+                       remove_dirty_inode(inode);
+               }
        }
 
        /* This is atomic written page, keep Private */