Merge tag 'for-5.13-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux-2.6-microblaze.git] / fs / btrfs / inode.c
index 35bfa05..eb6fddf 100644 (file)
@@ -102,6 +102,7 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
  * BTRFS_ILOCK_SHARED - acquire a shared lock on the inode
  * BTRFS_ILOCK_TRY - try to acquire the lock, if fails on first attempt
  *                  return -EAGAIN
+ * BTRFS_ILOCK_MMAP - acquire a write lock on the i_mmap_lock
  */
 int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
 {
@@ -122,6 +123,8 @@ int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
                }
                inode_lock(inode);
        }
+       if (ilock_flags & BTRFS_ILOCK_MMAP)
+               down_write(&BTRFS_I(inode)->i_mmap_lock);
        return 0;
 }
 
@@ -133,6 +136,8 @@ int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
  */
 void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
 {
+       if (ilock_flags & BTRFS_ILOCK_MMAP)
+               up_write(&BTRFS_I(inode)->i_mmap_lock);
        if (ilock_flags & BTRFS_ILOCK_SHARED)
                inode_unlock_shared(inode);
        else
@@ -641,17 +646,12 @@ again:
                if (!ret) {
                        unsigned long offset = offset_in_page(total_compressed);
                        struct page *page = pages[nr_pages - 1];
-                       char *kaddr;
 
                        /* zero the tail end of the last page, we might be
                         * sending it down to disk
                         */
-                       if (offset) {
-                               kaddr = kmap_atomic(page);
-                               memset(kaddr + offset, 0,
-                                      PAGE_SIZE - offset);
-                               kunmap_atomic(kaddr);
-                       }
+                       if (offset)
+                               memzero_page(page, offset, PAGE_SIZE - offset);
                        will_compress = 1;
                }
        }
@@ -1516,7 +1516,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                                       struct page *locked_page,
                                       const u64 start, const u64 end,
-                                      int *page_started, int force,
+                                      int *page_started,
                                       unsigned long *nr_written)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -1530,6 +1530,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
        u64 ino = btrfs_ino(inode);
        bool nocow = false;
        u64 disk_bytenr = 0;
+       const bool force = inode->flags & BTRFS_INODE_NODATACOW;
 
        path = btrfs_alloc_path();
        if (!path) {
@@ -1863,23 +1864,16 @@ error:
        return ret;
 }
 
-static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
+static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
 {
-
-       if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
-           !(inode->flags & BTRFS_INODE_PREALLOC))
-               return 0;
-
-       /*
-        * @defrag_bytes is a hint value, no spinlock held here,
-        * if is not zero, it means the file is defragging.
-        * Force cow if given extent needs to be defragged.
-        */
-       if (inode->defrag_bytes &&
-           test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
-               return 1;
-
-       return 0;
+       if (inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) {
+               if (inode->defrag_bytes &&
+                   test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG,
+                                  0, NULL))
+                       return false;
+               return true;
+       }
+       return false;
 }
 
 /*
@@ -1891,17 +1885,12 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
                struct writeback_control *wbc)
 {
        int ret;
-       int force_cow = need_force_cow(inode, start, end);
        const bool zoned = btrfs_is_zoned(inode->root->fs_info);
 
-       if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
-               ASSERT(!zoned);
-               ret = run_delalloc_nocow(inode, locked_page, start, end,
-                                        page_started, 1, nr_written);
-       } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+       if (should_nocow(inode, start, end)) {
                ASSERT(!zoned);
                ret = run_delalloc_nocow(inode, locked_page, start, end,
-                                        page_started, 0, nr_written);
+                                        page_started, nr_written);
        } else if (!inode_can_compress(inode) ||
                   !inode_need_compress(inode, start, end)) {
                if (zoned)
@@ -3099,11 +3088,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
  * @bio_offset:        offset to the beginning of the bio (in bytes)
  * @page:      page where is the data to be verified
  * @pgoff:     offset inside the page
+ * @start:     logical offset in the file
  *
  * The length of such check is always one sector size.
  */
 static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
-                          u32 bio_offset, struct page *page, u32 pgoff)
+                          u32 bio_offset, struct page *page, u32 pgoff,
+                          u64 start)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
@@ -3130,8 +3121,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
        kunmap_atomic(kaddr);
        return 0;
 zeroit:
-       btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
-                                   csum, csum_expected, io_bio->mirror_num);
+       btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
+                                   io_bio->mirror_num);
        if (io_bio->device)
                btrfs_dev_stat_inc_and_print(io_bio->device,
                                             BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -3149,10 +3140,9 @@ zeroit:
  * @bio_offset:        offset to the beginning of the bio (in bytes)
  * @start:     file offset of the range start
  * @end:       file offset of the range end (inclusive)
- * @mirror:    mirror number
  */
 int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
-                          struct page *page, u64 start, u64 end, int mirror)
+                          struct page *page, u64 start, u64 end)
 {
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -3184,7 +3174,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
             pg_off += sectorsize, bio_offset += sectorsize) {
                int ret;
 
-               ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+               ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
+                                     page_offset(page) + pg_off);
                if (ret < 0)
                        return -EIO;
        }
@@ -3390,15 +3381,19 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                        int is_dead_root = 0;
 
                        /*
-                        * this is an orphan in the tree root. Currently these
+                        * This is an orphan in the tree root. Currently these
                         * could come from 2 sources:
-                        *  a) a snapshot deletion in progress
+                        *  a) a root (snapshot/subvolume) deletion in progress
                         *  b) a free space cache inode
-                        * We need to distinguish those two, as the snapshot
-                        * orphan must not get deleted.
-                        * find_dead_roots already ran before us, so if this
-                        * is a snapshot deletion, we should find the root
-                        * in the fs_roots radix tree.
+                        * We need to distinguish those two, as the orphan item
+                        * for a root must not get deleted before the deletion
+                        * of the snapshot/subvolume's tree completes.
+                        *
+                        * btrfs_find_orphan_roots() ran before us, which has
+                        * found all deleted roots and loaded them into
+                        * fs_info->fs_roots_radix. So here we can find if an
+                        * orphan item corresponds to a deleted root by looking
+                        * up the root from that radix tree.
                         */
 
                        spin_lock(&fs_info->fs_roots_radix_lock);
@@ -4329,7 +4324,11 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                goto out_end_trans;
        }
 
-       btrfs_record_root_in_trans(trans, dest);
+       ret = btrfs_record_root_in_trans(trans, dest);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_end_trans;
+       }
 
        memset(&dest->root_item.drop_progress, 0,
                sizeof(dest->root_item.drop_progress));
@@ -4829,7 +4828,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
-       char *kaddr;
        bool only_release_metadata = false;
        u32 blocksize = fs_info->sectorsize;
        pgoff_t index = from >> PAGE_SHIFT;
@@ -4921,15 +4919,13 @@ again:
        if (offset != blocksize) {
                if (!len)
                        len = blocksize - offset;
-               kaddr = kmap(page);
                if (front)
-                       memset(kaddr + (block_start - page_offset(page)),
-                               0, offset);
+                       memzero_page(page, (block_start - page_offset(page)),
+                                    offset);
                else
-                       memset(kaddr + (block_start - page_offset(page)) +  offset,
-                               0, len);
+                       memzero_page(page, (block_start - page_offset(page)) + offset,
+                                    len);
                flush_dcache_page(page);
-               kunmap(page);
        }
        ClearPageChecked(page);
        set_page_dirty(page);
@@ -6828,11 +6824,9 @@ static noinline int uncompress_inline(struct btrfs_path *path,
         * cover that region here.
         */
 
-       if (max_size + pg_offset < PAGE_SIZE) {
-               char *map = kmap(page);
-               memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
-               kunmap(page);
-       }
+       if (max_size + pg_offset < PAGE_SIZE)
+               memzero_page(page,  pg_offset + max_size,
+                            PAGE_SIZE - max_size - pg_offset);
        kfree(tmp);
        return ret;
 }
@@ -7023,7 +7017,7 @@ next:
                                if (ret)
                                        goto out;
                        } else {
-                               map = kmap(page);
+                               map = kmap_local_page(page);
                                read_extent_buffer(leaf, map + pg_offset, ptr,
                                                   copy_size);
                                if (pg_offset + copy_size < PAGE_SIZE) {
@@ -7031,7 +7025,7 @@ next:
                                               PAGE_SIZE - pg_offset -
                                               copy_size);
                                }
-                               kunmap(page);
+                               kunmap_local(map);
                        }
                        flush_dcache_page(page);
                }
@@ -7259,6 +7253,19 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
        return em;
 }
 
+static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+       struct btrfs_block_group *block_group;
+       bool readonly = false;
+
+       block_group = btrfs_lookup_block_group(fs_info, bytenr);
+       if (!block_group || block_group->ro)
+               readonly = true;
+       if (block_group)
+               btrfs_put_block_group(block_group);
+       return readonly;
+}
+
 /*
  * Check if we can do nocow write into the range [@offset, @offset + @len)
  *
@@ -7910,7 +7917,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                        ASSERT(pgoff < PAGE_SIZE);
                        if (uptodate &&
                            (!csum || !check_data_csum(inode, io_bio,
-                                       bio_offset, bvec.bv_page, pgoff))) {
+                                                      bio_offset, bvec.bv_page,
+                                                      pgoff, start))) {
                                clean_io_failure(fs_info, failure_tree, io_tree,
                                                 start, bvec.bv_page,
                                                 btrfs_ino(BTRFS_I(inode)),
@@ -8169,10 +8177,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                bio->bi_end_io = btrfs_end_dio_bio;
                btrfs_io_bio(bio)->logical = file_offset;
 
-               WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) &&
-                            fs_info->max_zone_append_size &&
-                            bio_op(bio) != REQ_OP_ZONE_APPEND);
-
                if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
                        status = extract_ordered_extent(BTRFS_I(inode), bio,
                                                        file_offset);
@@ -8403,17 +8407,11 @@ again:
                 * for the finish_ordered_io
                 */
                if (TestClearPagePrivate2(page)) {
-                       struct btrfs_ordered_inode_tree *tree;
-                       u64 new_len;
-
-                       tree = &inode->ordered_tree;
-
-                       spin_lock_irq(&tree->lock);
+                       spin_lock_irq(&inode->ordered_tree.lock);
                        set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
-                       new_len = start - ordered->file_offset;
-                       if (new_len < ordered->truncated_len)
-                               ordered->truncated_len = new_len;
-                       spin_unlock_irq(&tree->lock);
+                       ordered->truncated_len = min(ordered->truncated_len,
+                                                    start - ordered->file_offset);
+                       spin_unlock_irq(&inode->ordered_tree.lock);
 
                        if (btrfs_dec_test_ordered_pending(inode, &ordered,
                                                           start,
@@ -8498,7 +8496,6 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
-       char *kaddr;
        unsigned long zero_start;
        loff_t size;
        vm_fault_t ret;
@@ -8539,6 +8536,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
 
        ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
 again:
+       down_read(&BTRFS_I(inode)->i_mmap_lock);
        lock_page(page);
        size = i_size_read(inode);
 
@@ -8567,6 +8565,7 @@ again:
                unlock_extent_cached(io_tree, page_start, page_end,
                                     &cached_state);
                unlock_page(page);
+               up_read(&BTRFS_I(inode)->i_mmap_lock);
                btrfs_start_ordered_extent(ordered, 1);
                btrfs_put_ordered_extent(ordered);
                goto again;
@@ -8610,20 +8609,17 @@ again:
                zero_start = PAGE_SIZE;
 
        if (zero_start != PAGE_SIZE) {
-               kaddr = kmap(page);
-               memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
+               memzero_page(page, zero_start, PAGE_SIZE - zero_start);
                flush_dcache_page(page);
-               kunmap(page);
        }
        ClearPageChecked(page);
        set_page_dirty(page);
        SetPageUptodate(page);
 
-       BTRFS_I(inode)->last_trans = fs_info->generation;
-       BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
-       BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+       btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
 
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
+       up_read(&BTRFS_I(inode)->i_mmap_lock);
 
        btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
        sb_end_pagefault(inode->i_sb);
@@ -8632,6 +8628,7 @@ again:
 
 out_unlock:
        unlock_page(page);
+       up_read(&BTRFS_I(inode)->i_mmap_lock);
 out:
        btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
        btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
@@ -8883,6 +8880,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ei->delalloc_inodes);
        INIT_LIST_HEAD(&ei->delayed_iput);
        RB_CLEAR_NODE(&ei->rb_node);
+       init_rwsem(&ei->i_mmap_lock);
 
        return inode;
 }
@@ -9008,7 +9006,7 @@ int __init btrfs_init_cachep(void)
 
        btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
                                                        PAGE_SIZE, PAGE_SIZE,
-                                                       SLAB_RED_ZONE, NULL);
+                                                       SLAB_MEM_SPREAD, NULL);
        if (!btrfs_free_space_bitmap_cachep)
                goto fail;
 
@@ -9101,8 +9099,11 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                goto out_notrans;
        }
 
-       if (dest != root)
-               btrfs_record_root_in_trans(trans, dest);
+       if (dest != root) {
+               ret = btrfs_record_root_in_trans(trans, dest);
+               if (ret)
+                       goto out_fail;
+       }
 
        /*
         * We need to find a free sequence number both in the source and
@@ -9406,8 +9407,11 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                goto out_notrans;
        }
 
-       if (dest != root)
-               btrfs_record_root_in_trans(trans, dest);
+       if (dest != root) {
+               ret = btrfs_record_root_in_trans(trans, dest);
+               if (ret)
+                       goto out_fail;
+       }
 
        ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
        if (ret)
@@ -9674,7 +9678,7 @@ out:
        return ret;
 }
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
 {
        struct writeback_control wbc = {
                .nr_to_write = LONG_MAX,
@@ -9687,7 +9691,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                return -EROFS;
 
-       return start_delalloc_inodes(root, &wbc, true, false);
+       return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
 }
 
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
@@ -9877,6 +9881,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        struct btrfs_path *path;
        u64 start = ins->objectid;
        u64 len = ins->offset;
+       int qgroup_released;
        int ret;
 
        memset(&stack_fi, 0, sizeof(stack_fi));
@@ -9889,16 +9894,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
        /* Encryption and other encoding is reserved and all 0 */
 
-       ret = btrfs_qgroup_release_data(inode, file_offset, len);
-       if (ret < 0)
-               return ERR_PTR(ret);
+       qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
+       if (qgroup_released < 0)
+               return ERR_PTR(qgroup_released);
 
        if (trans) {
                ret = insert_reserved_file_extent(trans, inode,
                                                  file_offset, &stack_fi,
-                                                 true, ret);
+                                                 true, qgroup_released);
                if (ret)
-                       return ERR_PTR(ret);
+                       goto free_qgroup;
                return trans;
        }
 
@@ -9909,21 +9914,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        extent_info.file_offset = file_offset;
        extent_info.extent_buf = (char *)&stack_fi;
        extent_info.is_new_extent = true;
-       extent_info.qgroup_reserved = ret;
+       extent_info.qgroup_reserved = qgroup_released;
        extent_info.insertions = 0;
 
        path = btrfs_alloc_path();
-       if (!path)
-               return ERR_PTR(-ENOMEM);
+       if (!path) {
+               ret = -ENOMEM;
+               goto free_qgroup;
+       }
 
-       ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
+       ret = btrfs_replace_file_extents(inode, path, file_offset,
                                     file_offset + len - 1, &extent_info,
                                     &trans);
        btrfs_free_path(path);
        if (ret)
-               return ERR_PTR(ret);
-
+               goto free_qgroup;
        return trans;
+
+free_qgroup:
+       /*
+        * We have released qgroup data range at the beginning of the function,
+        * and normally qgroup_released bytes will be freed when committing
+        * transaction.
+        * But if we error out early, we have to free what we have released
+        * or we leak qgroup data reservation.
+        */
+       btrfs_qgroup_free_refroot(inode->root->fs_info,
+                       inode->root->root_key.objectid, qgroup_released,
+                       BTRFS_QGROUP_RSV_DATA);
+       return ERR_PTR(ret);
 }
 
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -10588,6 +10607,8 @@ static const struct inode_operations btrfs_dir_inode_operations = {
        .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
        .tmpfile        = btrfs_tmpfile,
+       .fileattr_get   = btrfs_fileattr_get,
+       .fileattr_set   = btrfs_fileattr_set,
 };
 
 static const struct file_operations btrfs_dir_file_operations = {
@@ -10641,6 +10662,8 @@ static const struct inode_operations btrfs_file_inode_operations = {
        .get_acl        = btrfs_get_acl,
        .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
+       .fileattr_get   = btrfs_fileattr_get,
+       .fileattr_set   = btrfs_fileattr_set,
 };
 static const struct inode_operations btrfs_special_inode_operations = {
        .getattr        = btrfs_getattr,