ceph: Fix race between hole punch and page fault
authorJan Kara <jack@suse.cz>
Thu, 22 Apr 2021 14:38:26 +0000 (16:38 +0200)
committerJan Kara <jack@suse.cz>
Tue, 13 Jul 2021 12:29:01 +0000 (14:29 +0200)
Ceph has a following race between hole punching and page fault:

CPU1                                  CPU2
ceph_fallocate()
  ...
  ceph_zero_pagecache_range()
                                      ceph_filemap_fault()
                                        faults in page in the range being
                                        punched
  ceph_zero_objects()

And now we have a page in punched range with invalid data. Fix the
problem by using mapping->invalidate_lock similarly to other
filesystems. Note that using invalidate_lock also fixes a similar race
wrt ->readpage().

CC: Jeff Layton <jlayton@kernel.org>
CC: ceph-devel@vger.kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
fs/ceph/addr.c
fs/ceph/file.c

index a1e2813..7e7a897 100644 (file)
@@ -1395,9 +1395,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
                ret = VM_FAULT_SIGBUS;
        } else {
                struct address_space *mapping = inode->i_mapping;
-               struct page *page = find_or_create_page(mapping, 0,
-                                               mapping_gfp_constraint(mapping,
-                                               ~__GFP_FS));
+               struct page *page;
+
+               filemap_invalidate_lock_shared(mapping);
+               page = find_or_create_page(mapping, 0,
+                               mapping_gfp_constraint(mapping, ~__GFP_FS));
                if (!page) {
                        ret = VM_FAULT_OOM;
                        goto out_inline;
@@ -1418,6 +1420,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
                vmf->page = page;
                ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
 out_inline:
+               filemap_invalidate_unlock_shared(mapping);
                dout("filemap_fault %p %llu read inline data ret %x\n",
                     inode, off, ret);
        }
index d1755ac..e1d605a 100644 (file)
@@ -2088,6 +2088,7 @@ static long ceph_fallocate(struct file *file, int mode,
        if (ret < 0)
                goto unlock;
 
+       filemap_invalidate_lock(inode->i_mapping);
        ceph_zero_pagecache_range(inode, offset, length);
        ret = ceph_zero_objects(inode, offset, length);
 
@@ -2100,6 +2101,7 @@ static long ceph_fallocate(struct file *file, int mode,
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
        }
+       filemap_invalidate_unlock(inode->i_mapping);
 
        ceph_put_cap_refs(ci, got);
 unlock: