Merge tag 'ceph-for-5.3-rc1' of git://github.com/ceph/ceph-client
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 18 Jul 2019 18:05:25 +0000 (11:05 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 18 Jul 2019 18:05:25 +0000 (11:05 -0700)
Pull ceph updates from Ilya Dryomov:
 "Lots of exciting things this time!

   - support for rbd object-map and fast-diff features (myself). This
     will speed up reads, discards and things like snap diffs on sparse
     images.

   - ceph.snap.btime vxattr to expose snapshot creation time (David
     Disseldorp). This will be used to integrate with "Restore Previous
     Versions" feature added in Windows 7 for folks who reexport ceph
     through SMB.

   - security xattrs for ceph (Zheng Yan). Only selinux is supported for
     now due to the limitations of ->dentry_init_security().

   - support for MSG_ADDR2, FS_BTIME and FS_CHANGE_ATTR features (Jeff
     Layton). This is actually a single feature bit which was missing
     because of the filesystem pieces. With this in, the kernel client
     will finally be reported as "luminous" by "ceph features" -- it is
     still being reported as "jewel" even though all required Luminous
     features were implemented in 4.13.

   - stop NULL-terminating ceph vxattrs (Jeff Layton). The convention
     with xattrs is to not terminate and this was causing
     inconsistencies with ceph-fuse.

   - change filesystem time granularity from 1 us to 1 ns, again fixing
     an inconsistency with ceph-fuse (Luis Henriques).

  On top of this there are some additional dentry name handling and cap
  flushing fixes from Zheng. Finally, Jeff is formally taking over for
  Zheng as the filesystem maintainer"

* tag 'ceph-for-5.3-rc1' of git://github.com/ceph/ceph-client: (71 commits)
  ceph: fix end offset in truncate_inode_pages_range call
  ceph: use generic_delete_inode() for ->drop_inode
  ceph: use ceph_evict_inode to cleanup inode's resource
  ceph: initialize superblock s_time_gran to 1
  MAINTAINERS: take over for Zheng as CephFS kernel client maintainer
  rbd: setallochint only if object doesn't exist
  rbd: support for object-map and fast-diff
  rbd: call rbd_dev_mapping_set() from rbd_dev_image_probe()
  libceph: export osd_req_op_data() macro
  libceph: change ceph_osdc_call() to take page vector for response
  libceph: bump CEPH_MSG_MAX_DATA_LEN (again)
  rbd: new exclusive lock wait/wake code
  rbd: quiescing lock should wait for image requests
  rbd: lock should be quiesced on reacquire
  rbd: introduce copyup state machine
  rbd: rename rbd_obj_setup_*() to rbd_obj_init_*()
  rbd: move OSD request allocation into object request state machines
  rbd: factor out __rbd_osd_setup_discard_ops()
  rbd: factor out rbd_osd_setup_copyup()
  rbd: introduce obj_req->osd_reqs list
  ...

1  2 
MAINTAINERS
drivers/block/rbd.c
fs/ceph/debugfs.c
fs/ceph/file.c
fs/ceph/super.c
fs/ceph/super.h
net/ceph/messenger.c

diff --cc MAINTAINERS
Simple merge
@@@ -3009,15 -3942,34 +3942,34 @@@ e_inval
        goto out;
  }
  
- static void wake_requests(struct rbd_device *rbd_dev, bool wake_all)
+ /*
+  * Either image request state machine(s) or rbd_add_acquire_lock()
+  * (i.e. "rbd map").
+  */
+ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
  {
-       dout("%s rbd_dev %p wake_all %d\n", __func__, rbd_dev, wake_all);
+       struct rbd_img_request *img_req;
+       dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
 -      lockdep_assert_held_exclusive(&rbd_dev->lock_rwsem);
++      lockdep_assert_held_write(&rbd_dev->lock_rwsem);
  
        cancel_delayed_work(&rbd_dev->lock_dwork);
-       if (wake_all)
-               wake_up_all(&rbd_dev->lock_waitq);
-       else
-               wake_up(&rbd_dev->lock_waitq);
+       if (!completion_done(&rbd_dev->acquire_wait)) {
+               rbd_assert(list_empty(&rbd_dev->acquiring_list) &&
+                          list_empty(&rbd_dev->running_list));
+               rbd_dev->acquire_err = result;
+               complete_all(&rbd_dev->acquire_wait);
+               return;
+       }
+       list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+               mutex_lock(&img_req->state_mutex);
+               rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+               rbd_img_schedule(img_req, result);
+               mutex_unlock(&img_req->state_mutex);
+       }
+       list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
  }
  
  static int get_lock_owner_info(struct rbd_device *rbd_dev,
@@@ -3246,13 -4225,13 +4225,13 @@@ again
        }
  }
  
- /*
-  * lock_rwsem must be held for write
-  */
- static bool rbd_release_lock(struct rbd_device *rbd_dev)
+ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
  {
-       dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
-            rbd_dev->lock_state);
+       bool need_wait;
+       dout("%s rbd_dev %p\n", __func__, rbd_dev);
 -      lockdep_assert_held_exclusive(&rbd_dev->lock_rwsem);
++      lockdep_assert_held_write(&rbd_dev->lock_rwsem);
        if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
                return false;
  
Simple merge
diff --cc fs/ceph/file.c
Simple merge
diff --cc fs/ceph/super.c
Simple merge
diff --cc fs/ceph/super.h
Simple merge
Simple merge