ceph: more precise CEPH_CLIENT_CAPS_PENDING_CAPSNAP
authorYan, Zheng <zyan@redhat.com>
Thu, 20 Jun 2019 04:09:08 +0000 (12:09 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jul 2019 12:01:44 +0000 (14:01 +0200)
Client uses this flag to tell mds if there is more cap snap need to
flush. It's mainly for the case that client needs to re-send cap/snap
flushes after mds failover, but CEPH_CAP_ANY_FILE_WR on corresponding
inodes are all released before mds failover.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/caps.c
include/linux/ceph/ceph_fs.h

index f9055cd..d98dcd9 100644 (file)
@@ -1295,7 +1295,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
  * caller should hold snap_rwsem (read), s_mutex.
  */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
-                     int op, bool sync, int used, int want, int retain,
+                     int op, int flags, int used, int want, int retain,
                      int flushing, u64 flush_tid, u64 oldest_flush_tid)
        __releases(cap->ci->i_ceph_lock)
 {
@@ -1393,12 +1393,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        arg.mode = inode->i_mode;
 
        arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
-       if (list_empty(&ci->i_cap_snaps))
-               arg.flags = CEPH_CLIENT_CAPS_NO_CAPSNAP;
-       else
-               arg.flags = CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
-       if (sync)
-               arg.flags |= CEPH_CLIENT_CAPS_SYNC;
+       if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) &&
+           !list_empty(&ci->i_cap_snaps)) {
+               struct ceph_cap_snap *capsnap;
+               list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) {
+                       if (capsnap->cap_flush.tid)
+                               break;
+                       if (capsnap->need_flush) {
+                               flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
+                               break;
+                       }
+               }
+       }
+       arg.flags = flags;
 
        spin_unlock(&ci->i_ceph_lock);
 
@@ -2085,7 +2092,7 @@ ack:
                sent++;
 
                /* __send_cap drops i_ceph_lock */
-               delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false,
+               delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0,
                                cap_used, want, retain, flushing,
                                flush_tid, oldest_flush_tid);
                goto retry; /* retake i_ceph_lock and restart our cap scan. */
@@ -2155,7 +2162,8 @@ retry_locked:
                                                &flush_tid, &oldest_flush_tid);
 
                /* __send_cap drops i_ceph_lock */
-               delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true,
+               delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+                                    CEPH_CLIENT_CAPS_SYNC,
                                     __ceph_caps_used(ci),
                                     __ceph_caps_wanted(ci),
                                     (cap->issued | cap->implemented),
@@ -2328,9 +2336,17 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
        struct ceph_cap_flush *cf;
        int ret;
        u64 first_tid = 0;
+       u64 last_snap_flush = 0;
 
        ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
 
+       list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
+               if (!cf->caps) {
+                       last_snap_flush = cf->tid;
+                       break;
+               }
+       }
+
        list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
                if (cf->tid < first_tid)
                        continue;
@@ -2348,10 +2364,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
                        dout("kick_flushing_caps %p cap %p tid %llu %s\n",
                             inode, cap, cf->tid, ceph_cap_string(cf->caps));
                        ci->i_ceph_flags |= CEPH_I_NODELAY;
+
                        ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-                                         false, __ceph_caps_used(ci),
+                                        (cf->tid < last_snap_flush ?
+                                         CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
+                                         __ceph_caps_used(ci),
                                          __ceph_caps_wanted(ci),
-                                         cap->issued | cap->implemented,
+                                         (cap->issued | cap->implemented),
                                          cf->caps, cf->tid, oldest_flush_tid);
                        if (ret) {
                                pr_err("kick_flushing_caps: error sending "
index 3ac0fea..cb21c5c 100644 (file)
@@ -682,7 +682,7 @@ extern const char *ceph_cap_op_name(int op);
 /* flags field in client cap messages (version >= 10) */
 #define CEPH_CLIENT_CAPS_SYNC                  (1<<0)
 #define CEPH_CLIENT_CAPS_NO_CAPSNAP            (1<<1)
-#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP       (1<<2);
+#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP       (1<<2)
 
 /*
  * caps message, used for capability callbacks, acks, requests, etc.