Merge tag 'ceph-for-6.8-rc1' of https://github.com/ceph/ceph-client

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig

index 94df854..7249d70 100644 (file)
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -7,6 +7,7 @@ config CEPH_FS
         select CRYPTO_AES
         select CRYPTO
         select NETFS_SUPPORT
+       select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
         default n
         help
           Choose Y or M here to include support for mounting the
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c

index 500a87b..1340d77 100644 (file)
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -337,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
         u64 len = subreq->len;
         bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
         u64 off = subreq->start;
+       int extent_cnt;
  
         if (ceph_inode_is_shutdown(inode)) {
                 err = -EIO;
@@ -350,8 +351,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
  
         req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
                         off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
-                       CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
-                       NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
+                       CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq,
+                       ci->i_truncate_size, false);
         if (IS_ERR(req)) {
                 err = PTR_ERR(req);
                 req = NULL;
@@ -359,7 +360,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
         }
  
         if (sparse) {
-               err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+               extent_cnt = __ceph_sparse_read_ext_count(inode, len);
+               err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt);
                 if (err)
                         goto out;
         }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

index 2c0b8dc..9c02f32 100644 (file)
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4887,13 +4887,15 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
                                struct inode *dir,
                                int mds, int drop, int unless)
  {
-       struct dentry *parent = NULL;
         struct ceph_mds_request_release *rel = *p;
         struct ceph_dentry_info *di = ceph_dentry(dentry);
         struct ceph_client *cl;
         int force = 0;
         int ret;
  
+       /* This shouldn't happen */
+       BUG_ON(!dir);
+
         /*
          * force an record for the directory caps if we have a dentry lease.
          * this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4903,14 +4905,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
         spin_lock(&dentry->d_lock);
         if (di->lease_session && di->lease_session->s_mds == mds)
                 force = 1;
-       if (!dir) {
-               parent = dget(dentry->d_parent);
-               dir = d_inode(parent);
-       }
         spin_unlock(&dentry->d_lock);
  
         ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
-       dput(parent);
  
         cl = ceph_inode_to_client(dir);
         spin_lock(&dentry->d_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c

index 6785966..0e9f56e 100644 (file)
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1593,10 +1593,12 @@ struct ceph_lease_walk_control {
         unsigned long dir_lease_ttl;
  };
  
+static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *);
+static int __dentry_lease_check(const struct dentry *);
+
  static unsigned long
  __dentry_leases_walk(struct ceph_mds_client *mdsc,
-                    struct ceph_lease_walk_control *lwc,
-                    int (*check)(struct dentry*, void*))
+                    struct ceph_lease_walk_control *lwc)
  {
         struct ceph_dentry_info *di, *tmp;
         struct dentry *dentry, *last = NULL;
@@ -1624,7 +1626,10 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
                         goto next;
                 }
  
-               ret = check(dentry, lwc);
+               if (lwc->dir_lease)
+                       ret = __dir_lease_check(dentry, lwc);
+               else
+                       ret = __dentry_lease_check(dentry);
                 if (ret & TOUCH) {
                         /* move it into tail of dir lease list */
                         __dentry_dir_lease_touch(mdsc, di);
@@ -1681,7 +1686,7 @@ next:
         return freed;
  }
  
-static int __dentry_lease_check(struct dentry *dentry, void *arg)
+static int __dentry_lease_check(const struct dentry *dentry)
  {
         struct ceph_dentry_info *di = ceph_dentry(dentry);
         int ret;
@@ -1696,9 +1701,9 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg)
         return DELETE;
  }
  
-static int __dir_lease_check(struct dentry *dentry, void *arg)
+static int __dir_lease_check(const struct dentry *dentry,
+                            struct ceph_lease_walk_control *lwc)
  {
-       struct ceph_lease_walk_control *lwc = arg;
         struct ceph_dentry_info *di = ceph_dentry(dentry);
  
         int ret = __dir_lease_try_check(dentry);
@@ -1737,7 +1742,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
  
         lwc.dir_lease = false;
         lwc.nr_to_scan  = CEPH_CAPS_PER_RELEASE * 2;
-       freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
+       freed = __dentry_leases_walk(mdsc, &lwc);
         if (!lwc.nr_to_scan) /* more invalid leases */
                 return -EAGAIN;
  
@@ -1747,7 +1752,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
         lwc.dir_lease = true;
         lwc.expire_dir_lease = freed < count;
         lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
-       freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
+       freed +=__dentry_leases_walk(mdsc, &lwc);
         if (!lwc.nr_to_scan) /* more to check */
                 return -EAGAIN;
  
diff --git a/fs/ceph/export.c b/fs/ceph/export.c

index 726af69..a79f163 100644 (file)
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -286,8 +286,6 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
                 doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
                       vino.snap, sfh->parent_ino, sfh->hash, err);
         }
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
         /* see comments in ceph_get_parent() */
         return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
  }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c

index d380d9d..abe8028 100644 (file)
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1029,6 +1029,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
                 struct ceph_osd_req_op *op;
                 u64 read_off = off;
                 u64 read_len = len;
+               int extent_cnt;
  
                 /* determine new offset/length if encrypted */
                 ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
@@ -1068,7 +1069,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
  
                 op = &req->r_ops[0];
                 if (sparse) {
-                       ret = ceph_alloc_sparse_ext_map(op);
+                       extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+                       ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
                         if (ret) {
                                 ceph_osdc_put_request(req);
                                 break;
@@ -1465,6 +1467,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                 ssize_t len;
                 struct ceph_osd_req_op *op;
                 int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
+               int extent_cnt;
  
                 if (write)
                         size = min_t(u64, size, fsc->mount_options->wsize);
@@ -1528,7 +1531,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                 osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
                 op = &req->r_ops[0];
                 if (sparse) {
-                       ret = ceph_alloc_sparse_ext_map(op);
+                       extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+                       ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
                         if (ret) {
                                 ceph_osdc_put_request(req);
                                 break;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 02ebfab..548d1de 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1534,7 +1534,8 @@ static int encode_metric_spec(void **p, void *end)
   * session message, specialization for CEPH_SESSION_REQUEST_OPEN
   * to include additional client metadata fields.
   */
-static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
+static struct ceph_msg *
+create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq)
  {
         struct ceph_msg *msg;
         struct ceph_mds_session_head *h;
@@ -1578,6 +1579,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
                 size = METRIC_BYTES(count);
         extra_bytes += 2 + 4 + 4 + size;
  
+       /* flags, mds auth caps and oldest_client_tid */
+       extra_bytes += 4 + 4 + 8;
+
         /* Allocate the message */
         msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
                            GFP_NOFS, false);
@@ -1589,16 +1593,16 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
         end = p + msg->front.iov_len;
  
         h = p;
-       h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
+       h->op = cpu_to_le32(op);
         h->seq = cpu_to_le64(seq);
  
         /*
          * Serialize client metadata into waiting buffer space, using
          * the format that userspace expects for map<string, string>
          *
-        * ClientSession messages with metadata are v4
+        * ClientSession messages with metadata are v7
          */
-       msg->hdr.version = cpu_to_le16(4);
+       msg->hdr.version = cpu_to_le16(7);
         msg->hdr.compat_version = cpu_to_le16(1);
  
         /* The write pointer, following the session_head structure */
@@ -1634,6 +1638,15 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
                 return ERR_PTR(ret);
         }
  
+       /* version == 5, flags */
+       ceph_encode_32(&p, 0);
+
+       /* version == 6, mds auth caps */
+       ceph_encode_32(&p, 0);
+
+       /* version == 7, oldest_client_tid */
+       ceph_encode_64(&p, mdsc->oldest_tid);
+
         msg->front.iov_len = p - msg->front.iov_base;
         msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
  
@@ -1663,7 +1676,8 @@ static int __open_session(struct ceph_mds_client *mdsc,
         session->s_renew_requested = jiffies;
  
         /* send connect message */
-       msg = create_session_open_msg(mdsc, session->s_seq);
+       msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN,
+                                     session->s_seq);
         if (IS_ERR(msg))
                 return PTR_ERR(msg);
         ceph_con_send(&session->s_con, msg);
@@ -2028,10 +2042,10 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
  
         doutc(cl, "to mds%d (%s)\n", session->s_mds,
               ceph_mds_state_name(state));
-       msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+       msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS,
                                       ++session->s_renew_seq);
-       if (!msg)
-               return -ENOMEM;
+       if (IS_ERR(msg))
+               return PTR_ERR(msg);
         ceph_con_send(&session->s_con, msg);
         return 0;
  }
@@ -4128,12 +4142,12 @@ static void handle_session(struct ceph_mds_session *session,
                         pr_info_client(cl, "mds%d reconnect success\n",
                                        session->s_mds);
  
+               session->s_features = features;
                 if (session->s_state == CEPH_MDS_SESSION_OPEN) {
                         pr_notice_client(cl, "mds%d is already opened\n",
                                          session->s_mds);
                 } else {
                         session->s_state = CEPH_MDS_SESSION_OPEN;
-                       session->s_features = features;
                         renewed_caps(mdsc, session, 0);
                         if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT,
                                      &session->s_features))
@@ -5870,7 +5884,8 @@ static void mds_peer_reset(struct ceph_connection *con)
  
         pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n",
                        s->s_mds);
-       if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
+       if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO &&
+           ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT)
                 send_mds_reconnect(mdsc, s);
  }
  
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c

index 9d36c35..06ee397 100644 (file)
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -197,10 +197,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
  }
  
  /*
- * This function walks through the snaprealm for an inode and returns the
- * ceph_snap_realm for the first snaprealm that has quotas set (max_files,
+ * This function walks through the snaprealm for an inode and set the
+ * realmp with the first snaprealm that has quotas set (max_files,
   * max_bytes, or any, depending on the 'which_quota' argument).  If the root is
- * reached, return the root ceph_snap_realm instead.
+ * reached, set the realmp with the root ceph_snap_realm instead.
   *
   * Note that the caller is responsible for calling ceph_put_snap_realm() on the
   * returned realm.
@@ -211,10 +211,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
   * this function will return -EAGAIN; otherwise, the snaprealms walk-through
   * will be restarted.
   */
-static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
-                                              struct inode *inode,
-                                              enum quota_get_realm which_quota,
-                                              bool retry)
+static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
+                          enum quota_get_realm which_quota,
+                          struct ceph_snap_realm **realmp, bool retry)
  {
         struct ceph_client *cl = mdsc->fsc->client;
         struct ceph_inode_info *ci = NULL;
@@ -222,8 +221,10 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
         struct inode *in;
         bool has_quota;
  
+       if (realmp)
+               *realmp = NULL;
         if (ceph_snap(inode) != CEPH_NOSNAP)
-               return NULL;
+               return 0;
  
  restart:
         realm = ceph_inode(inode)->i_snap_realm;
@@ -250,7 +251,7 @@ restart:
                                 break;
                         ceph_put_snap_realm(mdsc, realm);
                         if (!retry)
-                               return ERR_PTR(-EAGAIN);
+                               return -EAGAIN;
                         goto restart;
                 }
  
@@ -259,8 +260,11 @@ restart:
                 iput(in);
  
                 next = realm->parent;
-               if (has_quota || !next)
-                      return realm;
+               if (has_quota || !next) {
+                       if (realmp)
+                               *realmp = realm;
+                       return 0;
+               }
  
                 ceph_get_snap_realm(mdsc, next);
                 ceph_put_snap_realm(mdsc, realm);
@@ -269,7 +273,7 @@ restart:
         if (realm)
                 ceph_put_snap_realm(mdsc, realm);
  
-       return NULL;
+       return 0;
  }
  
  bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
@@ -277,6 +281,7 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
         struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
         struct ceph_snap_realm *old_realm, *new_realm;
         bool is_same;
+       int ret;
  
  restart:
         /*
@@ -286,9 +291,9 @@ restart:
          * dropped and we can then restart the whole operation.
          */
         down_read(&mdsc->snap_rwsem);
-       old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true);
-       new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false);
-       if (PTR_ERR(new_realm) == -EAGAIN) {
+       get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
+       ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
+       if (ret == -EAGAIN) {
                 up_read(&mdsc->snap_rwsem);
                 if (old_realm)
                         ceph_put_snap_realm(mdsc, old_realm);
@@ -492,8 +497,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
         bool is_updated = false;
  
         down_read(&mdsc->snap_rwsem);
-       realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root),
-                               QUOTA_GET_MAX_BYTES, true);
+       get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
+                       &realm, true);
         up_read(&mdsc->snap_rwsem);
         if (!realm)
                 return false;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h

index fe0f64a..b06e2bc 100644 (file)
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -3,6 +3,7 @@
  #define _FS_CEPH_SUPER_H
  
  #include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/osd_client.h>
  
  #include <asm/unaligned.h>
  #include <linux/backing-dev.h>
@@ -1407,6 +1408,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci,
                 ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
  }
  
+static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len)
+{
+       int cnt = 0;
+
+       if (IS_ENCRYPTED(inode)) {
+               cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT;
+               if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL)
+                       cnt = 0;
+       }
+
+       return cnt;
+}
+
  extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
                               struct ceph_mds_session *session,
                               struct ceph_msg *msg);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h

index b8610e9..fa018d5 100644 (file)
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -572,9 +572,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
   */
  #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
  
-static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
  {
-       return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+       if (!cnt)
+               cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL;
+
+       return __ceph_alloc_sparse_ext_map(op, cnt);
  }
  
  extern void ceph_osdc_get_request(struct ceph_osd_request *req);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c

index d3a759e..6256220 100644 (file)
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5850,8 +5850,6 @@ static inline void convert_extent_map(struct ceph_sparse_read *sr)
  }
  #endif
  
-#define MAX_EXTENTS 4096
-
  static int osd_sparse_read(struct ceph_connection *con,
                            struct ceph_msg_data_cursor *cursor,
                            char **pbuf)
@@ -5882,23 +5880,16 @@ next_op:
  
                 if (count > 0) {
                         if (!sr->sr_extent || count > sr->sr_ext_len) {
-                               /*
-                                * Apply a hard cap to the number of extents.
-                                * If we have more, assume something is wrong.
-                                */
-                               if (count > MAX_EXTENTS) {
-                                       dout("%s: OSD returned 0x%x extents in a single reply!\n",
-                                            __func__, count);
-                                       return -EREMOTEIO;
-                               }
-
                                 /* no extent array provided, or too short */
                                 kfree(sr->sr_extent);
                                 sr->sr_extent = kmalloc_array(count,
                                                               sizeof(*sr->sr_extent),
                                                               GFP_NOIO);
-                               if (!sr->sr_extent)
+                               if (!sr->sr_extent) {
+                                       pr_err("%s: failed to allocate %u extents\n",
+                                              __func__, count);
                                         return -ENOMEM;
+                               }
                                 sr->sr_ext_len = count;
                         }
                         ret = count * sizeof(*sr->sr_extent);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 19 Jan 2024 17:58:55 +0000 (09:58 -0800)
fs/ceph/Kconfig		patch \| blob \| history
fs/ceph/addr.c		patch \| blob \| history
fs/ceph/caps.c		patch \| blob \| history
fs/ceph/dir.c		patch \| blob \| history
fs/ceph/export.c		patch \| blob \| history
fs/ceph/file.c		patch \| blob \| history
fs/ceph/mds_client.c		patch \| blob \| history
fs/ceph/quota.c		patch \| blob \| history
fs/ceph/super.h		patch \| blob \| history
include/linux/ceph/osd_client.h		patch \| blob \| history
net/ceph/osd_client.c		patch \| blob \| history