select CRYPTO_AES
select CRYPTO
select NETFS_SUPPORT
+ select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
default n
help
Choose Y or M here to include support for mounting the
u64 len = subreq->len;
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 off = subreq->start;
+ int extent_cnt;
if (ceph_inode_is_shutdown(inode)) {
err = -EIO;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
- CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
- NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
+ CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq,
+ ci->i_truncate_size, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
}
if (sparse) {
- err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+ extent_cnt = __ceph_sparse_read_ext_count(inode, len);
+ err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt);
if (err)
goto out;
}
struct inode *dir,
int mds, int drop, int unless)
{
- struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
struct ceph_client *cl;
int force = 0;
int ret;
+ /* This shouldn't happen */
+ BUG_ON(!dir);
+
/*
* force an record for the directory caps if we have a dentry lease.
* this is racy (can't take i_ceph_lock and d_lock together), but it
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
- if (!dir) {
- parent = dget(dentry->d_parent);
- dir = d_inode(parent);
- }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
- dput(parent);
cl = ceph_inode_to_client(dir);
spin_lock(&dentry->d_lock);
unsigned long dir_lease_ttl;
};
+static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *);
+static int __dentry_lease_check(const struct dentry *);
+
static unsigned long
__dentry_leases_walk(struct ceph_mds_client *mdsc,
- struct ceph_lease_walk_control *lwc,
- int (*check)(struct dentry*, void*))
+ struct ceph_lease_walk_control *lwc)
{
struct ceph_dentry_info *di, *tmp;
struct dentry *dentry, *last = NULL;
goto next;
}
- ret = check(dentry, lwc);
+ if (lwc->dir_lease)
+ ret = __dir_lease_check(dentry, lwc);
+ else
+ ret = __dentry_lease_check(dentry);
if (ret & TOUCH) {
/* move it into tail of dir lease list */
__dentry_dir_lease_touch(mdsc, di);
return freed;
}
-static int __dentry_lease_check(struct dentry *dentry, void *arg)
+static int __dentry_lease_check(const struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
int ret;
return DELETE;
}
-static int __dir_lease_check(struct dentry *dentry, void *arg)
+static int __dir_lease_check(const struct dentry *dentry,
+ struct ceph_lease_walk_control *lwc)
{
- struct ceph_lease_walk_control *lwc = arg;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int ret = __dir_lease_try_check(dentry);
lwc.dir_lease = false;
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
- freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
+ freed = __dentry_leases_walk(mdsc, &lwc);
if (!lwc.nr_to_scan) /* more invalid leases */
return -EAGAIN;
lwc.dir_lease = true;
lwc.expire_dir_lease = freed < count;
lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
- freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
+ freed +=__dentry_leases_walk(mdsc, &lwc);
if (!lwc.nr_to_scan) /* more to check */
return -EAGAIN;
doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
vino.snap, sfh->parent_ino, sfh->hash, err);
}
- if (IS_ERR(inode))
- return ERR_CAST(inode);
/* see comments in ceph_get_parent() */
return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
}
struct ceph_osd_req_op *op;
u64 read_off = off;
u64 read_len = len;
+ int extent_cnt;
/* determine new offset/length if encrypted */
ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
op = &req->r_ops[0];
if (sparse) {
- ret = ceph_alloc_sparse_ext_map(op);
+ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
if (ret) {
ceph_osdc_put_request(req);
break;
ssize_t len;
struct ceph_osd_req_op *op;
int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
+ int extent_cnt;
if (write)
size = min_t(u64, size, fsc->mount_options->wsize);
osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
op = &req->r_ops[0];
if (sparse) {
- ret = ceph_alloc_sparse_ext_map(op);
+ extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
if (ret) {
ceph_osdc_put_request(req);
break;
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
*/
-static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
+static struct ceph_msg *
+create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq)
{
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
size = METRIC_BYTES(count);
extra_bytes += 2 + 4 + 4 + size;
+ /* flags, mds auth caps and oldest_client_tid */
+ extra_bytes += 4 + 4 + 8;
+
/* Allocate the message */
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
end = p + msg->front.iov_len;
h = p;
- h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
+ h->op = cpu_to_le32(op);
h->seq = cpu_to_le64(seq);
/*
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
*
- * ClientSession messages with metadata are v4
+ * ClientSession messages with metadata are v7
*/
- msg->hdr.version = cpu_to_le16(4);
+ msg->hdr.version = cpu_to_le16(7);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
return ERR_PTR(ret);
}
+ /* version == 5, flags */
+ ceph_encode_32(&p, 0);
+
+ /* version == 6, mds auth caps */
+ ceph_encode_32(&p, 0);
+
+ /* version == 7, oldest_client_tid */
+ ceph_encode_64(&p, mdsc->oldest_tid);
+
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
session->s_renew_requested = jiffies;
/* send connect message */
- msg = create_session_open_msg(mdsc, session->s_seq);
+ msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN,
+ session->s_seq);
if (IS_ERR(msg))
return PTR_ERR(msg);
ceph_con_send(&session->s_con, msg);
doutc(cl, "to mds%d (%s)\n", session->s_mds,
ceph_mds_state_name(state));
- msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+ msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS,
++session->s_renew_seq);
- if (!msg)
- return -ENOMEM;
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
ceph_con_send(&session->s_con, msg);
return 0;
}
pr_info_client(cl, "mds%d reconnect success\n",
session->s_mds);
+ session->s_features = features;
if (session->s_state == CEPH_MDS_SESSION_OPEN) {
pr_notice_client(cl, "mds%d is already opened\n",
session->s_mds);
} else {
session->s_state = CEPH_MDS_SESSION_OPEN;
- session->s_features = features;
renewed_caps(mdsc, session, 0);
if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT,
&session->s_features))
pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n",
s->s_mds);
- if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
+ if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO &&
+ ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT)
send_mds_reconnect(mdsc, s);
}
}
/*
- * This function walks through the snaprealm for an inode and returns the
- * ceph_snap_realm for the first snaprealm that has quotas set (max_files,
+ * This function walks through the snaprealm for an inode and set the
+ * realmp with the first snaprealm that has quotas set (max_files,
* max_bytes, or any, depending on the 'which_quota' argument). If the root is
- * reached, return the root ceph_snap_realm instead.
+ * reached, set the realmp with the root ceph_snap_realm instead.
*
* Note that the caller is responsible for calling ceph_put_snap_realm() on the
* returned realm.
* this function will return -EAGAIN; otherwise, the snaprealms walk-through
* will be restarted.
*/
-static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
- struct inode *inode,
- enum quota_get_realm which_quota,
- bool retry)
+static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
+ enum quota_get_realm which_quota,
+ struct ceph_snap_realm **realmp, bool retry)
{
struct ceph_client *cl = mdsc->fsc->client;
struct ceph_inode_info *ci = NULL;
struct inode *in;
bool has_quota;
+ if (realmp)
+ *realmp = NULL;
if (ceph_snap(inode) != CEPH_NOSNAP)
- return NULL;
+ return 0;
restart:
realm = ceph_inode(inode)->i_snap_realm;
break;
ceph_put_snap_realm(mdsc, realm);
if (!retry)
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
goto restart;
}
iput(in);
next = realm->parent;
- if (has_quota || !next)
- return realm;
+ if (has_quota || !next) {
+ if (realmp)
+ *realmp = realm;
+ return 0;
+ }
ceph_get_snap_realm(mdsc, next);
ceph_put_snap_realm(mdsc, realm);
if (realm)
ceph_put_snap_realm(mdsc, realm);
- return NULL;
+ return 0;
}
bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm;
bool is_same;
+ int ret;
restart:
/*
* dropped and we can then restart the whole operation.
*/
down_read(&mdsc->snap_rwsem);
- old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true);
- new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false);
- if (PTR_ERR(new_realm) == -EAGAIN) {
+ get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
+ ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
+ if (ret == -EAGAIN) {
up_read(&mdsc->snap_rwsem);
if (old_realm)
ceph_put_snap_realm(mdsc, old_realm);
bool is_updated = false;
down_read(&mdsc->snap_rwsem);
- realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root),
- QUOTA_GET_MAX_BYTES, true);
+ get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
+ &realm, true);
up_read(&mdsc->snap_rwsem);
if (!realm)
return false;
#define _FS_CEPH_SUPER_H
#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/osd_client.h>
#include <asm/unaligned.h>
#include <linux/backing-dev.h>
ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
}
+static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len)
+{
+ int cnt = 0;
+
+ if (IS_ENCRYPTED(inode)) {
+ cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT;
+ if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL)
+ cnt = 0;
+ }
+
+ return cnt;
+}
+
extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
*/
#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
-static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
{
- return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+ if (!cnt)
+ cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL;
+
+ return __ceph_alloc_sparse_ext_map(op, cnt);
}
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
}
#endif
-#define MAX_EXTENTS 4096
-
static int osd_sparse_read(struct ceph_connection *con,
struct ceph_msg_data_cursor *cursor,
char **pbuf)
if (count > 0) {
if (!sr->sr_extent || count > sr->sr_ext_len) {
- /*
- * Apply a hard cap to the number of extents.
- * If we have more, assume something is wrong.
- */
- if (count > MAX_EXTENTS) {
- dout("%s: OSD returned 0x%x extents in a single reply!\n",
- __func__, count);
- return -EREMOTEIO;
- }
-
/* no extent array provided, or too short */
kfree(sr->sr_extent);
sr->sr_extent = kmalloc_array(count,
sizeof(*sr->sr_extent),
GFP_NOIO);
- if (!sr->sr_extent)
+ if (!sr->sr_extent) {
+ pr_err("%s: failed to allocate %u extents\n",
+ __func__, count);
return -ENOMEM;
+ }
sr->sr_ext_len = count;
}
ret = count * sizeof(*sr->sr_extent);