libceph: read_from_replica option
authorIlya Dryomov <idryomov@gmail.com>
Sat, 23 May 2020 09:47:33 +0000 (11:47 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 1 Jun 2020 11:22:53 +0000 (13:22 +0200)
Expose replica reads through read_from_replica=balance and
read_from_replica=localize.  The default is to read from primary
(read_from_replica=no).

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
include/linux/ceph/libceph.h
net/ceph/ceph_common.c
net/ceph/osd_client.c

index 4733959..2247e71 100644 (file)
@@ -53,6 +53,8 @@ struct ceph_options {
        unsigned long osd_keepalive_timeout;    /* jiffies */
        unsigned long osd_request_timeout;      /* jiffies */
 
+       u32 osd_req_flags;  /* CEPH_OSD_FLAG_*, applied to each OSD request */
+
        /*
         * any type that can't be simply compared or doesn't need
         * to be compared should go beyond this point,
index 44770b6..9bab3e9 100644 (file)
@@ -265,6 +265,7 @@ enum {
        Opt_key,
        Opt_ip,
        Opt_crush_location,
+       Opt_read_from_replica,
        /* string args above */
        Opt_share,
        Opt_crc,
@@ -274,6 +275,19 @@ enum {
        Opt_abort_on_full,
 };
 
+enum {
+       Opt_read_from_replica_no,
+       Opt_read_from_replica_balance,
+       Opt_read_from_replica_localize,
+};
+
+static const struct constant_table ceph_param_read_from_replica[] = {
+       {"no",          Opt_read_from_replica_no},
+       {"balance",     Opt_read_from_replica_balance},
+       {"localize",    Opt_read_from_replica_localize},
+       {}
+};
+
 static const struct fs_parameter_spec ceph_parameters[] = {
        fsparam_flag    ("abort_on_full",               Opt_abort_on_full),
        fsparam_flag_no ("cephx_require_signatures",    Opt_cephx_require_signatures),
@@ -290,6 +304,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
        fsparam_u32     ("osdkeepalive",                Opt_osdkeepalivetimeout),
        __fsparam       (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
                         fs_param_deprecated, NULL),
+       fsparam_enum    ("read_from_replica",           Opt_read_from_replica,
+                        ceph_param_read_from_replica),
        fsparam_string  ("secret",                      Opt_secret),
        fsparam_flag_no ("share",                       Opt_share),
        fsparam_flag_no ("tcp_nodelay",                 Opt_tcp_nodelay),
@@ -472,6 +488,24 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
                        return err;
                }
                break;
+       case Opt_read_from_replica:
+               switch (result.uint_32) {
+               case Opt_read_from_replica_no:
+                       opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
+                                               CEPH_OSD_FLAG_LOCALIZE_READS);
+                       break;
+               case Opt_read_from_replica_balance:
+                       opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+                       opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
+                       break;
+               case Opt_read_from_replica_localize:
+                       opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+                       opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
+                       break;
+               default:
+                       BUG();
+               }
+               break;
 
        case Opt_osdtimeout:
                warn_plog(&log, "Ignoring osdtimeout");
@@ -580,6 +614,11 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
                }
                seq_putc(m, ',');
        }
+       if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
+               seq_puts(m, "read_from_replica=balance,");
+       } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
+               seq_puts(m, "read_from_replica=localize,");
+       }
 
        if (opt->flags & CEPH_OPT_FSID)
                seq_printf(m, "fsid=%pU,", &opt->fsid);
index 4ce6cdc..22733e8 100644 (file)
@@ -2425,11 +2425,14 @@ promote:
 
 static void account_request(struct ceph_osd_request *req)
 {
+       struct ceph_osd_client *osdc = req->r_osdc;
+
        WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
        WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
 
        req->r_flags |= CEPH_OSD_FLAG_ONDISK;
-       atomic_inc(&req->r_osdc->num_requests);
+       req->r_flags |= osdc->client->options->osd_req_flags;
+       atomic_inc(&osdc->num_requests);
 
        req->r_start_stamp = jiffies;
        req->r_start_latency = ktime_get();