net/rds: Keep track of and wait for FRWR segments in use upon shutdown
authorGerd Rausch <gerd.rausch@oracle.com>
Tue, 16 Jul 2019 22:29:17 +0000 (15:29 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 17 Jul 2019 19:06:52 +0000 (12:06 -0700)
Since "rds_ib_free_frmr" and "rds_ib_free_frmr_list" simply put
the FRMR memory segments on the "drop_list" or "free_list",
and it is the job of "rds_ib_flush_mr_pool" to reap those entries
by ultimately issuing a "IB_WR_LOCAL_INV" work-request,
we need to trigger and then wait for all those memory segments
attached to a particular connection to be fully released before
we can move on to release the QP, CQ, etc.

So we make "rds_ib_conn_path_shutdown" wait for one more
atomic_t called "i_fastreg_inuse_count" that keeps track of how
many FRWR memory segments are out there marked "FRMR_IS_INUSE"
(and also wake_up rds_ib_ring_empty_wait, as they go away).

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/ib.h
net/rds/ib_cm.c
net/rds/ib_frmr.c

index 66c03c7..303c6ee 100644 (file)
@@ -156,6 +156,7 @@ struct rds_ib_connection {
 
        /* To control the number of wrs from fastreg */
        atomic_t                i_fastreg_wrs;
+       atomic_t                i_fastreg_inuse_count;
 
        /* interrupt handling */
        struct tasklet_struct   i_send_tasklet;
index 8891822..1b6fd6c 100644 (file)
@@ -40,6 +40,7 @@
 #include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
+#include "ib_mr.h"
 
 /*
  * Set the selected protocol version
@@ -993,6 +994,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
                                ic->i_cm_id, err);
                }
 
+               /* kick off "flush_worker" for all pools in order to reap
+                * all FRMR registrations that are still marked "FRMR_IS_INUSE"
+                */
+               rds_ib_flush_mrs();
+
                /*
                 * We want to wait for tx and rx completion to finish
                 * before we tear down the connection, but we have to be
@@ -1005,6 +1011,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
                wait_event(rds_ib_ring_empty_wait,
                           rds_ib_ring_empty(&ic->i_recv_ring) &&
                           (atomic_read(&ic->i_signaled_sends) == 0) &&
+                          (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
                           (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
                tasklet_kill(&ic->i_send_tasklet);
                tasklet_kill(&ic->i_recv_tasklet);
index adaa8e9..06ecf9d 100644 (file)
 
 #include "ib_mr.h"
 
+static inline void
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
+                         enum rds_ib_fr_state old_state,
+                         enum rds_ib_fr_state new_state)
+{
+       if (cmpxchg(&ibmr->u.frmr.fr_state,
+                   old_state, new_state) == old_state &&
+           old_state == FRMR_IS_INUSE) {
+               /* enforce order of ibmr->u.frmr.fr_state update
+                * before decrementing i_fastreg_inuse_count
+                */
+               smp_mb__before_atomic();
+               atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
+               if (waitqueue_active(&rds_ib_ring_empty_wait))
+                       wake_up(&rds_ib_ring_empty_wait);
+       }
+}
+
 static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
                                           int npages)
 {
@@ -118,13 +136,18 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
        if (unlikely(ret != ibmr->sg_len))
                return ret < 0 ? ret : -EINVAL;
 
+       if (cmpxchg(&frmr->fr_state,
+                   FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+               return -EBUSY;
+
+       atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+
        /* Perform a WR for the fast_reg_mr. Each individual page
         * in the sg list is added to the fast reg page list and placed
         * inside the fast_reg_mr WR.  The key used is a rolling 8bit
         * counter, which should guarantee uniqueness.
         */
        ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
-       frmr->fr_state = FRMR_IS_INUSE;
        frmr->fr_reg = true;
 
        memset(&reg_wr, 0, sizeof(reg_wr));
@@ -141,7 +164,8 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
        ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
        if (unlikely(ret)) {
                /* Failure here can be because of -ENOMEM as well */
-               frmr->fr_state = FRMR_IS_STALE;
+               rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+
                atomic_inc(&ibmr->ic->i_fastreg_wrs);
                if (printk_ratelimit())
                        pr_warn("RDS/IB: %s returned error(%d)\n",
@@ -268,8 +292,12 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 
        ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
        if (unlikely(ret)) {
-               frmr->fr_state = FRMR_IS_STALE;
+               rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
                frmr->fr_inv = false;
+               /* enforce order of frmr->fr_inv update
+                * before incrementing i_fastreg_wrs
+                */
+               smp_mb__before_atomic();
                atomic_inc(&ibmr->ic->i_fastreg_wrs);
                pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
                goto out;
@@ -297,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 
        if (wc->status != IB_WC_SUCCESS) {
-               frmr->fr_state = FRMR_IS_STALE;
+               rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
                if (rds_conn_up(ic->conn))
                        rds_ib_conn_error(ic->conn,
                                          "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
@@ -309,8 +337,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
        }
 
        if (frmr->fr_inv) {
-               if (frmr->fr_state == FRMR_IS_INUSE)
-                       frmr->fr_state = FRMR_IS_FREE;
+               rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
                frmr->fr_inv = false;
                wake_up(&frmr->fr_inv_done);
        }
@@ -320,6 +347,10 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
                wake_up(&frmr->fr_reg_done);
        }
 
+       /* enforce order of frmr->{fr_reg,fr_inv} update
+        * before incrementing i_fastreg_wrs
+        */
+       smp_mb__before_atomic();
        atomic_inc(&ic->i_fastreg_wrs);
 }