net/smc: add termination reason and handle LLC protocol violation
[linux-2.6-microblaze.git] / net / smc / smc_core.c
index 824c521..b6f93b4 100644 (file)
@@ -44,10 +44,20 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
 
+struct smc_ib_up_work {
+       struct work_struct      work;
+       struct smc_link_group   *lgr;
+       struct smc_ib_device    *smcibdev;
+       u8                      ibport;
+};
+
 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
                         struct smc_buf_desc *buf_desc);
 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
 
+static void smc_link_up_work(struct work_struct *work);
+static void smc_link_down_work(struct work_struct *work);
+
 /* return head of link group list and its lock for a given link group */
 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
                                                  spinlock_t **lgr_lock)
@@ -111,16 +121,60 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
        rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
 }
 
+/* assign an SMC-R link to the connection */
+static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
+{
+       enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
+                                      SMC_LNK_ACTIVE;
+       int i, j;
+
+       /* do link balancing */
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               struct smc_link *lnk = &conn->lgr->lnk[i];
+
+               if (lnk->state != expected || lnk->link_is_asym)
+                       continue;
+               if (conn->lgr->role == SMC_CLNT) {
+                       conn->lnk = lnk; /* temporary, SMC server assigns link*/
+                       break;
+               }
+               if (conn->lgr->conns_num % 2) {
+                       for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+                               struct smc_link *lnk2;
+
+                               lnk2 = &conn->lgr->lnk[j];
+                               if (lnk2->state == expected &&
+                                   !lnk2->link_is_asym) {
+                                       conn->lnk = lnk2;
+                                       break;
+                               }
+                       }
+               }
+               if (!conn->lnk)
+                       conn->lnk = lnk;
+               break;
+       }
+       if (!conn->lnk)
+               return SMC_CLC_DECL_NOACTLINK;
+       return 0;
+}
+
 /* Register connection in link group by assigning an alert token
  * registered in a search tree.
  * Requires @conns_lock
  * Note that '0' is a reserved value and not assigned.
  */
-static void smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
 {
        struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
        static atomic_t nexttoken = ATOMIC_INIT(0);
+       int rc;
 
+       if (!conn->lgr->is_smcd) {
+               rc = smcr_lgr_conn_assign_link(conn, first);
+               if (rc)
+                       return rc;
+       }
        /* find a new alert_token_local value not yet used by some connection
         * in this link group
         */
@@ -132,6 +186,7 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
        }
        smc_lgr_add_alert_token(conn);
        conn->lgr->conns_num++;
+       return 0;
 }
 
 /* Unregister connection and reset the alert token of the given connection<
@@ -166,29 +221,22 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
+       struct list_head *lgr_list;
+       spinlock_t *lgr_lock;
 
        if (!lgr)
                return;
 
        smc_conn_free(conn);
-       smc_lgr_forget(lgr);
+       lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+       spin_lock_bh(lgr_lock);
+       /* do not use this link group for new connections */
+       if (!list_empty(lgr_list))
+               list_del_init(lgr_list);
+       spin_unlock_bh(lgr_lock);
        smc_lgr_schedule_free_work_fast(lgr);
 }
 
-/* Send delete link, either as client to request the initiation
- * of the DELETE LINK sequence from server; or as server to
- * initiate the delete processing. See smc_llc_rx_delete_link().
- */
-static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
-{
-       if (lnk->state == SMC_LNK_ACTIVE &&
-           !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
-               smc_llc_link_deleting(lnk);
-               return 0;
-       }
-       return -ENOTCONN;
-}
-
 static void smc_lgr_free(struct smc_link_group *lgr);
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -197,8 +245,8 @@ static void smc_lgr_free_work(struct work_struct *work)
                                                  struct smc_link_group,
                                                  free_work);
        spinlock_t *lgr_lock;
-       struct smc_link *lnk;
        bool conns;
+       int i;
 
        smc_lgr_list_head(lgr, &lgr_lock);
        spin_lock_bh(lgr_lock);
@@ -214,26 +262,24 @@ static void smc_lgr_free_work(struct work_struct *work)
                return;
        }
        list_del_init(&lgr->list); /* remove from smc_lgr_list */
-
-       lnk = &lgr->lnk[SMC_SINGLE_LINK];
-       if (!lgr->is_smcd && !lgr->terminating) {
-               /* try to send del link msg, on error free lgr immediately */
-               if (lnk->state == SMC_LNK_ACTIVE &&
-                   !smc_link_send_delete(lnk, true)) {
-                       /* reschedule in case we never receive a response */
-                       smc_lgr_schedule_free_work(lgr);
-                       spin_unlock_bh(lgr_lock);
-                       return;
-               }
-       }
        lgr->freeing = 1; /* this instance does the freeing, no new schedule */
        spin_unlock_bh(lgr_lock);
        cancel_delayed_work(&lgr->free_work);
 
-       if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
-               smc_llc_link_inactive(lnk);
+       if (!lgr->is_smcd && !lgr->terminating)
+               smc_llc_send_link_delete_all(lgr, true,
+                                            SMC_LLC_DEL_PROG_INIT_TERM);
        if (lgr->is_smcd && !lgr->terminating)
                smc_ism_signal_shutdown(lgr);
+       if (!lgr->is_smcd) {
+               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                       struct smc_link *lnk = &lgr->lnk[i];
+
+                       if (smc_link_usable(lnk))
+                               lnk->state = SMC_LNK_INACTIVE;
+               }
+               wake_up_interruptible_all(&lgr->llc_waiter);
+       }
        smc_lgr_free(lgr);
 }
 
@@ -245,6 +291,88 @@ static void smc_lgr_terminate_work(struct work_struct *work)
        __smc_lgr_terminate(lgr, true);
 }
 
+/* return next unique link id for the lgr */
+static u8 smcr_next_link_id(struct smc_link_group *lgr)
+{
+       u8 link_id;
+       int i;
+
+       while (1) {
+               link_id = ++lgr->next_link_id;
+               if (!link_id)   /* skip zero as link_id */
+                       link_id = ++lgr->next_link_id;
+               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                       if (smc_link_usable(&lgr->lnk[i]) &&
+                           lgr->lnk[i].link_id == link_id)
+                               continue;
+               }
+               break;
+       }
+       return link_id;
+}
+
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+                  u8 link_idx, struct smc_init_info *ini)
+{
+       u8 rndvec[3];
+       int rc;
+
+       get_device(&ini->ib_dev->ibdev->dev);
+       atomic_inc(&ini->ib_dev->lnk_cnt);
+       lnk->state = SMC_LNK_ACTIVATING;
+       lnk->link_id = smcr_next_link_id(lgr);
+       lnk->lgr = lgr;
+       lnk->link_idx = link_idx;
+       lnk->smcibdev = ini->ib_dev;
+       lnk->ibport = ini->ib_port;
+       lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+       INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
+       if (!ini->ib_dev->initialized) {
+               rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+               if (rc)
+                       goto out;
+       }
+       get_random_bytes(rndvec, sizeof(rndvec));
+       lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+               (rndvec[2] << 16);
+       rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+                                 ini->vlan_id, lnk->gid, &lnk->sgid_index);
+       if (rc)
+               goto out;
+       rc = smc_llc_link_init(lnk);
+       if (rc)
+               goto out;
+       rc = smc_wr_alloc_link_mem(lnk);
+       if (rc)
+               goto clear_llc_lnk;
+       rc = smc_ib_create_protection_domain(lnk);
+       if (rc)
+               goto free_link_mem;
+       rc = smc_ib_create_queue_pair(lnk);
+       if (rc)
+               goto dealloc_pd;
+       rc = smc_wr_create_link(lnk);
+       if (rc)
+               goto destroy_qp;
+       return 0;
+
+destroy_qp:
+       smc_ib_destroy_queue_pair(lnk);
+dealloc_pd:
+       smc_ib_dealloc_protection_domain(lnk);
+free_link_mem:
+       smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+       smc_llc_link_clear(lnk);
+out:
+       put_device(&ini->ib_dev->ibdev->dev);
+       memset(lnk, 0, sizeof(struct smc_link));
+       lnk->state = SMC_LNK_UNUSED;
+       if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
+               wake_up(&ini->ib_dev->lnks_deleted);
+       return rc;
+}
+
 /* create a new SMC link group */
 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
@@ -252,7 +380,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        struct list_head *lgr_list;
        struct smc_link *lnk;
        spinlock_t *lgr_lock;
-       u8 rndvec[3];
+       u8 link_idx;
        int rc = 0;
        int i;
 
@@ -274,13 +402,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        lgr->freefast = 0;
        lgr->freeing = 0;
        lgr->vlan_id = ini->vlan_id;
-       rwlock_init(&lgr->sndbufs_lock);
-       rwlock_init(&lgr->rmbs_lock);
+       mutex_init(&lgr->sndbufs_lock);
+       mutex_init(&lgr->rmbs_lock);
        rwlock_init(&lgr->conns_lock);
        for (i = 0; i < SMC_RMBE_SIZES; i++) {
                INIT_LIST_HEAD(&lgr->sndbufs[i]);
                INIT_LIST_HEAD(&lgr->rmbs[i]);
        }
+       lgr->next_link_id = 0;
        smc_lgr_list.num += SMC_LGR_NUM_INCR;
        memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
        INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
@@ -297,48 +426,21 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
                atomic_inc(&ini->ism_dev->lgr_cnt);
        } else {
                /* SMC-R specific settings */
-               get_device(&ini->ib_dev->ibdev->dev);
                lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
                memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
                       SMC_SYSTEMID_LEN);
+               memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+                      SMC_MAX_PNETID_LEN);
+               smc_llc_lgr_init(lgr, smc);
 
-               lnk = &lgr->lnk[SMC_SINGLE_LINK];
-               /* initialize link */
-               lnk->state = SMC_LNK_ACTIVATING;
-               lnk->link_id = SMC_SINGLE_LINK;
-               lnk->smcibdev = ini->ib_dev;
-               lnk->ibport = ini->ib_port;
-               lgr_list = &smc_lgr_list.list;
-               lgr_lock = &smc_lgr_list.lock;
-               lnk->path_mtu =
-                       ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
-               if (!ini->ib_dev->initialized)
-                       smc_ib_setup_per_ibdev(ini->ib_dev);
-               get_random_bytes(rndvec, sizeof(rndvec));
-               lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
-                       (rndvec[2] << 16);
-               rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
-                                         ini->vlan_id, lnk->gid,
-                                         &lnk->sgid_index);
-               if (rc)
-                       goto free_lgr;
-               rc = smc_llc_link_init(lnk);
+               link_idx = SMC_SINGLE_LINK;
+               lnk = &lgr->lnk[link_idx];
+               rc = smcr_link_init(lgr, lnk, link_idx, ini);
                if (rc)
                        goto free_lgr;
-               rc = smc_wr_alloc_link_mem(lnk);
-               if (rc)
-                       goto clear_llc_lnk;
-               rc = smc_ib_create_protection_domain(lnk);
-               if (rc)
-                       goto free_link_mem;
-               rc = smc_ib_create_queue_pair(lnk);
-               if (rc)
-                       goto dealloc_pd;
-               rc = smc_wr_create_link(lnk);
-               if (rc)
-                       goto destroy_qp;
+               lgr_list = &smc_lgr_list.list;
+               lgr_lock = &smc_lgr_list.lock;
                atomic_inc(&lgr_cnt);
-               atomic_inc(&ini->ib_dev->lnk_cnt);
        }
        smc->conn.lgr = lgr;
        spin_lock_bh(lgr_lock);
@@ -346,14 +448,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        spin_unlock_bh(lgr_lock);
        return 0;
 
-destroy_qp:
-       smc_ib_destroy_queue_pair(lnk);
-dealloc_pd:
-       smc_ib_dealloc_protection_domain(lnk);
-free_link_mem:
-       smc_wr_free_link_mem(lnk);
-clear_llc_lnk:
-       smc_llc_link_clear(lnk);
 free_lgr:
        kfree(lgr);
 ism_put_vlan:
@@ -369,29 +463,174 @@ out:
        return rc;
 }
 
+static int smc_write_space(struct smc_connection *conn)
+{
+       int buffer_len = conn->peer_rmbe_size;
+       union smc_host_cursor prod;
+       union smc_host_cursor cons;
+       int space;
+
+       smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+       smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+       /* determine rx_buf space */
+       space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
+       return space;
+}
+
+static int smc_switch_cursor(struct smc_sock *smc)
+{
+       struct smc_connection *conn = &smc->conn;
+       union smc_host_cursor cons, fin;
+       int rc = 0;
+       int diff;
+
+       smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
+       smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
+       /* set prod cursor to old state, enforce tx_rdma_writes() */
+       smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
+       smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+
+       if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
+               /* cons cursor advanced more than fin, and prod was set
+                * fin above, so now prod is smaller than cons. Fix that.
+                */
+               diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
+               smc_curs_add(conn->sndbuf_desc->len,
+                            &conn->tx_curs_sent, diff);
+               smc_curs_add(conn->sndbuf_desc->len,
+                            &conn->tx_curs_fin, diff);
+
+               smp_mb__before_atomic();
+               atomic_add(diff, &conn->sndbuf_space);
+               smp_mb__after_atomic();
+
+               smc_curs_add(conn->peer_rmbe_size,
+                            &conn->local_tx_ctrl.prod, diff);
+               smc_curs_add(conn->peer_rmbe_size,
+                            &conn->local_tx_ctrl_fin, diff);
+       }
+       /* recalculate, value is used by tx_rdma_writes() */
+       atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
+
+       if (smc->sk.sk_state != SMC_INIT &&
+           smc->sk.sk_state != SMC_CLOSED) {
+               rc = smcr_cdc_msg_send_validation(conn);
+               if (!rc) {
+                       schedule_delayed_work(&conn->tx_work, 0);
+                       smc->sk.sk_data_ready(&smc->sk);
+               }
+       }
+       return rc;
+}
+
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+                                 struct smc_link *from_lnk, bool is_dev_err)
+{
+       struct smc_link *to_lnk = NULL;
+       struct smc_connection *conn;
+       struct smc_sock *smc;
+       struct rb_node *node;
+       int i, rc = 0;
+
+       /* link is inactive, wake up tx waiters */
+       smc_wr_wakeup_tx_wait(from_lnk);
+
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
+                   i == from_lnk->link_idx)
+                       continue;
+               if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
+                   from_lnk->ibport == lgr->lnk[i].ibport) {
+                       continue;
+               }
+               to_lnk = &lgr->lnk[i];
+               break;
+       }
+       if (!to_lnk) {
+               smc_lgr_terminate_sched(lgr);
+               return NULL;
+       }
+again:
+       read_lock_bh(&lgr->conns_lock);
+       for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
+               conn = rb_entry(node, struct smc_connection, alert_node);
+               if (conn->lnk != from_lnk)
+                       continue;
+               smc = container_of(conn, struct smc_sock, conn);
+               /* conn->lnk not yet set in SMC_INIT state */
+               if (smc->sk.sk_state == SMC_INIT)
+                       continue;
+               if (smc->sk.sk_state == SMC_CLOSED ||
+                   smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
+                   smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
+                   smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
+                   smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
+                   smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
+                   smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
+                   smc->sk.sk_state == SMC_PEERABORTWAIT ||
+                   smc->sk.sk_state == SMC_PROCESSABORT) {
+                       spin_lock_bh(&conn->send_lock);
+                       conn->lnk = to_lnk;
+                       spin_unlock_bh(&conn->send_lock);
+                       continue;
+               }
+               sock_hold(&smc->sk);
+               read_unlock_bh(&lgr->conns_lock);
+               /* avoid race with smcr_tx_sndbuf_nonempty() */
+               spin_lock_bh(&conn->send_lock);
+               conn->lnk = to_lnk;
+               rc = smc_switch_cursor(smc);
+               spin_unlock_bh(&conn->send_lock);
+               sock_put(&smc->sk);
+               if (rc) {
+                       smcr_link_down_cond_sched(to_lnk);
+                       return NULL;
+               }
+               goto again;
+       }
+       read_unlock_bh(&lgr->conns_lock);
+       return to_lnk;
+}
+
+static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+                          struct smc_link_group *lgr)
+{
+       int rc;
+
+       if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+               /* unregister rmb with peer */
+               rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+               if (!rc) {
+                       /* protect against smc_llc_cli_rkey_exchange() */
+                       mutex_lock(&lgr->llc_conf_mutex);
+                       smc_llc_do_delete_rkey(lgr, rmb_desc);
+                       rmb_desc->is_conf_rkey = false;
+                       mutex_unlock(&lgr->llc_conf_mutex);
+                       smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+               }
+       }
+
+       if (rmb_desc->is_reg_err) {
+               /* buf registration failed, reuse not possible */
+               mutex_lock(&lgr->rmbs_lock);
+               list_del(&rmb_desc->list);
+               mutex_unlock(&lgr->rmbs_lock);
+
+               smc_buf_free(lgr, true, rmb_desc);
+       } else {
+               rmb_desc->used = 0;
+       }
+}
+
 static void smc_buf_unuse(struct smc_connection *conn,
                          struct smc_link_group *lgr)
 {
        if (conn->sndbuf_desc)
                conn->sndbuf_desc->used = 0;
-       if (conn->rmb_desc) {
-               if (!conn->rmb_desc->regerr) {
-                       if (!lgr->is_smcd && !list_empty(&lgr->list)) {
-                               /* unregister rmb with peer */
-                               smc_llc_do_delete_rkey(
-                                               &lgr->lnk[SMC_SINGLE_LINK],
-                                               conn->rmb_desc);
-                       }
-                       conn->rmb_desc->used = 0;
-               } else {
-                       /* buf registration failed, reuse not possible */
-                       write_lock_bh(&lgr->rmbs_lock);
-                       list_del(&conn->rmb_desc->list);
-                       write_unlock_bh(&lgr->rmbs_lock);
-
-                       smc_buf_free(lgr, true, conn->rmb_desc);
-               }
-       }
+       if (conn->rmb_desc && lgr->is_smcd)
+               conn->rmb_desc->used = 0;
+       else if (conn->rmb_desc)
+               smcr_buf_unuse(conn->rmb_desc, lgr);
 }
 
 /* remove a finished connection from its link group */
@@ -407,6 +646,8 @@ void smc_conn_free(struct smc_connection *conn)
                tasklet_kill(&conn->rx_tsklet);
        } else {
                smc_cdc_tx_dismiss_slots(conn);
+               if (current_work() != &conn->abort_work)
+                       cancel_work_sync(&conn->abort_work);
        }
        if (!list_empty(&lgr->list)) {
                smc_lgr_unregister_conn(conn);
@@ -417,35 +658,91 @@ void smc_conn_free(struct smc_connection *conn)
                smc_lgr_schedule_free_work(lgr);
 }
 
-static void smc_link_clear(struct smc_link *lnk)
+/* unregister a link from a buf_desc */
+static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+                               struct smc_link *lnk)
+{
+       if (is_rmb)
+               buf_desc->is_reg_mr[lnk->link_idx] = false;
+       if (!buf_desc->is_map_ib[lnk->link_idx])
+               return;
+       if (is_rmb) {
+               if (buf_desc->mr_rx[lnk->link_idx]) {
+                       smc_ib_put_memory_region(
+                                       buf_desc->mr_rx[lnk->link_idx]);
+                       buf_desc->mr_rx[lnk->link_idx] = NULL;
+               }
+               smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+       } else {
+               smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+       }
+       sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+       buf_desc->is_map_ib[lnk->link_idx] = false;
+}
+
+/* unmap all buffers of lgr for a deleted link */
+static void smcr_buf_unmap_lgr(struct smc_link *lnk)
+{
+       struct smc_link_group *lgr = lnk->lgr;
+       struct smc_buf_desc *buf_desc, *bf;
+       int i;
+
+       for (i = 0; i < SMC_RMBE_SIZES; i++) {
+               mutex_lock(&lgr->rmbs_lock);
+               list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
+                       smcr_buf_unmap_link(buf_desc, true, lnk);
+               mutex_unlock(&lgr->rmbs_lock);
+               mutex_lock(&lgr->sndbufs_lock);
+               list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
+                                        list)
+                       smcr_buf_unmap_link(buf_desc, false, lnk);
+               mutex_unlock(&lgr->sndbufs_lock);
+       }
+}
+
+static void smcr_rtoken_clear_link(struct smc_link *lnk)
+{
+       struct smc_link_group *lgr = lnk->lgr;
+       int i;
+
+       for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+               lgr->rtokens[i][lnk->link_idx].rkey = 0;
+               lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
+       }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk)
 {
+       struct smc_ib_device *smcibdev;
+
+       if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+               return;
        lnk->peer_qpn = 0;
        smc_llc_link_clear(lnk);
+       smcr_buf_unmap_lgr(lnk);
+       smcr_rtoken_clear_link(lnk);
        smc_ib_modify_qp_reset(lnk);
        smc_wr_free_link(lnk);
        smc_ib_destroy_queue_pair(lnk);
        smc_ib_dealloc_protection_domain(lnk);
        smc_wr_free_link_mem(lnk);
-       if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
-               wake_up(&lnk->smcibdev->lnks_deleted);
+       put_device(&lnk->smcibdev->ibdev->dev);
+       smcibdev = lnk->smcibdev;
+       memset(lnk, 0, sizeof(struct smc_link));
+       lnk->state = SMC_LNK_UNUSED;
+       if (!atomic_dec_return(&smcibdev->lnk_cnt))
+               wake_up(&smcibdev->lnks_deleted);
 }
 
 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
                          struct smc_buf_desc *buf_desc)
 {
-       struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+       int i;
+
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+               smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
 
-       if (is_rmb) {
-               if (buf_desc->mr_rx[SMC_SINGLE_LINK])
-                       smc_ib_put_memory_region(
-                                       buf_desc->mr_rx[SMC_SINGLE_LINK]);
-               smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
-                                   DMA_FROM_DEVICE);
-       } else {
-               smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
-                                   DMA_TO_DEVICE);
-       }
-       sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
        if (buf_desc->pages)
                __free_pages(buf_desc->pages, buf_desc->order);
        kfree(buf_desc);
@@ -503,6 +800,8 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 /* remove a link group */
 static void smc_lgr_free(struct smc_link_group *lgr)
 {
+       int i;
+
        smc_lgr_free_bufs(lgr);
        if (lgr->is_smcd) {
                if (!lgr->terminating) {
@@ -512,27 +811,17 @@ static void smc_lgr_free(struct smc_link_group *lgr)
                if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
                        wake_up(&lgr->smcd->lgrs_deleted);
        } else {
-               smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
-               put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                       if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+                               smcr_link_clear(&lgr->lnk[i]);
+               }
+               smc_llc_lgr_clear(lgr);
                if (!atomic_dec_return(&lgr_cnt))
                        wake_up(&lgrs_deleted);
        }
        kfree(lgr);
 }
 
-void smc_lgr_forget(struct smc_link_group *lgr)
-{
-       struct list_head *lgr_list;
-       spinlock_t *lgr_lock;
-
-       lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
-       spin_lock_bh(lgr_lock);
-       /* do not use this link group for new connections */
-       if (!list_empty(lgr_list))
-               list_del_init(lgr_list);
-       spin_unlock_bh(lgr_lock);
-}
-
 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
 {
        int i;
@@ -581,16 +870,26 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
 
 static void smc_lgr_cleanup(struct smc_link_group *lgr)
 {
+       int i;
+
        if (lgr->is_smcd) {
                smc_ism_signal_shutdown(lgr);
                smcd_unregister_all_dmbs(lgr);
                smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
                put_device(&lgr->smcd->dev);
        } else {
-               struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+               u32 rsn = lgr->llc_termination_rsn;
+
+               if (!rsn)
+                       rsn = SMC_LLC_DEL_PROG_INIT_TERM;
+               smc_llc_send_link_delete_all(lgr, false, rsn);
+               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                       struct smc_link *lnk = &lgr->lnk[i];
 
-               if (lnk->state != SMC_LNK_INACTIVE)
-                       smc_llc_link_inactive(lnk);
+                       if (smc_link_usable(lnk))
+                               lnk->state = SMC_LNK_INACTIVE;
+               }
+               wake_up_interruptible_all(&lgr->llc_waiter);
        }
 }
 
@@ -609,8 +908,6 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
        if (!soft)
                cancel_delayed_work_sync(&lgr->free_work);
        lgr->terminating = 1;
-       if (!lgr->is_smcd)
-               smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
        /* kill remaining link group connections */
        read_lock_bh(&lgr->conns_lock);
@@ -651,29 +948,6 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
        schedule_work(&lgr->terminate_work);
 }
 
-/* Called when IB port is terminated */
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
-       struct smc_link_group *lgr, *l;
-       LIST_HEAD(lgr_free_list);
-
-       spin_lock_bh(&smc_lgr_list.lock);
-       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-               if (!lgr->is_smcd &&
-                   lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
-                   lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
-                       list_move(&lgr->list, &lgr_free_list);
-                       lgr->freeing = 1;
-               }
-       }
-       spin_unlock_bh(&smc_lgr_list.lock);
-
-       list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
-               list_del_init(&lgr->list);
-               __smc_lgr_terminate(lgr, false);
-       }
-}
-
 /* Called when peer lgr shutdown (regularly or abnormally) is received */
 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 {
@@ -728,6 +1002,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 {
        struct smc_link_group *lgr, *lg;
        LIST_HEAD(lgr_free_list);
+       int i;
 
        spin_lock_bh(&smc_lgr_list.lock);
        if (!smcibdev) {
@@ -736,9 +1011,9 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
                        lgr->freeing = 1;
        } else {
                list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
-                       if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
-                               list_move(&lgr->list, &lgr_free_list);
-                               lgr->freeing = 1;
+                       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                               if (lgr->lnk[i].smcibdev == smcibdev)
+                                       smcr_link_down_cond_sched(&lgr->lnk[i]);
                        }
                }
        }
@@ -746,6 +1021,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 
        list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
                list_del_init(&lgr->list);
+               smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
                __smc_lgr_terminate(lgr, false);
        }
 
@@ -759,6 +1035,201 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
        }
 }
 
+/* set new lgr type and clear all asymmetric link tagging */
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
+{
+       int i;
+
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+               if (smc_link_usable(&lgr->lnk[i]))
+                       lgr->lnk[i].link_is_asym = false;
+       lgr->type = new_type;
+}
+
+/* set new lgr type and tag a link as asymmetric */
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+                           enum smc_lgr_type new_type, int asym_lnk_idx)
+{
+       smcr_lgr_set_type(lgr, new_type);
+       lgr->lnk[asym_lnk_idx].link_is_asym = true;
+}
+
+/* abort connection, abort_work scheduled from tasklet context */
+static void smc_conn_abort_work(struct work_struct *work)
+{
+       struct smc_connection *conn = container_of(work,
+                                                  struct smc_connection,
+                                                  abort_work);
+       struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+       smc_conn_kill(conn, true);
+       sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
+}
+
+/* link is up - establish alternate link if applicable */
+static void smcr_link_up(struct smc_link_group *lgr,
+                        struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct smc_link *link = NULL;
+
+       if (list_empty(&lgr->list) ||
+           lgr->type == SMC_LGR_SYMMETRIC ||
+           lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+               return;
+
+       if (lgr->role == SMC_SERV) {
+               /* trigger local add link processing */
+               link = smc_llc_usable_link(lgr);
+               if (!link)
+                       return;
+               smc_llc_srv_add_link_local(link);
+       } else {
+               /* invite server to start add link processing */
+               u8 gid[SMC_GID_SIZE];
+
+               if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
+                                        NULL))
+                       return;
+               if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+                       /* some other llc task is ongoing */
+                       wait_event_interruptible_timeout(lgr->llc_waiter,
+                               (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+                               SMC_LLC_WAIT_TIME);
+               }
+               if (list_empty(&lgr->list) ||
+                   !smc_ib_port_active(smcibdev, ibport))
+                       return; /* lgr or device no longer active */
+               link = smc_llc_usable_link(lgr);
+               if (!link)
+                       return;
+               smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
+                                     NULL, SMC_LLC_REQ);
+       }
+}
+
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct smc_ib_up_work *ib_work;
+       struct smc_link_group *lgr, *n;
+
+       list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+               if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+                           SMC_MAX_PNETID_LEN) ||
+                   lgr->type == SMC_LGR_SYMMETRIC ||
+                   lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+                       continue;
+               ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
+               if (!ib_work)
+                       continue;
+               INIT_WORK(&ib_work->work, smc_link_up_work);
+               ib_work->lgr = lgr;
+               ib_work->smcibdev = smcibdev;
+               ib_work->ibport = ibport;
+               schedule_work(&ib_work->work);
+       }
+}
+
+/* link is down - switch connections to alternate link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+static void smcr_link_down(struct smc_link *lnk)
+{
+       struct smc_link_group *lgr = lnk->lgr;
+       struct smc_link *to_lnk;
+       int del_link_id;
+
+       if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+               return;
+
+       smc_ib_modify_qp_reset(lnk);
+       to_lnk = smc_switch_conns(lgr, lnk, true);
+       if (!to_lnk) { /* no backup link available */
+               smcr_link_clear(lnk);
+               return;
+       }
+       smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
+       del_link_id = lnk->link_id;
+
+       if (lgr->role == SMC_SERV) {
+               /* trigger local delete link processing */
+               smc_llc_srv_delete_link_local(to_lnk, del_link_id);
+       } else {
+               if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+                       /* another llc task is ongoing */
+                       mutex_unlock(&lgr->llc_conf_mutex);
+                       wait_event_interruptible_timeout(lgr->llc_waiter,
+                               (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+                               SMC_LLC_WAIT_TIME);
+                       mutex_lock(&lgr->llc_conf_mutex);
+               }
+               smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
+                                        SMC_LLC_DEL_LOST_PATH);
+       }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_down_cond(struct smc_link *lnk)
+{
+       if (smc_link_downing(&lnk->state))
+               smcr_link_down(lnk);
+}
+
+/* will get the lgr->llc_conf_mutex lock */
+void smcr_link_down_cond_sched(struct smc_link *lnk)
+{
+       if (smc_link_downing(&lnk->state))
+               schedule_work(&lnk->link_down_wrk);
+}
+
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct smc_link_group *lgr, *n;
+       int i;
+
+       list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+               if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+                           SMC_MAX_PNETID_LEN))
+                       continue; /* lgr is not affected */
+               if (list_empty(&lgr->list))
+                       continue;
+               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                       struct smc_link *lnk = &lgr->lnk[i];
+
+                       if (smc_link_usable(lnk) &&
+                           lnk->smcibdev == smcibdev && lnk->ibport == ibport)
+                               smcr_link_down_cond_sched(lnk);
+               }
+       }
+}
+
+static void smc_link_up_work(struct work_struct *work)
+{
+       struct smc_ib_up_work *ib_work = container_of(work,
+                                                     struct smc_ib_up_work,
+                                                     work);
+       struct smc_link_group *lgr = ib_work->lgr;
+
+       if (list_empty(&lgr->list))
+               goto out;
+       smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
+out:
+       kfree(ib_work);
+}
+
+static void smc_link_down_work(struct work_struct *work)
+{
+       struct smc_link *link = container_of(work, struct smc_link,
+                                            link_down_wrk);
+       struct smc_link_group *lgr = link->lgr;
+
+       if (list_empty(&lgr->list))
+               return;
+       wake_up_interruptible_all(&lgr->llc_waiter);
+       mutex_lock(&lgr->llc_conf_mutex);
+       smcr_link_down(link);
+       mutex_unlock(&lgr->llc_conf_mutex);
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
@@ -810,15 +1281,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
                           struct smc_clc_msg_local *lcl,
                           enum smc_lgr_role role, u32 clcqpn)
 {
-       return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
-                      SMC_SYSTEMID_LEN) &&
-               !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
-                       SMC_GID_SIZE) &&
-               !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
-                       sizeof(lcl->mac)) &&
-               lgr->role == role &&
-               (lgr->role == SMC_SERV ||
-                lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
+       int i;
+
+       if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+           lgr->role != role)
+               return false;
+
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+                       continue;
+               if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
+                   !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
+                   !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+                       return true;
+       }
+       return false;
 }
 
 static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -859,15 +1336,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
                        /* link group found */
                        ini->cln_first_contact = SMC_REUSE_CONTACT;
                        conn->lgr = lgr;
-                       smc_lgr_register_conn(conn); /* add smc conn to lgr */
-                       if (delayed_work_pending(&lgr->free_work))
-                               cancel_delayed_work(&lgr->free_work);
+                       rc = smc_lgr_register_conn(conn, false);
                        write_unlock_bh(&lgr->conns_lock);
+                       if (!rc && delayed_work_pending(&lgr->free_work))
+                               cancel_delayed_work(&lgr->free_work);
                        break;
                }
                write_unlock_bh(&lgr->conns_lock);
        }
        spin_unlock_bh(lgr_lock);
+       if (rc)
+               return rc;
 
        if (role == SMC_CLNT && !ini->srv_first_contact &&
            ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -885,12 +1364,15 @@ create:
                        goto out;
                lgr = conn->lgr;
                write_lock_bh(&lgr->conns_lock);
-               smc_lgr_register_conn(conn); /* add smc conn to lgr */
+               rc = smc_lgr_register_conn(conn, true);
                write_unlock_bh(&lgr->conns_lock);
+               if (rc)
+                       goto out;
        }
        conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
        conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
        conn->urg_state = SMC_URG_READ;
+       INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
        if (ini->is_smcd) {
                conn->rx_off = sizeof(struct smcd_cdc_msg);
                smcd_cdc_rx_init(conn); /* init tasklet for this conn */
@@ -934,19 +1416,19 @@ int smc_uncompress_bufsize(u8 compressed)
  * buffer size; if not available, return NULL
  */
 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
-                                            rwlock_t *lock,
+                                            struct mutex *lock,
                                             struct list_head *buf_list)
 {
        struct smc_buf_desc *buf_slot;
 
-       read_lock_bh(lock);
+       mutex_lock(lock);
        list_for_each_entry(buf_slot, buf_list, list) {
                if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
-                       read_unlock_bh(lock);
+                       mutex_unlock(lock);
                        return buf_slot;
                }
        }
-       read_unlock_bh(lock);
+       mutex_unlock(lock);
        return NULL;
 }
 
@@ -959,12 +1441,135 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
        return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
+/* map an rmb buf to a link */
+static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+                            struct smc_link *lnk)
+{
+       int rc;
+
+       if (buf_desc->is_map_ib[lnk->link_idx])
+               return 0;
+
+       rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+       if (rc)
+               return rc;
+       sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+                  buf_desc->cpu_addr, buf_desc->len);
+
+       /* map sg table to DMA address */
+       rc = smc_ib_buf_map_sg(lnk, buf_desc,
+                              is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       /* SMC protocol depends on mapping to one DMA address only */
+       if (rc != 1) {
+               rc = -EAGAIN;
+               goto free_table;
+       }
+
+       /* create a new memory region for the RMB */
+       if (is_rmb) {
+               rc = smc_ib_get_memory_region(lnk->roce_pd,
+                                             IB_ACCESS_REMOTE_WRITE |
+                                             IB_ACCESS_LOCAL_WRITE,
+                                             buf_desc, lnk->link_idx);
+               if (rc)
+                       goto buf_unmap;
+               smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+       }
+       buf_desc->is_map_ib[lnk->link_idx] = true;
+       return 0;
+
+buf_unmap:
+       smc_ib_buf_unmap_sg(lnk, buf_desc,
+                           is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+free_table:
+       sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+       return rc;
+}
+
+/* register a new rmb on IB device,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+       if (list_empty(&link->lgr->list))
+               return -ENOLINK;
+       if (!rmb_desc->is_reg_mr[link->link_idx]) {
+               /* register memory region for new rmb */
+               if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
+                       rmb_desc->is_reg_err = true;
+                       return -EFAULT;
+               }
+               rmb_desc->is_reg_mr[link->link_idx] = true;
+       }
+       return 0;
+}
+
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+                            struct list_head *lst, bool is_rmb)
+{
+       struct smc_buf_desc *buf_desc, *bf;
+       int rc = 0;
+
+       mutex_lock(lock);
+       list_for_each_entry_safe(buf_desc, bf, lst, list) {
+               if (!buf_desc->used)
+                       continue;
+               rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
+               if (rc)
+                       goto out;
+       }
+out:
+       mutex_unlock(lock);
+       return rc;
+}
+
+/* map all used buffers of lgr for a new link */
+int smcr_buf_map_lgr(struct smc_link *lnk)
+{
+       struct smc_link_group *lgr = lnk->lgr;
+       int i, rc = 0;
+
+       for (i = 0; i < SMC_RMBE_SIZES; i++) {
+               rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
+                                      &lgr->rmbs[i], true);
+               if (rc)
+                       return rc;
+               rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
+                                      &lgr->sndbufs[i], false);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
+/* register all used buffers of lgr for a new link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_buf_reg_lgr(struct smc_link *lnk)
+{
+       struct smc_link_group *lgr = lnk->lgr;
+       struct smc_buf_desc *buf_desc, *bf;
+       int i, rc = 0;
+
+       mutex_lock(&lgr->rmbs_lock);
+       for (i = 0; i < SMC_RMBE_SIZES; i++) {
+               list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
+                       if (!buf_desc->used)
+                               continue;
+                       rc = smcr_link_reg_rmb(lnk, buf_desc);
+                       if (rc)
+                               goto out;
+               }
+       }
+out:
+       mutex_unlock(&lgr->rmbs_lock);
+       return rc;
+}
+
 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
                                                bool is_rmb, int bufsize)
 {
        struct smc_buf_desc *buf_desc;
-       struct smc_link *lnk;
-       int rc;
 
        /* try to alloc a new buffer */
        buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
@@ -981,41 +1586,33 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
                return ERR_PTR(-EAGAIN);
        }
        buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
+       buf_desc->len = bufsize;
+       return buf_desc;
+}
 
-       /* build the sg table from the pages */
-       lnk = &lgr->lnk[SMC_SINGLE_LINK];
-       rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
-                           GFP_KERNEL);
-       if (rc) {
-               smc_buf_free(lgr, is_rmb, buf_desc);
-               return ERR_PTR(rc);
-       }
-       sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
-                  buf_desc->cpu_addr, bufsize);
+/* map buf_desc on all usable links,
+ * unused buffers stay mapped as long as the link is up
+ */
+static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+                                    struct smc_buf_desc *buf_desc, bool is_rmb)
+{
+       int i, rc = 0;
 
-       /* map sg table to DMA address */
-       rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
-                              is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
-       /* SMC protocol depends on mapping to one DMA address only */
-       if (rc != 1)  {
-               smc_buf_free(lgr, is_rmb, buf_desc);
-               return ERR_PTR(-EAGAIN);
-       }
+       /* protect against parallel link reconfiguration */
+       mutex_lock(&lgr->llc_conf_mutex);
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               struct smc_link *lnk = &lgr->lnk[i];
 
-       /* create a new memory region for the RMB */
-       if (is_rmb) {
-               rc = smc_ib_get_memory_region(lnk->roce_pd,
-                                             IB_ACCESS_REMOTE_WRITE |
-                                             IB_ACCESS_LOCAL_WRITE,
-                                             buf_desc);
-               if (rc) {
-                       smc_buf_free(lgr, is_rmb, buf_desc);
-                       return ERR_PTR(rc);
+               if (!smc_link_usable(lnk))
+                       continue;
+               if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
+                       rc = -ENOMEM;
+                       goto out;
                }
        }
-
-       buf_desc->len = bufsize;
-       return buf_desc;
+out:
+       mutex_unlock(&lgr->llc_conf_mutex);
+       return rc;
 }
 
 #define SMCD_DMBE_SIZES                7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
@@ -1062,8 +1659,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
        struct smc_link_group *lgr = conn->lgr;
        struct list_head *buf_list;
        int bufsize, bufsize_short;
+       struct mutex *lock;     /* lock buffer list */
        int sk_buf_size;
-       rwlock_t *lock;
 
        if (is_rmb)
                /* use socket recv buffer size (w/o overhead) as start value */
@@ -1104,15 +1701,22 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                        continue;
 
                buf_desc->used = 1;
-               write_lock_bh(lock);
+               mutex_lock(lock);
                list_add(&buf_desc->list, buf_list);
-               write_unlock_bh(lock);
+               mutex_unlock(lock);
                break; /* found */
        }
 
        if (IS_ERR(buf_desc))
                return -ENOMEM;
 
+       if (!is_smcd) {
+               if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+                       smcr_buf_unuse(buf_desc, lgr);
+                       return -ENOMEM;
+               }
+       }
+
        if (is_rmb) {
                conn->rmb_desc = buf_desc;
                conn->rmbe_size_short = bufsize_short;
@@ -1132,42 +1736,44 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
-       struct smc_link_group *lgr = conn->lgr;
-
-       if (!conn->lgr || conn->lgr->is_smcd)
+       if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
                return;
-       smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-                              conn->sndbuf_desc, DMA_TO_DEVICE);
+       smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
-       struct smc_link_group *lgr = conn->lgr;
-
-       if (!conn->lgr || conn->lgr->is_smcd)
+       if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
                return;
-       smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-                                 conn->sndbuf_desc, DMA_TO_DEVICE);
+       smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
-       struct smc_link_group *lgr = conn->lgr;
+       int i;
 
        if (!conn->lgr || conn->lgr->is_smcd)
                return;
-       smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-                              conn->rmb_desc, DMA_FROM_DEVICE);
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               if (!smc_link_usable(&conn->lgr->lnk[i]))
+                       continue;
+               smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
+                                      DMA_FROM_DEVICE);
+       }
 }
 
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
-       struct smc_link_group *lgr = conn->lgr;
+       int i;
 
        if (!conn->lgr || conn->lgr->is_smcd)
                return;
-       smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-                                 conn->rmb_desc, DMA_FROM_DEVICE);
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               if (!smc_link_usable(&conn->lgr->lnk[i]))
+                       continue;
+               smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
+                                         DMA_FROM_DEVICE);
+       }
 }
 
 /* create the send and receive buffer for an SMC socket;
@@ -1202,16 +1808,64 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
        return -ENOSPC;
 }
 
+static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
+                                  u32 rkey)
+{
+       int i;
+
+       for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+               if (test_bit(i, lgr->rtokens_used_mask) &&
+                   lgr->rtokens[i][lnk_idx].rkey == rkey)
+                       return i;
+       }
+       return -ENOENT;
+}
+
+/* set rtoken for a new link to an existing rmb */
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+                   __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
+{
+       int rtok_idx;
+
+       rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
+       if (rtok_idx == -ENOENT)
+               return;
+       lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
+       lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
+}
+
+/* set rtoken for a new link whose link_id is given */
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+                    __be64 nw_vaddr, __be32 nw_rkey)
+{
+       u64 dma_addr = be64_to_cpu(nw_vaddr);
+       u32 rkey = ntohl(nw_rkey);
+       bool found = false;
+       int link_idx;
+
+       for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
+               if (lgr->lnk[link_idx].link_id == link_id) {
+                       found = true;
+                       break;
+               }
+       }
+       if (!found)
+               return;
+       lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
+       lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
+}
+
 /* add a new rtoken from peer */
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
 {
+       struct smc_link_group *lgr = smc_get_lgr(lnk);
        u64 dma_addr = be64_to_cpu(nw_vaddr);
        u32 rkey = ntohl(nw_rkey);
        int i;
 
        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
-               if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
-                   (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
+               if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
+                   lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
                    test_bit(i, lgr->rtokens_used_mask)) {
                        /* already in list */
                        return i;
@@ -1220,23 +1874,25 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
        i = smc_rmb_reserve_rtoken_idx(lgr);
        if (i < 0)
                return i;
-       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
-       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+       lgr->rtokens[i][lnk->link_idx].rkey = rkey;
+       lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
        return i;
 }
 
-/* delete an rtoken */
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+/* delete an rtoken from all links */
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
 {
+       struct smc_link_group *lgr = smc_get_lgr(lnk);
        u32 rkey = ntohl(nw_rkey);
-       int i;
+       int i, j;
 
        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
-               if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+               if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
                    test_bit(i, lgr->rtokens_used_mask)) {
-                       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
-                       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
-
+                       for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
+                               lgr->rtokens[i][j].rkey = 0;
+                               lgr->rtokens[i][j].dma_addr = 0;
+                       }
                        clear_bit(i, lgr->rtokens_used_mask);
                        return 0;
                }
@@ -1246,9 +1902,10 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
 
 /* save rkey and dma_addr received from peer during clc handshake */
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
+                           struct smc_link *lnk,
                            struct smc_clc_msg_accept_confirm *clc)
 {
-       conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+       conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
                                          clc->rmb_rkey);
        if (conn->rtoken_idx < 0)
                return conn->rtoken_idx;