Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / net / smc / af_smc.c
index 973b447..2d8a1e1 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
+#include <linux/if_vlan.h>
 
 #include <net/sock.h>
 #include <net/tcp.h>
@@ -35,6 +36,7 @@
 #include "smc_cdc.h"
 #include "smc_core.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 #include "smc_pnet.h"
 #include "smc_tx.h"
 #include "smc_rx.h"
@@ -45,6 +47,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending);  /* serialize link group
                                                 */
 
 static void smc_tcp_listen_work(struct work_struct *);
+static void smc_connect_work(struct work_struct *);
 
 static void smc_set_keepalive(struct sock *sk, int val)
 {
@@ -122,6 +125,12 @@ static int smc_release(struct socket *sock)
                goto out;
 
        smc = smc_sk(sk);
+
+       /* cleanup for a dangling non-blocking connect */
+       flush_work(&smc->connect_work);
+       kfree(smc->connect_info);
+       smc->connect_info = NULL;
+
        if (sk->sk_state == SMC_LISTEN)
                /* smc_close_non_accepted() is called and acquires
                 * sock lock for child sockets again
@@ -140,7 +149,8 @@ static int smc_release(struct socket *sock)
                smc->clcsock = NULL;
        }
        if (smc->use_fallback) {
-               sock_put(sk); /* passive closing */
+               if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+                       sock_put(sk); /* passive closing */
                sk->sk_state = SMC_CLOSED;
                sk->sk_state_change(sk);
        }
@@ -186,6 +196,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
        sk->sk_protocol = protocol;
        smc = smc_sk(sk);
        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
+       INIT_WORK(&smc->connect_work, smc_connect_work);
        INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
        INIT_LIST_HEAD(&smc->accept_q);
        spin_lock_init(&smc->accept_q_lock);
@@ -333,20 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
        rc = smc_ib_modify_qp_rts(link);
        if (rc)
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_RDYLNK;
 
        smc_wr_remember_qp_attr(link);
 
        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_REGRMB;
 
        /* send CONFIRM LINK response over RoCE fabric */
-       rc = smc_llc_send_confirm_link(link,
-                                      link->smcibdev->mac[link->ibport - 1],
-                                      &link->smcibdev->gid[link->ibport - 1],
-                                      SMC_LLC_RESP);
+       rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_CL;
 
        /* receive ADD LINK request from server over RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(&link->llc_add,
@@ -362,18 +370,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
        /* send add link reject message, only one link supported for now */
        rc = smc_llc_send_add_link(link,
                                   link->smcibdev->mac[link->ibport - 1],
-                                  &link->smcibdev->gid[link->ibport - 1],
-                                  SMC_LLC_RESP);
+                                  link->gid, SMC_LLC_RESP);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_AL;
 
        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
 
-static void smc_conn_save_peer_info(struct smc_sock *smc,
-                                   struct smc_clc_msg_accept_confirm *clc)
+static void smcr_conn_save_peer_info(struct smc_sock *smc,
+                                    struct smc_clc_msg_accept_confirm *clc)
 {
        int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 
@@ -384,6 +391,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
        smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 }
 
+static void smcd_conn_save_peer_info(struct smc_sock *smc,
+                                    struct smc_clc_msg_accept_confirm *clc)
+{
+       int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
+
+       smc->conn.peer_rmbe_idx = clc->dmbe_idx;
+       smc->conn.peer_token = clc->token;
+       /* msg header takes up space in the buffer */
+       smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
+       atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
+       smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
+}
+
+static void smc_conn_save_peer_info(struct smc_sock *smc,
+                                   struct smc_clc_msg_accept_confirm *clc)
+{
+       if (smc->conn.lgr->is_smcd)
+               smcd_conn_save_peer_info(smc, clc);
+       else
+               smcr_conn_save_peer_info(smc, clc);
+}
+
 static void smc_link_save_peer_info(struct smc_link *link,
                                    struct smc_clc_msg_accept_confirm *clc)
 {
@@ -395,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,
 }
 
 /* fall back during connect */
-static int smc_connect_fallback(struct smc_sock *smc)
+static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
        smc->use_fallback = true;
+       smc->fallback_rsn = reason_code;
        smc_copy_sock_settings_to_clc(smc);
        if (smc->sk.sk_state == SMC_INIT)
                smc->sk.sk_state = SMC_ACTIVE;
@@ -409,14 +439,20 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 {
        int rc;
 
-       if (reason_code < 0) /* error, fallback is not possible */
+       if (reason_code < 0) { /* error, fallback is not possible */
+               if (smc->sk.sk_state == SMC_INIT)
+                       sock_put(&smc->sk); /* passive closing */
                return reason_code;
-       if (reason_code != SMC_CLC_DECL_REPLY) {
+       }
+       if (reason_code != SMC_CLC_DECL_PEERDECL) {
                rc = smc_clc_send_decline(smc, reason_code);
-               if (rc < 0)
+               if (rc < 0) {
+                       if (smc->sk.sk_state == SMC_INIT)
+                               sock_put(&smc->sk); /* passive closing */
                        return rc;
+               }
        }
-       return smc_connect_fallback(smc);
+       return smc_connect_fallback(smc, reason_code);
 }
 
 /* abort connecting */
@@ -427,15 +463,13 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
                smc_lgr_forget(smc->conn.lgr);
        mutex_unlock(&smc_create_lgr_pending);
        smc_conn_free(&smc->conn);
-       if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
-               sock_put(&smc->sk); /* passive closing */
        return reason_code;
 }
 
 /* check if there is a rdma device available for this connection. */
 /* called for connect and listen */
 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
-                         u8 *ibport)
+                         u8 *ibport, unsigned short vlan_id, u8 gid[])
 {
        int reason_code = 0;
 
@@ -443,22 +477,59 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
         * within same PNETID that also contains the ethernet device
         * used for the internal TCP socket
         */
-       smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
+       smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
+                                   gid);
        if (!(*ibdev))
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 
        return reason_code;
 }
 
+/* check if there is an ISM device available for this connection. */
+/* called for connect and listen */
+static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+{
+       /* Find ISM device with same PNETID as connecting interface  */
+       smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
+       if (!(*ismdev))
+               return SMC_CLC_DECL_CNFERR; /* configuration error */
+       return 0;
+}
+
+/* Check for VLAN ID and register it on ISM device just for CLC handshake */
+static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
+                                     struct smcd_dev *ismdev,
+                                     unsigned short vlan_id)
+{
+       if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
+               return SMC_CLC_DECL_CNFERR;
+       return 0;
+}
+
+/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
+ * used, the VLAN ID will be registered again during the connection setup.
+ */
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
+                                       struct smcd_dev *ismdev,
+                                       unsigned short vlan_id)
+{
+       if (!is_smcd)
+               return 0;
+       if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+               return SMC_CLC_DECL_CNFERR;
+       return 0;
+}
+
 /* CLC handshake during connect */
-static int smc_connect_clc(struct smc_sock *smc,
+static int smc_connect_clc(struct smc_sock *smc, int smc_type,
                           struct smc_clc_msg_accept_confirm *aclc,
-                          struct smc_ib_device *ibdev, u8 ibport)
+                          struct smc_ib_device *ibdev, u8 ibport,
+                          u8 gid[], struct smcd_dev *ismdev)
 {
        int rc = 0;
 
        /* do inband token exchange */
-       rc = smc_clc_send_proposal(smc, ibdev, ibport);
+       rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
        if (rc)
                return rc;
        /* receive SMC Accept CLC message */
@@ -475,8 +546,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
        int reason_code = 0;
 
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
-                                       aclc->hdr.flag);
+       local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
+                                       ibport, &aclc->lcl, NULL, 0);
        if (local_contact < 0) {
                if (local_contact == -ENOMEM)
                        reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -491,14 +562,14 @@ static int smc_connect_rdma(struct smc_sock *smc,
        smc_conn_save_peer_info(smc, aclc);
 
        /* create send buffer and rmb */
-       if (smc_buf_create(smc))
+       if (smc_buf_create(smc, false))
                return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
 
        if (local_contact == SMC_FIRST_CONTACT)
                smc_link_save_peer_info(link, aclc);
 
        if (smc_rmb_rtoken_handling(&smc->conn, aclc))
-               return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+               return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
                                         local_contact);
 
        smc_close_init(smc);
@@ -506,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
        if (local_contact == SMC_FIRST_CONTACT) {
                if (smc_ib_ready_link(link))
-                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                       return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
                                                 local_contact);
        } else {
                if (!smc->conn.rmb_desc->reused &&
                    smc_reg_rmb(link, smc->conn.rmb_desc, true))
-                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                       return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
                                                 local_contact);
        }
        smc_rmb_sync_sg_for_device(&smc->conn);
@@ -538,44 +609,145 @@ static int smc_connect_rdma(struct smc_sock *smc,
        return 0;
 }
 
+/* setup for ISM connection of client */
+static int smc_connect_ism(struct smc_sock *smc,
+                          struct smc_clc_msg_accept_confirm *aclc,
+                          struct smcd_dev *ismdev)
+{
+       int local_contact = SMC_FIRST_CONTACT;
+       int rc = 0;
+
+       mutex_lock(&smc_create_lgr_pending);
+       local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+                                       NULL, ismdev, aclc->gid);
+       if (local_contact < 0)
+               return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+
+       /* Create send and receive buffers */
+       if (smc_buf_create(smc, true))
+               return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+
+       smc_conn_save_peer_info(smc, aclc);
+       smc_close_init(smc);
+       smc_rx_init(smc);
+       smc_tx_init(smc);
+
+       rc = smc_clc_send_confirm(smc);
+       if (rc)
+               return smc_connect_abort(smc, rc, local_contact);
+       mutex_unlock(&smc_create_lgr_pending);
+
+       smc_copy_sock_settings_to_clc(smc);
+       if (smc->sk.sk_state == SMC_INIT)
+               smc->sk.sk_state = SMC_ACTIVE;
+
+       return 0;
+}
+
 /* perform steps before actually connecting */
 static int __smc_connect(struct smc_sock *smc)
 {
+       bool ism_supported = false, rdma_supported = false;
        struct smc_clc_msg_accept_confirm aclc;
        struct smc_ib_device *ibdev;
+       struct smcd_dev *ismdev;
+       u8 gid[SMC_GID_SIZE];
+       unsigned short vlan;
+       int smc_type;
        int rc = 0;
        u8 ibport;
 
        sock_hold(&smc->sk); /* sock put in passive closing */
 
        if (smc->use_fallback)
-               return smc_connect_fallback(smc);
+               return smc_connect_fallback(smc, smc->fallback_rsn);
 
        /* if peer has not signalled SMC-capability, fall back */
        if (!tcp_sk(smc->clcsock->sk)->syn_smc)
-               return smc_connect_fallback(smc);
+               return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 
        /* IPSec connections opt out of SMC-R optimizations */
        if (using_ipsec(smc))
                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 
-       /* check if a RDMA device is available; if not, fall back */
-       if (smc_check_rdma(smc, &ibdev, &ibport))
+       /* check for VLAN ID */
+       if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
 
+       /* check if there is an ism device available */
+       if (!smc_check_ism(smc, &ismdev) &&
+           !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+               /* ISM is supported for this connection */
+               ism_supported = true;
+               smc_type = SMC_TYPE_D;
+       }
+
+       /* check if there is a rdma device available */
+       if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+               /* RDMA is supported for this connection */
+               rdma_supported = true;
+               if (ism_supported)
+                       smc_type = SMC_TYPE_B; /* both */
+               else
+                       smc_type = SMC_TYPE_R; /* only RDMA */
+       }
+
+       /* if neither ISM nor RDMA are supported, fallback */
+       if (!rdma_supported && !ism_supported)
+               return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
+
        /* perform CLC handshake */
-       rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
-       if (rc)
+       rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+       if (rc) {
+               smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
                return smc_connect_decline_fallback(smc, rc);
+       }
 
-       /* connect using rdma */
-       rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
-       if (rc)
+       /* depending on previous steps, connect using rdma or ism */
+       if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
+               rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+       else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
+               rc = smc_connect_ism(smc, &aclc, ismdev);
+       else
+               rc = SMC_CLC_DECL_MODEUNSUPP;
+       if (rc) {
+               smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
                return smc_connect_decline_fallback(smc, rc);
+       }
 
+       smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
        return 0;
 }
 
+static void smc_connect_work(struct work_struct *work)
+{
+       struct smc_sock *smc = container_of(work, struct smc_sock,
+                                           connect_work);
+       int rc;
+
+       lock_sock(&smc->sk);
+       rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
+                           smc->connect_info->alen, smc->connect_info->flags);
+       if (smc->clcsock->sk->sk_err) {
+               smc->sk.sk_err = smc->clcsock->sk->sk_err;
+               goto out;
+       }
+       if (rc < 0) {
+               smc->sk.sk_err = -rc;
+               goto out;
+       }
+
+       rc = __smc_connect(smc);
+       if (rc < 0)
+               smc->sk.sk_err = -rc;
+
+out:
+       smc->sk.sk_state_change(&smc->sk);
+       kfree(smc->connect_info);
+       smc->connect_info = NULL;
+       release_sock(&smc->sk);
+}
+
 static int smc_connect(struct socket *sock, struct sockaddr *addr,
                       int alen, int flags)
 {
@@ -605,15 +777,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 
        smc_copy_sock_settings_to_clc(smc);
        tcp_sk(smc->clcsock->sk)->syn_smc = 1;
-       rc = kernel_connect(smc->clcsock, addr, alen, flags);
-       if (rc)
-               goto out;
+       if (flags & O_NONBLOCK) {
+               if (smc->connect_info) {
+                       rc = -EALREADY;
+                       goto out;
+               }
+               smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
+               if (!smc->connect_info) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               smc->connect_info->alen = alen;
+               smc->connect_info->flags = flags ^ O_NONBLOCK;
+               memcpy(&smc->connect_info->addr, addr, alen);
+               schedule_work(&smc->connect_work);
+               rc = -EINPROGRESS;
+       } else {
+               rc = kernel_connect(smc->clcsock, addr, alen, flags);
+               if (rc)
+                       goto out;
 
-       rc = __smc_connect(smc);
-       if (rc < 0)
-               goto out;
-       else
-               rc = 0; /* success cases including fallback */
+               rc = __smc_connect(smc);
+               if (rc < 0)
+                       goto out;
+               else
+                       rc = 0; /* success cases including fallback */
+       }
 
 out:
        release_sock(sk);
@@ -758,15 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
        link = &lgr->lnk[SMC_SINGLE_LINK];
 
        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_REGRMB;
 
        /* send CONFIRM LINK request to client over the RoCE fabric */
-       rc = smc_llc_send_confirm_link(link,
-                                      link->smcibdev->mac[link->ibport - 1],
-                                      &link->smcibdev->gid[link->ibport - 1],
-                                      SMC_LLC_REQ);
+       rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_CL;
 
        /* receive CONFIRM LINK response from client over the RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(
@@ -786,10 +972,9 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
        /* send ADD LINK request to client over the RoCE fabric */
        rc = smc_llc_send_add_link(link,
                                   link->smcibdev->mac[link->ibport - 1],
-                                  &link->smcibdev->gid[link->ibport - 1],
-                                  SMC_LLC_REQ);
+                                  link->gid, SMC_LLC_REQ);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_AL;
 
        /* receive ADD LINK response from client over the RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
@@ -864,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
        }
        smc_conn_free(&new_smc->conn);
        new_smc->use_fallback = true;
-       if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
+       new_smc->fallback_rsn = reason_code;
+       if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
                if (smc_clc_send_decline(new_smc, reason_code) < 0) {
                        smc_listen_out_err(new_smc);
                        return;
@@ -894,7 +1080,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
                                int *local_contact)
 {
        /* allocate connection / link group */
-       *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
+       *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+                                        &pclc->lcl, NULL, 0);
        if (*local_contact < 0) {
                if (*local_contact == -ENOMEM)
                        return SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -902,12 +1089,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
        }
 
        /* create send buffer and rmb */
-       if (smc_buf_create(new_smc))
+       if (smc_buf_create(new_smc, false))
                return SMC_CLC_DECL_MEM;
 
        return 0;
 }
 
+/* listen worker: initialize connection and buffers for SMC-D */
+static int smc_listen_ism_init(struct smc_sock *new_smc,
+                              struct smc_clc_msg_proposal *pclc,
+                              struct smcd_dev *ismdev,
+                              int *local_contact)
+{
+       struct smc_clc_msg_smcd *pclc_smcd;
+
+       pclc_smcd = smc_get_clc_msg_smcd(pclc);
+       *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+                                        ismdev, pclc_smcd->gid);
+       if (*local_contact < 0) {
+               if (*local_contact == -ENOMEM)
+                       return SMC_CLC_DECL_MEM;/* insufficient memory*/
+               return SMC_CLC_DECL_INTERR; /* other error */
+       }
+
+       /* Check if peer can be reached via ISM device */
+       if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
+                           new_smc->conn.lgr->vlan_id,
+                           new_smc->conn.lgr->smcd)) {
+               if (*local_contact == SMC_FIRST_CONTACT)
+                       smc_lgr_forget(new_smc->conn.lgr);
+               smc_conn_free(&new_smc->conn);
+               return SMC_CLC_DECL_CNFERR;
+       }
+
+       /* Create send and receive buffers */
+       if (smc_buf_create(new_smc, true)) {
+               if (*local_contact == SMC_FIRST_CONTACT)
+                       smc_lgr_forget(new_smc->conn.lgr);
+               smc_conn_free(&new_smc->conn);
+               return SMC_CLC_DECL_MEM;
+       }
+
+       return 0;
+}
+
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
@@ -916,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
        if (local_contact != SMC_FIRST_CONTACT) {
                if (!new_smc->conn.rmb_desc->reused) {
                        if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
-                               return SMC_CLC_DECL_INTERR;
+                               return SMC_CLC_DECL_ERR_REGRMB;
                }
        }
        smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -936,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
                smc_link_save_peer_info(link, cclc);
 
        if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
-               reason_code = SMC_CLC_DECL_INTERR;
+               reason_code = SMC_CLC_DECL_ERR_RTOK;
                goto decline;
        }
 
        if (local_contact == SMC_FIRST_CONTACT) {
                if (smc_ib_ready_link(link)) {
-                       reason_code = SMC_CLC_DECL_INTERR;
+                       reason_code = SMC_CLC_DECL_ERR_RDYLNK;
                        goto decline;
                }
                /* QP confirmation over RoCE fabric */
@@ -966,8 +1191,11 @@ static void smc_listen_work(struct work_struct *work)
        struct smc_clc_msg_accept_confirm cclc;
        struct smc_clc_msg_proposal *pclc;
        struct smc_ib_device *ibdev;
+       bool ism_supported = false;
+       struct smcd_dev *ismdev;
        u8 buf[SMC_CLC_MAX_LEN];
        int local_contact = 0;
+       unsigned short vlan;
        int reason_code = 0;
        int rc = 0;
        u8 ibport;
@@ -980,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)
        /* check if peer is smc capable */
        if (!tcp_sk(newclcsock->sk)->syn_smc) {
                new_smc->use_fallback = true;
+               new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
                smc_listen_out_connected(new_smc);
                return;
        }
@@ -1006,15 +1235,26 @@ static void smc_listen_work(struct work_struct *work)
        smc_rx_init(new_smc);
        smc_tx_init(new_smc);
 
+       /* check if ISM is available */
+       if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
+           !smc_check_ism(new_smc, &ismdev) &&
+           !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
+               ism_supported = true;
+       }
+
        /* check if RDMA is available */
-       if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
-           smc_listen_rdma_check(new_smc, pclc) ||
-           smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
-                                &local_contact) ||
-           smc_listen_rdma_reg(new_smc, local_contact)) {
+       if (!ism_supported &&
+           ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
+            smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
+            smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
+            smc_listen_rdma_check(new_smc, pclc) ||
+            smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
+                                 &local_contact) ||
+            smc_listen_rdma_reg(new_smc, local_contact))) {
                /* SMC not supported, decline */
                mutex_unlock(&smc_create_lgr_pending);
-               smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
+               smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
+                                  local_contact);
                return;
        }
 
@@ -1036,7 +1276,8 @@ static void smc_listen_work(struct work_struct *work)
        }
 
        /* finish worker */
-       smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+       if (!ism_supported)
+               smc_listen_rdma_finish(new_smc, &cclc, local_contact);
        smc_conn_save_peer_info(new_smc, &cclc);
        mutex_unlock(&smc_create_lgr_pending);
        smc_listen_out_connected(new_smc);
@@ -1060,9 +1301,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
 
                new_smc->listen_smc = lsmc;
                new_smc->use_fallback = lsmc->use_fallback;
+               new_smc->fallback_rsn = lsmc->fallback_rsn;
                sock_hold(lsk); /* sock_put in smc_listen_work */
                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                smc_copy_sock_settings_to_smc(new_smc);
+               new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
+               new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
                sock_hold(&new_smc->sk); /* sock_put in passive closing */
                if (!schedule_work(&new_smc->smc_listen_work))
                        sock_put(&new_smc->sk);
@@ -1214,6 +1458,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (msg->msg_flags & MSG_FASTOPEN) {
                if (sk->sk_state == SMC_INIT) {
                        smc->use_fallback = true;
+                       smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
                } else {
                        rc = -EINVAL;
                        goto out;
@@ -1279,40 +1524,20 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        __poll_t mask = 0;
        struct smc_sock *smc;
-       int rc;
 
        if (!sk)
                return EPOLLNVAL;
 
        smc = smc_sk(sock->sk);
-       sock_hold(sk);
-       lock_sock(sk);
        if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
                /* delegate to CLC child sock */
-               release_sock(sk);
                mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
-               lock_sock(sk);
                sk->sk_err = smc->clcsock->sk->sk_err;
-               if (sk->sk_err) {
+               if (sk->sk_err)
                        mask |= EPOLLERR;
-               } else {
-                       /* if non-blocking connect finished ... */
-                       if (sk->sk_state == SMC_INIT &&
-                           mask & EPOLLOUT &&
-                           smc->clcsock->sk->sk_state != TCP_CLOSE) {
-                               rc = __smc_connect(smc);
-                               if (rc < 0)
-                                       mask |= EPOLLERR;
-                               /* success cases including fallback */
-                               mask |= EPOLLOUT | EPOLLWRNORM;
-                       }
-               }
        } else {
-               if (sk->sk_state != SMC_CLOSED) {
-                       release_sock(sk);
-                       sock_poll_wait(file, sk_sleep(sk), wait);
-                       lock_sock(sk);
-               }
+               if (sk->sk_state != SMC_CLOSED)
+                       sock_poll_wait(file, wait);
                if (sk->sk_err)
                        mask |= EPOLLERR;
                if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1338,10 +1563,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
                }
                if (smc->conn.urg_state == SMC_URG_VALID)
                        mask |= EPOLLPRI;
-
        }
-       release_sock(sk);
-       sock_put(sk);
 
        return mask;
 }
@@ -1361,8 +1583,7 @@ static int smc_shutdown(struct socket *sock, int how)
        lock_sock(sk);
 
        rc = -ENOTCONN;
-       if ((sk->sk_state != SMC_LISTEN) &&
-           (sk->sk_state != SMC_ACTIVE) &&
+       if ((sk->sk_state != SMC_ACTIVE) &&
            (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
            (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
@@ -1421,7 +1642,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 
        if (optlen < sizeof(int))
                return -EINVAL;
-       get_user(val, (int __user *)optval);
+       if (get_user(val, (int __user *)optval))
+               return -EFAULT;
 
        lock_sock(sk);
        switch (optname) {
@@ -1433,6 +1655,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
                /* option not supported by SMC */
                if (sk->sk_state == SMC_INIT) {
                        smc->use_fallback = true;
+                       smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
                } else {
                        if (!smc->use_fallback)
                                rc = -EINVAL;
@@ -1484,15 +1707,22 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
 
        smc = smc_sk(sock->sk);
        conn = &smc->conn;
+       lock_sock(&smc->sk);
        if (smc->use_fallback) {
-               if (!smc->clcsock)
+               if (!smc->clcsock) {
+                       release_sock(&smc->sk);
                        return -EBADF;
-               return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
+               }
+               answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
+               release_sock(&smc->sk);
+               return answ;
        }
        switch (cmd) {
        case SIOCINQ: /* same as FIONREAD */
-               if (smc->sk.sk_state == SMC_LISTEN)
+               if (smc->sk.sk_state == SMC_LISTEN) {
+                       release_sock(&smc->sk);
                        return -EINVAL;
+               }
                if (smc->sk.sk_state == SMC_INIT ||
                    smc->sk.sk_state == SMC_CLOSED)
                        answ = 0;
@@ -1501,8 +1731,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
                break;
        case SIOCOUTQ:
                /* output queue size (not send + not acked) */
-               if (smc->sk.sk_state == SMC_LISTEN)
+               if (smc->sk.sk_state == SMC_LISTEN) {
+                       release_sock(&smc->sk);
                        return -EINVAL;
+               }
                if (smc->sk.sk_state == SMC_INIT ||
                    smc->sk.sk_state == SMC_CLOSED)
                        answ = 0;
@@ -1512,8 +1744,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
                break;
        case SIOCOUTQNSD:
                /* output queue size (not send only) */
-               if (smc->sk.sk_state == SMC_LISTEN)
+               if (smc->sk.sk_state == SMC_LISTEN) {
+                       release_sock(&smc->sk);
                        return -EINVAL;
+               }
                if (smc->sk.sk_state == SMC_INIT ||
                    smc->sk.sk_state == SMC_CLOSED)
                        answ = 0;
@@ -1521,25 +1755,25 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
                        answ = smc_tx_prepared_sends(&smc->conn);
                break;
        case SIOCATMARK:
-               if (smc->sk.sk_state == SMC_LISTEN)
+               if (smc->sk.sk_state == SMC_LISTEN) {
+                       release_sock(&smc->sk);
                        return -EINVAL;
+               }
                if (smc->sk.sk_state == SMC_INIT ||
                    smc->sk.sk_state == SMC_CLOSED) {
                        answ = 0;
                } else {
-                       smc_curs_write(&cons,
-                              smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                                      conn);
-                       smc_curs_write(&urg,
-                                      smc_curs_read(&conn->urg_curs, conn),
-                                      conn);
+                       smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
+                       smc_curs_copy(&urg, &conn->urg_curs, conn);
                        answ = smc_curs_diff(conn->rmb_desc->len,
                                             &cons, &urg) == 1;
                }
                break;
        default:
+               release_sock(&smc->sk);
                return -ENOIOCTLCMD;
        }
+       release_sock(&smc->sk);
 
        return put_user(answ, (int __user *)arg);
 }
@@ -1663,6 +1897,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
        /* create internal TCP socket for CLC handshake and fallback */
        smc = smc_sk(sk);
        smc->use_fallback = false; /* assume rdma capability first */
+       smc->fallback_rsn = 0;
        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
                              &smc->clcsock);
        if (rc) {