Merge tag 'fs.setgid.v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner...
[linux-2.6-microblaze.git] / net / smc / af_smc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
6  *  applies to SOCK_STREAM sockets only
7  *  offers an alternative communication option for TCP-protocol sockets
8  *  applicable with RoCE-cards only
9  *
10  *  Initial restrictions:
11  *    - support for alternate links postponed
12  *
13  *  Copyright IBM Corp. 2016, 2018
14  *
15  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
16  *              based on prototype from Frank Blaschka
17  */
18
19 #define KMSG_COMPONENT "smc"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
22 #include <linux/module.h>
23 #include <linux/socket.h>
24 #include <linux/workqueue.h>
25 #include <linux/in.h>
26 #include <linux/sched/signal.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rcupdate_wait.h>
29 #include <linux/ctype.h>
30
31 #include <net/sock.h>
32 #include <net/tcp.h>
33 #include <net/smc.h>
34 #include <asm/ioctls.h>
35
36 #include <net/net_namespace.h>
37 #include <net/netns/generic.h>
38 #include "smc_netns.h"
39
40 #include "smc.h"
41 #include "smc_clc.h"
42 #include "smc_llc.h"
43 #include "smc_cdc.h"
44 #include "smc_core.h"
45 #include "smc_ib.h"
46 #include "smc_ism.h"
47 #include "smc_pnet.h"
48 #include "smc_netlink.h"
49 #include "smc_tx.h"
50 #include "smc_rx.h"
51 #include "smc_close.h"
52 #include "smc_stats.h"
53 #include "smc_tracepoint.h"
54 #include "smc_sysctl.h"
55
56 static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
57                                                  * creation on server
58                                                  */
59 static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
60                                                  * creation on client
61                                                  */
62
63 static struct workqueue_struct  *smc_tcp_ls_wq; /* wq for tcp listen work */
64 struct workqueue_struct *smc_hs_wq;     /* wq for handshake work */
65 struct workqueue_struct *smc_close_wq;  /* wq for close work */
66
67 static void smc_tcp_listen_work(struct work_struct *);
68 static void smc_connect_work(struct work_struct *);
69
70 int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
71 {
72         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
73         void *hdr;
74
75         if (cb_ctx->pos[0])
76                 goto out;
77
78         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
79                           &smc_gen_nl_family, NLM_F_MULTI,
80                           SMC_NETLINK_DUMP_HS_LIMITATION);
81         if (!hdr)
82                 return -ENOMEM;
83
84         if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
85                        sock_net(skb->sk)->smc.limit_smc_hs))
86                 goto err;
87
88         genlmsg_end(skb, hdr);
89         cb_ctx->pos[0] = 1;
90 out:
91         return skb->len;
92 err:
93         genlmsg_cancel(skb, hdr);
94         return -EMSGSIZE;
95 }
96
97 int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
98 {
99         sock_net(skb->sk)->smc.limit_smc_hs = true;
100         return 0;
101 }
102
103 int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
104 {
105         sock_net(skb->sk)->smc.limit_smc_hs = false;
106         return 0;
107 }
108
109 static void smc_set_keepalive(struct sock *sk, int val)
110 {
111         struct smc_sock *smc = smc_sk(sk);
112
113         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
114 }
115
116 static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
117                                           struct sk_buff *skb,
118                                           struct request_sock *req,
119                                           struct dst_entry *dst,
120                                           struct request_sock *req_unhash,
121                                           bool *own_req)
122 {
123         struct smc_sock *smc;
124         struct sock *child;
125
126         smc = smc_clcsock_user_data(sk);
127
128         if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
129                                 sk->sk_max_ack_backlog)
130                 goto drop;
131
132         if (sk_acceptq_is_full(&smc->sk)) {
133                 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
134                 goto drop;
135         }
136
137         /* passthrough to original syn recv sock fct */
138         child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
139                                                own_req);
140         /* child must not inherit smc or its ops */
141         if (child) {
142                 rcu_assign_sk_user_data(child, NULL);
143
144                 /* v4-mapped sockets don't inherit parent ops. Don't restore. */
145                 if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
146                         inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
147         }
148         return child;
149
150 drop:
151         dst_release(dst);
152         tcp_listendrop(sk);
153         return NULL;
154 }
155
156 static bool smc_hs_congested(const struct sock *sk)
157 {
158         const struct smc_sock *smc;
159
160         smc = smc_clcsock_user_data(sk);
161
162         if (!smc)
163                 return true;
164
165         if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
166                 return true;
167
168         return false;
169 }
170
171 static struct smc_hashinfo smc_v4_hashinfo = {
172         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
173 };
174
175 static struct smc_hashinfo smc_v6_hashinfo = {
176         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
177 };
178
179 int smc_hash_sk(struct sock *sk)
180 {
181         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
182         struct hlist_head *head;
183
184         head = &h->ht;
185
186         write_lock_bh(&h->lock);
187         sk_add_node(sk, head);
188         write_unlock_bh(&h->lock);
189         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
190
191         return 0;
192 }
193 EXPORT_SYMBOL_GPL(smc_hash_sk);
194
195 void smc_unhash_sk(struct sock *sk)
196 {
197         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
198
199         write_lock_bh(&h->lock);
200         if (sk_del_node_init(sk))
201                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
202         write_unlock_bh(&h->lock);
203 }
204 EXPORT_SYMBOL_GPL(smc_unhash_sk);
205
206 /* This will be called before user really release sock_lock. So do the
207  * work which we didn't do because of user hold the sock_lock in the
208  * BH context
209  */
210 static void smc_release_cb(struct sock *sk)
211 {
212         struct smc_sock *smc = smc_sk(sk);
213
214         if (smc->conn.tx_in_release_sock) {
215                 smc_tx_pending(&smc->conn);
216                 smc->conn.tx_in_release_sock = false;
217         }
218 }
219
220 struct proto smc_proto = {
221         .name           = "SMC",
222         .owner          = THIS_MODULE,
223         .keepalive      = smc_set_keepalive,
224         .hash           = smc_hash_sk,
225         .unhash         = smc_unhash_sk,
226         .release_cb     = smc_release_cb,
227         .obj_size       = sizeof(struct smc_sock),
228         .h.smc_hash     = &smc_v4_hashinfo,
229         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
230 };
231 EXPORT_SYMBOL_GPL(smc_proto);
232
233 struct proto smc_proto6 = {
234         .name           = "SMC6",
235         .owner          = THIS_MODULE,
236         .keepalive      = smc_set_keepalive,
237         .hash           = smc_hash_sk,
238         .unhash         = smc_unhash_sk,
239         .release_cb     = smc_release_cb,
240         .obj_size       = sizeof(struct smc_sock),
241         .h.smc_hash     = &smc_v6_hashinfo,
242         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
243 };
244 EXPORT_SYMBOL_GPL(smc_proto6);
245
246 static void smc_fback_restore_callbacks(struct smc_sock *smc)
247 {
248         struct sock *clcsk = smc->clcsock->sk;
249
250         write_lock_bh(&clcsk->sk_callback_lock);
251         clcsk->sk_user_data = NULL;
252
253         smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
254         smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
255         smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
256         smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
257
258         write_unlock_bh(&clcsk->sk_callback_lock);
259 }
260
261 static void smc_restore_fallback_changes(struct smc_sock *smc)
262 {
263         if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
264                 smc->clcsock->file->private_data = smc->sk.sk_socket;
265                 smc->clcsock->file = NULL;
266                 smc_fback_restore_callbacks(smc);
267         }
268 }
269
270 static int __smc_release(struct smc_sock *smc)
271 {
272         struct sock *sk = &smc->sk;
273         int rc = 0;
274
275         if (!smc->use_fallback) {
276                 rc = smc_close_active(smc);
277                 sock_set_flag(sk, SOCK_DEAD);
278                 sk->sk_shutdown |= SHUTDOWN_MASK;
279         } else {
280                 if (sk->sk_state != SMC_CLOSED) {
281                         if (sk->sk_state != SMC_LISTEN &&
282                             sk->sk_state != SMC_INIT)
283                                 sock_put(sk); /* passive closing */
284                         if (sk->sk_state == SMC_LISTEN) {
285                                 /* wake up clcsock accept */
286                                 rc = kernel_sock_shutdown(smc->clcsock,
287                                                           SHUT_RDWR);
288                         }
289                         sk->sk_state = SMC_CLOSED;
290                         sk->sk_state_change(sk);
291                 }
292                 smc_restore_fallback_changes(smc);
293         }
294
295         sk->sk_prot->unhash(sk);
296
297         if (sk->sk_state == SMC_CLOSED) {
298                 if (smc->clcsock) {
299                         release_sock(sk);
300                         smc_clcsock_release(smc);
301                         lock_sock(sk);
302                 }
303                 if (!smc->use_fallback)
304                         smc_conn_free(&smc->conn);
305         }
306
307         return rc;
308 }
309
310 static int smc_release(struct socket *sock)
311 {
312         struct sock *sk = sock->sk;
313         struct smc_sock *smc;
314         int old_state, rc = 0;
315
316         if (!sk)
317                 goto out;
318
319         sock_hold(sk); /* sock_put below */
320         smc = smc_sk(sk);
321
322         old_state = sk->sk_state;
323
324         /* cleanup for a dangling non-blocking connect */
325         if (smc->connect_nonblock && old_state == SMC_INIT)
326                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
327
328         if (cancel_work_sync(&smc->connect_work))
329                 sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
330
331         if (sk->sk_state == SMC_LISTEN)
332                 /* smc_close_non_accepted() is called and acquires
333                  * sock lock for child sockets again
334                  */
335                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
336         else
337                 lock_sock(sk);
338
339         if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
340             !smc->use_fallback)
341                 smc_close_active_abort(smc);
342
343         rc = __smc_release(smc);
344
345         /* detach socket */
346         sock_orphan(sk);
347         sock->sk = NULL;
348         release_sock(sk);
349
350         sock_put(sk); /* sock_hold above */
351         sock_put(sk); /* final sock_put */
352 out:
353         return rc;
354 }
355
356 static void smc_destruct(struct sock *sk)
357 {
358         if (sk->sk_state != SMC_CLOSED)
359                 return;
360         if (!sock_flag(sk, SOCK_DEAD))
361                 return;
362
363         sk_refcnt_debug_dec(sk);
364 }
365
366 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
367                                    int protocol)
368 {
369         struct smc_sock *smc;
370         struct proto *prot;
371         struct sock *sk;
372
373         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
374         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
375         if (!sk)
376                 return NULL;
377
378         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
379         sk->sk_state = SMC_INIT;
380         sk->sk_destruct = smc_destruct;
381         sk->sk_protocol = protocol;
382         smc = smc_sk(sk);
383         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
384         INIT_WORK(&smc->connect_work, smc_connect_work);
385         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
386         INIT_LIST_HEAD(&smc->accept_q);
387         spin_lock_init(&smc->accept_q_lock);
388         spin_lock_init(&smc->conn.send_lock);
389         sk->sk_prot->hash(sk);
390         sk_refcnt_debug_inc(sk);
391         mutex_init(&smc->clcsock_release_lock);
392         smc_init_saved_callbacks(smc);
393
394         return sk;
395 }
396
397 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
398                     int addr_len)
399 {
400         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
401         struct sock *sk = sock->sk;
402         struct smc_sock *smc;
403         int rc;
404
405         smc = smc_sk(sk);
406
407         /* replicate tests from inet_bind(), to be safe wrt. future changes */
408         rc = -EINVAL;
409         if (addr_len < sizeof(struct sockaddr_in))
410                 goto out;
411
412         rc = -EAFNOSUPPORT;
413         if (addr->sin_family != AF_INET &&
414             addr->sin_family != AF_INET6 &&
415             addr->sin_family != AF_UNSPEC)
416                 goto out;
417         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
418         if (addr->sin_family == AF_UNSPEC &&
419             addr->sin_addr.s_addr != htonl(INADDR_ANY))
420                 goto out;
421
422         lock_sock(sk);
423
424         /* Check if socket is already active */
425         rc = -EINVAL;
426         if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
427                 goto out_rel;
428
429         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
430         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
431
432 out_rel:
433         release_sock(sk);
434 out:
435         return rc;
436 }
437
438 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
439                                    unsigned long mask)
440 {
441         /* options we don't get control via setsockopt for */
442         nsk->sk_type = osk->sk_type;
443         nsk->sk_sndbuf = osk->sk_sndbuf;
444         nsk->sk_rcvbuf = osk->sk_rcvbuf;
445         nsk->sk_sndtimeo = osk->sk_sndtimeo;
446         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
447         nsk->sk_mark = osk->sk_mark;
448         nsk->sk_priority = osk->sk_priority;
449         nsk->sk_rcvlowat = osk->sk_rcvlowat;
450         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
451         nsk->sk_err = osk->sk_err;
452
453         nsk->sk_flags &= ~mask;
454         nsk->sk_flags |= osk->sk_flags & mask;
455 }
456
457 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
458                              (1UL << SOCK_KEEPOPEN) | \
459                              (1UL << SOCK_LINGER) | \
460                              (1UL << SOCK_BROADCAST) | \
461                              (1UL << SOCK_TIMESTAMP) | \
462                              (1UL << SOCK_DBG) | \
463                              (1UL << SOCK_RCVTSTAMP) | \
464                              (1UL << SOCK_RCVTSTAMPNS) | \
465                              (1UL << SOCK_LOCALROUTE) | \
466                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
467                              (1UL << SOCK_RXQ_OVFL) | \
468                              (1UL << SOCK_WIFI_STATUS) | \
469                              (1UL << SOCK_NOFCS) | \
470                              (1UL << SOCK_FILTER_LOCKED) | \
471                              (1UL << SOCK_TSTAMP_NEW))
472 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
473  * clc socket (since smc is not called for these options from net/core)
474  */
475 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
476 {
477         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
478 }
479
480 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
481                              (1UL << SOCK_KEEPOPEN) | \
482                              (1UL << SOCK_LINGER) | \
483                              (1UL << SOCK_DBG))
484 /* copy only settings and flags relevant for smc from clc to smc socket */
485 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
486 {
487         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
488 }
489
490 /* register the new vzalloced sndbuf on all links */
491 static int smcr_lgr_reg_sndbufs(struct smc_link *link,
492                                 struct smc_buf_desc *snd_desc)
493 {
494         struct smc_link_group *lgr = link->lgr;
495         int i, rc = 0;
496
497         if (!snd_desc->is_vm)
498                 return -EINVAL;
499
500         /* protect against parallel smcr_link_reg_buf() */
501         mutex_lock(&lgr->llc_conf_mutex);
502         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
503                 if (!smc_link_active(&lgr->lnk[i]))
504                         continue;
505                 rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
506                 if (rc)
507                         break;
508         }
509         mutex_unlock(&lgr->llc_conf_mutex);
510         return rc;
511 }
512
513 /* register the new rmb on all links */
514 static int smcr_lgr_reg_rmbs(struct smc_link *link,
515                              struct smc_buf_desc *rmb_desc)
516 {
517         struct smc_link_group *lgr = link->lgr;
518         int i, rc = 0;
519
520         rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
521         if (rc)
522                 return rc;
523         /* protect against parallel smc_llc_cli_rkey_exchange() and
524          * parallel smcr_link_reg_buf()
525          */
526         mutex_lock(&lgr->llc_conf_mutex);
527         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
528                 if (!smc_link_active(&lgr->lnk[i]))
529                         continue;
530                 rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
531                 if (rc)
532                         goto out;
533         }
534
535         /* exchange confirm_rkey msg with peer */
536         rc = smc_llc_do_confirm_rkey(link, rmb_desc);
537         if (rc) {
538                 rc = -EFAULT;
539                 goto out;
540         }
541         rmb_desc->is_conf_rkey = true;
542 out:
543         mutex_unlock(&lgr->llc_conf_mutex);
544         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
545         return rc;
546 }
547
548 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
549 {
550         struct smc_link *link = smc->conn.lnk;
551         struct smc_llc_qentry *qentry;
552         int rc;
553
554         /* receive CONFIRM LINK request from server over RoCE fabric */
555         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
556                               SMC_LLC_CONFIRM_LINK);
557         if (!qentry) {
558                 struct smc_clc_msg_decline dclc;
559
560                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
561                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
562                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
563         }
564         smc_llc_save_peer_uid(qentry);
565         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
566         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
567         if (rc)
568                 return SMC_CLC_DECL_RMBE_EC;
569
570         rc = smc_ib_modify_qp_rts(link);
571         if (rc)
572                 return SMC_CLC_DECL_ERR_RDYLNK;
573
574         smc_wr_remember_qp_attr(link);
575
576         /* reg the sndbuf if it was vzalloced */
577         if (smc->conn.sndbuf_desc->is_vm) {
578                 if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
579                         return SMC_CLC_DECL_ERR_REGBUF;
580         }
581
582         /* reg the rmb */
583         if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
584                 return SMC_CLC_DECL_ERR_REGBUF;
585
586         /* confirm_rkey is implicit on 1st contact */
587         smc->conn.rmb_desc->is_conf_rkey = true;
588
589         /* send CONFIRM LINK response over RoCE fabric */
590         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
591         if (rc < 0)
592                 return SMC_CLC_DECL_TIMEOUT_CL;
593
594         smc_llc_link_active(link);
595         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
596
597         /* optional 2nd link, receive ADD LINK request from server */
598         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
599                               SMC_LLC_ADD_LINK);
600         if (!qentry) {
601                 struct smc_clc_msg_decline dclc;
602
603                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
604                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
605                 if (rc == -EAGAIN)
606                         rc = 0; /* no DECLINE received, go with one link */
607                 return rc;
608         }
609         smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
610         smc_llc_cli_add_link(link, qentry);
611         return 0;
612 }
613
614 static bool smc_isascii(char *hostname)
615 {
616         int i;
617
618         for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
619                 if (!isascii(hostname[i]))
620                         return false;
621         return true;
622 }
623
624 static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
625                                         struct smc_clc_msg_accept_confirm *clc)
626 {
627         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
628                 (struct smc_clc_msg_accept_confirm_v2 *)clc;
629         struct smc_clc_first_contact_ext *fce;
630         int clc_v2_len;
631
632         if (clc->hdr.version == SMC_V1 ||
633             !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
634                 return;
635
636         if (smc->conn.lgr->is_smcd) {
637                 memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
638                        SMC_MAX_EID_LEN);
639                 clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
640                                          d1);
641         } else {
642                 memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
643                        SMC_MAX_EID_LEN);
644                 clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
645                                          r1);
646         }
647         fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
648         smc->conn.lgr->peer_os = fce->os_type;
649         smc->conn.lgr->peer_smc_release = fce->release;
650         if (smc_isascii(fce->hostname))
651                 memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
652                        SMC_MAX_HOSTNAME_LEN);
653 }
654
655 static void smcr_conn_save_peer_info(struct smc_sock *smc,
656                                      struct smc_clc_msg_accept_confirm *clc)
657 {
658         int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
659
660         smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
661         smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
662         smc->conn.peer_rmbe_size = bufsize;
663         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
664         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
665 }
666
667 static void smcd_conn_save_peer_info(struct smc_sock *smc,
668                                      struct smc_clc_msg_accept_confirm *clc)
669 {
670         int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
671
672         smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
673         smc->conn.peer_token = clc->d0.token;
674         /* msg header takes up space in the buffer */
675         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
676         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
677         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
678 }
679
680 static void smc_conn_save_peer_info(struct smc_sock *smc,
681                                     struct smc_clc_msg_accept_confirm *clc)
682 {
683         if (smc->conn.lgr->is_smcd)
684                 smcd_conn_save_peer_info(smc, clc);
685         else
686                 smcr_conn_save_peer_info(smc, clc);
687         smc_conn_save_peer_info_fce(smc, clc);
688 }
689
690 static void smc_link_save_peer_info(struct smc_link *link,
691                                     struct smc_clc_msg_accept_confirm *clc,
692                                     struct smc_init_info *ini)
693 {
694         link->peer_qpn = ntoh24(clc->r0.qpn);
695         memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
696         memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
697         link->peer_psn = ntoh24(clc->r0.psn);
698         link->peer_mtu = clc->r0.qp_mtu;
699 }
700
701 static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
702                                        struct smc_stats_fback *fback_arr)
703 {
704         int cnt;
705
706         for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
707                 if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
708                         fback_arr[cnt].count++;
709                         break;
710                 }
711                 if (!fback_arr[cnt].fback_code) {
712                         fback_arr[cnt].fback_code = smc->fallback_rsn;
713                         fback_arr[cnt].count++;
714                         break;
715                 }
716         }
717 }
718
719 static void smc_stat_fallback(struct smc_sock *smc)
720 {
721         struct net *net = sock_net(&smc->sk);
722
723         mutex_lock(&net->smc.mutex_fback_rsn);
724         if (smc->listen_smc) {
725                 smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
726                 net->smc.fback_rsn->srv_fback_cnt++;
727         } else {
728                 smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
729                 net->smc.fback_rsn->clnt_fback_cnt++;
730         }
731         mutex_unlock(&net->smc.mutex_fback_rsn);
732 }
733
734 /* must be called under rcu read lock */
735 static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
736 {
737         struct socket_wq *wq;
738         __poll_t flags;
739
740         wq = rcu_dereference(smc->sk.sk_wq);
741         if (!skwq_has_sleeper(wq))
742                 return;
743
744         /* wake up smc sk->sk_wq */
745         if (!key) {
746                 /* sk_state_change */
747                 wake_up_interruptible_all(&wq->wait);
748         } else {
749                 flags = key_to_poll(key);
750                 if (flags & (EPOLLIN | EPOLLOUT))
751                         /* sk_data_ready or sk_write_space */
752                         wake_up_interruptible_sync_poll(&wq->wait, flags);
753                 else if (flags & EPOLLERR)
754                         /* sk_error_report */
755                         wake_up_interruptible_poll(&wq->wait, flags);
756         }
757 }
758
759 static int smc_fback_mark_woken(wait_queue_entry_t *wait,
760                                 unsigned int mode, int sync, void *key)
761 {
762         struct smc_mark_woken *mark =
763                 container_of(wait, struct smc_mark_woken, wait_entry);
764
765         mark->woken = true;
766         mark->key = key;
767         return 0;
768 }
769
770 static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
771                                      void (*clcsock_callback)(struct sock *sk))
772 {
773         struct smc_mark_woken mark = { .woken = false };
774         struct socket_wq *wq;
775
776         init_waitqueue_func_entry(&mark.wait_entry,
777                                   smc_fback_mark_woken);
778         rcu_read_lock();
779         wq = rcu_dereference(clcsk->sk_wq);
780         if (!wq)
781                 goto out;
782         add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
783         clcsock_callback(clcsk);
784         remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
785
786         if (mark.woken)
787                 smc_fback_wakeup_waitqueue(smc, mark.key);
788 out:
789         rcu_read_unlock();
790 }
791
792 static void smc_fback_state_change(struct sock *clcsk)
793 {
794         struct smc_sock *smc;
795
796         read_lock_bh(&clcsk->sk_callback_lock);
797         smc = smc_clcsock_user_data(clcsk);
798         if (smc)
799                 smc_fback_forward_wakeup(smc, clcsk,
800                                          smc->clcsk_state_change);
801         read_unlock_bh(&clcsk->sk_callback_lock);
802 }
803
804 static void smc_fback_data_ready(struct sock *clcsk)
805 {
806         struct smc_sock *smc;
807
808         read_lock_bh(&clcsk->sk_callback_lock);
809         smc = smc_clcsock_user_data(clcsk);
810         if (smc)
811                 smc_fback_forward_wakeup(smc, clcsk,
812                                          smc->clcsk_data_ready);
813         read_unlock_bh(&clcsk->sk_callback_lock);
814 }
815
816 static void smc_fback_write_space(struct sock *clcsk)
817 {
818         struct smc_sock *smc;
819
820         read_lock_bh(&clcsk->sk_callback_lock);
821         smc = smc_clcsock_user_data(clcsk);
822         if (smc)
823                 smc_fback_forward_wakeup(smc, clcsk,
824                                          smc->clcsk_write_space);
825         read_unlock_bh(&clcsk->sk_callback_lock);
826 }
827
828 static void smc_fback_error_report(struct sock *clcsk)
829 {
830         struct smc_sock *smc;
831
832         read_lock_bh(&clcsk->sk_callback_lock);
833         smc = smc_clcsock_user_data(clcsk);
834         if (smc)
835                 smc_fback_forward_wakeup(smc, clcsk,
836                                          smc->clcsk_error_report);
837         read_unlock_bh(&clcsk->sk_callback_lock);
838 }
839
840 static void smc_fback_replace_callbacks(struct smc_sock *smc)
841 {
842         struct sock *clcsk = smc->clcsock->sk;
843
844         write_lock_bh(&clcsk->sk_callback_lock);
845         clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
846
847         smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
848                                &smc->clcsk_state_change);
849         smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
850                                &smc->clcsk_data_ready);
851         smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
852                                &smc->clcsk_write_space);
853         smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
854                                &smc->clcsk_error_report);
855
856         write_unlock_bh(&clcsk->sk_callback_lock);
857 }
858
859 static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
860 {
861         int rc = 0;
862
863         mutex_lock(&smc->clcsock_release_lock);
864         if (!smc->clcsock) {
865                 rc = -EBADF;
866                 goto out;
867         }
868
869         smc->use_fallback = true;
870         smc->fallback_rsn = reason_code;
871         smc_stat_fallback(smc);
872         trace_smc_switch_to_fallback(smc, reason_code);
873         if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
874                 smc->clcsock->file = smc->sk.sk_socket->file;
875                 smc->clcsock->file->private_data = smc->clcsock;
876                 smc->clcsock->wq.fasync_list =
877                         smc->sk.sk_socket->wq.fasync_list;
878
879                 /* There might be some wait entries remaining
880                  * in smc sk->sk_wq and they should be woken up
881                  * as clcsock's wait queue is woken up.
882                  */
883                 smc_fback_replace_callbacks(smc);
884         }
885 out:
886         mutex_unlock(&smc->clcsock_release_lock);
887         return rc;
888 }
889
890 /* fall back during connect */
891 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
892 {
893         struct net *net = sock_net(&smc->sk);
894         int rc = 0;
895
896         rc = smc_switch_to_fallback(smc, reason_code);
897         if (rc) { /* fallback fails */
898                 this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
899                 if (smc->sk.sk_state == SMC_INIT)
900                         sock_put(&smc->sk); /* passive closing */
901                 return rc;
902         }
903         smc_copy_sock_settings_to_clc(smc);
904         smc->connect_nonblock = 0;
905         if (smc->sk.sk_state == SMC_INIT)
906                 smc->sk.sk_state = SMC_ACTIVE;
907         return 0;
908 }
909
910 /* decline and fall back during connect */
911 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
912                                         u8 version)
913 {
914         struct net *net = sock_net(&smc->sk);
915         int rc;
916
917         if (reason_code < 0) { /* error, fallback is not possible */
918                 this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
919                 if (smc->sk.sk_state == SMC_INIT)
920                         sock_put(&smc->sk); /* passive closing */
921                 return reason_code;
922         }
923         if (reason_code != SMC_CLC_DECL_PEERDECL) {
924                 rc = smc_clc_send_decline(smc, reason_code, version);
925                 if (rc < 0) {
926                         this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
927                         if (smc->sk.sk_state == SMC_INIT)
928                                 sock_put(&smc->sk); /* passive closing */
929                         return rc;
930                 }
931         }
932         return smc_connect_fallback(smc, reason_code);
933 }
934
935 static void smc_conn_abort(struct smc_sock *smc, int local_first)
936 {
937         struct smc_connection *conn = &smc->conn;
938         struct smc_link_group *lgr = conn->lgr;
939         bool lgr_valid = false;
940
941         if (smc_conn_lgr_valid(conn))
942                 lgr_valid = true;
943
944         smc_conn_free(conn);
945         if (local_first && lgr_valid)
946                 smc_lgr_cleanup_early(lgr);
947 }
948
949 /* check if there is a rdma device available for this connection. */
950 /* called for connect and listen */
951 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
952 {
953         /* PNET table look up: search active ib_device and port
954          * within same PNETID that also contains the ethernet device
955          * used for the internal TCP socket
956          */
957         smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
958         if (!ini->check_smcrv2 && !ini->ib_dev)
959                 return SMC_CLC_DECL_NOSMCRDEV;
960         if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
961                 return SMC_CLC_DECL_NOSMCRDEV;
962         return 0;
963 }
964
965 /* check if there is an ISM device available for this connection. */
966 /* called for connect and listen */
967 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
968 {
969         /* Find ISM device with same PNETID as connecting interface  */
970         smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
971         if (!ini->ism_dev[0])
972                 return SMC_CLC_DECL_NOSMCDDEV;
973         else
974                 ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
975         return 0;
976 }
977
978 /* is chid unique for the ism devices that are already determined? */
979 static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
980                                            int cnt)
981 {
982         int i = (!ini->ism_dev[0]) ? 1 : 0;
983
984         for (; i < cnt; i++)
985                 if (ini->ism_chid[i] == chid)
986                         return false;
987         return true;
988 }
989
990 /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
991  * PNETID matching net_device)
992  */
993 static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
994                                        struct smc_init_info *ini)
995 {
996         int rc = SMC_CLC_DECL_NOSMCDDEV;
997         struct smcd_dev *smcd;
998         int i = 1;
999         u16 chid;
1000
1001         if (smcd_indicated(ini->smc_type_v1))
1002                 rc = 0;         /* already initialized for V1 */
1003         mutex_lock(&smcd_dev_list.mutex);
1004         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1005                 if (smcd->going_away || smcd == ini->ism_dev[0])
1006                         continue;
1007                 chid = smc_ism_get_chid(smcd);
1008                 if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
1009                         continue;
1010                 if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
1011                     smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
1012                         ini->ism_dev[i] = smcd;
1013                         ini->ism_chid[i] = chid;
1014                         ini->is_smcd = true;
1015                         rc = 0;
1016                         i++;
1017                         if (i > SMC_MAX_ISM_DEVS)
1018                                 break;
1019                 }
1020         }
1021         mutex_unlock(&smcd_dev_list.mutex);
1022         ini->ism_offered_cnt = i - 1;
1023         if (!ini->ism_dev[0] && !ini->ism_dev[1])
1024                 ini->smcd_version = 0;
1025
1026         return rc;
1027 }
1028
1029 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
1030 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
1031                                       struct smc_init_info *ini)
1032 {
1033         if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
1034                 return SMC_CLC_DECL_ISMVLANERR;
1035         return 0;
1036 }
1037
1038 static int smc_find_proposal_devices(struct smc_sock *smc,
1039                                      struct smc_init_info *ini)
1040 {
1041         int rc = 0;
1042
1043         /* check if there is an ism device available */
1044         if (!(ini->smcd_version & SMC_V1) ||
1045             smc_find_ism_device(smc, ini) ||
1046             smc_connect_ism_vlan_setup(smc, ini))
1047                 ini->smcd_version &= ~SMC_V1;
1048         /* else ISM V1 is supported for this connection */
1049
1050         /* check if there is an rdma device available */
1051         if (!(ini->smcr_version & SMC_V1) ||
1052             smc_find_rdma_device(smc, ini))
1053                 ini->smcr_version &= ~SMC_V1;
1054         /* else RDMA is supported for this connection */
1055
1056         ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
1057                                               ini->smcr_version & SMC_V1);
1058
1059         /* check if there is an ism v2 device available */
1060         if (!(ini->smcd_version & SMC_V2) ||
1061             !smc_ism_is_v2_capable() ||
1062             smc_find_ism_v2_device_clnt(smc, ini))
1063                 ini->smcd_version &= ~SMC_V2;
1064
1065         /* check if there is an rdma v2 device available */
1066         ini->check_smcrv2 = true;
1067         ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
1068         if (!(ini->smcr_version & SMC_V2) ||
1069             smc->clcsock->sk->sk_family != AF_INET ||
1070             !smc_clc_ueid_count() ||
1071             smc_find_rdma_device(smc, ini))
1072                 ini->smcr_version &= ~SMC_V2;
1073         ini->check_smcrv2 = false;
1074
1075         ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
1076                                               ini->smcr_version & SMC_V2);
1077
1078         /* if neither ISM nor RDMA are supported, fallback */
1079         if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
1080                 rc = SMC_CLC_DECL_NOSMCDEV;
1081
1082         return rc;
1083 }
1084
1085 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
1086  * used, the VLAN ID will be registered again during the connection setup.
1087  */
1088 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
1089                                         struct smc_init_info *ini)
1090 {
1091         if (!smcd_indicated(ini->smc_type_v1))
1092                 return 0;
1093         if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
1094                 return SMC_CLC_DECL_CNFERR;
1095         return 0;
1096 }
1097
1098 #define SMC_CLC_MAX_ACCEPT_LEN \
1099         (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
1100          sizeof(struct smc_clc_first_contact_ext) + \
1101          sizeof(struct smc_clc_msg_trail))
1102
1103 /* CLC handshake during connect */
1104 static int smc_connect_clc(struct smc_sock *smc,
1105                            struct smc_clc_msg_accept_confirm_v2 *aclc2,
1106                            struct smc_init_info *ini)
1107 {
1108         int rc = 0;
1109
1110         /* do inband token exchange */
1111         rc = smc_clc_send_proposal(smc, ini);
1112         if (rc)
1113                 return rc;
1114         /* receive SMC Accept CLC message */
1115         return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
1116                                 SMC_CLC_ACCEPT, CLC_WAIT_TIME);
1117 }
1118
1119 void smc_fill_gid_list(struct smc_link_group *lgr,
1120                        struct smc_gidlist *gidlist,
1121                        struct smc_ib_device *known_dev, u8 *known_gid)
1122 {
1123         struct smc_init_info *alt_ini = NULL;
1124
1125         memset(gidlist, 0, sizeof(*gidlist));
1126         memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
1127
1128         alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
1129         if (!alt_ini)
1130                 goto out;
1131
1132         alt_ini->vlan_id = lgr->vlan_id;
1133         alt_ini->check_smcrv2 = true;
1134         alt_ini->smcrv2.saddr = lgr->saddr;
1135         smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
1136
1137         if (!alt_ini->smcrv2.ib_dev_v2)
1138                 goto out;
1139
1140         memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
1141                SMC_GID_SIZE);
1142
1143 out:
1144         kfree(alt_ini);
1145 }
1146
1147 static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
1148                                        struct smc_clc_msg_accept_confirm *aclc,
1149                                        struct smc_init_info *ini)
1150 {
1151         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1152                 (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1153         struct smc_clc_first_contact_ext *fce =
1154                 (struct smc_clc_first_contact_ext *)
1155                         (((u8 *)clc_v2) + sizeof(*clc_v2));
1156
1157         if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
1158                 return 0;
1159
1160         if (fce->v2_direct) {
1161                 memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
1162                 ini->smcrv2.uses_gateway = false;
1163         } else {
1164                 if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
1165                                       smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
1166                                       ini->smcrv2.nexthop_mac,
1167                                       &ini->smcrv2.uses_gateway))
1168                         return SMC_CLC_DECL_NOROUTE;
1169                 if (!ini->smcrv2.uses_gateway) {
1170                         /* mismatch: peer claims indirect, but its direct */
1171                         return SMC_CLC_DECL_NOINDIRECT;
1172                 }
1173         }
1174         return 0;
1175 }
1176
1177 /* setup for RDMA connection of client */
1178 static int smc_connect_rdma(struct smc_sock *smc,
1179                             struct smc_clc_msg_accept_confirm *aclc,
1180                             struct smc_init_info *ini)
1181 {
1182         int i, reason_code = 0;
1183         struct smc_link *link;
1184         u8 *eid = NULL;
1185
1186         ini->is_smcd = false;
1187         ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
1188         ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1189         memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
1190         memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
1191         memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
1192
1193         reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
1194         if (reason_code)
1195                 return reason_code;
1196
1197         mutex_lock(&smc_client_lgr_pending);
1198         reason_code = smc_conn_create(smc, ini);
1199         if (reason_code) {
1200                 mutex_unlock(&smc_client_lgr_pending);
1201                 return reason_code;
1202         }
1203
1204         smc_conn_save_peer_info(smc, aclc);
1205
1206         if (ini->first_contact_local) {
1207                 link = smc->conn.lnk;
1208         } else {
1209                 /* set link that was assigned by server */
1210                 link = NULL;
1211                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1212                         struct smc_link *l = &smc->conn.lgr->lnk[i];
1213
1214                         if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
1215                             !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
1216                                     SMC_GID_SIZE) &&
1217                             (aclc->hdr.version > SMC_V1 ||
1218                              !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
1219                                      sizeof(l->peer_mac)))) {
1220                                 link = l;
1221                                 break;
1222                         }
1223                 }
1224                 if (!link) {
1225                         reason_code = SMC_CLC_DECL_NOSRVLINK;
1226                         goto connect_abort;
1227                 }
1228                 smc_switch_link_and_count(&smc->conn, link);
1229         }
1230
1231         /* create send buffer and rmb */
1232         if (smc_buf_create(smc, false)) {
1233                 reason_code = SMC_CLC_DECL_MEM;
1234                 goto connect_abort;
1235         }
1236
1237         if (ini->first_contact_local)
1238                 smc_link_save_peer_info(link, aclc, ini);
1239
1240         if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
1241                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1242                 goto connect_abort;
1243         }
1244
1245         smc_close_init(smc);
1246         smc_rx_init(smc);
1247
1248         if (ini->first_contact_local) {
1249                 if (smc_ib_ready_link(link)) {
1250                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1251                         goto connect_abort;
1252                 }
1253         } else {
1254                 /* reg sendbufs if they were vzalloced */
1255                 if (smc->conn.sndbuf_desc->is_vm) {
1256                         if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
1257                                 reason_code = SMC_CLC_DECL_ERR_REGBUF;
1258                                 goto connect_abort;
1259                         }
1260                 }
1261                 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
1262                         reason_code = SMC_CLC_DECL_ERR_REGBUF;
1263                         goto connect_abort;
1264                 }
1265         }
1266
1267         if (aclc->hdr.version > SMC_V1) {
1268                 struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1269                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1270
1271                 eid = clc_v2->r1.eid;
1272                 if (ini->first_contact_local)
1273                         smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
1274                                           link->smcibdev, link->gid);
1275         }
1276
1277         reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
1278                                            aclc->hdr.version, eid, ini);
1279         if (reason_code)
1280                 goto connect_abort;
1281
1282         smc_tx_init(smc);
1283
1284         if (ini->first_contact_local) {
1285                 /* QP confirmation over RoCE fabric */
1286                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1287                 reason_code = smcr_clnt_conf_first_link(smc);
1288                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
1289                 if (reason_code)
1290                         goto connect_abort;
1291         }
1292         mutex_unlock(&smc_client_lgr_pending);
1293
1294         smc_copy_sock_settings_to_clc(smc);
1295         smc->connect_nonblock = 0;
1296         if (smc->sk.sk_state == SMC_INIT)
1297                 smc->sk.sk_state = SMC_ACTIVE;
1298
1299         return 0;
1300 connect_abort:
1301         smc_conn_abort(smc, ini->first_contact_local);
1302         mutex_unlock(&smc_client_lgr_pending);
1303         smc->connect_nonblock = 0;
1304
1305         return reason_code;
1306 }
1307
1308 /* The server has chosen one of the proposed ISM devices for the communication.
1309  * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
1310  */
1311 static int
1312 smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
1313                                struct smc_init_info *ini)
1314 {
1315         int i;
1316
1317         for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
1318                 if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
1319                         ini->ism_selected = i;
1320                         return 0;
1321                 }
1322         }
1323
1324         return -EPROTO;
1325 }
1326
1327 /* setup for ISM connection of client */
1328 static int smc_connect_ism(struct smc_sock *smc,
1329                            struct smc_clc_msg_accept_confirm *aclc,
1330                            struct smc_init_info *ini)
1331 {
1332         u8 *eid = NULL;
1333         int rc = 0;
1334
1335         ini->is_smcd = true;
1336         ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1337
1338         if (aclc->hdr.version == SMC_V2) {
1339                 struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
1340                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1341
1342                 rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
1343                 if (rc)
1344                         return rc;
1345         }
1346         ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
1347
1348         /* there is only one lgr role for SMC-D; use server lock */
1349         mutex_lock(&smc_server_lgr_pending);
1350         rc = smc_conn_create(smc, ini);
1351         if (rc) {
1352                 mutex_unlock(&smc_server_lgr_pending);
1353                 return rc;
1354         }
1355
1356         /* Create send and receive buffers */
1357         rc = smc_buf_create(smc, true);
1358         if (rc) {
1359                 rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
1360                 goto connect_abort;
1361         }
1362
1363         smc_conn_save_peer_info(smc, aclc);
1364         smc_close_init(smc);
1365         smc_rx_init(smc);
1366         smc_tx_init(smc);
1367
1368         if (aclc->hdr.version > SMC_V1) {
1369                 struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1370                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1371
1372                 eid = clc_v2->d1.eid;
1373         }
1374
1375         rc = smc_clc_send_confirm(smc, ini->first_contact_local,
1376                                   aclc->hdr.version, eid, NULL);
1377         if (rc)
1378                 goto connect_abort;
1379         mutex_unlock(&smc_server_lgr_pending);
1380
1381         smc_copy_sock_settings_to_clc(smc);
1382         smc->connect_nonblock = 0;
1383         if (smc->sk.sk_state == SMC_INIT)
1384                 smc->sk.sk_state = SMC_ACTIVE;
1385
1386         return 0;
1387 connect_abort:
1388         smc_conn_abort(smc, ini->first_contact_local);
1389         mutex_unlock(&smc_server_lgr_pending);
1390         smc->connect_nonblock = 0;
1391
1392         return rc;
1393 }
1394
1395 /* check if received accept type and version matches a proposed one */
1396 static int smc_connect_check_aclc(struct smc_init_info *ini,
1397                                   struct smc_clc_msg_accept_confirm *aclc)
1398 {
1399         if (aclc->hdr.typev1 != SMC_TYPE_R &&
1400             aclc->hdr.typev1 != SMC_TYPE_D)
1401                 return SMC_CLC_DECL_MODEUNSUPP;
1402
1403         if (aclc->hdr.version >= SMC_V2) {
1404                 if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1405                      !smcr_indicated(ini->smc_type_v2)) ||
1406                     (aclc->hdr.typev1 == SMC_TYPE_D &&
1407                      !smcd_indicated(ini->smc_type_v2)))
1408                         return SMC_CLC_DECL_MODEUNSUPP;
1409         } else {
1410                 if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1411                      !smcr_indicated(ini->smc_type_v1)) ||
1412                     (aclc->hdr.typev1 == SMC_TYPE_D &&
1413                      !smcd_indicated(ini->smc_type_v1)))
1414                         return SMC_CLC_DECL_MODEUNSUPP;
1415         }
1416
1417         return 0;
1418 }
1419
1420 /* perform steps before actually connecting */
1421 static int __smc_connect(struct smc_sock *smc)
1422 {
1423         u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
1424         struct smc_clc_msg_accept_confirm_v2 *aclc2;
1425         struct smc_clc_msg_accept_confirm *aclc;
1426         struct smc_init_info *ini = NULL;
1427         u8 *buf = NULL;
1428         int rc = 0;
1429
1430         if (smc->use_fallback)
1431                 return smc_connect_fallback(smc, smc->fallback_rsn);
1432
1433         /* if peer has not signalled SMC-capability, fall back */
1434         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
1435                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
1436
1437         /* IPSec connections opt out of SMC optimizations */
1438         if (using_ipsec(smc))
1439                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
1440                                                     version);
1441
1442         ini = kzalloc(sizeof(*ini), GFP_KERNEL);
1443         if (!ini)
1444                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
1445                                                     version);
1446
1447         ini->smcd_version = SMC_V1 | SMC_V2;
1448         ini->smcr_version = SMC_V1 | SMC_V2;
1449         ini->smc_type_v1 = SMC_TYPE_B;
1450         ini->smc_type_v2 = SMC_TYPE_B;
1451
1452         /* get vlan id from IP device */
1453         if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
1454                 ini->smcd_version &= ~SMC_V1;
1455                 ini->smcr_version = 0;
1456                 ini->smc_type_v1 = SMC_TYPE_N;
1457                 if (!ini->smcd_version) {
1458                         rc = SMC_CLC_DECL_GETVLANERR;
1459                         goto fallback;
1460                 }
1461         }
1462
1463         rc = smc_find_proposal_devices(smc, ini);
1464         if (rc)
1465                 goto fallback;
1466
1467         buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
1468         if (!buf) {
1469                 rc = SMC_CLC_DECL_MEM;
1470                 goto fallback;
1471         }
1472         aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
1473         aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
1474
1475         /* perform CLC handshake */
1476         rc = smc_connect_clc(smc, aclc2, ini);
1477         if (rc) {
1478                 /* -EAGAIN on timeout, see tcp_recvmsg() */
1479                 if (rc == -EAGAIN) {
1480                         rc = -ETIMEDOUT;
1481                         smc->sk.sk_err = ETIMEDOUT;
1482                 }
1483                 goto vlan_cleanup;
1484         }
1485
1486         /* check if smc modes and versions of CLC proposal and accept match */
1487         rc = smc_connect_check_aclc(ini, aclc);
1488         version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
1489         if (rc)
1490                 goto vlan_cleanup;
1491
1492         /* depending on previous steps, connect using rdma or ism */
1493         if (aclc->hdr.typev1 == SMC_TYPE_R) {
1494                 ini->smcr_version = version;
1495                 rc = smc_connect_rdma(smc, aclc, ini);
1496         } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
1497                 ini->smcd_version = version;
1498                 rc = smc_connect_ism(smc, aclc, ini);
1499         }
1500         if (rc)
1501                 goto vlan_cleanup;
1502
1503         SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
1504         smc_connect_ism_vlan_cleanup(smc, ini);
1505         kfree(buf);
1506         kfree(ini);
1507         return 0;
1508
1509 vlan_cleanup:
1510         smc_connect_ism_vlan_cleanup(smc, ini);
1511         kfree(buf);
1512 fallback:
1513         kfree(ini);
1514         return smc_connect_decline_fallback(smc, rc, version);
1515 }
1516
1517 static void smc_connect_work(struct work_struct *work)
1518 {
1519         struct smc_sock *smc = container_of(work, struct smc_sock,
1520                                             connect_work);
1521         long timeo = smc->sk.sk_sndtimeo;
1522         int rc = 0;
1523
1524         if (!timeo)
1525                 timeo = MAX_SCHEDULE_TIMEOUT;
1526         lock_sock(smc->clcsock->sk);
1527         if (smc->clcsock->sk->sk_err) {
1528                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
1529         } else if ((1 << smc->clcsock->sk->sk_state) &
1530                                         (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1531                 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
1532                 if ((rc == -EPIPE) &&
1533                     ((1 << smc->clcsock->sk->sk_state) &
1534                                         (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
1535                         rc = 0;
1536         }
1537         release_sock(smc->clcsock->sk);
1538         lock_sock(&smc->sk);
1539         if (rc != 0 || smc->sk.sk_err) {
1540                 smc->sk.sk_state = SMC_CLOSED;
1541                 if (rc == -EPIPE || rc == -EAGAIN)
1542                         smc->sk.sk_err = EPIPE;
1543                 else if (rc == -ECONNREFUSED)
1544                         smc->sk.sk_err = ECONNREFUSED;
1545                 else if (signal_pending(current))
1546                         smc->sk.sk_err = -sock_intr_errno(timeo);
1547                 sock_put(&smc->sk); /* passive closing */
1548                 goto out;
1549         }
1550
1551         rc = __smc_connect(smc);
1552         if (rc < 0)
1553                 smc->sk.sk_err = -rc;
1554
1555 out:
1556         if (!sock_flag(&smc->sk, SOCK_DEAD)) {
1557                 if (smc->sk.sk_err) {
1558                         smc->sk.sk_state_change(&smc->sk);
1559                 } else { /* allow polling before and after fallback decision */
1560                         smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
1561                         smc->sk.sk_write_space(&smc->sk);
1562                 }
1563         }
1564         release_sock(&smc->sk);
1565 }
1566
1567 static int smc_connect(struct socket *sock, struct sockaddr *addr,
1568                        int alen, int flags)
1569 {
1570         struct sock *sk = sock->sk;
1571         struct smc_sock *smc;
1572         int rc = -EINVAL;
1573
1574         smc = smc_sk(sk);
1575
1576         /* separate smc parameter checking to be safe */
1577         if (alen < sizeof(addr->sa_family))
1578                 goto out_err;
1579         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
1580                 goto out_err;
1581
1582         lock_sock(sk);
1583         switch (sock->state) {
1584         default:
1585                 rc = -EINVAL;
1586                 goto out;
1587         case SS_CONNECTED:
1588                 rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
1589                 goto out;
1590         case SS_CONNECTING:
1591                 if (sk->sk_state == SMC_ACTIVE)
1592                         goto connected;
1593                 break;
1594         case SS_UNCONNECTED:
1595                 sock->state = SS_CONNECTING;
1596                 break;
1597         }
1598
1599         switch (sk->sk_state) {
1600         default:
1601                 goto out;
1602         case SMC_CLOSED:
1603                 rc = sock_error(sk) ? : -ECONNABORTED;
1604                 sock->state = SS_UNCONNECTED;
1605                 goto out;
1606         case SMC_ACTIVE:
1607                 rc = -EISCONN;
1608                 goto out;
1609         case SMC_INIT:
1610                 break;
1611         }
1612
1613         smc_copy_sock_settings_to_clc(smc);
1614         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1615         if (smc->connect_nonblock) {
1616                 rc = -EALREADY;
1617                 goto out;
1618         }
1619         rc = kernel_connect(smc->clcsock, addr, alen, flags);
1620         if (rc && rc != -EINPROGRESS)
1621                 goto out;
1622
1623         if (smc->use_fallback) {
1624                 sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
1625                 goto out;
1626         }
1627         sock_hold(&smc->sk); /* sock put in passive closing */
1628         if (flags & O_NONBLOCK) {
1629                 if (queue_work(smc_hs_wq, &smc->connect_work))
1630                         smc->connect_nonblock = 1;
1631                 rc = -EINPROGRESS;
1632                 goto out;
1633         } else {
1634                 rc = __smc_connect(smc);
1635                 if (rc < 0)
1636                         goto out;
1637         }
1638
1639 connected:
1640         rc = 0;
1641         sock->state = SS_CONNECTED;
1642 out:
1643         release_sock(sk);
1644 out_err:
1645         return rc;
1646 }
1647
1648 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
1649 {
1650         struct socket *new_clcsock = NULL;
1651         struct sock *lsk = &lsmc->sk;
1652         struct sock *new_sk;
1653         int rc = -EINVAL;
1654
1655         release_sock(lsk);
1656         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
1657         if (!new_sk) {
1658                 rc = -ENOMEM;
1659                 lsk->sk_err = ENOMEM;
1660                 *new_smc = NULL;
1661                 lock_sock(lsk);
1662                 goto out;
1663         }
1664         *new_smc = smc_sk(new_sk);
1665
1666         mutex_lock(&lsmc->clcsock_release_lock);
1667         if (lsmc->clcsock)
1668                 rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
1669         mutex_unlock(&lsmc->clcsock_release_lock);
1670         lock_sock(lsk);
1671         if  (rc < 0 && rc != -EAGAIN)
1672                 lsk->sk_err = -rc;
1673         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
1674                 new_sk->sk_prot->unhash(new_sk);
1675                 if (new_clcsock)
1676                         sock_release(new_clcsock);
1677                 new_sk->sk_state = SMC_CLOSED;
1678                 sock_set_flag(new_sk, SOCK_DEAD);
1679                 sock_put(new_sk); /* final */
1680                 *new_smc = NULL;
1681                 goto out;
1682         }
1683
1684         /* new clcsock has inherited the smc listen-specific sk_data_ready
1685          * function; switch it back to the original sk_data_ready function
1686          */
1687         new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
1688
1689         /* if new clcsock has also inherited the fallback-specific callback
1690          * functions, switch them back to the original ones.
1691          */
1692         if (lsmc->use_fallback) {
1693                 if (lsmc->clcsk_state_change)
1694                         new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
1695                 if (lsmc->clcsk_write_space)
1696                         new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
1697                 if (lsmc->clcsk_error_report)
1698                         new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
1699         }
1700
1701         (*new_smc)->clcsock = new_clcsock;
1702 out:
1703         return rc;
1704 }
1705
1706 /* add a just created sock to the accept queue of the listen sock as
1707  * candidate for a following socket accept call from user space
1708  */
1709 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
1710 {
1711         struct smc_sock *par = smc_sk(parent);
1712
1713         sock_hold(sk); /* sock_put in smc_accept_unlink () */
1714         spin_lock(&par->accept_q_lock);
1715         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
1716         spin_unlock(&par->accept_q_lock);
1717         sk_acceptq_added(parent);
1718 }
1719
1720 /* remove a socket from the accept queue of its parental listening socket */
1721 static void smc_accept_unlink(struct sock *sk)
1722 {
1723         struct smc_sock *par = smc_sk(sk)->listen_smc;
1724
1725         spin_lock(&par->accept_q_lock);
1726         list_del_init(&smc_sk(sk)->accept_q);
1727         spin_unlock(&par->accept_q_lock);
1728         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
1729         sock_put(sk); /* sock_hold in smc_accept_enqueue */
1730 }
1731
1732 /* remove a sock from the accept queue to bind it to a new socket created
1733  * for a socket accept call from user space
1734  */
1735 struct sock *smc_accept_dequeue(struct sock *parent,
1736                                 struct socket *new_sock)
1737 {
1738         struct smc_sock *isk, *n;
1739         struct sock *new_sk;
1740
1741         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
1742                 new_sk = (struct sock *)isk;
1743
1744                 smc_accept_unlink(new_sk);
1745                 if (new_sk->sk_state == SMC_CLOSED) {
1746                         new_sk->sk_prot->unhash(new_sk);
1747                         if (isk->clcsock) {
1748                                 sock_release(isk->clcsock);
1749                                 isk->clcsock = NULL;
1750                         }
1751                         sock_put(new_sk); /* final */
1752                         continue;
1753                 }
1754                 if (new_sock) {
1755                         sock_graft(new_sk, new_sock);
1756                         new_sock->state = SS_CONNECTED;
1757                         if (isk->use_fallback) {
1758                                 smc_sk(new_sk)->clcsock->file = new_sock->file;
1759                                 isk->clcsock->file->private_data = isk->clcsock;
1760                         }
1761                 }
1762                 return new_sk;
1763         }
1764         return NULL;
1765 }
1766
1767 /* clean up for a created but never accepted sock */
1768 void smc_close_non_accepted(struct sock *sk)
1769 {
1770         struct smc_sock *smc = smc_sk(sk);
1771
1772         sock_hold(sk); /* sock_put below */
1773         lock_sock(sk);
1774         if (!sk->sk_lingertime)
1775                 /* wait for peer closing */
1776                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
1777         __smc_release(smc);
1778         release_sock(sk);
1779         sock_put(sk); /* sock_hold above */
1780         sock_put(sk); /* final sock_put */
1781 }
1782
1783 static int smcr_serv_conf_first_link(struct smc_sock *smc)
1784 {
1785         struct smc_link *link = smc->conn.lnk;
1786         struct smc_llc_qentry *qentry;
1787         int rc;
1788
1789         /* reg the sndbuf if it was vzalloced*/
1790         if (smc->conn.sndbuf_desc->is_vm) {
1791                 if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
1792                         return SMC_CLC_DECL_ERR_REGBUF;
1793         }
1794
1795         /* reg the rmb */
1796         if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
1797                 return SMC_CLC_DECL_ERR_REGBUF;
1798
1799         /* send CONFIRM LINK request to client over the RoCE fabric */
1800         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1801         if (rc < 0)
1802                 return SMC_CLC_DECL_TIMEOUT_CL;
1803
1804         /* receive CONFIRM LINK response from client over the RoCE fabric */
1805         qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1806                               SMC_LLC_CONFIRM_LINK);
1807         if (!qentry) {
1808                 struct smc_clc_msg_decline dclc;
1809
1810                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1811                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1812                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1813         }
1814         smc_llc_save_peer_uid(qentry);
1815         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1816         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1817         if (rc)
1818                 return SMC_CLC_DECL_RMBE_EC;
1819
1820         /* confirm_rkey is implicit on 1st contact */
1821         smc->conn.rmb_desc->is_conf_rkey = true;
1822
1823         smc_llc_link_active(link);
1824         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
1825
1826         /* initial contact - try to establish second link */
1827         smc_llc_srv_add_link(link, NULL);
1828         return 0;
1829 }
1830
1831 /* listen worker: finish */
1832 static void smc_listen_out(struct smc_sock *new_smc)
1833 {
1834         struct smc_sock *lsmc = new_smc->listen_smc;
1835         struct sock *newsmcsk = &new_smc->sk;
1836
1837         if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
1838                 atomic_dec(&lsmc->queued_smc_hs);
1839
1840         if (lsmc->sk.sk_state == SMC_LISTEN) {
1841                 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1842                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1843                 release_sock(&lsmc->sk);
1844         } else { /* no longer listening */
1845                 smc_close_non_accepted(newsmcsk);
1846         }
1847
1848         /* Wake up accept */
1849         lsmc->sk.sk_data_ready(&lsmc->sk);
1850         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1851 }
1852
1853 /* listen worker: finish in state connected */
1854 static void smc_listen_out_connected(struct smc_sock *new_smc)
1855 {
1856         struct sock *newsmcsk = &new_smc->sk;
1857
1858         sk_refcnt_debug_inc(newsmcsk);
1859         if (newsmcsk->sk_state == SMC_INIT)
1860                 newsmcsk->sk_state = SMC_ACTIVE;
1861
1862         smc_listen_out(new_smc);
1863 }
1864
1865 /* listen worker: finish in error state */
1866 static void smc_listen_out_err(struct smc_sock *new_smc)
1867 {
1868         struct sock *newsmcsk = &new_smc->sk;
1869         struct net *net = sock_net(newsmcsk);
1870
1871         this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
1872         if (newsmcsk->sk_state == SMC_INIT)
1873                 sock_put(&new_smc->sk); /* passive closing */
1874         newsmcsk->sk_state = SMC_CLOSED;
1875
1876         smc_listen_out(new_smc);
1877 }
1878
1879 /* listen worker: decline and fall back if possible */
1880 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1881                                int local_first, u8 version)
1882 {
1883         /* RDMA setup failed, switch back to TCP */
1884         smc_conn_abort(new_smc, local_first);
1885         if (reason_code < 0 ||
1886             smc_switch_to_fallback(new_smc, reason_code)) {
1887                 /* error, no fallback possible */
1888                 smc_listen_out_err(new_smc);
1889                 return;
1890         }
1891         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1892                 if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
1893                         smc_listen_out_err(new_smc);
1894                         return;
1895                 }
1896         }
1897         smc_listen_out_connected(new_smc);
1898 }
1899
1900 /* listen worker: version checking */
1901 static int smc_listen_v2_check(struct smc_sock *new_smc,
1902                                struct smc_clc_msg_proposal *pclc,
1903                                struct smc_init_info *ini)
1904 {
1905         struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
1906         struct smc_clc_v2_extension *pclc_v2_ext;
1907         int rc = SMC_CLC_DECL_PEERNOSMC;
1908
1909         ini->smc_type_v1 = pclc->hdr.typev1;
1910         ini->smc_type_v2 = pclc->hdr.typev2;
1911         ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1912         ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1913         if (pclc->hdr.version > SMC_V1) {
1914                 if (smcd_indicated(ini->smc_type_v2))
1915                         ini->smcd_version |= SMC_V2;
1916                 if (smcr_indicated(ini->smc_type_v2))
1917                         ini->smcr_version |= SMC_V2;
1918         }
1919         if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
1920                 rc = SMC_CLC_DECL_PEERNOSMC;
1921                 goto out;
1922         }
1923         pclc_v2_ext = smc_get_clc_v2_ext(pclc);
1924         if (!pclc_v2_ext) {
1925                 ini->smcd_version &= ~SMC_V2;
1926                 ini->smcr_version &= ~SMC_V2;
1927                 rc = SMC_CLC_DECL_NOV2EXT;
1928                 goto out;
1929         }
1930         pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
1931         if (ini->smcd_version & SMC_V2) {
1932                 if (!smc_ism_is_v2_capable()) {
1933                         ini->smcd_version &= ~SMC_V2;
1934                         rc = SMC_CLC_DECL_NOISM2SUPP;
1935                 } else if (!pclc_smcd_v2_ext) {
1936                         ini->smcd_version &= ~SMC_V2;
1937                         rc = SMC_CLC_DECL_NOV2DEXT;
1938                 } else if (!pclc_v2_ext->hdr.eid_cnt &&
1939                            !pclc_v2_ext->hdr.flag.seid) {
1940                         ini->smcd_version &= ~SMC_V2;
1941                         rc = SMC_CLC_DECL_NOUEID;
1942                 }
1943         }
1944         if (ini->smcr_version & SMC_V2) {
1945                 if (!pclc_v2_ext->hdr.eid_cnt) {
1946                         ini->smcr_version &= ~SMC_V2;
1947                         rc = SMC_CLC_DECL_NOUEID;
1948                 }
1949         }
1950
1951 out:
1952         if (!ini->smcd_version && !ini->smcr_version)
1953                 return rc;
1954
1955         return 0;
1956 }
1957
1958 /* listen worker: check prefixes */
1959 static int smc_listen_prfx_check(struct smc_sock *new_smc,
1960                                  struct smc_clc_msg_proposal *pclc)
1961 {
1962         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1963         struct socket *newclcsock = new_smc->clcsock;
1964
1965         if (pclc->hdr.typev1 == SMC_TYPE_N)
1966                 return 0;
1967         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1968         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1969                 return SMC_CLC_DECL_DIFFPREFIX;
1970
1971         return 0;
1972 }
1973
1974 /* listen worker: initialize connection and buffers */
1975 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1976                                 struct smc_init_info *ini)
1977 {
1978         int rc;
1979
1980         /* allocate connection / link group */
1981         rc = smc_conn_create(new_smc, ini);
1982         if (rc)
1983                 return rc;
1984
1985         /* create send buffer and rmb */
1986         if (smc_buf_create(new_smc, false))
1987                 return SMC_CLC_DECL_MEM;
1988
1989         return 0;
1990 }
1991
1992 /* listen worker: initialize connection and buffers for SMC-D */
1993 static int smc_listen_ism_init(struct smc_sock *new_smc,
1994                                struct smc_init_info *ini)
1995 {
1996         int rc;
1997
1998         rc = smc_conn_create(new_smc, ini);
1999         if (rc)
2000                 return rc;
2001
2002         /* Create send and receive buffers */
2003         rc = smc_buf_create(new_smc, true);
2004         if (rc) {
2005                 smc_conn_abort(new_smc, ini->first_contact_local);
2006                 return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
2007                                          SMC_CLC_DECL_MEM;
2008         }
2009
2010         return 0;
2011 }
2012
2013 static bool smc_is_already_selected(struct smcd_dev *smcd,
2014                                     struct smc_init_info *ini,
2015                                     int matches)
2016 {
2017         int i;
2018
2019         for (i = 0; i < matches; i++)
2020                 if (smcd == ini->ism_dev[i])
2021                         return true;
2022
2023         return false;
2024 }
2025
2026 /* check for ISM devices matching proposed ISM devices */
2027 static void smc_check_ism_v2_match(struct smc_init_info *ini,
2028                                    u16 proposed_chid, u64 proposed_gid,
2029                                    unsigned int *matches)
2030 {
2031         struct smcd_dev *smcd;
2032
2033         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2034                 if (smcd->going_away)
2035                         continue;
2036                 if (smc_is_already_selected(smcd, ini, *matches))
2037                         continue;
2038                 if (smc_ism_get_chid(smcd) == proposed_chid &&
2039                     !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
2040                         ini->ism_peer_gid[*matches] = proposed_gid;
2041                         ini->ism_dev[*matches] = smcd;
2042                         (*matches)++;
2043                         break;
2044                 }
2045         }
2046 }
2047
2048 static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
2049 {
2050         if (!ini->rc)
2051                 ini->rc = rc;
2052 }
2053
2054 static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
2055                                         struct smc_clc_msg_proposal *pclc,
2056                                         struct smc_init_info *ini)
2057 {
2058         struct smc_clc_smcd_v2_extension *smcd_v2_ext;
2059         struct smc_clc_v2_extension *smc_v2_ext;
2060         struct smc_clc_msg_smcd *pclc_smcd;
2061         unsigned int matches = 0;
2062         u8 smcd_version;
2063         u8 *eid = NULL;
2064         int i, rc;
2065
2066         if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
2067                 goto not_found;
2068
2069         pclc_smcd = smc_get_clc_msg_smcd(pclc);
2070         smc_v2_ext = smc_get_clc_v2_ext(pclc);
2071         smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
2072
2073         mutex_lock(&smcd_dev_list.mutex);
2074         if (pclc_smcd->ism.chid)
2075                 /* check for ISM device matching proposed native ISM device */
2076                 smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
2077                                        ntohll(pclc_smcd->ism.gid), &matches);
2078         for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
2079                 /* check for ISM devices matching proposed non-native ISM
2080                  * devices
2081                  */
2082                 smc_check_ism_v2_match(ini,
2083                                        ntohs(smcd_v2_ext->gidchid[i - 1].chid),
2084                                        ntohll(smcd_v2_ext->gidchid[i - 1].gid),
2085                                        &matches);
2086         }
2087         mutex_unlock(&smcd_dev_list.mutex);
2088
2089         if (!ini->ism_dev[0]) {
2090                 smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
2091                 goto not_found;
2092         }
2093
2094         smc_ism_get_system_eid(&eid);
2095         if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
2096                                smcd_v2_ext->system_eid, eid))
2097                 goto not_found;
2098
2099         /* separate - outside the smcd_dev_list.lock */
2100         smcd_version = ini->smcd_version;
2101         for (i = 0; i < matches; i++) {
2102                 ini->smcd_version = SMC_V2;
2103                 ini->is_smcd = true;
2104                 ini->ism_selected = i;
2105                 rc = smc_listen_ism_init(new_smc, ini);
2106                 if (rc) {
2107                         smc_find_ism_store_rc(rc, ini);
2108                         /* try next active ISM device */
2109                         continue;
2110                 }
2111                 return; /* matching and usable V2 ISM device found */
2112         }
2113         /* no V2 ISM device could be initialized */
2114         ini->smcd_version = smcd_version;       /* restore original value */
2115         ini->negotiated_eid[0] = 0;
2116
2117 not_found:
2118         ini->smcd_version &= ~SMC_V2;
2119         ini->ism_dev[0] = NULL;
2120         ini->is_smcd = false;
2121 }
2122
2123 static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
2124                                         struct smc_clc_msg_proposal *pclc,
2125                                         struct smc_init_info *ini)
2126 {
2127         struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
2128         int rc = 0;
2129
2130         /* check if ISM V1 is available */
2131         if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
2132                 goto not_found;
2133         ini->is_smcd = true; /* prepare ISM check */
2134         ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
2135         rc = smc_find_ism_device(new_smc, ini);
2136         if (rc)
2137                 goto not_found;
2138         ini->ism_selected = 0;
2139         rc = smc_listen_ism_init(new_smc, ini);
2140         if (!rc)
2141                 return;         /* V1 ISM device found */
2142
2143 not_found:
2144         smc_find_ism_store_rc(rc, ini);
2145         ini->smcd_version &= ~SMC_V1;
2146         ini->ism_dev[0] = NULL;
2147         ini->is_smcd = false;
2148 }
2149
2150 /* listen worker: register buffers */
2151 static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
2152 {
2153         struct smc_connection *conn = &new_smc->conn;
2154
2155         if (!local_first) {
2156                 /* reg sendbufs if they were vzalloced */
2157                 if (conn->sndbuf_desc->is_vm) {
2158                         if (smcr_lgr_reg_sndbufs(conn->lnk,
2159                                                  conn->sndbuf_desc))
2160                                 return SMC_CLC_DECL_ERR_REGBUF;
2161                 }
2162                 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
2163                         return SMC_CLC_DECL_ERR_REGBUF;
2164         }
2165
2166         return 0;
2167 }
2168
2169 static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
2170                                          struct smc_clc_msg_proposal *pclc,
2171                                          struct smc_init_info *ini)
2172 {
2173         struct smc_clc_v2_extension *smc_v2_ext;
2174         u8 smcr_version;
2175         int rc;
2176
2177         if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
2178                 goto not_found;
2179
2180         smc_v2_ext = smc_get_clc_v2_ext(pclc);
2181         if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
2182                 goto not_found;
2183
2184         /* prepare RDMA check */
2185         memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
2186         memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
2187         memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
2188         ini->check_smcrv2 = true;
2189         ini->smcrv2.clc_sk = new_smc->clcsock->sk;
2190         ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
2191         ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
2192         rc = smc_find_rdma_device(new_smc, ini);
2193         if (rc) {
2194                 smc_find_ism_store_rc(rc, ini);
2195                 goto not_found;
2196         }
2197         if (!ini->smcrv2.uses_gateway)
2198                 memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
2199
2200         smcr_version = ini->smcr_version;
2201         ini->smcr_version = SMC_V2;
2202         rc = smc_listen_rdma_init(new_smc, ini);
2203         if (!rc)
2204                 rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
2205         if (!rc)
2206                 return;
2207         ini->smcr_version = smcr_version;
2208         smc_find_ism_store_rc(rc, ini);
2209
2210 not_found:
2211         ini->smcr_version &= ~SMC_V2;
2212         ini->smcrv2.ib_dev_v2 = NULL;
2213         ini->check_smcrv2 = false;
2214 }
2215
2216 static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
2217                                         struct smc_clc_msg_proposal *pclc,
2218                                         struct smc_init_info *ini)
2219 {
2220         int rc;
2221
2222         if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
2223                 return SMC_CLC_DECL_NOSMCDEV;
2224
2225         /* prepare RDMA check */
2226         memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
2227         memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
2228         memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
2229         rc = smc_find_rdma_device(new_smc, ini);
2230         if (rc) {
2231                 /* no RDMA device found */
2232                 return SMC_CLC_DECL_NOSMCDEV;
2233         }
2234         rc = smc_listen_rdma_init(new_smc, ini);
2235         if (rc)
2236                 return rc;
2237         return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
2238 }
2239
2240 /* determine the local device matching to proposal */
2241 static int smc_listen_find_device(struct smc_sock *new_smc,
2242                                   struct smc_clc_msg_proposal *pclc,
2243                                   struct smc_init_info *ini)
2244 {
2245         int prfx_rc;
2246
2247         /* check for ISM device matching V2 proposed device */
2248         smc_find_ism_v2_device_serv(new_smc, pclc, ini);
2249         if (ini->ism_dev[0])
2250                 return 0;
2251
2252         /* check for matching IP prefix and subnet length (V1) */
2253         prfx_rc = smc_listen_prfx_check(new_smc, pclc);
2254         if (prfx_rc)
2255                 smc_find_ism_store_rc(prfx_rc, ini);
2256
2257         /* get vlan id from IP device */
2258         if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
2259                 return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
2260
2261         /* check for ISM device matching V1 proposed device */
2262         if (!prfx_rc)
2263                 smc_find_ism_v1_device_serv(new_smc, pclc, ini);
2264         if (ini->ism_dev[0])
2265                 return 0;
2266
2267         if (!smcr_indicated(pclc->hdr.typev1) &&
2268             !smcr_indicated(pclc->hdr.typev2))
2269                 /* skip RDMA and decline */
2270                 return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
2271
2272         /* check if RDMA V2 is available */
2273         smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
2274         if (ini->smcrv2.ib_dev_v2)
2275                 return 0;
2276
2277         /* check if RDMA V1 is available */
2278         if (!prfx_rc) {
2279                 int rc;
2280
2281                 rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
2282                 smc_find_ism_store_rc(rc, ini);
2283                 return (!rc) ? 0 : ini->rc;
2284         }
2285         return SMC_CLC_DECL_NOSMCDEV;
2286 }
2287
2288 /* listen worker: finish RDMA setup */
2289 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
2290                                   struct smc_clc_msg_accept_confirm *cclc,
2291                                   bool local_first,
2292                                   struct smc_init_info *ini)
2293 {
2294         struct smc_link *link = new_smc->conn.lnk;
2295         int reason_code = 0;
2296
2297         if (local_first)
2298                 smc_link_save_peer_info(link, cclc, ini);
2299
2300         if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
2301                 return SMC_CLC_DECL_ERR_RTOK;
2302
2303         if (local_first) {
2304                 if (smc_ib_ready_link(link))
2305                         return SMC_CLC_DECL_ERR_RDYLNK;
2306                 /* QP confirmation over RoCE fabric */
2307                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
2308                 reason_code = smcr_serv_conf_first_link(new_smc);
2309                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
2310         }
2311         return reason_code;
2312 }
2313
2314 /* setup for connection of server */
2315 static void smc_listen_work(struct work_struct *work)
2316 {
2317         struct smc_sock *new_smc = container_of(work, struct smc_sock,
2318                                                 smc_listen_work);
2319         struct socket *newclcsock = new_smc->clcsock;
2320         struct smc_clc_msg_accept_confirm *cclc;
2321         struct smc_clc_msg_proposal_area *buf;
2322         struct smc_clc_msg_proposal *pclc;
2323         struct smc_init_info *ini = NULL;
2324         u8 proposal_version = SMC_V1;
2325         u8 accept_version;
2326         int rc = 0;
2327
2328         if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
2329                 return smc_listen_out_err(new_smc);
2330
2331         if (new_smc->use_fallback) {
2332                 smc_listen_out_connected(new_smc);
2333                 return;
2334         }
2335
2336         /* check if peer is smc capable */
2337         if (!tcp_sk(newclcsock->sk)->syn_smc) {
2338                 rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
2339                 if (rc)
2340                         smc_listen_out_err(new_smc);
2341                 else
2342                         smc_listen_out_connected(new_smc);
2343                 return;
2344         }
2345
2346         /* do inband token exchange -
2347          * wait for and receive SMC Proposal CLC message
2348          */
2349         buf = kzalloc(sizeof(*buf), GFP_KERNEL);
2350         if (!buf) {
2351                 rc = SMC_CLC_DECL_MEM;
2352                 goto out_decl;
2353         }
2354         pclc = (struct smc_clc_msg_proposal *)buf;
2355         rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
2356                               SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
2357         if (rc)
2358                 goto out_decl;
2359
2360         if (pclc->hdr.version > SMC_V1)
2361                 proposal_version = SMC_V2;
2362
2363         /* IPSec connections opt out of SMC optimizations */
2364         if (using_ipsec(new_smc)) {
2365                 rc = SMC_CLC_DECL_IPSEC;
2366                 goto out_decl;
2367         }
2368
2369         ini = kzalloc(sizeof(*ini), GFP_KERNEL);
2370         if (!ini) {
2371                 rc = SMC_CLC_DECL_MEM;
2372                 goto out_decl;
2373         }
2374
2375         /* initial version checking */
2376         rc = smc_listen_v2_check(new_smc, pclc, ini);
2377         if (rc)
2378                 goto out_decl;
2379
2380         mutex_lock(&smc_server_lgr_pending);
2381         smc_close_init(new_smc);
2382         smc_rx_init(new_smc);
2383         smc_tx_init(new_smc);
2384
2385         /* determine ISM or RoCE device used for connection */
2386         rc = smc_listen_find_device(new_smc, pclc, ini);
2387         if (rc)
2388                 goto out_unlock;
2389
2390         /* send SMC Accept CLC message */
2391         accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
2392         rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
2393                                  accept_version, ini->negotiated_eid);
2394         if (rc)
2395                 goto out_unlock;
2396
2397         /* SMC-D does not need this lock any more */
2398         if (ini->is_smcd)
2399                 mutex_unlock(&smc_server_lgr_pending);
2400
2401         /* receive SMC Confirm CLC message */
2402         memset(buf, 0, sizeof(*buf));
2403         cclc = (struct smc_clc_msg_accept_confirm *)buf;
2404         rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
2405                               SMC_CLC_CONFIRM, CLC_WAIT_TIME);
2406         if (rc) {
2407                 if (!ini->is_smcd)
2408                         goto out_unlock;
2409                 goto out_decl;
2410         }
2411
2412         /* finish worker */
2413         if (!ini->is_smcd) {
2414                 rc = smc_listen_rdma_finish(new_smc, cclc,
2415                                             ini->first_contact_local, ini);
2416                 if (rc)
2417                         goto out_unlock;
2418                 mutex_unlock(&smc_server_lgr_pending);
2419         }
2420         smc_conn_save_peer_info(new_smc, cclc);
2421         smc_listen_out_connected(new_smc);
2422         SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
2423         goto out_free;
2424
2425 out_unlock:
2426         mutex_unlock(&smc_server_lgr_pending);
2427 out_decl:
2428         smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
2429                            proposal_version);
2430 out_free:
2431         kfree(ini);
2432         kfree(buf);
2433 }
2434
2435 static void smc_tcp_listen_work(struct work_struct *work)
2436 {
2437         struct smc_sock *lsmc = container_of(work, struct smc_sock,
2438                                              tcp_listen_work);
2439         struct sock *lsk = &lsmc->sk;
2440         struct smc_sock *new_smc;
2441         int rc = 0;
2442
2443         lock_sock(lsk);
2444         while (lsk->sk_state == SMC_LISTEN) {
2445                 rc = smc_clcsock_accept(lsmc, &new_smc);
2446                 if (rc) /* clcsock accept queue empty or error */
2447                         goto out;
2448                 if (!new_smc)
2449                         continue;
2450
2451                 if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
2452                         atomic_inc(&lsmc->queued_smc_hs);
2453
2454                 new_smc->listen_smc = lsmc;
2455                 new_smc->use_fallback = lsmc->use_fallback;
2456                 new_smc->fallback_rsn = lsmc->fallback_rsn;
2457                 sock_hold(lsk); /* sock_put in smc_listen_work */
2458                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
2459                 smc_copy_sock_settings_to_smc(new_smc);
2460                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
2461                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
2462                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
2463                 if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
2464                         sock_put(&new_smc->sk);
2465         }
2466
2467 out:
2468         release_sock(lsk);
2469         sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
2470 }
2471
2472 static void smc_clcsock_data_ready(struct sock *listen_clcsock)
2473 {
2474         struct smc_sock *lsmc;
2475
2476         read_lock_bh(&listen_clcsock->sk_callback_lock);
2477         lsmc = smc_clcsock_user_data(listen_clcsock);
2478         if (!lsmc)
2479                 goto out;
2480         lsmc->clcsk_data_ready(listen_clcsock);
2481         if (lsmc->sk.sk_state == SMC_LISTEN) {
2482                 sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
2483                 if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
2484                         sock_put(&lsmc->sk);
2485         }
2486 out:
2487         read_unlock_bh(&listen_clcsock->sk_callback_lock);
2488 }
2489
2490 static int smc_listen(struct socket *sock, int backlog)
2491 {
2492         struct sock *sk = sock->sk;
2493         struct smc_sock *smc;
2494         int rc;
2495
2496         smc = smc_sk(sk);
2497         lock_sock(sk);
2498
2499         rc = -EINVAL;
2500         if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
2501             smc->connect_nonblock || sock->state != SS_UNCONNECTED)
2502                 goto out;
2503
2504         rc = 0;
2505         if (sk->sk_state == SMC_LISTEN) {
2506                 sk->sk_max_ack_backlog = backlog;
2507                 goto out;
2508         }
2509         /* some socket options are handled in core, so we could not apply
2510          * them to the clc socket -- copy smc socket options to clc socket
2511          */
2512         smc_copy_sock_settings_to_clc(smc);
2513         if (!smc->use_fallback)
2514                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
2515
2516         /* save original sk_data_ready function and establish
2517          * smc-specific sk_data_ready function
2518          */
2519         write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
2520         smc->clcsock->sk->sk_user_data =
2521                 (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
2522         smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
2523                                smc_clcsock_data_ready, &smc->clcsk_data_ready);
2524         write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
2525
2526         /* save original ops */
2527         smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
2528
2529         smc->af_ops = *smc->ori_af_ops;
2530         smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
2531
2532         inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
2533
2534         if (smc->limit_smc_hs)
2535                 tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
2536
2537         rc = kernel_listen(smc->clcsock, backlog);
2538         if (rc) {
2539                 write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
2540                 smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
2541                                        &smc->clcsk_data_ready);
2542                 smc->clcsock->sk->sk_user_data = NULL;
2543                 write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
2544                 goto out;
2545         }
2546         sk->sk_max_ack_backlog = backlog;
2547         sk->sk_ack_backlog = 0;
2548         sk->sk_state = SMC_LISTEN;
2549
2550 out:
2551         release_sock(sk);
2552         return rc;
2553 }
2554
2555 static int smc_accept(struct socket *sock, struct socket *new_sock,
2556                       int flags, bool kern)
2557 {
2558         struct sock *sk = sock->sk, *nsk;
2559         DECLARE_WAITQUEUE(wait, current);
2560         struct smc_sock *lsmc;
2561         long timeo;
2562         int rc = 0;
2563
2564         lsmc = smc_sk(sk);
2565         sock_hold(sk); /* sock_put below */
2566         lock_sock(sk);
2567
2568         if (lsmc->sk.sk_state != SMC_LISTEN) {
2569                 rc = -EINVAL;
2570                 release_sock(sk);
2571                 goto out;
2572         }
2573
2574         /* Wait for an incoming connection */
2575         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
2576         add_wait_queue_exclusive(sk_sleep(sk), &wait);
2577         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
2578                 set_current_state(TASK_INTERRUPTIBLE);
2579                 if (!timeo) {
2580                         rc = -EAGAIN;
2581                         break;
2582                 }
2583                 release_sock(sk);
2584                 timeo = schedule_timeout(timeo);
2585                 /* wakeup by sk_data_ready in smc_listen_work() */
2586                 sched_annotate_sleep();
2587                 lock_sock(sk);
2588                 if (signal_pending(current)) {
2589                         rc = sock_intr_errno(timeo);
2590                         break;
2591                 }
2592         }
2593         set_current_state(TASK_RUNNING);
2594         remove_wait_queue(sk_sleep(sk), &wait);
2595
2596         if (!rc)
2597                 rc = sock_error(nsk);
2598         release_sock(sk);
2599         if (rc)
2600                 goto out;
2601
2602         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
2603                 /* wait till data arrives on the socket */
2604                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
2605                                                                 MSEC_PER_SEC);
2606                 if (smc_sk(nsk)->use_fallback) {
2607                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
2608
2609                         lock_sock(clcsk);
2610                         if (skb_queue_empty(&clcsk->sk_receive_queue))
2611                                 sk_wait_data(clcsk, &timeo, NULL);
2612                         release_sock(clcsk);
2613                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
2614                         lock_sock(nsk);
2615                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
2616                         release_sock(nsk);
2617                 }
2618         }
2619
2620 out:
2621         sock_put(sk); /* sock_hold above */
2622         return rc;
2623 }
2624
2625 static int smc_getname(struct socket *sock, struct sockaddr *addr,
2626                        int peer)
2627 {
2628         struct smc_sock *smc;
2629
2630         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
2631             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
2632                 return -ENOTCONN;
2633
2634         smc = smc_sk(sock->sk);
2635
2636         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
2637 }
2638
2639 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2640 {
2641         struct sock *sk = sock->sk;
2642         struct smc_sock *smc;
2643         int rc = -EPIPE;
2644
2645         smc = smc_sk(sk);
2646         lock_sock(sk);
2647         if ((sk->sk_state != SMC_ACTIVE) &&
2648             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2649             (sk->sk_state != SMC_INIT))
2650                 goto out;
2651
2652         if (msg->msg_flags & MSG_FASTOPEN) {
2653                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2654                         rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2655                         if (rc)
2656                                 goto out;
2657                 } else {
2658                         rc = -EINVAL;
2659                         goto out;
2660                 }
2661         }
2662
2663         if (smc->use_fallback) {
2664                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
2665         } else {
2666                 rc = smc_tx_sendmsg(smc, msg, len);
2667                 SMC_STAT_TX_PAYLOAD(smc, len, rc);
2668         }
2669 out:
2670         release_sock(sk);
2671         return rc;
2672 }
2673
2674 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
2675                        int flags)
2676 {
2677         struct sock *sk = sock->sk;
2678         struct smc_sock *smc;
2679         int rc = -ENOTCONN;
2680
2681         smc = smc_sk(sk);
2682         lock_sock(sk);
2683         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2684                 /* socket was connected before, no more data to read */
2685                 rc = 0;
2686                 goto out;
2687         }
2688         if ((sk->sk_state == SMC_INIT) ||
2689             (sk->sk_state == SMC_LISTEN) ||
2690             (sk->sk_state == SMC_CLOSED))
2691                 goto out;
2692
2693         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
2694                 rc = 0;
2695                 goto out;
2696         }
2697
2698         if (smc->use_fallback) {
2699                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
2700         } else {
2701                 msg->msg_namelen = 0;
2702                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
2703                 SMC_STAT_RX_PAYLOAD(smc, rc, rc);
2704         }
2705
2706 out:
2707         release_sock(sk);
2708         return rc;
2709 }
2710
2711 static __poll_t smc_accept_poll(struct sock *parent)
2712 {
2713         struct smc_sock *isk = smc_sk(parent);
2714         __poll_t mask = 0;
2715
2716         spin_lock(&isk->accept_q_lock);
2717         if (!list_empty(&isk->accept_q))
2718                 mask = EPOLLIN | EPOLLRDNORM;
2719         spin_unlock(&isk->accept_q_lock);
2720
2721         return mask;
2722 }
2723
2724 static __poll_t smc_poll(struct file *file, struct socket *sock,
2725                              poll_table *wait)
2726 {
2727         struct sock *sk = sock->sk;
2728         struct smc_sock *smc;
2729         __poll_t mask = 0;
2730
2731         if (!sk)
2732                 return EPOLLNVAL;
2733
2734         smc = smc_sk(sock->sk);
2735         if (smc->use_fallback) {
2736                 /* delegate to CLC child sock */
2737                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
2738                 sk->sk_err = smc->clcsock->sk->sk_err;
2739         } else {
2740                 if (sk->sk_state != SMC_CLOSED)
2741                         sock_poll_wait(file, sock, wait);
2742                 if (sk->sk_err)
2743                         mask |= EPOLLERR;
2744                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
2745                     (sk->sk_state == SMC_CLOSED))
2746                         mask |= EPOLLHUP;
2747                 if (sk->sk_state == SMC_LISTEN) {
2748                         /* woken up by sk_data_ready in smc_listen_work() */
2749                         mask |= smc_accept_poll(sk);
2750                 } else if (smc->use_fallback) { /* as result of connect_work()*/
2751                         mask |= smc->clcsock->ops->poll(file, smc->clcsock,
2752                                                            wait);
2753                         sk->sk_err = smc->clcsock->sk->sk_err;
2754                 } else {
2755                         if ((sk->sk_state != SMC_INIT &&
2756                              atomic_read(&smc->conn.sndbuf_space)) ||
2757                             sk->sk_shutdown & SEND_SHUTDOWN) {
2758                                 mask |= EPOLLOUT | EPOLLWRNORM;
2759                         } else {
2760                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2761                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2762                         }
2763                         if (atomic_read(&smc->conn.bytes_to_rcv))
2764                                 mask |= EPOLLIN | EPOLLRDNORM;
2765                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2766                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2767                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
2768                                 mask |= EPOLLIN;
2769                         if (smc->conn.urg_state == SMC_URG_VALID)
2770                                 mask |= EPOLLPRI;
2771                 }
2772         }
2773
2774         return mask;
2775 }
2776
2777 static int smc_shutdown(struct socket *sock, int how)
2778 {
2779         struct sock *sk = sock->sk;
2780         bool do_shutdown = true;
2781         struct smc_sock *smc;
2782         int rc = -EINVAL;
2783         int old_state;
2784         int rc1 = 0;
2785
2786         smc = smc_sk(sk);
2787
2788         if ((how < SHUT_RD) || (how > SHUT_RDWR))
2789                 return rc;
2790
2791         lock_sock(sk);
2792
2793         if (sock->state == SS_CONNECTING) {
2794                 if (sk->sk_state == SMC_ACTIVE)
2795                         sock->state = SS_CONNECTED;
2796                 else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
2797                          sk->sk_state == SMC_PEERCLOSEWAIT2 ||
2798                          sk->sk_state == SMC_APPCLOSEWAIT1 ||
2799                          sk->sk_state == SMC_APPCLOSEWAIT2 ||
2800                          sk->sk_state == SMC_APPFINCLOSEWAIT)
2801                         sock->state = SS_DISCONNECTING;
2802         }
2803
2804         rc = -ENOTCONN;
2805         if ((sk->sk_state != SMC_ACTIVE) &&
2806             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
2807             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
2808             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2809             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
2810             (sk->sk_state != SMC_APPFINCLOSEWAIT))
2811                 goto out;
2812         if (smc->use_fallback) {
2813                 rc = kernel_sock_shutdown(smc->clcsock, how);
2814                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
2815                 if (sk->sk_shutdown == SHUTDOWN_MASK) {
2816                         sk->sk_state = SMC_CLOSED;
2817                         sk->sk_socket->state = SS_UNCONNECTED;
2818                         sock_put(sk);
2819                 }
2820                 goto out;
2821         }
2822         switch (how) {
2823         case SHUT_RDWR:         /* shutdown in both directions */
2824                 old_state = sk->sk_state;
2825                 rc = smc_close_active(smc);
2826                 if (old_state == SMC_ACTIVE &&
2827                     sk->sk_state == SMC_PEERCLOSEWAIT1)
2828                         do_shutdown = false;
2829                 break;
2830         case SHUT_WR:
2831                 rc = smc_close_shutdown_write(smc);
2832                 break;
2833         case SHUT_RD:
2834                 rc = 0;
2835                 /* nothing more to do because peer is not involved */
2836                 break;
2837         }
2838         if (do_shutdown && smc->clcsock)
2839                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
2840         /* map sock_shutdown_cmd constants to sk_shutdown value range */
2841         sk->sk_shutdown |= how + 1;
2842
2843         if (sk->sk_state == SMC_CLOSED)
2844                 sock->state = SS_UNCONNECTED;
2845         else
2846                 sock->state = SS_DISCONNECTING;
2847 out:
2848         release_sock(sk);
2849         return rc ? rc : rc1;
2850 }
2851
2852 static int __smc_getsockopt(struct socket *sock, int level, int optname,
2853                             char __user *optval, int __user *optlen)
2854 {
2855         struct smc_sock *smc;
2856         int val, len;
2857
2858         smc = smc_sk(sock->sk);
2859
2860         if (get_user(len, optlen))
2861                 return -EFAULT;
2862
2863         len = min_t(int, len, sizeof(int));
2864
2865         if (len < 0)
2866                 return -EINVAL;
2867
2868         switch (optname) {
2869         case SMC_LIMIT_HS:
2870                 val = smc->limit_smc_hs;
2871                 break;
2872         default:
2873                 return -EOPNOTSUPP;
2874         }
2875
2876         if (put_user(len, optlen))
2877                 return -EFAULT;
2878         if (copy_to_user(optval, &val, len))
2879                 return -EFAULT;
2880
2881         return 0;
2882 }
2883
2884 static int __smc_setsockopt(struct socket *sock, int level, int optname,
2885                             sockptr_t optval, unsigned int optlen)
2886 {
2887         struct sock *sk = sock->sk;
2888         struct smc_sock *smc;
2889         int val, rc;
2890
2891         smc = smc_sk(sk);
2892
2893         lock_sock(sk);
2894         switch (optname) {
2895         case SMC_LIMIT_HS:
2896                 if (optlen < sizeof(int)) {
2897                         rc = -EINVAL;
2898                         break;
2899                 }
2900                 if (copy_from_sockptr(&val, optval, sizeof(int))) {
2901                         rc = -EFAULT;
2902                         break;
2903                 }
2904
2905                 smc->limit_smc_hs = !!val;
2906                 rc = 0;
2907                 break;
2908         default:
2909                 rc = -EOPNOTSUPP;
2910                 break;
2911         }
2912         release_sock(sk);
2913
2914         return rc;
2915 }
2916
2917 static int smc_setsockopt(struct socket *sock, int level, int optname,
2918                           sockptr_t optval, unsigned int optlen)
2919 {
2920         struct sock *sk = sock->sk;
2921         struct smc_sock *smc;
2922         int val, rc;
2923
2924         if (level == SOL_TCP && optname == TCP_ULP)
2925                 return -EOPNOTSUPP;
2926         else if (level == SOL_SMC)
2927                 return __smc_setsockopt(sock, level, optname, optval, optlen);
2928
2929         smc = smc_sk(sk);
2930
2931         /* generic setsockopts reaching us here always apply to the
2932          * CLC socket
2933          */
2934         mutex_lock(&smc->clcsock_release_lock);
2935         if (!smc->clcsock) {
2936                 mutex_unlock(&smc->clcsock_release_lock);
2937                 return -EBADF;
2938         }
2939         if (unlikely(!smc->clcsock->ops->setsockopt))
2940                 rc = -EOPNOTSUPP;
2941         else
2942                 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
2943                                                    optval, optlen);
2944         if (smc->clcsock->sk->sk_err) {
2945                 sk->sk_err = smc->clcsock->sk->sk_err;
2946                 sk_error_report(sk);
2947         }
2948         mutex_unlock(&smc->clcsock_release_lock);
2949
2950         if (optlen < sizeof(int))
2951                 return -EINVAL;
2952         if (copy_from_sockptr(&val, optval, sizeof(int)))
2953                 return -EFAULT;
2954
2955         lock_sock(sk);
2956         if (rc || smc->use_fallback)
2957                 goto out;
2958         switch (optname) {
2959         case TCP_FASTOPEN:
2960         case TCP_FASTOPEN_CONNECT:
2961         case TCP_FASTOPEN_KEY:
2962         case TCP_FASTOPEN_NO_COOKIE:
2963                 /* option not supported by SMC */
2964                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2965                         rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2966                 } else {
2967                         rc = -EINVAL;
2968                 }
2969                 break;
2970         case TCP_NODELAY:
2971                 if (sk->sk_state != SMC_INIT &&
2972                     sk->sk_state != SMC_LISTEN &&
2973                     sk->sk_state != SMC_CLOSED) {
2974                         if (val) {
2975                                 SMC_STAT_INC(smc, ndly_cnt);
2976                                 smc_tx_pending(&smc->conn);
2977                                 cancel_delayed_work(&smc->conn.tx_work);
2978                         }
2979                 }
2980                 break;
2981         case TCP_CORK:
2982                 if (sk->sk_state != SMC_INIT &&
2983                     sk->sk_state != SMC_LISTEN &&
2984                     sk->sk_state != SMC_CLOSED) {
2985                         if (!val) {
2986                                 SMC_STAT_INC(smc, cork_cnt);
2987                                 smc_tx_pending(&smc->conn);
2988                                 cancel_delayed_work(&smc->conn.tx_work);
2989                         }
2990                 }
2991                 break;
2992         case TCP_DEFER_ACCEPT:
2993                 smc->sockopt_defer_accept = val;
2994                 break;
2995         default:
2996                 break;
2997         }
2998 out:
2999         release_sock(sk);
3000
3001         return rc;
3002 }
3003
3004 static int smc_getsockopt(struct socket *sock, int level, int optname,
3005                           char __user *optval, int __user *optlen)
3006 {
3007         struct smc_sock *smc;
3008         int rc;
3009
3010         if (level == SOL_SMC)
3011                 return __smc_getsockopt(sock, level, optname, optval, optlen);
3012
3013         smc = smc_sk(sock->sk);
3014         mutex_lock(&smc->clcsock_release_lock);
3015         if (!smc->clcsock) {
3016                 mutex_unlock(&smc->clcsock_release_lock);
3017                 return -EBADF;
3018         }
3019         /* socket options apply to the CLC socket */
3020         if (unlikely(!smc->clcsock->ops->getsockopt)) {
3021                 mutex_unlock(&smc->clcsock_release_lock);
3022                 return -EOPNOTSUPP;
3023         }
3024         rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
3025                                            optval, optlen);
3026         mutex_unlock(&smc->clcsock_release_lock);
3027         return rc;
3028 }
3029
3030 static int smc_ioctl(struct socket *sock, unsigned int cmd,
3031                      unsigned long arg)
3032 {
3033         union smc_host_cursor cons, urg;
3034         struct smc_connection *conn;
3035         struct smc_sock *smc;
3036         int answ;
3037
3038         smc = smc_sk(sock->sk);
3039         conn = &smc->conn;
3040         lock_sock(&smc->sk);
3041         if (smc->use_fallback) {
3042                 if (!smc->clcsock) {
3043                         release_sock(&smc->sk);
3044                         return -EBADF;
3045                 }
3046                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
3047                 release_sock(&smc->sk);
3048                 return answ;
3049         }
3050         switch (cmd) {
3051         case SIOCINQ: /* same as FIONREAD */
3052                 if (smc->sk.sk_state == SMC_LISTEN) {
3053                         release_sock(&smc->sk);
3054                         return -EINVAL;
3055                 }
3056                 if (smc->sk.sk_state == SMC_INIT ||
3057                     smc->sk.sk_state == SMC_CLOSED)
3058                         answ = 0;
3059                 else
3060                         answ = atomic_read(&smc->conn.bytes_to_rcv);
3061                 break;
3062         case SIOCOUTQ:
3063                 /* output queue size (not send + not acked) */
3064                 if (smc->sk.sk_state == SMC_LISTEN) {
3065                         release_sock(&smc->sk);
3066                         return -EINVAL;
3067                 }
3068                 if (smc->sk.sk_state == SMC_INIT ||
3069                     smc->sk.sk_state == SMC_CLOSED)
3070                         answ = 0;
3071                 else
3072                         answ = smc->conn.sndbuf_desc->len -
3073                                         atomic_read(&smc->conn.sndbuf_space);
3074                 break;
3075         case SIOCOUTQNSD:
3076                 /* output queue size (not send only) */
3077                 if (smc->sk.sk_state == SMC_LISTEN) {
3078                         release_sock(&smc->sk);
3079                         return -EINVAL;
3080                 }
3081                 if (smc->sk.sk_state == SMC_INIT ||
3082                     smc->sk.sk_state == SMC_CLOSED)
3083                         answ = 0;
3084                 else
3085                         answ = smc_tx_prepared_sends(&smc->conn);
3086                 break;
3087         case SIOCATMARK:
3088                 if (smc->sk.sk_state == SMC_LISTEN) {
3089                         release_sock(&smc->sk);
3090                         return -EINVAL;
3091                 }
3092                 if (smc->sk.sk_state == SMC_INIT ||
3093                     smc->sk.sk_state == SMC_CLOSED) {
3094                         answ = 0;
3095                 } else {
3096                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
3097                         smc_curs_copy(&urg, &conn->urg_curs, conn);
3098                         answ = smc_curs_diff(conn->rmb_desc->len,
3099                                              &cons, &urg) == 1;
3100                 }
3101                 break;
3102         default:
3103                 release_sock(&smc->sk);
3104                 return -ENOIOCTLCMD;
3105         }
3106         release_sock(&smc->sk);
3107
3108         return put_user(answ, (int __user *)arg);
3109 }
3110
3111 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
3112                             int offset, size_t size, int flags)
3113 {
3114         struct sock *sk = sock->sk;
3115         struct smc_sock *smc;
3116         int rc = -EPIPE;
3117
3118         smc = smc_sk(sk);
3119         lock_sock(sk);
3120         if (sk->sk_state != SMC_ACTIVE) {
3121                 release_sock(sk);
3122                 goto out;
3123         }
3124         release_sock(sk);
3125         if (smc->use_fallback) {
3126                 rc = kernel_sendpage(smc->clcsock, page, offset,
3127                                      size, flags);
3128         } else {
3129                 lock_sock(sk);
3130                 rc = smc_tx_sendpage(smc, page, offset, size, flags);
3131                 release_sock(sk);
3132                 SMC_STAT_INC(smc, sendpage_cnt);
3133         }
3134
3135 out:
3136         return rc;
3137 }
3138
3139 /* Map the affected portions of the rmbe into an spd, note the number of bytes
3140  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
3141  * updates till whenever a respective page has been fully processed.
3142  * Note that subsequent recv() calls have to wait till all splice() processing
3143  * completed.
3144  */
3145 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
3146                                struct pipe_inode_info *pipe, size_t len,
3147                                unsigned int flags)
3148 {
3149         struct sock *sk = sock->sk;
3150         struct smc_sock *smc;
3151         int rc = -ENOTCONN;
3152
3153         smc = smc_sk(sk);
3154         lock_sock(sk);
3155         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
3156                 /* socket was connected before, no more data to read */
3157                 rc = 0;
3158                 goto out;
3159         }
3160         if (sk->sk_state == SMC_INIT ||
3161             sk->sk_state == SMC_LISTEN ||
3162             sk->sk_state == SMC_CLOSED)
3163                 goto out;
3164
3165         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
3166                 rc = 0;
3167                 goto out;
3168         }
3169
3170         if (smc->use_fallback) {
3171                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
3172                                                     pipe, len, flags);
3173         } else {
3174                 if (*ppos) {
3175                         rc = -ESPIPE;
3176                         goto out;
3177                 }
3178                 if (flags & SPLICE_F_NONBLOCK)
3179                         flags = MSG_DONTWAIT;
3180                 else
3181                         flags = 0;
3182                 SMC_STAT_INC(smc, splice_cnt);
3183                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
3184         }
3185 out:
3186         release_sock(sk);
3187
3188         return rc;
3189 }
3190
3191 /* must look like tcp */
3192 static const struct proto_ops smc_sock_ops = {
3193         .family         = PF_SMC,
3194         .owner          = THIS_MODULE,
3195         .release        = smc_release,
3196         .bind           = smc_bind,
3197         .connect        = smc_connect,
3198         .socketpair     = sock_no_socketpair,
3199         .accept         = smc_accept,
3200         .getname        = smc_getname,
3201         .poll           = smc_poll,
3202         .ioctl          = smc_ioctl,
3203         .listen         = smc_listen,
3204         .shutdown       = smc_shutdown,
3205         .setsockopt     = smc_setsockopt,
3206         .getsockopt     = smc_getsockopt,
3207         .sendmsg        = smc_sendmsg,
3208         .recvmsg        = smc_recvmsg,
3209         .mmap           = sock_no_mmap,
3210         .sendpage       = smc_sendpage,
3211         .splice_read    = smc_splice_read,
3212 };
3213
3214 static int __smc_create(struct net *net, struct socket *sock, int protocol,
3215                         int kern, struct socket *clcsock)
3216 {
3217         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
3218         struct smc_sock *smc;
3219         struct sock *sk;
3220         int rc;
3221
3222         rc = -ESOCKTNOSUPPORT;
3223         if (sock->type != SOCK_STREAM)
3224                 goto out;
3225
3226         rc = -EPROTONOSUPPORT;
3227         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
3228                 goto out;
3229
3230         rc = -ENOBUFS;
3231         sock->ops = &smc_sock_ops;
3232         sock->state = SS_UNCONNECTED;
3233         sk = smc_sock_alloc(net, sock, protocol);
3234         if (!sk)
3235                 goto out;
3236
3237         /* create internal TCP socket for CLC handshake and fallback */
3238         smc = smc_sk(sk);
3239         smc->use_fallback = false; /* assume rdma capability first */
3240         smc->fallback_rsn = 0;
3241
3242         /* default behavior from limit_smc_hs in every net namespace */
3243         smc->limit_smc_hs = net->smc.limit_smc_hs;
3244
3245         rc = 0;
3246         if (!clcsock) {
3247                 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
3248                                       &smc->clcsock);
3249                 if (rc) {
3250                         sk_common_release(sk);
3251                         goto out;
3252                 }
3253         } else {
3254                 smc->clcsock = clcsock;
3255         }
3256
3257         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
3258         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
3259
3260 out:
3261         return rc;
3262 }
3263
3264 static int smc_create(struct net *net, struct socket *sock, int protocol,
3265                       int kern)
3266 {
3267         return __smc_create(net, sock, protocol, kern, NULL);
3268 }
3269
3270 static const struct net_proto_family smc_sock_family_ops = {
3271         .family = PF_SMC,
3272         .owner  = THIS_MODULE,
3273         .create = smc_create,
3274 };
3275
3276 static int smc_ulp_init(struct sock *sk)
3277 {
3278         struct socket *tcp = sk->sk_socket;
3279         struct net *net = sock_net(sk);
3280         struct socket *smcsock;
3281         int protocol, ret;
3282
3283         /* only TCP can be replaced */
3284         if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
3285             (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
3286                 return -ESOCKTNOSUPPORT;
3287         /* don't handle wq now */
3288         if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
3289                 return -ENOTCONN;
3290
3291         if (sk->sk_family == AF_INET)
3292                 protocol = SMCPROTO_SMC;
3293         else
3294                 protocol = SMCPROTO_SMC6;
3295
3296         smcsock = sock_alloc();
3297         if (!smcsock)
3298                 return -ENFILE;
3299
3300         smcsock->type = SOCK_STREAM;
3301         __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
3302         ret = __smc_create(net, smcsock, protocol, 1, tcp);
3303         if (ret) {
3304                 sock_release(smcsock); /* module_put() which ops won't be NULL */
3305                 return ret;
3306         }
3307
3308         /* replace tcp socket to smc */
3309         smcsock->file = tcp->file;
3310         smcsock->file->private_data = smcsock;
3311         smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
3312         smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
3313         tcp->file = NULL;
3314
3315         return ret;
3316 }
3317
3318 static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
3319                           const gfp_t priority)
3320 {
3321         struct inet_connection_sock *icsk = inet_csk(newsk);
3322
3323         /* don't inherit ulp ops to child when listen */
3324         icsk->icsk_ulp_ops = NULL;
3325 }
3326
3327 static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
3328         .name           = "smc",
3329         .owner          = THIS_MODULE,
3330         .init           = smc_ulp_init,
3331         .clone          = smc_ulp_clone,
3332 };
3333
3334 unsigned int smc_net_id;
3335
3336 static __net_init int smc_net_init(struct net *net)
3337 {
3338         int rc;
3339
3340         rc = smc_sysctl_net_init(net);
3341         if (rc)
3342                 return rc;
3343         return smc_pnet_net_init(net);
3344 }
3345
3346 static void __net_exit smc_net_exit(struct net *net)
3347 {
3348         smc_sysctl_net_exit(net);
3349         smc_pnet_net_exit(net);
3350 }
3351
3352 static __net_init int smc_net_stat_init(struct net *net)
3353 {
3354         return smc_stats_init(net);
3355 }
3356
3357 static void __net_exit smc_net_stat_exit(struct net *net)
3358 {
3359         smc_stats_exit(net);
3360 }
3361
3362 static struct pernet_operations smc_net_ops = {
3363         .init = smc_net_init,
3364         .exit = smc_net_exit,
3365         .id   = &smc_net_id,
3366         .size = sizeof(struct smc_net),
3367 };
3368
3369 static struct pernet_operations smc_net_stat_ops = {
3370         .init = smc_net_stat_init,
3371         .exit = smc_net_stat_exit,
3372 };
3373
3374 static int __init smc_init(void)
3375 {
3376         int rc;
3377
3378         rc = register_pernet_subsys(&smc_net_ops);
3379         if (rc)
3380                 return rc;
3381
3382         rc = register_pernet_subsys(&smc_net_stat_ops);
3383         if (rc)
3384                 return rc;
3385
3386         smc_ism_init();
3387         smc_clc_init();
3388
3389         rc = smc_nl_init();
3390         if (rc)
3391                 goto out_pernet_subsys;
3392
3393         rc = smc_pnet_init();
3394         if (rc)
3395                 goto out_nl;
3396
3397         rc = -ENOMEM;
3398
3399         smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
3400         if (!smc_tcp_ls_wq)
3401                 goto out_pnet;
3402
3403         smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
3404         if (!smc_hs_wq)
3405                 goto out_alloc_tcp_ls_wq;
3406
3407         smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
3408         if (!smc_close_wq)
3409                 goto out_alloc_hs_wq;
3410
3411         rc = smc_core_init();
3412         if (rc) {
3413                 pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
3414                 goto out_alloc_wqs;
3415         }
3416
3417         rc = smc_llc_init();
3418         if (rc) {
3419                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
3420                 goto out_core;
3421         }
3422
3423         rc = smc_cdc_init();
3424         if (rc) {
3425                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
3426                 goto out_core;
3427         }
3428
3429         rc = proto_register(&smc_proto, 1);
3430         if (rc) {
3431                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
3432                 goto out_core;
3433         }
3434
3435         rc = proto_register(&smc_proto6, 1);
3436         if (rc) {
3437                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
3438                 goto out_proto;
3439         }
3440
3441         rc = sock_register(&smc_sock_family_ops);
3442         if (rc) {
3443                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
3444                 goto out_proto6;
3445         }
3446         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
3447         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
3448
3449         rc = smc_ib_register_client();
3450         if (rc) {
3451                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
3452                 goto out_sock;
3453         }
3454
3455         rc = tcp_register_ulp(&smc_ulp_ops);
3456         if (rc) {
3457                 pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
3458                 goto out_ib;
3459         }
3460
3461         static_branch_enable(&tcp_have_smc);
3462         return 0;
3463
3464 out_ib:
3465         smc_ib_unregister_client();
3466 out_sock:
3467         sock_unregister(PF_SMC);
3468 out_proto6:
3469         proto_unregister(&smc_proto6);
3470 out_proto:
3471         proto_unregister(&smc_proto);
3472 out_core:
3473         smc_core_exit();
3474 out_alloc_wqs:
3475         destroy_workqueue(smc_close_wq);
3476 out_alloc_hs_wq:
3477         destroy_workqueue(smc_hs_wq);
3478 out_alloc_tcp_ls_wq:
3479         destroy_workqueue(smc_tcp_ls_wq);
3480 out_pnet:
3481         smc_pnet_exit();
3482 out_nl:
3483         smc_nl_exit();
3484 out_pernet_subsys:
3485         unregister_pernet_subsys(&smc_net_ops);
3486
3487         return rc;
3488 }
3489
3490 static void __exit smc_exit(void)
3491 {
3492         static_branch_disable(&tcp_have_smc);
3493         tcp_unregister_ulp(&smc_ulp_ops);
3494         sock_unregister(PF_SMC);
3495         smc_core_exit();
3496         smc_ib_unregister_client();
3497         destroy_workqueue(smc_close_wq);
3498         destroy_workqueue(smc_tcp_ls_wq);
3499         destroy_workqueue(smc_hs_wq);
3500         proto_unregister(&smc_proto6);
3501         proto_unregister(&smc_proto);
3502         smc_pnet_exit();
3503         smc_nl_exit();
3504         smc_clc_exit();
3505         unregister_pernet_subsys(&smc_net_stat_ops);
3506         unregister_pernet_subsys(&smc_net_ops);
3507         rcu_barrier();
3508 }
3509
3510 module_init(smc_init);
3511 module_exit(smc_exit);
3512
3513 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
3514 MODULE_DESCRIPTION("smc socket address family");
3515 MODULE_LICENSE("GPL");
3516 MODULE_ALIAS_NETPROTO(PF_SMC);
3517 MODULE_ALIAS_TCP_ULP("smc");
3518 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);