net/smc: common routine for CLC accept and confirm
[linux-2.6-microblaze.git] / net / smc / af_smc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
6  *  applies to SOCK_STREAM sockets only
7  *  offers an alternative communication option for TCP-protocol sockets
8  *  applicable with RoCE-cards only
9  *
10  *  Initial restrictions:
11  *    - support for alternate links postponed
12  *
13  *  Copyright IBM Corp. 2016, 2018
14  *
15  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
16  *              based on prototype from Frank Blaschka
17  */
18
19 #define KMSG_COMPONENT "smc"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
22 #include <linux/module.h>
23 #include <linux/socket.h>
24 #include <linux/workqueue.h>
25 #include <linux/in.h>
26 #include <linux/sched/signal.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rcupdate_wait.h>
29
30 #include <net/sock.h>
31 #include <net/tcp.h>
32 #include <net/smc.h>
33 #include <asm/ioctls.h>
34
35 #include <net/net_namespace.h>
36 #include <net/netns/generic.h>
37 #include "smc_netns.h"
38
39 #include "smc.h"
40 #include "smc_clc.h"
41 #include "smc_llc.h"
42 #include "smc_cdc.h"
43 #include "smc_core.h"
44 #include "smc_ib.h"
45 #include "smc_ism.h"
46 #include "smc_pnet.h"
47 #include "smc_tx.h"
48 #include "smc_rx.h"
49 #include "smc_close.h"
50
51 static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
52                                                  * creation on server
53                                                  */
54 static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
55                                                  * creation on client
56                                                  */
57
58 static void smc_tcp_listen_work(struct work_struct *);
59 static void smc_connect_work(struct work_struct *);
60
61 static void smc_set_keepalive(struct sock *sk, int val)
62 {
63         struct smc_sock *smc = smc_sk(sk);
64
65         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
66 }
67
68 static struct smc_hashinfo smc_v4_hashinfo = {
69         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
70 };
71
72 static struct smc_hashinfo smc_v6_hashinfo = {
73         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
74 };
75
76 int smc_hash_sk(struct sock *sk)
77 {
78         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
79         struct hlist_head *head;
80
81         head = &h->ht;
82
83         write_lock_bh(&h->lock);
84         sk_add_node(sk, head);
85         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
86         write_unlock_bh(&h->lock);
87
88         return 0;
89 }
90 EXPORT_SYMBOL_GPL(smc_hash_sk);
91
92 void smc_unhash_sk(struct sock *sk)
93 {
94         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
95
96         write_lock_bh(&h->lock);
97         if (sk_del_node_init(sk))
98                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
99         write_unlock_bh(&h->lock);
100 }
101 EXPORT_SYMBOL_GPL(smc_unhash_sk);
102
103 struct proto smc_proto = {
104         .name           = "SMC",
105         .owner          = THIS_MODULE,
106         .keepalive      = smc_set_keepalive,
107         .hash           = smc_hash_sk,
108         .unhash         = smc_unhash_sk,
109         .obj_size       = sizeof(struct smc_sock),
110         .h.smc_hash     = &smc_v4_hashinfo,
111         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
112 };
113 EXPORT_SYMBOL_GPL(smc_proto);
114
115 struct proto smc_proto6 = {
116         .name           = "SMC6",
117         .owner          = THIS_MODULE,
118         .keepalive      = smc_set_keepalive,
119         .hash           = smc_hash_sk,
120         .unhash         = smc_unhash_sk,
121         .obj_size       = sizeof(struct smc_sock),
122         .h.smc_hash     = &smc_v6_hashinfo,
123         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
124 };
125 EXPORT_SYMBOL_GPL(smc_proto6);
126
127 static void smc_restore_fallback_changes(struct smc_sock *smc)
128 {
129         if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
130                 smc->clcsock->file->private_data = smc->sk.sk_socket;
131                 smc->clcsock->file = NULL;
132         }
133 }
134
135 static int __smc_release(struct smc_sock *smc)
136 {
137         struct sock *sk = &smc->sk;
138         int rc = 0;
139
140         if (!smc->use_fallback) {
141                 rc = smc_close_active(smc);
142                 sock_set_flag(sk, SOCK_DEAD);
143                 sk->sk_shutdown |= SHUTDOWN_MASK;
144         } else {
145                 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
146                         sock_put(sk); /* passive closing */
147                 if (sk->sk_state == SMC_LISTEN) {
148                         /* wake up clcsock accept */
149                         rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
150                 }
151                 sk->sk_state = SMC_CLOSED;
152                 sk->sk_state_change(sk);
153                 smc_restore_fallback_changes(smc);
154         }
155
156         sk->sk_prot->unhash(sk);
157
158         if (sk->sk_state == SMC_CLOSED) {
159                 if (smc->clcsock) {
160                         release_sock(sk);
161                         smc_clcsock_release(smc);
162                         lock_sock(sk);
163                 }
164                 if (!smc->use_fallback)
165                         smc_conn_free(&smc->conn);
166         }
167
168         return rc;
169 }
170
171 static int smc_release(struct socket *sock)
172 {
173         struct sock *sk = sock->sk;
174         struct smc_sock *smc;
175         int rc = 0;
176
177         if (!sk)
178                 goto out;
179
180         sock_hold(sk); /* sock_put below */
181         smc = smc_sk(sk);
182
183         /* cleanup for a dangling non-blocking connect */
184         if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
185                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
186         flush_work(&smc->connect_work);
187
188         if (sk->sk_state == SMC_LISTEN)
189                 /* smc_close_non_accepted() is called and acquires
190                  * sock lock for child sockets again
191                  */
192                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
193         else
194                 lock_sock(sk);
195
196         rc = __smc_release(smc);
197
198         /* detach socket */
199         sock_orphan(sk);
200         sock->sk = NULL;
201         release_sock(sk);
202
203         sock_put(sk); /* sock_hold above */
204         sock_put(sk); /* final sock_put */
205 out:
206         return rc;
207 }
208
209 static void smc_destruct(struct sock *sk)
210 {
211         if (sk->sk_state != SMC_CLOSED)
212                 return;
213         if (!sock_flag(sk, SOCK_DEAD))
214                 return;
215
216         sk_refcnt_debug_dec(sk);
217 }
218
219 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
220                                    int protocol)
221 {
222         struct smc_sock *smc;
223         struct proto *prot;
224         struct sock *sk;
225
226         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
227         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
228         if (!sk)
229                 return NULL;
230
231         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
232         sk->sk_state = SMC_INIT;
233         sk->sk_destruct = smc_destruct;
234         sk->sk_protocol = protocol;
235         smc = smc_sk(sk);
236         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
237         INIT_WORK(&smc->connect_work, smc_connect_work);
238         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
239         INIT_LIST_HEAD(&smc->accept_q);
240         spin_lock_init(&smc->accept_q_lock);
241         spin_lock_init(&smc->conn.send_lock);
242         sk->sk_prot->hash(sk);
243         sk_refcnt_debug_inc(sk);
244         mutex_init(&smc->clcsock_release_lock);
245
246         return sk;
247 }
248
249 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
250                     int addr_len)
251 {
252         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
253         struct sock *sk = sock->sk;
254         struct smc_sock *smc;
255         int rc;
256
257         smc = smc_sk(sk);
258
259         /* replicate tests from inet_bind(), to be safe wrt. future changes */
260         rc = -EINVAL;
261         if (addr_len < sizeof(struct sockaddr_in))
262                 goto out;
263
264         rc = -EAFNOSUPPORT;
265         if (addr->sin_family != AF_INET &&
266             addr->sin_family != AF_INET6 &&
267             addr->sin_family != AF_UNSPEC)
268                 goto out;
269         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
270         if (addr->sin_family == AF_UNSPEC &&
271             addr->sin_addr.s_addr != htonl(INADDR_ANY))
272                 goto out;
273
274         lock_sock(sk);
275
276         /* Check if socket is already active */
277         rc = -EINVAL;
278         if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
279                 goto out_rel;
280
281         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
282         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
283
284 out_rel:
285         release_sock(sk);
286 out:
287         return rc;
288 }
289
290 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
291                                    unsigned long mask)
292 {
293         /* options we don't get control via setsockopt for */
294         nsk->sk_type = osk->sk_type;
295         nsk->sk_sndbuf = osk->sk_sndbuf;
296         nsk->sk_rcvbuf = osk->sk_rcvbuf;
297         nsk->sk_sndtimeo = osk->sk_sndtimeo;
298         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
299         nsk->sk_mark = osk->sk_mark;
300         nsk->sk_priority = osk->sk_priority;
301         nsk->sk_rcvlowat = osk->sk_rcvlowat;
302         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
303         nsk->sk_err = osk->sk_err;
304
305         nsk->sk_flags &= ~mask;
306         nsk->sk_flags |= osk->sk_flags & mask;
307 }
308
309 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
310                              (1UL << SOCK_KEEPOPEN) | \
311                              (1UL << SOCK_LINGER) | \
312                              (1UL << SOCK_BROADCAST) | \
313                              (1UL << SOCK_TIMESTAMP) | \
314                              (1UL << SOCK_DBG) | \
315                              (1UL << SOCK_RCVTSTAMP) | \
316                              (1UL << SOCK_RCVTSTAMPNS) | \
317                              (1UL << SOCK_LOCALROUTE) | \
318                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
319                              (1UL << SOCK_RXQ_OVFL) | \
320                              (1UL << SOCK_WIFI_STATUS) | \
321                              (1UL << SOCK_NOFCS) | \
322                              (1UL << SOCK_FILTER_LOCKED) | \
323                              (1UL << SOCK_TSTAMP_NEW))
324 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
325  * clc socket (since smc is not called for these options from net/core)
326  */
327 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
328 {
329         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
330 }
331
332 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
333                              (1UL << SOCK_KEEPOPEN) | \
334                              (1UL << SOCK_LINGER) | \
335                              (1UL << SOCK_DBG))
336 /* copy only settings and flags relevant for smc from clc to smc socket */
337 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
338 {
339         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
340 }
341
342 /* register the new rmb on all links */
343 static int smcr_lgr_reg_rmbs(struct smc_link *link,
344                              struct smc_buf_desc *rmb_desc)
345 {
346         struct smc_link_group *lgr = link->lgr;
347         int i, rc = 0;
348
349         rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
350         if (rc)
351                 return rc;
352         /* protect against parallel smc_llc_cli_rkey_exchange() and
353          * parallel smcr_link_reg_rmb()
354          */
355         mutex_lock(&lgr->llc_conf_mutex);
356         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
357                 if (!smc_link_active(&lgr->lnk[i]))
358                         continue;
359                 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
360                 if (rc)
361                         goto out;
362         }
363
364         /* exchange confirm_rkey msg with peer */
365         rc = smc_llc_do_confirm_rkey(link, rmb_desc);
366         if (rc) {
367                 rc = -EFAULT;
368                 goto out;
369         }
370         rmb_desc->is_conf_rkey = true;
371 out:
372         mutex_unlock(&lgr->llc_conf_mutex);
373         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
374         return rc;
375 }
376
377 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
378 {
379         struct smc_link *link = smc->conn.lnk;
380         struct smc_llc_qentry *qentry;
381         int rc;
382
383         /* receive CONFIRM LINK request from server over RoCE fabric */
384         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
385                               SMC_LLC_CONFIRM_LINK);
386         if (!qentry) {
387                 struct smc_clc_msg_decline dclc;
388
389                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
390                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
391                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
392         }
393         smc_llc_save_peer_uid(qentry);
394         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
395         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
396         if (rc)
397                 return SMC_CLC_DECL_RMBE_EC;
398
399         rc = smc_ib_modify_qp_rts(link);
400         if (rc)
401                 return SMC_CLC_DECL_ERR_RDYLNK;
402
403         smc_wr_remember_qp_attr(link);
404
405         if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
406                 return SMC_CLC_DECL_ERR_REGRMB;
407
408         /* confirm_rkey is implicit on 1st contact */
409         smc->conn.rmb_desc->is_conf_rkey = true;
410
411         /* send CONFIRM LINK response over RoCE fabric */
412         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
413         if (rc < 0)
414                 return SMC_CLC_DECL_TIMEOUT_CL;
415
416         smc_llc_link_active(link);
417         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
418
419         /* optional 2nd link, receive ADD LINK request from server */
420         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
421                               SMC_LLC_ADD_LINK);
422         if (!qentry) {
423                 struct smc_clc_msg_decline dclc;
424
425                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
426                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
427                 if (rc == -EAGAIN)
428                         rc = 0; /* no DECLINE received, go with one link */
429                 return rc;
430         }
431         smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
432         smc_llc_cli_add_link(link, qentry);
433         return 0;
434 }
435
436 static void smcr_conn_save_peer_info(struct smc_sock *smc,
437                                      struct smc_clc_msg_accept_confirm *clc)
438 {
439         int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
440
441         smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
442         smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
443         smc->conn.peer_rmbe_size = bufsize;
444         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
445         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
446 }
447
448 static void smcd_conn_save_peer_info(struct smc_sock *smc,
449                                      struct smc_clc_msg_accept_confirm *clc)
450 {
451         int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
452
453         smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
454         smc->conn.peer_token = clc->d0.token;
455         /* msg header takes up space in the buffer */
456         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
457         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
458         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
459 }
460
461 static void smc_conn_save_peer_info(struct smc_sock *smc,
462                                     struct smc_clc_msg_accept_confirm *clc)
463 {
464         if (smc->conn.lgr->is_smcd)
465                 smcd_conn_save_peer_info(smc, clc);
466         else
467                 smcr_conn_save_peer_info(smc, clc);
468 }
469
470 static void smc_link_save_peer_info(struct smc_link *link,
471                                     struct smc_clc_msg_accept_confirm *clc)
472 {
473         link->peer_qpn = ntoh24(clc->r0.qpn);
474         memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
475         memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
476         link->peer_psn = ntoh24(clc->r0.psn);
477         link->peer_mtu = clc->r0.qp_mtu;
478 }
479
480 static void smc_switch_to_fallback(struct smc_sock *smc)
481 {
482         smc->use_fallback = true;
483         if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
484                 smc->clcsock->file = smc->sk.sk_socket->file;
485                 smc->clcsock->file->private_data = smc->clcsock;
486                 smc->clcsock->wq.fasync_list =
487                         smc->sk.sk_socket->wq.fasync_list;
488         }
489 }
490
491 /* fall back during connect */
492 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
493 {
494         smc_switch_to_fallback(smc);
495         smc->fallback_rsn = reason_code;
496         smc_copy_sock_settings_to_clc(smc);
497         smc->connect_nonblock = 0;
498         if (smc->sk.sk_state == SMC_INIT)
499                 smc->sk.sk_state = SMC_ACTIVE;
500         return 0;
501 }
502
503 /* decline and fall back during connect */
504 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
505 {
506         int rc;
507
508         if (reason_code < 0) { /* error, fallback is not possible */
509                 if (smc->sk.sk_state == SMC_INIT)
510                         sock_put(&smc->sk); /* passive closing */
511                 return reason_code;
512         }
513         if (reason_code != SMC_CLC_DECL_PEERDECL) {
514                 rc = smc_clc_send_decline(smc, reason_code);
515                 if (rc < 0) {
516                         if (smc->sk.sk_state == SMC_INIT)
517                                 sock_put(&smc->sk); /* passive closing */
518                         return rc;
519                 }
520         }
521         return smc_connect_fallback(smc, reason_code);
522 }
523
524 /* abort connecting */
525 static int smc_connect_abort(struct smc_sock *smc, int reason_code,
526                              int local_first)
527 {
528         bool is_smcd = smc->conn.lgr->is_smcd;
529
530         if (local_first)
531                 smc_lgr_cleanup_early(&smc->conn);
532         else
533                 smc_conn_free(&smc->conn);
534         if (is_smcd)
535                 /* there is only one lgr role for SMC-D; use server lock */
536                 mutex_unlock(&smc_server_lgr_pending);
537         else
538                 mutex_unlock(&smc_client_lgr_pending);
539
540         smc->connect_nonblock = 0;
541         return reason_code;
542 }
543
544 /* check if there is a rdma device available for this connection. */
545 /* called for connect and listen */
546 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
547 {
548         /* PNET table look up: search active ib_device and port
549          * within same PNETID that also contains the ethernet device
550          * used for the internal TCP socket
551          */
552         smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
553         if (!ini->ib_dev)
554                 return SMC_CLC_DECL_NOSMCRDEV;
555         return 0;
556 }
557
558 /* check if there is an ISM device available for this connection. */
559 /* called for connect and listen */
560 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
561 {
562         /* Find ISM device with same PNETID as connecting interface  */
563         smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
564         if (!ini->ism_dev)
565                 return SMC_CLC_DECL_NOSMCDDEV;
566         return 0;
567 }
568
569 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
570 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
571                                       struct smc_init_info *ini)
572 {
573         if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
574                 return SMC_CLC_DECL_ISMVLANERR;
575         return 0;
576 }
577
578 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
579  * used, the VLAN ID will be registered again during the connection setup.
580  */
581 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
582                                         struct smc_init_info *ini)
583 {
584         if (!is_smcd)
585                 return 0;
586         if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
587                 return SMC_CLC_DECL_CNFERR;
588         return 0;
589 }
590
591 /* CLC handshake during connect */
592 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
593                            struct smc_clc_msg_accept_confirm *aclc,
594                            struct smc_init_info *ini)
595 {
596         int rc = 0;
597
598         /* do inband token exchange */
599         rc = smc_clc_send_proposal(smc, smc_type, ini);
600         if (rc)
601                 return rc;
602         /* receive SMC Accept CLC message */
603         return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
604                                 CLC_WAIT_TIME);
605 }
606
607 /* setup for RDMA connection of client */
608 static int smc_connect_rdma(struct smc_sock *smc,
609                             struct smc_clc_msg_accept_confirm *aclc,
610                             struct smc_init_info *ini)
611 {
612         int i, reason_code = 0;
613         struct smc_link *link;
614
615         ini->is_smcd = false;
616         ini->ib_lcl = &aclc->r0.lcl;
617         ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
618         ini->first_contact_peer = aclc->hdr.flag;
619
620         mutex_lock(&smc_client_lgr_pending);
621         reason_code = smc_conn_create(smc, ini);
622         if (reason_code) {
623                 mutex_unlock(&smc_client_lgr_pending);
624                 return reason_code;
625         }
626
627         smc_conn_save_peer_info(smc, aclc);
628
629         if (ini->first_contact_local) {
630                 link = smc->conn.lnk;
631         } else {
632                 /* set link that was assigned by server */
633                 link = NULL;
634                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
635                         struct smc_link *l = &smc->conn.lgr->lnk[i];
636
637                         if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
638                             !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
639                                     SMC_GID_SIZE) &&
640                             !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
641                                     sizeof(l->peer_mac))) {
642                                 link = l;
643                                 break;
644                         }
645                 }
646                 if (!link)
647                         return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
648                                                  ini->first_contact_local);
649                 smc->conn.lnk = link;
650         }
651
652         /* create send buffer and rmb */
653         if (smc_buf_create(smc, false))
654                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
655                                          ini->first_contact_local);
656
657         if (ini->first_contact_local)
658                 smc_link_save_peer_info(link, aclc);
659
660         if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
661                 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
662                                          ini->first_contact_local);
663
664         smc_close_init(smc);
665         smc_rx_init(smc);
666
667         if (ini->first_contact_local) {
668                 if (smc_ib_ready_link(link))
669                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
670                                                  ini->first_contact_local);
671         } else {
672                 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
673                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
674                                                  ini->first_contact_local);
675         }
676         smc_rmb_sync_sg_for_device(&smc->conn);
677
678         reason_code = smc_clc_send_confirm(smc);
679         if (reason_code)
680                 return smc_connect_abort(smc, reason_code,
681                                          ini->first_contact_local);
682
683         smc_tx_init(smc);
684
685         if (ini->first_contact_local) {
686                 /* QP confirmation over RoCE fabric */
687                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
688                 reason_code = smcr_clnt_conf_first_link(smc);
689                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
690                 if (reason_code)
691                         return smc_connect_abort(smc, reason_code,
692                                                  ini->first_contact_local);
693         }
694         mutex_unlock(&smc_client_lgr_pending);
695
696         smc_copy_sock_settings_to_clc(smc);
697         smc->connect_nonblock = 0;
698         if (smc->sk.sk_state == SMC_INIT)
699                 smc->sk.sk_state = SMC_ACTIVE;
700
701         return 0;
702 }
703
704 /* setup for ISM connection of client */
705 static int smc_connect_ism(struct smc_sock *smc,
706                            struct smc_clc_msg_accept_confirm *aclc,
707                            struct smc_init_info *ini)
708 {
709         int rc = 0;
710
711         ini->is_smcd = true;
712         ini->ism_peer_gid = aclc->d0.gid;
713         ini->first_contact_peer = aclc->hdr.flag;
714
715         /* there is only one lgr role for SMC-D; use server lock */
716         mutex_lock(&smc_server_lgr_pending);
717         rc = smc_conn_create(smc, ini);
718         if (rc) {
719                 mutex_unlock(&smc_server_lgr_pending);
720                 return rc;
721         }
722
723         /* Create send and receive buffers */
724         rc = smc_buf_create(smc, true);
725         if (rc)
726                 return smc_connect_abort(smc, (rc == -ENOSPC) ?
727                                               SMC_CLC_DECL_MAX_DMB :
728                                               SMC_CLC_DECL_MEM,
729                                          ini->first_contact_local);
730
731         smc_conn_save_peer_info(smc, aclc);
732         smc_close_init(smc);
733         smc_rx_init(smc);
734         smc_tx_init(smc);
735
736         rc = smc_clc_send_confirm(smc);
737         if (rc)
738                 return smc_connect_abort(smc, rc, ini->first_contact_local);
739         mutex_unlock(&smc_server_lgr_pending);
740
741         smc_copy_sock_settings_to_clc(smc);
742         smc->connect_nonblock = 0;
743         if (smc->sk.sk_state == SMC_INIT)
744                 smc->sk.sk_state = SMC_ACTIVE;
745
746         return 0;
747 }
748
749 /* perform steps before actually connecting */
750 static int __smc_connect(struct smc_sock *smc)
751 {
752         bool ism_supported = false, rdma_supported = false;
753         struct smc_clc_msg_accept_confirm aclc;
754         struct smc_init_info ini = {0};
755         int smc_type;
756         int rc = 0;
757
758         if (smc->use_fallback)
759                 return smc_connect_fallback(smc, smc->fallback_rsn);
760
761         /* if peer has not signalled SMC-capability, fall back */
762         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
763                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
764
765         /* IPSec connections opt out of SMC-R optimizations */
766         if (using_ipsec(smc))
767                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
768
769         /* get vlan id from IP device */
770         if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
771                 return smc_connect_decline_fallback(smc,
772                                                     SMC_CLC_DECL_GETVLANERR);
773
774         /* check if there is an ism device available */
775         if (!smc_find_ism_device(smc, &ini) &&
776             !smc_connect_ism_vlan_setup(smc, &ini)) {
777                 /* ISM is supported for this connection */
778                 ism_supported = true;
779                 smc_type = SMC_TYPE_D;
780         }
781
782         /* check if there is a rdma device available */
783         if (!smc_find_rdma_device(smc, &ini)) {
784                 /* RDMA is supported for this connection */
785                 rdma_supported = true;
786                 if (ism_supported)
787                         smc_type = SMC_TYPE_B; /* both */
788                 else
789                         smc_type = SMC_TYPE_R; /* only RDMA */
790         }
791
792         /* if neither ISM nor RDMA are supported, fallback */
793         if (!rdma_supported && !ism_supported)
794                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
795
796         /* perform CLC handshake */
797         rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
798         if (rc) {
799                 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
800                 return smc_connect_decline_fallback(smc, rc);
801         }
802
803         /* depending on previous steps, connect using rdma or ism */
804         if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
805                 rc = smc_connect_rdma(smc, &aclc, &ini);
806         else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
807                 rc = smc_connect_ism(smc, &aclc, &ini);
808         else
809                 rc = SMC_CLC_DECL_MODEUNSUPP;
810         if (rc) {
811                 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
812                 return smc_connect_decline_fallback(smc, rc);
813         }
814
815         smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
816         return 0;
817 }
818
819 static void smc_connect_work(struct work_struct *work)
820 {
821         struct smc_sock *smc = container_of(work, struct smc_sock,
822                                             connect_work);
823         long timeo = smc->sk.sk_sndtimeo;
824         int rc = 0;
825
826         if (!timeo)
827                 timeo = MAX_SCHEDULE_TIMEOUT;
828         lock_sock(smc->clcsock->sk);
829         if (smc->clcsock->sk->sk_err) {
830                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
831         } else if ((1 << smc->clcsock->sk->sk_state) &
832                                         (TCPF_SYN_SENT | TCP_SYN_RECV)) {
833                 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
834                 if ((rc == -EPIPE) &&
835                     ((1 << smc->clcsock->sk->sk_state) &
836                                         (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
837                         rc = 0;
838         }
839         release_sock(smc->clcsock->sk);
840         lock_sock(&smc->sk);
841         if (rc != 0 || smc->sk.sk_err) {
842                 smc->sk.sk_state = SMC_CLOSED;
843                 if (rc == -EPIPE || rc == -EAGAIN)
844                         smc->sk.sk_err = EPIPE;
845                 else if (signal_pending(current))
846                         smc->sk.sk_err = -sock_intr_errno(timeo);
847                 sock_put(&smc->sk); /* passive closing */
848                 goto out;
849         }
850
851         rc = __smc_connect(smc);
852         if (rc < 0)
853                 smc->sk.sk_err = -rc;
854
855 out:
856         if (!sock_flag(&smc->sk, SOCK_DEAD)) {
857                 if (smc->sk.sk_err) {
858                         smc->sk.sk_state_change(&smc->sk);
859                 } else { /* allow polling before and after fallback decision */
860                         smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
861                         smc->sk.sk_write_space(&smc->sk);
862                 }
863         }
864         release_sock(&smc->sk);
865 }
866
867 static int smc_connect(struct socket *sock, struct sockaddr *addr,
868                        int alen, int flags)
869 {
870         struct sock *sk = sock->sk;
871         struct smc_sock *smc;
872         int rc = -EINVAL;
873
874         smc = smc_sk(sk);
875
876         /* separate smc parameter checking to be safe */
877         if (alen < sizeof(addr->sa_family))
878                 goto out_err;
879         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
880                 goto out_err;
881
882         lock_sock(sk);
883         switch (sk->sk_state) {
884         default:
885                 goto out;
886         case SMC_ACTIVE:
887                 rc = -EISCONN;
888                 goto out;
889         case SMC_INIT:
890                 rc = 0;
891                 break;
892         }
893
894         smc_copy_sock_settings_to_clc(smc);
895         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
896         if (smc->connect_nonblock) {
897                 rc = -EALREADY;
898                 goto out;
899         }
900         rc = kernel_connect(smc->clcsock, addr, alen, flags);
901         if (rc && rc != -EINPROGRESS)
902                 goto out;
903
904         sock_hold(&smc->sk); /* sock put in passive closing */
905         if (smc->use_fallback)
906                 goto out;
907         if (flags & O_NONBLOCK) {
908                 if (schedule_work(&smc->connect_work))
909                         smc->connect_nonblock = 1;
910                 rc = -EINPROGRESS;
911         } else {
912                 rc = __smc_connect(smc);
913                 if (rc < 0)
914                         goto out;
915                 else
916                         rc = 0; /* success cases including fallback */
917         }
918
919 out:
920         release_sock(sk);
921 out_err:
922         return rc;
923 }
924
925 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
926 {
927         struct socket *new_clcsock = NULL;
928         struct sock *lsk = &lsmc->sk;
929         struct sock *new_sk;
930         int rc = -EINVAL;
931
932         release_sock(lsk);
933         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
934         if (!new_sk) {
935                 rc = -ENOMEM;
936                 lsk->sk_err = ENOMEM;
937                 *new_smc = NULL;
938                 lock_sock(lsk);
939                 goto out;
940         }
941         *new_smc = smc_sk(new_sk);
942
943         mutex_lock(&lsmc->clcsock_release_lock);
944         if (lsmc->clcsock)
945                 rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
946         mutex_unlock(&lsmc->clcsock_release_lock);
947         lock_sock(lsk);
948         if  (rc < 0 && rc != -EAGAIN)
949                 lsk->sk_err = -rc;
950         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
951                 new_sk->sk_prot->unhash(new_sk);
952                 if (new_clcsock)
953                         sock_release(new_clcsock);
954                 new_sk->sk_state = SMC_CLOSED;
955                 sock_set_flag(new_sk, SOCK_DEAD);
956                 sock_put(new_sk); /* final */
957                 *new_smc = NULL;
958                 goto out;
959         }
960
961         /* new clcsock has inherited the smc listen-specific sk_data_ready
962          * function; switch it back to the original sk_data_ready function
963          */
964         new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
965         (*new_smc)->clcsock = new_clcsock;
966 out:
967         return rc;
968 }
969
970 /* add a just created sock to the accept queue of the listen sock as
971  * candidate for a following socket accept call from user space
972  */
973 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
974 {
975         struct smc_sock *par = smc_sk(parent);
976
977         sock_hold(sk); /* sock_put in smc_accept_unlink () */
978         spin_lock(&par->accept_q_lock);
979         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
980         spin_unlock(&par->accept_q_lock);
981         sk_acceptq_added(parent);
982 }
983
984 /* remove a socket from the accept queue of its parental listening socket */
985 static void smc_accept_unlink(struct sock *sk)
986 {
987         struct smc_sock *par = smc_sk(sk)->listen_smc;
988
989         spin_lock(&par->accept_q_lock);
990         list_del_init(&smc_sk(sk)->accept_q);
991         spin_unlock(&par->accept_q_lock);
992         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
993         sock_put(sk); /* sock_hold in smc_accept_enqueue */
994 }
995
996 /* remove a sock from the accept queue to bind it to a new socket created
997  * for a socket accept call from user space
998  */
999 struct sock *smc_accept_dequeue(struct sock *parent,
1000                                 struct socket *new_sock)
1001 {
1002         struct smc_sock *isk, *n;
1003         struct sock *new_sk;
1004
1005         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
1006                 new_sk = (struct sock *)isk;
1007
1008                 smc_accept_unlink(new_sk);
1009                 if (new_sk->sk_state == SMC_CLOSED) {
1010                         new_sk->sk_prot->unhash(new_sk);
1011                         if (isk->clcsock) {
1012                                 sock_release(isk->clcsock);
1013                                 isk->clcsock = NULL;
1014                         }
1015                         sock_put(new_sk); /* final */
1016                         continue;
1017                 }
1018                 if (new_sock) {
1019                         sock_graft(new_sk, new_sock);
1020                         if (isk->use_fallback) {
1021                                 smc_sk(new_sk)->clcsock->file = new_sock->file;
1022                                 isk->clcsock->file->private_data = isk->clcsock;
1023                         }
1024                 }
1025                 return new_sk;
1026         }
1027         return NULL;
1028 }
1029
1030 /* clean up for a created but never accepted sock */
1031 void smc_close_non_accepted(struct sock *sk)
1032 {
1033         struct smc_sock *smc = smc_sk(sk);
1034
1035         sock_hold(sk); /* sock_put below */
1036         lock_sock(sk);
1037         if (!sk->sk_lingertime)
1038                 /* wait for peer closing */
1039                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
1040         __smc_release(smc);
1041         release_sock(sk);
1042         sock_put(sk); /* sock_hold above */
1043         sock_put(sk); /* final sock_put */
1044 }
1045
1046 static int smcr_serv_conf_first_link(struct smc_sock *smc)
1047 {
1048         struct smc_link *link = smc->conn.lnk;
1049         struct smc_llc_qentry *qentry;
1050         int rc;
1051
1052         if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
1053                 return SMC_CLC_DECL_ERR_REGRMB;
1054
1055         /* send CONFIRM LINK request to client over the RoCE fabric */
1056         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1057         if (rc < 0)
1058                 return SMC_CLC_DECL_TIMEOUT_CL;
1059
1060         /* receive CONFIRM LINK response from client over the RoCE fabric */
1061         qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1062                               SMC_LLC_CONFIRM_LINK);
1063         if (!qentry) {
1064                 struct smc_clc_msg_decline dclc;
1065
1066                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1067                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1068                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1069         }
1070         smc_llc_save_peer_uid(qentry);
1071         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1072         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1073         if (rc)
1074                 return SMC_CLC_DECL_RMBE_EC;
1075
1076         /* confirm_rkey is implicit on 1st contact */
1077         smc->conn.rmb_desc->is_conf_rkey = true;
1078
1079         smc_llc_link_active(link);
1080         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
1081
1082         /* initial contact - try to establish second link */
1083         smc_llc_srv_add_link(link);
1084         return 0;
1085 }
1086
1087 /* listen worker: finish */
1088 static void smc_listen_out(struct smc_sock *new_smc)
1089 {
1090         struct smc_sock *lsmc = new_smc->listen_smc;
1091         struct sock *newsmcsk = &new_smc->sk;
1092
1093         if (lsmc->sk.sk_state == SMC_LISTEN) {
1094                 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1095                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1096                 release_sock(&lsmc->sk);
1097         } else { /* no longer listening */
1098                 smc_close_non_accepted(newsmcsk);
1099         }
1100
1101         /* Wake up accept */
1102         lsmc->sk.sk_data_ready(&lsmc->sk);
1103         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1104 }
1105
1106 /* listen worker: finish in state connected */
1107 static void smc_listen_out_connected(struct smc_sock *new_smc)
1108 {
1109         struct sock *newsmcsk = &new_smc->sk;
1110
1111         sk_refcnt_debug_inc(newsmcsk);
1112         if (newsmcsk->sk_state == SMC_INIT)
1113                 newsmcsk->sk_state = SMC_ACTIVE;
1114
1115         smc_listen_out(new_smc);
1116 }
1117
1118 /* listen worker: finish in error state */
1119 static void smc_listen_out_err(struct smc_sock *new_smc)
1120 {
1121         struct sock *newsmcsk = &new_smc->sk;
1122
1123         if (newsmcsk->sk_state == SMC_INIT)
1124                 sock_put(&new_smc->sk); /* passive closing */
1125         newsmcsk->sk_state = SMC_CLOSED;
1126
1127         smc_listen_out(new_smc);
1128 }
1129
1130 /* listen worker: decline and fall back if possible */
1131 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1132                                bool local_first)
1133 {
1134         /* RDMA setup failed, switch back to TCP */
1135         if (local_first)
1136                 smc_lgr_cleanup_early(&new_smc->conn);
1137         else
1138                 smc_conn_free(&new_smc->conn);
1139         if (reason_code < 0) { /* error, no fallback possible */
1140                 smc_listen_out_err(new_smc);
1141                 return;
1142         }
1143         smc_switch_to_fallback(new_smc);
1144         new_smc->fallback_rsn = reason_code;
1145         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1146                 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1147                         smc_listen_out_err(new_smc);
1148                         return;
1149                 }
1150         }
1151         smc_listen_out_connected(new_smc);
1152 }
1153
1154 /* listen worker: check prefixes */
1155 static int smc_listen_prfx_check(struct smc_sock *new_smc,
1156                                  struct smc_clc_msg_proposal *pclc)
1157 {
1158         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1159         struct socket *newclcsock = new_smc->clcsock;
1160
1161         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1162         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1163                 return SMC_CLC_DECL_DIFFPREFIX;
1164
1165         return 0;
1166 }
1167
1168 /* listen worker: initialize connection and buffers */
1169 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1170                                 struct smc_init_info *ini)
1171 {
1172         int rc;
1173
1174         /* allocate connection / link group */
1175         rc = smc_conn_create(new_smc, ini);
1176         if (rc)
1177                 return rc;
1178
1179         /* create send buffer and rmb */
1180         if (smc_buf_create(new_smc, false))
1181                 return SMC_CLC_DECL_MEM;
1182
1183         return 0;
1184 }
1185
1186 /* listen worker: initialize connection and buffers for SMC-D */
1187 static int smc_listen_ism_init(struct smc_sock *new_smc,
1188                                struct smc_clc_msg_proposal *pclc,
1189                                struct smc_init_info *ini)
1190 {
1191         struct smc_clc_msg_smcd *pclc_smcd;
1192         int rc;
1193
1194         pclc_smcd = smc_get_clc_msg_smcd(pclc);
1195         ini->ism_peer_gid = pclc_smcd->gid;
1196         rc = smc_conn_create(new_smc, ini);
1197         if (rc)
1198                 return rc;
1199
1200         /* Check if peer can be reached via ISM device */
1201         if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1202                             new_smc->conn.lgr->vlan_id,
1203                             new_smc->conn.lgr->smcd)) {
1204                 if (ini->first_contact_local)
1205                         smc_lgr_cleanup_early(&new_smc->conn);
1206                 else
1207                         smc_conn_free(&new_smc->conn);
1208                 return SMC_CLC_DECL_SMCDNOTALK;
1209         }
1210
1211         /* Create send and receive buffers */
1212         rc = smc_buf_create(new_smc, true);
1213         if (rc) {
1214                 if (ini->first_contact_local)
1215                         smc_lgr_cleanup_early(&new_smc->conn);
1216                 else
1217                         smc_conn_free(&new_smc->conn);
1218                 return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
1219                                          SMC_CLC_DECL_MEM;
1220         }
1221
1222         return 0;
1223 }
1224
1225 /* listen worker: register buffers */
1226 static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
1227 {
1228         struct smc_connection *conn = &new_smc->conn;
1229
1230         if (!local_first) {
1231                 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
1232                         return SMC_CLC_DECL_ERR_REGRMB;
1233         }
1234         smc_rmb_sync_sg_for_device(&new_smc->conn);
1235
1236         return 0;
1237 }
1238
1239 /* listen worker: finish RDMA setup */
1240 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1241                                   struct smc_clc_msg_accept_confirm *cclc,
1242                                   bool local_first)
1243 {
1244         struct smc_link *link = new_smc->conn.lnk;
1245         int reason_code = 0;
1246
1247         if (local_first)
1248                 smc_link_save_peer_info(link, cclc);
1249
1250         if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
1251                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1252                 goto decline;
1253         }
1254
1255         if (local_first) {
1256                 if (smc_ib_ready_link(link)) {
1257                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1258                         goto decline;
1259                 }
1260                 /* QP confirmation over RoCE fabric */
1261                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1262                 reason_code = smcr_serv_conf_first_link(new_smc);
1263                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
1264                 if (reason_code)
1265                         goto decline;
1266         }
1267         return 0;
1268
1269 decline:
1270         smc_listen_decline(new_smc, reason_code, local_first);
1271         return reason_code;
1272 }
1273
1274 /* setup for RDMA connection of server */
1275 static void smc_listen_work(struct work_struct *work)
1276 {
1277         struct smc_sock *new_smc = container_of(work, struct smc_sock,
1278                                                 smc_listen_work);
1279         struct socket *newclcsock = new_smc->clcsock;
1280         struct smc_clc_msg_accept_confirm cclc;
1281         struct smc_clc_msg_proposal_area *buf;
1282         struct smc_clc_msg_proposal *pclc;
1283         struct smc_init_info ini = {0};
1284         bool ism_supported = false;
1285         int rc = 0;
1286
1287         if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
1288                 return smc_listen_out_err(new_smc);
1289
1290         if (new_smc->use_fallback) {
1291                 smc_listen_out_connected(new_smc);
1292                 return;
1293         }
1294
1295         /* check if peer is smc capable */
1296         if (!tcp_sk(newclcsock->sk)->syn_smc) {
1297                 smc_switch_to_fallback(new_smc);
1298                 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1299                 smc_listen_out_connected(new_smc);
1300                 return;
1301         }
1302
1303         /* do inband token exchange -
1304          * wait for and receive SMC Proposal CLC message
1305          */
1306         buf = kzalloc(sizeof(*buf), GFP_KERNEL);
1307         if (!buf) {
1308                 rc = SMC_CLC_DECL_MEM;
1309                 goto out_decl;
1310         }
1311         pclc = (struct smc_clc_msg_proposal *)buf;
1312         rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
1313                               SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1314         if (rc)
1315                 goto out_decl;
1316
1317         /* IPSec connections opt out of SMC-R optimizations */
1318         if (using_ipsec(new_smc)) {
1319                 rc = SMC_CLC_DECL_IPSEC;
1320                 goto out_decl;
1321         }
1322
1323         /* check for matching IP prefix and subnet length */
1324         rc = smc_listen_prfx_check(new_smc, pclc);
1325         if (rc)
1326                 goto out_decl;
1327
1328         /* get vlan id from IP device */
1329         if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
1330                 rc = SMC_CLC_DECL_GETVLANERR;
1331                 goto out_decl;
1332         }
1333
1334         mutex_lock(&smc_server_lgr_pending);
1335         smc_close_init(new_smc);
1336         smc_rx_init(new_smc);
1337         smc_tx_init(new_smc);
1338
1339         /* check if ISM is available */
1340         if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
1341                 ini.is_smcd = true; /* prepare ISM check */
1342                 rc = smc_find_ism_device(new_smc, &ini);
1343                 if (!rc)
1344                         rc = smc_listen_ism_init(new_smc, pclc, &ini);
1345                 if (!rc)
1346                         ism_supported = true;
1347                 else if (pclc->hdr.path == SMC_TYPE_D)
1348                         goto out_unlock; /* skip RDMA and decline */
1349         }
1350
1351         /* check if RDMA is available */
1352         if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
1353                 /* prepare RDMA check */
1354                 ini.is_smcd = false;
1355                 ini.ism_dev = NULL;
1356                 ini.ib_lcl = &pclc->lcl;
1357                 rc = smc_find_rdma_device(new_smc, &ini);
1358                 if (rc) {
1359                         /* no RDMA device found */
1360                         if (pclc->hdr.path == SMC_TYPE_B)
1361                                 /* neither ISM nor RDMA device found */
1362                                 rc = SMC_CLC_DECL_NOSMCDEV;
1363                         goto out_unlock;
1364                 }
1365                 rc = smc_listen_rdma_init(new_smc, &ini);
1366                 if (rc)
1367                         goto out_unlock;
1368                 rc = smc_listen_rdma_reg(new_smc, ini.first_contact_local);
1369                 if (rc)
1370                         goto out_unlock;
1371         }
1372
1373         /* send SMC Accept CLC message */
1374         rc = smc_clc_send_accept(new_smc, ini.first_contact_local);
1375         if (rc)
1376                 goto out_unlock;
1377
1378         /* SMC-D does not need this lock any more */
1379         if (ism_supported)
1380                 mutex_unlock(&smc_server_lgr_pending);
1381
1382         /* receive SMC Confirm CLC message */
1383         rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1384                               SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1385         if (rc) {
1386                 if (!ism_supported)
1387                         goto out_unlock;
1388                 goto out_decl;
1389         }
1390
1391         /* finish worker */
1392         kfree(buf);
1393         if (!ism_supported) {
1394                 rc = smc_listen_rdma_finish(new_smc, &cclc,
1395                                             ini.first_contact_local);
1396                 mutex_unlock(&smc_server_lgr_pending);
1397                 if (rc)
1398                         return;
1399         }
1400         smc_conn_save_peer_info(new_smc, &cclc);
1401         smc_listen_out_connected(new_smc);
1402         return;
1403
1404 out_unlock:
1405         mutex_unlock(&smc_server_lgr_pending);
1406 out_decl:
1407         smc_listen_decline(new_smc, rc, ini.first_contact_local);
1408         kfree(buf);
1409 }
1410
1411 static void smc_tcp_listen_work(struct work_struct *work)
1412 {
1413         struct smc_sock *lsmc = container_of(work, struct smc_sock,
1414                                              tcp_listen_work);
1415         struct sock *lsk = &lsmc->sk;
1416         struct smc_sock *new_smc;
1417         int rc = 0;
1418
1419         lock_sock(lsk);
1420         while (lsk->sk_state == SMC_LISTEN) {
1421                 rc = smc_clcsock_accept(lsmc, &new_smc);
1422                 if (rc) /* clcsock accept queue empty or error */
1423                         goto out;
1424                 if (!new_smc)
1425                         continue;
1426
1427                 new_smc->listen_smc = lsmc;
1428                 new_smc->use_fallback = lsmc->use_fallback;
1429                 new_smc->fallback_rsn = lsmc->fallback_rsn;
1430                 sock_hold(lsk); /* sock_put in smc_listen_work */
1431                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1432                 smc_copy_sock_settings_to_smc(new_smc);
1433                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1434                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1435                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1436                 if (!schedule_work(&new_smc->smc_listen_work))
1437                         sock_put(&new_smc->sk);
1438         }
1439
1440 out:
1441         release_sock(lsk);
1442         sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
1443 }
1444
1445 static void smc_clcsock_data_ready(struct sock *listen_clcsock)
1446 {
1447         struct smc_sock *lsmc;
1448
1449         lsmc = (struct smc_sock *)
1450                ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
1451         if (!lsmc)
1452                 return;
1453         lsmc->clcsk_data_ready(listen_clcsock);
1454         if (lsmc->sk.sk_state == SMC_LISTEN) {
1455                 sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
1456                 if (!schedule_work(&lsmc->tcp_listen_work))
1457                         sock_put(&lsmc->sk);
1458         }
1459 }
1460
1461 static int smc_listen(struct socket *sock, int backlog)
1462 {
1463         struct sock *sk = sock->sk;
1464         struct smc_sock *smc;
1465         int rc;
1466
1467         smc = smc_sk(sk);
1468         lock_sock(sk);
1469
1470         rc = -EINVAL;
1471         if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
1472             smc->connect_nonblock)
1473                 goto out;
1474
1475         rc = 0;
1476         if (sk->sk_state == SMC_LISTEN) {
1477                 sk->sk_max_ack_backlog = backlog;
1478                 goto out;
1479         }
1480         /* some socket options are handled in core, so we could not apply
1481          * them to the clc socket -- copy smc socket options to clc socket
1482          */
1483         smc_copy_sock_settings_to_clc(smc);
1484         if (!smc->use_fallback)
1485                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1486
1487         /* save original sk_data_ready function and establish
1488          * smc-specific sk_data_ready function
1489          */
1490         smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
1491         smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
1492         smc->clcsock->sk->sk_user_data =
1493                 (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
1494         rc = kernel_listen(smc->clcsock, backlog);
1495         if (rc)
1496                 goto out;
1497         sk->sk_max_ack_backlog = backlog;
1498         sk->sk_ack_backlog = 0;
1499         sk->sk_state = SMC_LISTEN;
1500
1501 out:
1502         release_sock(sk);
1503         return rc;
1504 }
1505
1506 static int smc_accept(struct socket *sock, struct socket *new_sock,
1507                       int flags, bool kern)
1508 {
1509         struct sock *sk = sock->sk, *nsk;
1510         DECLARE_WAITQUEUE(wait, current);
1511         struct smc_sock *lsmc;
1512         long timeo;
1513         int rc = 0;
1514
1515         lsmc = smc_sk(sk);
1516         sock_hold(sk); /* sock_put below */
1517         lock_sock(sk);
1518
1519         if (lsmc->sk.sk_state != SMC_LISTEN) {
1520                 rc = -EINVAL;
1521                 release_sock(sk);
1522                 goto out;
1523         }
1524
1525         /* Wait for an incoming connection */
1526         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1527         add_wait_queue_exclusive(sk_sleep(sk), &wait);
1528         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1529                 set_current_state(TASK_INTERRUPTIBLE);
1530                 if (!timeo) {
1531                         rc = -EAGAIN;
1532                         break;
1533                 }
1534                 release_sock(sk);
1535                 timeo = schedule_timeout(timeo);
1536                 /* wakeup by sk_data_ready in smc_listen_work() */
1537                 sched_annotate_sleep();
1538                 lock_sock(sk);
1539                 if (signal_pending(current)) {
1540                         rc = sock_intr_errno(timeo);
1541                         break;
1542                 }
1543         }
1544         set_current_state(TASK_RUNNING);
1545         remove_wait_queue(sk_sleep(sk), &wait);
1546
1547         if (!rc)
1548                 rc = sock_error(nsk);
1549         release_sock(sk);
1550         if (rc)
1551                 goto out;
1552
1553         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1554                 /* wait till data arrives on the socket */
1555                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1556                                                                 MSEC_PER_SEC);
1557                 if (smc_sk(nsk)->use_fallback) {
1558                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1559
1560                         lock_sock(clcsk);
1561                         if (skb_queue_empty(&clcsk->sk_receive_queue))
1562                                 sk_wait_data(clcsk, &timeo, NULL);
1563                         release_sock(clcsk);
1564                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1565                         lock_sock(nsk);
1566                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1567                         release_sock(nsk);
1568                 }
1569         }
1570
1571 out:
1572         sock_put(sk); /* sock_hold above */
1573         return rc;
1574 }
1575
1576 static int smc_getname(struct socket *sock, struct sockaddr *addr,
1577                        int peer)
1578 {
1579         struct smc_sock *smc;
1580
1581         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1582             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1583                 return -ENOTCONN;
1584
1585         smc = smc_sk(sock->sk);
1586
1587         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1588 }
1589
1590 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1591 {
1592         struct sock *sk = sock->sk;
1593         struct smc_sock *smc;
1594         int rc = -EPIPE;
1595
1596         smc = smc_sk(sk);
1597         lock_sock(sk);
1598         if ((sk->sk_state != SMC_ACTIVE) &&
1599             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1600             (sk->sk_state != SMC_INIT))
1601                 goto out;
1602
1603         if (msg->msg_flags & MSG_FASTOPEN) {
1604                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
1605                         smc_switch_to_fallback(smc);
1606                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1607                 } else {
1608                         rc = -EINVAL;
1609                         goto out;
1610                 }
1611         }
1612
1613         if (smc->use_fallback)
1614                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1615         else
1616                 rc = smc_tx_sendmsg(smc, msg, len);
1617 out:
1618         release_sock(sk);
1619         return rc;
1620 }
1621
1622 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1623                        int flags)
1624 {
1625         struct sock *sk = sock->sk;
1626         struct smc_sock *smc;
1627         int rc = -ENOTCONN;
1628
1629         smc = smc_sk(sk);
1630         lock_sock(sk);
1631         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1632                 /* socket was connected before, no more data to read */
1633                 rc = 0;
1634                 goto out;
1635         }
1636         if ((sk->sk_state == SMC_INIT) ||
1637             (sk->sk_state == SMC_LISTEN) ||
1638             (sk->sk_state == SMC_CLOSED))
1639                 goto out;
1640
1641         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1642                 rc = 0;
1643                 goto out;
1644         }
1645
1646         if (smc->use_fallback) {
1647                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1648         } else {
1649                 msg->msg_namelen = 0;
1650                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1651         }
1652
1653 out:
1654         release_sock(sk);
1655         return rc;
1656 }
1657
1658 static __poll_t smc_accept_poll(struct sock *parent)
1659 {
1660         struct smc_sock *isk = smc_sk(parent);
1661         __poll_t mask = 0;
1662
1663         spin_lock(&isk->accept_q_lock);
1664         if (!list_empty(&isk->accept_q))
1665                 mask = EPOLLIN | EPOLLRDNORM;
1666         spin_unlock(&isk->accept_q_lock);
1667
1668         return mask;
1669 }
1670
1671 static __poll_t smc_poll(struct file *file, struct socket *sock,
1672                              poll_table *wait)
1673 {
1674         struct sock *sk = sock->sk;
1675         struct smc_sock *smc;
1676         __poll_t mask = 0;
1677
1678         if (!sk)
1679                 return EPOLLNVAL;
1680
1681         smc = smc_sk(sock->sk);
1682         if (smc->use_fallback) {
1683                 /* delegate to CLC child sock */
1684                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1685                 sk->sk_err = smc->clcsock->sk->sk_err;
1686         } else {
1687                 if (sk->sk_state != SMC_CLOSED)
1688                         sock_poll_wait(file, sock, wait);
1689                 if (sk->sk_err)
1690                         mask |= EPOLLERR;
1691                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1692                     (sk->sk_state == SMC_CLOSED))
1693                         mask |= EPOLLHUP;
1694                 if (sk->sk_state == SMC_LISTEN) {
1695                         /* woken up by sk_data_ready in smc_listen_work() */
1696                         mask |= smc_accept_poll(sk);
1697                 } else if (smc->use_fallback) { /* as result of connect_work()*/
1698                         mask |= smc->clcsock->ops->poll(file, smc->clcsock,
1699                                                            wait);
1700                         sk->sk_err = smc->clcsock->sk->sk_err;
1701                 } else {
1702                         if ((sk->sk_state != SMC_INIT &&
1703                              atomic_read(&smc->conn.sndbuf_space)) ||
1704                             sk->sk_shutdown & SEND_SHUTDOWN) {
1705                                 mask |= EPOLLOUT | EPOLLWRNORM;
1706                         } else {
1707                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1708                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1709                         }
1710                         if (atomic_read(&smc->conn.bytes_to_rcv))
1711                                 mask |= EPOLLIN | EPOLLRDNORM;
1712                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1713                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1714                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
1715                                 mask |= EPOLLIN;
1716                         if (smc->conn.urg_state == SMC_URG_VALID)
1717                                 mask |= EPOLLPRI;
1718                 }
1719         }
1720
1721         return mask;
1722 }
1723
1724 static int smc_shutdown(struct socket *sock, int how)
1725 {
1726         struct sock *sk = sock->sk;
1727         struct smc_sock *smc;
1728         int rc = -EINVAL;
1729         int rc1 = 0;
1730
1731         smc = smc_sk(sk);
1732
1733         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1734                 return rc;
1735
1736         lock_sock(sk);
1737
1738         rc = -ENOTCONN;
1739         if ((sk->sk_state != SMC_ACTIVE) &&
1740             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1741             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1742             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1743             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1744             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1745                 goto out;
1746         if (smc->use_fallback) {
1747                 rc = kernel_sock_shutdown(smc->clcsock, how);
1748                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1749                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1750                         sk->sk_state = SMC_CLOSED;
1751                 goto out;
1752         }
1753         switch (how) {
1754         case SHUT_RDWR:         /* shutdown in both directions */
1755                 rc = smc_close_active(smc);
1756                 break;
1757         case SHUT_WR:
1758                 rc = smc_close_shutdown_write(smc);
1759                 break;
1760         case SHUT_RD:
1761                 rc = 0;
1762                 /* nothing more to do because peer is not involved */
1763                 break;
1764         }
1765         if (smc->clcsock)
1766                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
1767         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1768         sk->sk_shutdown |= how + 1;
1769
1770 out:
1771         release_sock(sk);
1772         return rc ? rc : rc1;
1773 }
1774
1775 static int smc_setsockopt(struct socket *sock, int level, int optname,
1776                           sockptr_t optval, unsigned int optlen)
1777 {
1778         struct sock *sk = sock->sk;
1779         struct smc_sock *smc;
1780         int val, rc;
1781
1782         smc = smc_sk(sk);
1783
1784         /* generic setsockopts reaching us here always apply to the
1785          * CLC socket
1786          */
1787         if (unlikely(!smc->clcsock->ops->setsockopt))
1788                 rc = -EOPNOTSUPP;
1789         else
1790                 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1791                                                    optval, optlen);
1792         if (smc->clcsock->sk->sk_err) {
1793                 sk->sk_err = smc->clcsock->sk->sk_err;
1794                 sk->sk_error_report(sk);
1795         }
1796
1797         if (optlen < sizeof(int))
1798                 return -EINVAL;
1799         if (copy_from_sockptr(&val, optval, sizeof(int)))
1800                 return -EFAULT;
1801
1802         lock_sock(sk);
1803         if (rc || smc->use_fallback)
1804                 goto out;
1805         switch (optname) {
1806         case TCP_ULP:
1807         case TCP_FASTOPEN:
1808         case TCP_FASTOPEN_CONNECT:
1809         case TCP_FASTOPEN_KEY:
1810         case TCP_FASTOPEN_NO_COOKIE:
1811                 /* option not supported by SMC */
1812                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
1813                         smc_switch_to_fallback(smc);
1814                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1815                 } else {
1816                         rc = -EINVAL;
1817                 }
1818                 break;
1819         case TCP_NODELAY:
1820                 if (sk->sk_state != SMC_INIT &&
1821                     sk->sk_state != SMC_LISTEN &&
1822                     sk->sk_state != SMC_CLOSED) {
1823                         if (val)
1824                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1825                                                  0);
1826                 }
1827                 break;
1828         case TCP_CORK:
1829                 if (sk->sk_state != SMC_INIT &&
1830                     sk->sk_state != SMC_LISTEN &&
1831                     sk->sk_state != SMC_CLOSED) {
1832                         if (!val)
1833                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1834                                                  0);
1835                 }
1836                 break;
1837         case TCP_DEFER_ACCEPT:
1838                 smc->sockopt_defer_accept = val;
1839                 break;
1840         default:
1841                 break;
1842         }
1843 out:
1844         release_sock(sk);
1845
1846         return rc;
1847 }
1848
1849 static int smc_getsockopt(struct socket *sock, int level, int optname,
1850                           char __user *optval, int __user *optlen)
1851 {
1852         struct smc_sock *smc;
1853
1854         smc = smc_sk(sock->sk);
1855         /* socket options apply to the CLC socket */
1856         if (unlikely(!smc->clcsock->ops->getsockopt))
1857                 return -EOPNOTSUPP;
1858         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1859                                              optval, optlen);
1860 }
1861
1862 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1863                      unsigned long arg)
1864 {
1865         union smc_host_cursor cons, urg;
1866         struct smc_connection *conn;
1867         struct smc_sock *smc;
1868         int answ;
1869
1870         smc = smc_sk(sock->sk);
1871         conn = &smc->conn;
1872         lock_sock(&smc->sk);
1873         if (smc->use_fallback) {
1874                 if (!smc->clcsock) {
1875                         release_sock(&smc->sk);
1876                         return -EBADF;
1877                 }
1878                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1879                 release_sock(&smc->sk);
1880                 return answ;
1881         }
1882         switch (cmd) {
1883         case SIOCINQ: /* same as FIONREAD */
1884                 if (smc->sk.sk_state == SMC_LISTEN) {
1885                         release_sock(&smc->sk);
1886                         return -EINVAL;
1887                 }
1888                 if (smc->sk.sk_state == SMC_INIT ||
1889                     smc->sk.sk_state == SMC_CLOSED)
1890                         answ = 0;
1891                 else
1892                         answ = atomic_read(&smc->conn.bytes_to_rcv);
1893                 break;
1894         case SIOCOUTQ:
1895                 /* output queue size (not send + not acked) */
1896                 if (smc->sk.sk_state == SMC_LISTEN) {
1897                         release_sock(&smc->sk);
1898                         return -EINVAL;
1899                 }
1900                 if (smc->sk.sk_state == SMC_INIT ||
1901                     smc->sk.sk_state == SMC_CLOSED)
1902                         answ = 0;
1903                 else
1904                         answ = smc->conn.sndbuf_desc->len -
1905                                         atomic_read(&smc->conn.sndbuf_space);
1906                 break;
1907         case SIOCOUTQNSD:
1908                 /* output queue size (not send only) */
1909                 if (smc->sk.sk_state == SMC_LISTEN) {
1910                         release_sock(&smc->sk);
1911                         return -EINVAL;
1912                 }
1913                 if (smc->sk.sk_state == SMC_INIT ||
1914                     smc->sk.sk_state == SMC_CLOSED)
1915                         answ = 0;
1916                 else
1917                         answ = smc_tx_prepared_sends(&smc->conn);
1918                 break;
1919         case SIOCATMARK:
1920                 if (smc->sk.sk_state == SMC_LISTEN) {
1921                         release_sock(&smc->sk);
1922                         return -EINVAL;
1923                 }
1924                 if (smc->sk.sk_state == SMC_INIT ||
1925                     smc->sk.sk_state == SMC_CLOSED) {
1926                         answ = 0;
1927                 } else {
1928                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1929                         smc_curs_copy(&urg, &conn->urg_curs, conn);
1930                         answ = smc_curs_diff(conn->rmb_desc->len,
1931                                              &cons, &urg) == 1;
1932                 }
1933                 break;
1934         default:
1935                 release_sock(&smc->sk);
1936                 return -ENOIOCTLCMD;
1937         }
1938         release_sock(&smc->sk);
1939
1940         return put_user(answ, (int __user *)arg);
1941 }
1942
1943 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1944                             int offset, size_t size, int flags)
1945 {
1946         struct sock *sk = sock->sk;
1947         struct smc_sock *smc;
1948         int rc = -EPIPE;
1949
1950         smc = smc_sk(sk);
1951         lock_sock(sk);
1952         if (sk->sk_state != SMC_ACTIVE) {
1953                 release_sock(sk);
1954                 goto out;
1955         }
1956         release_sock(sk);
1957         if (smc->use_fallback)
1958                 rc = kernel_sendpage(smc->clcsock, page, offset,
1959                                      size, flags);
1960         else
1961                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1962
1963 out:
1964         return rc;
1965 }
1966
1967 /* Map the affected portions of the rmbe into an spd, note the number of bytes
1968  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1969  * updates till whenever a respective page has been fully processed.
1970  * Note that subsequent recv() calls have to wait till all splice() processing
1971  * completed.
1972  */
1973 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1974                                struct pipe_inode_info *pipe, size_t len,
1975                                unsigned int flags)
1976 {
1977         struct sock *sk = sock->sk;
1978         struct smc_sock *smc;
1979         int rc = -ENOTCONN;
1980
1981         smc = smc_sk(sk);
1982         lock_sock(sk);
1983         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1984                 /* socket was connected before, no more data to read */
1985                 rc = 0;
1986                 goto out;
1987         }
1988         if (sk->sk_state == SMC_INIT ||
1989             sk->sk_state == SMC_LISTEN ||
1990             sk->sk_state == SMC_CLOSED)
1991                 goto out;
1992
1993         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1994                 rc = 0;
1995                 goto out;
1996         }
1997
1998         if (smc->use_fallback) {
1999                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
2000                                                     pipe, len, flags);
2001         } else {
2002                 if (*ppos) {
2003                         rc = -ESPIPE;
2004                         goto out;
2005                 }
2006                 if (flags & SPLICE_F_NONBLOCK)
2007                         flags = MSG_DONTWAIT;
2008                 else
2009                         flags = 0;
2010                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
2011         }
2012 out:
2013         release_sock(sk);
2014
2015         return rc;
2016 }
2017
2018 /* must look like tcp */
2019 static const struct proto_ops smc_sock_ops = {
2020         .family         = PF_SMC,
2021         .owner          = THIS_MODULE,
2022         .release        = smc_release,
2023         .bind           = smc_bind,
2024         .connect        = smc_connect,
2025         .socketpair     = sock_no_socketpair,
2026         .accept         = smc_accept,
2027         .getname        = smc_getname,
2028         .poll           = smc_poll,
2029         .ioctl          = smc_ioctl,
2030         .listen         = smc_listen,
2031         .shutdown       = smc_shutdown,
2032         .setsockopt     = smc_setsockopt,
2033         .getsockopt     = smc_getsockopt,
2034         .sendmsg        = smc_sendmsg,
2035         .recvmsg        = smc_recvmsg,
2036         .mmap           = sock_no_mmap,
2037         .sendpage       = smc_sendpage,
2038         .splice_read    = smc_splice_read,
2039 };
2040
2041 static int smc_create(struct net *net, struct socket *sock, int protocol,
2042                       int kern)
2043 {
2044         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
2045         struct smc_sock *smc;
2046         struct sock *sk;
2047         int rc;
2048
2049         rc = -ESOCKTNOSUPPORT;
2050         if (sock->type != SOCK_STREAM)
2051                 goto out;
2052
2053         rc = -EPROTONOSUPPORT;
2054         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
2055                 goto out;
2056
2057         rc = -ENOBUFS;
2058         sock->ops = &smc_sock_ops;
2059         sk = smc_sock_alloc(net, sock, protocol);
2060         if (!sk)
2061                 goto out;
2062
2063         /* create internal TCP socket for CLC handshake and fallback */
2064         smc = smc_sk(sk);
2065         smc->use_fallback = false; /* assume rdma capability first */
2066         smc->fallback_rsn = 0;
2067         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
2068                               &smc->clcsock);
2069         if (rc) {
2070                 sk_common_release(sk);
2071                 goto out;
2072         }
2073         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
2074         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
2075
2076 out:
2077         return rc;
2078 }
2079
2080 static const struct net_proto_family smc_sock_family_ops = {
2081         .family = PF_SMC,
2082         .owner  = THIS_MODULE,
2083         .create = smc_create,
2084 };
2085
2086 unsigned int smc_net_id;
2087
2088 static __net_init int smc_net_init(struct net *net)
2089 {
2090         return smc_pnet_net_init(net);
2091 }
2092
2093 static void __net_exit smc_net_exit(struct net *net)
2094 {
2095         smc_pnet_net_exit(net);
2096 }
2097
2098 static struct pernet_operations smc_net_ops = {
2099         .init = smc_net_init,
2100         .exit = smc_net_exit,
2101         .id   = &smc_net_id,
2102         .size = sizeof(struct smc_net),
2103 };
2104
2105 static int __init smc_init(void)
2106 {
2107         int rc;
2108
2109         rc = register_pernet_subsys(&smc_net_ops);
2110         if (rc)
2111                 return rc;
2112
2113         rc = smc_pnet_init();
2114         if (rc)
2115                 goto out_pernet_subsys;
2116
2117         rc = smc_core_init();
2118         if (rc) {
2119                 pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
2120                 goto out_pnet;
2121         }
2122
2123         rc = smc_llc_init();
2124         if (rc) {
2125                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
2126                 goto out_core;
2127         }
2128
2129         rc = smc_cdc_init();
2130         if (rc) {
2131                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
2132                 goto out_core;
2133         }
2134
2135         rc = proto_register(&smc_proto, 1);
2136         if (rc) {
2137                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
2138                 goto out_core;
2139         }
2140
2141         rc = proto_register(&smc_proto6, 1);
2142         if (rc) {
2143                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
2144                 goto out_proto;
2145         }
2146
2147         rc = sock_register(&smc_sock_family_ops);
2148         if (rc) {
2149                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
2150                 goto out_proto6;
2151         }
2152         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
2153         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
2154
2155         rc = smc_ib_register_client();
2156         if (rc) {
2157                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
2158                 goto out_sock;
2159         }
2160
2161         static_branch_enable(&tcp_have_smc);
2162         return 0;
2163
2164 out_sock:
2165         sock_unregister(PF_SMC);
2166 out_proto6:
2167         proto_unregister(&smc_proto6);
2168 out_proto:
2169         proto_unregister(&smc_proto);
2170 out_core:
2171         smc_core_exit();
2172 out_pnet:
2173         smc_pnet_exit();
2174 out_pernet_subsys:
2175         unregister_pernet_subsys(&smc_net_ops);
2176
2177         return rc;
2178 }
2179
2180 static void __exit smc_exit(void)
2181 {
2182         static_branch_disable(&tcp_have_smc);
2183         sock_unregister(PF_SMC);
2184         smc_core_exit();
2185         smc_ib_unregister_client();
2186         proto_unregister(&smc_proto6);
2187         proto_unregister(&smc_proto);
2188         smc_pnet_exit();
2189         unregister_pernet_subsys(&smc_net_ops);
2190         rcu_barrier();
2191 }
2192
2193 module_init(smc_init);
2194 module_exit(smc_exit);
2195
2196 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2197 MODULE_DESCRIPTION("smc socket address family");
2198 MODULE_LICENSE("GPL");
2199 MODULE_ALIAS_NETPROTO(PF_SMC);