Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
48                          struct smc_buf_desc *buf_desc);
49 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
50
51 /* return head of link group list and its lock for a given link group */
52 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
53                                                   spinlock_t **lgr_lock)
54 {
55         if (lgr->is_smcd) {
56                 *lgr_lock = &lgr->smcd->lgr_lock;
57                 return &lgr->smcd->lgr_list;
58         }
59
60         *lgr_lock = &smc_lgr_list.lock;
61         return &smc_lgr_list.list;
62 }
63
64 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
65 {
66         /* client link group creation always follows the server link group
67          * creation. For client use a somewhat higher removal delay time,
68          * otherwise there is a risk of out-of-sync link groups.
69          */
70         if (!lgr->freeing && !lgr->freefast) {
71                 mod_delayed_work(system_wq, &lgr->free_work,
72                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
73                                                 SMC_LGR_FREE_DELAY_CLNT :
74                                                 SMC_LGR_FREE_DELAY_SERV);
75         }
76 }
77
78 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
79 {
80         if (!lgr->freeing && !lgr->freefast) {
81                 lgr->freefast = 1;
82                 mod_delayed_work(system_wq, &lgr->free_work,
83                                  SMC_LGR_FREE_DELAY_FAST);
84         }
85 }
86
87 /* Register connection's alert token in our lookup structure.
88  * To use rbtrees we have to implement our own insert core.
89  * Requires @conns_lock
90  * @smc         connection to register
91  * Returns 0 on success, != otherwise.
92  */
93 static void smc_lgr_add_alert_token(struct smc_connection *conn)
94 {
95         struct rb_node **link, *parent = NULL;
96         u32 token = conn->alert_token_local;
97
98         link = &conn->lgr->conns_all.rb_node;
99         while (*link) {
100                 struct smc_connection *cur = rb_entry(*link,
101                                         struct smc_connection, alert_node);
102
103                 parent = *link;
104                 if (cur->alert_token_local > token)
105                         link = &parent->rb_left;
106                 else
107                         link = &parent->rb_right;
108         }
109         /* Put the new node there */
110         rb_link_node(&conn->alert_node, parent, link);
111         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
112 }
113
114 /* Register connection in link group by assigning an alert token
115  * registered in a search tree.
116  * Requires @conns_lock
117  * Note that '0' is a reserved value and not assigned.
118  */
119 static int smc_lgr_register_conn(struct smc_connection *conn)
120 {
121         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
122         static atomic_t nexttoken = ATOMIC_INIT(0);
123
124         /* find a new alert_token_local value not yet used by some connection
125          * in this link group
126          */
127         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
128         while (!conn->alert_token_local) {
129                 conn->alert_token_local = atomic_inc_return(&nexttoken);
130                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
131                         conn->alert_token_local = 0;
132         }
133         smc_lgr_add_alert_token(conn);
134
135         /* assign the new connection to a link */
136         if (!conn->lgr->is_smcd) {
137                 struct smc_link *lnk;
138                 int i;
139
140                 /* tbd - link balancing */
141                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
142                         lnk = &conn->lgr->lnk[i];
143                         if (lnk->state == SMC_LNK_ACTIVATING ||
144                             lnk->state == SMC_LNK_ACTIVE)
145                                 conn->lnk = lnk;
146                 }
147                 if (!conn->lnk)
148                         return SMC_CLC_DECL_NOACTLINK;
149         }
150         conn->lgr->conns_num++;
151         return 0;
152 }
153
154 /* Unregister connection and reset the alert token of the given connection<
155  */
156 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
157 {
158         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
159         struct smc_link_group *lgr = conn->lgr;
160
161         rb_erase(&conn->alert_node, &lgr->conns_all);
162         lgr->conns_num--;
163         conn->alert_token_local = 0;
164         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
165 }
166
167 /* Unregister connection from lgr
168  */
169 static void smc_lgr_unregister_conn(struct smc_connection *conn)
170 {
171         struct smc_link_group *lgr = conn->lgr;
172
173         if (!lgr)
174                 return;
175         write_lock_bh(&lgr->conns_lock);
176         if (conn->alert_token_local) {
177                 __smc_lgr_unregister_conn(conn);
178         }
179         write_unlock_bh(&lgr->conns_lock);
180         conn->lgr = NULL;
181 }
182
183 void smc_lgr_cleanup_early(struct smc_connection *conn)
184 {
185         struct smc_link_group *lgr = conn->lgr;
186
187         if (!lgr)
188                 return;
189
190         smc_conn_free(conn);
191         smc_lgr_forget(lgr);
192         smc_lgr_schedule_free_work_fast(lgr);
193 }
194
195 /* Send delete link, either as client to request the initiation
196  * of the DELETE LINK sequence from server; or as server to
197  * initiate the delete processing. See smc_llc_rx_delete_link().
198  */
199 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
200 {
201         if (lnk->state == SMC_LNK_ACTIVE &&
202             !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
203                 smc_llc_link_deleting(lnk);
204                 return 0;
205         }
206         return -ENOTCONN;
207 }
208
209 static void smc_lgr_free(struct smc_link_group *lgr);
210
211 static void smc_lgr_free_work(struct work_struct *work)
212 {
213         struct smc_link_group *lgr = container_of(to_delayed_work(work),
214                                                   struct smc_link_group,
215                                                   free_work);
216         spinlock_t *lgr_lock;
217         bool conns;
218         int i;
219
220         smc_lgr_list_head(lgr, &lgr_lock);
221         spin_lock_bh(lgr_lock);
222         if (lgr->freeing) {
223                 spin_unlock_bh(lgr_lock);
224                 return;
225         }
226         read_lock_bh(&lgr->conns_lock);
227         conns = RB_EMPTY_ROOT(&lgr->conns_all);
228         read_unlock_bh(&lgr->conns_lock);
229         if (!conns) { /* number of lgr connections is no longer zero */
230                 spin_unlock_bh(lgr_lock);
231                 return;
232         }
233         list_del_init(&lgr->list); /* remove from smc_lgr_list */
234
235         if (!lgr->is_smcd && !lgr->terminating) {
236                 bool do_wait = false;
237
238                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
239                         struct smc_link *lnk = &lgr->lnk[i];
240                         /* try to send del link msg, on err free immediately */
241                         if (lnk->state == SMC_LNK_ACTIVE &&
242                             !smcr_link_send_delete(lnk, true)) {
243                                 /* reschedule in case we never receive a resp */
244                                 smc_lgr_schedule_free_work(lgr);
245                                 do_wait = true;
246                         }
247                 }
248                 if (do_wait) {
249                         spin_unlock_bh(lgr_lock);
250                         return; /* wait for resp, see smc_llc_rx_delete_link */
251                 }
252         }
253         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
254         spin_unlock_bh(lgr_lock);
255         cancel_delayed_work(&lgr->free_work);
256
257         if (lgr->is_smcd && !lgr->terminating)
258                 smc_ism_signal_shutdown(lgr);
259         if (!lgr->is_smcd) {
260                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
261                         struct smc_link *lnk = &lgr->lnk[i];
262
263                         if (smc_link_usable(lnk))
264                                 lnk->state = SMC_LNK_INACTIVE;
265                 }
266         }
267         smc_lgr_free(lgr);
268 }
269
270 static void smc_lgr_terminate_work(struct work_struct *work)
271 {
272         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
273                                                   terminate_work);
274
275         __smc_lgr_terminate(lgr, true);
276 }
277
278 /* return next unique link id for the lgr */
279 static u8 smcr_next_link_id(struct smc_link_group *lgr)
280 {
281         u8 link_id;
282         int i;
283
284         while (1) {
285                 link_id = ++lgr->next_link_id;
286                 if (!link_id)   /* skip zero as link_id */
287                         link_id = ++lgr->next_link_id;
288                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
289                         if (smc_link_usable(&lgr->lnk[i]) &&
290                             lgr->lnk[i].link_id == link_id)
291                                 continue;
292                 }
293                 break;
294         }
295         return link_id;
296 }
297
298 static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
299                           u8 link_idx, struct smc_init_info *ini)
300 {
301         u8 rndvec[3];
302         int rc;
303
304         get_device(&ini->ib_dev->ibdev->dev);
305         atomic_inc(&ini->ib_dev->lnk_cnt);
306         lnk->state = SMC_LNK_ACTIVATING;
307         lnk->link_id = smcr_next_link_id(lgr);
308         lnk->lgr = lgr;
309         lnk->link_idx = link_idx;
310         lnk->smcibdev = ini->ib_dev;
311         lnk->ibport = ini->ib_port;
312         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
313         if (!ini->ib_dev->initialized) {
314                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
315                 if (rc)
316                         goto out;
317         }
318         get_random_bytes(rndvec, sizeof(rndvec));
319         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
320                 (rndvec[2] << 16);
321         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
322                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
323         if (rc)
324                 goto out;
325         rc = smc_llc_link_init(lnk);
326         if (rc)
327                 goto out;
328         rc = smc_wr_alloc_link_mem(lnk);
329         if (rc)
330                 goto clear_llc_lnk;
331         rc = smc_ib_create_protection_domain(lnk);
332         if (rc)
333                 goto free_link_mem;
334         rc = smc_ib_create_queue_pair(lnk);
335         if (rc)
336                 goto dealloc_pd;
337         rc = smc_wr_create_link(lnk);
338         if (rc)
339                 goto destroy_qp;
340         return 0;
341
342 destroy_qp:
343         smc_ib_destroy_queue_pair(lnk);
344 dealloc_pd:
345         smc_ib_dealloc_protection_domain(lnk);
346 free_link_mem:
347         smc_wr_free_link_mem(lnk);
348 clear_llc_lnk:
349         smc_llc_link_clear(lnk);
350 out:
351         put_device(&ini->ib_dev->ibdev->dev);
352         memset(lnk, 0, sizeof(struct smc_link));
353         lnk->state = SMC_LNK_UNUSED;
354         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
355                 wake_up(&ini->ib_dev->lnks_deleted);
356         return rc;
357 }
358
359 /* create a new SMC link group */
360 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
361 {
362         struct smc_link_group *lgr;
363         struct list_head *lgr_list;
364         struct smc_link *lnk;
365         spinlock_t *lgr_lock;
366         u8 link_idx;
367         int rc = 0;
368         int i;
369
370         if (ini->is_smcd && ini->vlan_id) {
371                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
372                         rc = SMC_CLC_DECL_ISMVLANERR;
373                         goto out;
374                 }
375         }
376
377         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
378         if (!lgr) {
379                 rc = SMC_CLC_DECL_MEM;
380                 goto ism_put_vlan;
381         }
382         lgr->is_smcd = ini->is_smcd;
383         lgr->sync_err = 0;
384         lgr->terminating = 0;
385         lgr->freefast = 0;
386         lgr->freeing = 0;
387         lgr->vlan_id = ini->vlan_id;
388         mutex_init(&lgr->sndbufs_lock);
389         mutex_init(&lgr->rmbs_lock);
390         rwlock_init(&lgr->conns_lock);
391         for (i = 0; i < SMC_RMBE_SIZES; i++) {
392                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
393                 INIT_LIST_HEAD(&lgr->rmbs[i]);
394         }
395         lgr->next_link_id = 0;
396         smc_lgr_list.num += SMC_LGR_NUM_INCR;
397         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
398         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
399         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
400         lgr->conns_all = RB_ROOT;
401         if (ini->is_smcd) {
402                 /* SMC-D specific settings */
403                 get_device(&ini->ism_dev->dev);
404                 lgr->peer_gid = ini->ism_gid;
405                 lgr->smcd = ini->ism_dev;
406                 lgr_list = &ini->ism_dev->lgr_list;
407                 lgr_lock = &lgr->smcd->lgr_lock;
408                 lgr->peer_shutdown = 0;
409                 atomic_inc(&ini->ism_dev->lgr_cnt);
410         } else {
411                 /* SMC-R specific settings */
412                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
413                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
414                        SMC_SYSTEMID_LEN);
415                 smc_llc_lgr_init(lgr, smc);
416
417                 link_idx = SMC_SINGLE_LINK;
418                 lnk = &lgr->lnk[link_idx];
419                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
420                 if (rc)
421                         goto free_lgr;
422                 lgr_list = &smc_lgr_list.list;
423                 lgr_lock = &smc_lgr_list.lock;
424                 atomic_inc(&lgr_cnt);
425         }
426         smc->conn.lgr = lgr;
427         spin_lock_bh(lgr_lock);
428         list_add(&lgr->list, lgr_list);
429         spin_unlock_bh(lgr_lock);
430         return 0;
431
432 free_lgr:
433         kfree(lgr);
434 ism_put_vlan:
435         if (ini->is_smcd && ini->vlan_id)
436                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
437 out:
438         if (rc < 0) {
439                 if (rc == -ENOMEM)
440                         rc = SMC_CLC_DECL_MEM;
441                 else
442                         rc = SMC_CLC_DECL_INTERR;
443         }
444         return rc;
445 }
446
447 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
448                            struct smc_link *lnk)
449 {
450         struct smc_link_group *lgr = lnk->lgr;
451
452         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
453                 /* unregister rmb with peer */
454                 smc_llc_do_delete_rkey(lnk, rmb_desc);
455                 rmb_desc->is_conf_rkey = false;
456         }
457         if (rmb_desc->is_reg_err) {
458                 /* buf registration failed, reuse not possible */
459                 mutex_lock(&lgr->rmbs_lock);
460                 list_del(&rmb_desc->list);
461                 mutex_unlock(&lgr->rmbs_lock);
462
463                 smc_buf_free(lgr, true, rmb_desc);
464         } else {
465                 rmb_desc->used = 0;
466         }
467 }
468
469 static void smc_buf_unuse(struct smc_connection *conn,
470                           struct smc_link_group *lgr)
471 {
472         if (conn->sndbuf_desc)
473                 conn->sndbuf_desc->used = 0;
474         if (conn->rmb_desc && lgr->is_smcd)
475                 conn->rmb_desc->used = 0;
476         else if (conn->rmb_desc)
477                 smcr_buf_unuse(conn->rmb_desc, conn->lnk);
478 }
479
480 /* remove a finished connection from its link group */
481 void smc_conn_free(struct smc_connection *conn)
482 {
483         struct smc_link_group *lgr = conn->lgr;
484
485         if (!lgr)
486                 return;
487         if (lgr->is_smcd) {
488                 if (!list_empty(&lgr->list))
489                         smc_ism_unset_conn(conn);
490                 tasklet_kill(&conn->rx_tsklet);
491         } else {
492                 smc_cdc_tx_dismiss_slots(conn);
493         }
494         if (!list_empty(&lgr->list)) {
495                 smc_lgr_unregister_conn(conn);
496                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
497         }
498
499         if (!lgr->conns_num)
500                 smc_lgr_schedule_free_work(lgr);
501 }
502
503 static void smcr_link_clear(struct smc_link *lnk)
504 {
505         struct smc_ib_device *smcibdev;
506
507         if (lnk->peer_qpn == 0)
508                 return;
509         lnk->peer_qpn = 0;
510         smc_llc_link_clear(lnk);
511         smc_ib_modify_qp_reset(lnk);
512         smc_wr_free_link(lnk);
513         smc_ib_destroy_queue_pair(lnk);
514         smc_ib_dealloc_protection_domain(lnk);
515         smc_wr_free_link_mem(lnk);
516         put_device(&lnk->smcibdev->ibdev->dev);
517         smcibdev = lnk->smcibdev;
518         memset(lnk, 0, sizeof(struct smc_link));
519         lnk->state = SMC_LNK_UNUSED;
520         if (!atomic_dec_return(&smcibdev->lnk_cnt))
521                 wake_up(&smcibdev->lnks_deleted);
522 }
523
524 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
525                           struct smc_buf_desc *buf_desc)
526 {
527         struct smc_link *lnk;
528         int i;
529
530         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
531                 lnk = &lgr->lnk[i];
532                 if (!buf_desc->is_map_ib[lnk->link_idx])
533                         continue;
534                 if (is_rmb) {
535                         if (buf_desc->mr_rx[lnk->link_idx])
536                                 smc_ib_put_memory_region(
537                                                 buf_desc->mr_rx[lnk->link_idx]);
538                         smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
539                 } else {
540                         smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
541                 }
542                 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
543         }
544
545         if (buf_desc->pages)
546                 __free_pages(buf_desc->pages, buf_desc->order);
547         kfree(buf_desc);
548 }
549
550 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
551                           struct smc_buf_desc *buf_desc)
552 {
553         if (is_dmb) {
554                 /* restore original buf len */
555                 buf_desc->len += sizeof(struct smcd_cdc_msg);
556                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
557         } else {
558                 kfree(buf_desc->cpu_addr);
559         }
560         kfree(buf_desc);
561 }
562
563 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
564                          struct smc_buf_desc *buf_desc)
565 {
566         if (lgr->is_smcd)
567                 smcd_buf_free(lgr, is_rmb, buf_desc);
568         else
569                 smcr_buf_free(lgr, is_rmb, buf_desc);
570 }
571
572 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
573 {
574         struct smc_buf_desc *buf_desc, *bf_desc;
575         struct list_head *buf_list;
576         int i;
577
578         for (i = 0; i < SMC_RMBE_SIZES; i++) {
579                 if (is_rmb)
580                         buf_list = &lgr->rmbs[i];
581                 else
582                         buf_list = &lgr->sndbufs[i];
583                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
584                                          list) {
585                         list_del(&buf_desc->list);
586                         smc_buf_free(lgr, is_rmb, buf_desc);
587                 }
588         }
589 }
590
591 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
592 {
593         /* free send buffers */
594         __smc_lgr_free_bufs(lgr, false);
595         /* free rmbs */
596         __smc_lgr_free_bufs(lgr, true);
597 }
598
599 /* remove a link group */
600 static void smc_lgr_free(struct smc_link_group *lgr)
601 {
602         int i;
603
604         smc_lgr_free_bufs(lgr);
605         if (lgr->is_smcd) {
606                 if (!lgr->terminating) {
607                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
608                         put_device(&lgr->smcd->dev);
609                 }
610                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
611                         wake_up(&lgr->smcd->lgrs_deleted);
612         } else {
613                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
614                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
615                                 smcr_link_clear(&lgr->lnk[i]);
616                 }
617                 smc_llc_lgr_clear(lgr);
618                 if (!atomic_dec_return(&lgr_cnt))
619                         wake_up(&lgrs_deleted);
620         }
621         kfree(lgr);
622 }
623
624 void smc_lgr_forget(struct smc_link_group *lgr)
625 {
626         struct list_head *lgr_list;
627         spinlock_t *lgr_lock;
628
629         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
630         spin_lock_bh(lgr_lock);
631         /* do not use this link group for new connections */
632         if (!list_empty(lgr_list))
633                 list_del_init(lgr_list);
634         spin_unlock_bh(lgr_lock);
635 }
636
637 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
638 {
639         int i;
640
641         for (i = 0; i < SMC_RMBE_SIZES; i++) {
642                 struct smc_buf_desc *buf_desc;
643
644                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
645                         buf_desc->len += sizeof(struct smcd_cdc_msg);
646                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
647                 }
648         }
649 }
650
651 static void smc_sk_wake_ups(struct smc_sock *smc)
652 {
653         smc->sk.sk_write_space(&smc->sk);
654         smc->sk.sk_data_ready(&smc->sk);
655         smc->sk.sk_state_change(&smc->sk);
656 }
657
658 /* kill a connection */
659 static void smc_conn_kill(struct smc_connection *conn, bool soft)
660 {
661         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
662
663         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
664                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
665         else
666                 smc_close_abort(conn);
667         conn->killed = 1;
668         smc->sk.sk_err = ECONNABORTED;
669         smc_sk_wake_ups(smc);
670         if (conn->lgr->is_smcd) {
671                 smc_ism_unset_conn(conn);
672                 if (soft)
673                         tasklet_kill(&conn->rx_tsklet);
674                 else
675                         tasklet_unlock_wait(&conn->rx_tsklet);
676         } else {
677                 smc_cdc_tx_dismiss_slots(conn);
678         }
679         smc_lgr_unregister_conn(conn);
680         smc_close_active_abort(smc);
681 }
682
683 static void smc_lgr_cleanup(struct smc_link_group *lgr)
684 {
685         int i;
686
687         if (lgr->is_smcd) {
688                 smc_ism_signal_shutdown(lgr);
689                 smcd_unregister_all_dmbs(lgr);
690                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
691                 put_device(&lgr->smcd->dev);
692         } else {
693                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
694                         struct smc_link *lnk = &lgr->lnk[i];
695
696                         if (smc_link_usable(lnk))
697                                 lnk->state = SMC_LNK_INACTIVE;
698                 }
699         }
700 }
701
702 /* terminate link group
703  * @soft: true if link group shutdown can take its time
704  *        false if immediate link group shutdown is required
705  */
706 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
707 {
708         struct smc_connection *conn;
709         struct smc_sock *smc;
710         struct rb_node *node;
711
712         if (lgr->terminating)
713                 return; /* lgr already terminating */
714         if (!soft)
715                 cancel_delayed_work_sync(&lgr->free_work);
716         lgr->terminating = 1;
717
718         /* kill remaining link group connections */
719         read_lock_bh(&lgr->conns_lock);
720         node = rb_first(&lgr->conns_all);
721         while (node) {
722                 read_unlock_bh(&lgr->conns_lock);
723                 conn = rb_entry(node, struct smc_connection, alert_node);
724                 smc = container_of(conn, struct smc_sock, conn);
725                 sock_hold(&smc->sk); /* sock_put below */
726                 lock_sock(&smc->sk);
727                 smc_conn_kill(conn, soft);
728                 release_sock(&smc->sk);
729                 sock_put(&smc->sk); /* sock_hold above */
730                 read_lock_bh(&lgr->conns_lock);
731                 node = rb_first(&lgr->conns_all);
732         }
733         read_unlock_bh(&lgr->conns_lock);
734         smc_lgr_cleanup(lgr);
735         if (soft)
736                 smc_lgr_schedule_free_work_fast(lgr);
737         else
738                 smc_lgr_free(lgr);
739 }
740
741 /* unlink link group and schedule termination */
742 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
743 {
744         spinlock_t *lgr_lock;
745
746         smc_lgr_list_head(lgr, &lgr_lock);
747         spin_lock_bh(lgr_lock);
748         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
749                 spin_unlock_bh(lgr_lock);
750                 return; /* lgr already terminating */
751         }
752         list_del_init(&lgr->list);
753         spin_unlock_bh(lgr_lock);
754         schedule_work(&lgr->terminate_work);
755 }
756
757 /* Called when IB port is terminated */
758 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
759 {
760         struct smc_link_group *lgr, *l;
761         LIST_HEAD(lgr_free_list);
762         int i;
763
764         spin_lock_bh(&smc_lgr_list.lock);
765         list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
766                 if (lgr->is_smcd)
767                         continue;
768                 /* tbd - terminate only when no more links are active */
769                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
770                         if (!smc_link_usable(&lgr->lnk[i]) ||
771                             lgr->lnk[i].state == SMC_LNK_DELETING)
772                                 continue;
773                         if (lgr->lnk[i].smcibdev == smcibdev &&
774                             lgr->lnk[i].ibport == ibport) {
775                                 list_move(&lgr->list, &lgr_free_list);
776                                 lgr->freeing = 1;
777                         }
778                 }
779         }
780         spin_unlock_bh(&smc_lgr_list.lock);
781
782         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
783                 list_del_init(&lgr->list);
784                 __smc_lgr_terminate(lgr, false);
785         }
786 }
787
788 /* Called when peer lgr shutdown (regularly or abnormally) is received */
789 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
790 {
791         struct smc_link_group *lgr, *l;
792         LIST_HEAD(lgr_free_list);
793
794         /* run common cleanup function and build free list */
795         spin_lock_bh(&dev->lgr_lock);
796         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
797                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
798                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
799                         if (peer_gid) /* peer triggered termination */
800                                 lgr->peer_shutdown = 1;
801                         list_move(&lgr->list, &lgr_free_list);
802                 }
803         }
804         spin_unlock_bh(&dev->lgr_lock);
805
806         /* cancel the regular free workers and actually free lgrs */
807         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
808                 list_del_init(&lgr->list);
809                 schedule_work(&lgr->terminate_work);
810         }
811 }
812
813 /* Called when an SMCD device is removed or the smc module is unloaded */
814 void smc_smcd_terminate_all(struct smcd_dev *smcd)
815 {
816         struct smc_link_group *lgr, *lg;
817         LIST_HEAD(lgr_free_list);
818
819         spin_lock_bh(&smcd->lgr_lock);
820         list_splice_init(&smcd->lgr_list, &lgr_free_list);
821         list_for_each_entry(lgr, &lgr_free_list, list)
822                 lgr->freeing = 1;
823         spin_unlock_bh(&smcd->lgr_lock);
824
825         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
826                 list_del_init(&lgr->list);
827                 __smc_lgr_terminate(lgr, false);
828         }
829
830         if (atomic_read(&smcd->lgr_cnt))
831                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
832 }
833
834 /* Called when an SMCR device is removed or the smc module is unloaded.
835  * If smcibdev is given, all SMCR link groups using this device are terminated.
836  * If smcibdev is NULL, all SMCR link groups are terminated.
837  */
838 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
839 {
840         struct smc_link_group *lgr, *lg;
841         LIST_HEAD(lgr_free_list);
842         int i;
843
844         spin_lock_bh(&smc_lgr_list.lock);
845         if (!smcibdev) {
846                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
847                 list_for_each_entry(lgr, &lgr_free_list, list)
848                         lgr->freeing = 1;
849         } else {
850                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
851                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
852                                 if (lgr->lnk[i].smcibdev == smcibdev) {
853                                         list_move(&lgr->list, &lgr_free_list);
854                                         lgr->freeing = 1;
855                                         break;
856                                 }
857                         }
858                 }
859         }
860         spin_unlock_bh(&smc_lgr_list.lock);
861
862         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
863                 list_del_init(&lgr->list);
864                 __smc_lgr_terminate(lgr, false);
865         }
866
867         if (smcibdev) {
868                 if (atomic_read(&smcibdev->lnk_cnt))
869                         wait_event(smcibdev->lnks_deleted,
870                                    !atomic_read(&smcibdev->lnk_cnt));
871         } else {
872                 if (atomic_read(&lgr_cnt))
873                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
874         }
875 }
876
877 /* Determine vlan of internal TCP socket.
878  * @vlan_id: address to store the determined vlan id into
879  */
880 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
881 {
882         struct dst_entry *dst = sk_dst_get(clcsock->sk);
883         struct net_device *ndev;
884         int i, nest_lvl, rc = 0;
885
886         ini->vlan_id = 0;
887         if (!dst) {
888                 rc = -ENOTCONN;
889                 goto out;
890         }
891         if (!dst->dev) {
892                 rc = -ENODEV;
893                 goto out_rel;
894         }
895
896         ndev = dst->dev;
897         if (is_vlan_dev(ndev)) {
898                 ini->vlan_id = vlan_dev_vlan_id(ndev);
899                 goto out_rel;
900         }
901
902         rtnl_lock();
903         nest_lvl = ndev->lower_level;
904         for (i = 0; i < nest_lvl; i++) {
905                 struct list_head *lower = &ndev->adj_list.lower;
906
907                 if (list_empty(lower))
908                         break;
909                 lower = lower->next;
910                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
911                 if (is_vlan_dev(ndev)) {
912                         ini->vlan_id = vlan_dev_vlan_id(ndev);
913                         break;
914                 }
915         }
916         rtnl_unlock();
917
918 out_rel:
919         dst_release(dst);
920 out:
921         return rc;
922 }
923
924 static bool smcr_lgr_match(struct smc_link_group *lgr,
925                            struct smc_clc_msg_local *lcl,
926                            enum smc_lgr_role role, u32 clcqpn)
927 {
928         int i;
929
930         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
931             lgr->role != role)
932                 return false;
933
934         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
935                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
936                         continue;
937                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
938                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
939                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
940                         return true;
941         }
942         return false;
943 }
944
945 static bool smcd_lgr_match(struct smc_link_group *lgr,
946                            struct smcd_dev *smcismdev, u64 peer_gid)
947 {
948         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
949 }
950
951 /* create a new SMC connection (and a new link group if necessary) */
952 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
953 {
954         struct smc_connection *conn = &smc->conn;
955         struct list_head *lgr_list;
956         struct smc_link_group *lgr;
957         enum smc_lgr_role role;
958         spinlock_t *lgr_lock;
959         int rc = 0;
960
961         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
962         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
963         ini->cln_first_contact = SMC_FIRST_CONTACT;
964         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
965         if (role == SMC_CLNT && ini->srv_first_contact)
966                 /* create new link group as well */
967                 goto create;
968
969         /* determine if an existing link group can be reused */
970         spin_lock_bh(lgr_lock);
971         list_for_each_entry(lgr, lgr_list, list) {
972                 write_lock_bh(&lgr->conns_lock);
973                 if ((ini->is_smcd ?
974                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
975                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
976                     !lgr->sync_err &&
977                     lgr->vlan_id == ini->vlan_id &&
978                     (role == SMC_CLNT ||
979                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
980                         /* link group found */
981                         ini->cln_first_contact = SMC_REUSE_CONTACT;
982                         conn->lgr = lgr;
983                         rc = smc_lgr_register_conn(conn); /* add conn to lgr */
984                         write_unlock_bh(&lgr->conns_lock);
985                         if (!rc && delayed_work_pending(&lgr->free_work))
986                                 cancel_delayed_work(&lgr->free_work);
987                         break;
988                 }
989                 write_unlock_bh(&lgr->conns_lock);
990         }
991         spin_unlock_bh(lgr_lock);
992         if (rc)
993                 return rc;
994
995         if (role == SMC_CLNT && !ini->srv_first_contact &&
996             ini->cln_first_contact == SMC_FIRST_CONTACT) {
997                 /* Server reuses a link group, but Client wants to start
998                  * a new one
999                  * send out_of_sync decline, reason synchr. error
1000                  */
1001                 return SMC_CLC_DECL_SYNCERR;
1002         }
1003
1004 create:
1005         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1006                 rc = smc_lgr_create(smc, ini);
1007                 if (rc)
1008                         goto out;
1009                 lgr = conn->lgr;
1010                 write_lock_bh(&lgr->conns_lock);
1011                 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
1012                 write_unlock_bh(&lgr->conns_lock);
1013                 if (rc)
1014                         goto out;
1015         }
1016         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1017         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1018         conn->urg_state = SMC_URG_READ;
1019         if (ini->is_smcd) {
1020                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1021                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1022         }
1023 #ifndef KERNEL_HAS_ATOMIC64
1024         spin_lock_init(&conn->acurs_lock);
1025 #endif
1026
1027 out:
1028         return rc;
1029 }
1030
1031 /* convert the RMB size into the compressed notation - minimum 16K.
1032  * In contrast to plain ilog2, this rounds towards the next power of 2,
1033  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1034  */
1035 static u8 smc_compress_bufsize(int size)
1036 {
1037         u8 compressed;
1038
1039         if (size <= SMC_BUF_MIN_SIZE)
1040                 return 0;
1041
1042         size = (size - 1) >> 14;
1043         compressed = ilog2(size) + 1;
1044         if (compressed >= SMC_RMBE_SIZES)
1045                 compressed = SMC_RMBE_SIZES - 1;
1046         return compressed;
1047 }
1048
1049 /* convert the RMB size from compressed notation into integer */
1050 int smc_uncompress_bufsize(u8 compressed)
1051 {
1052         u32 size;
1053
1054         size = 0x00000001 << (((int)compressed) + 14);
1055         return (int)size;
1056 }
1057
1058 /* try to reuse a sndbuf or rmb description slot for a certain
1059  * buffer size; if not available, return NULL
1060  */
1061 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1062                                              struct mutex *lock,
1063                                              struct list_head *buf_list)
1064 {
1065         struct smc_buf_desc *buf_slot;
1066
1067         mutex_lock(lock);
1068         list_for_each_entry(buf_slot, buf_list, list) {
1069                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1070                         mutex_unlock(lock);
1071                         return buf_slot;
1072                 }
1073         }
1074         mutex_unlock(lock);
1075         return NULL;
1076 }
1077
1078 /* one of the conditions for announcing a receiver's current window size is
1079  * that it "results in a minimum increase in the window size of 10% of the
1080  * receive buffer space" [RFC7609]
1081  */
1082 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1083 {
1084         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1085 }
1086
1087 /* map an rmb buf to a link */
1088 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1089                              struct smc_link *lnk)
1090 {
1091         int rc;
1092
1093         if (buf_desc->is_map_ib[lnk->link_idx])
1094                 return 0;
1095
1096         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1097         if (rc)
1098                 return rc;
1099         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1100                    buf_desc->cpu_addr, buf_desc->len);
1101
1102         /* map sg table to DMA address */
1103         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1104                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1105         /* SMC protocol depends on mapping to one DMA address only */
1106         if (rc != 1) {
1107                 rc = -EAGAIN;
1108                 goto free_table;
1109         }
1110
1111         /* create a new memory region for the RMB */
1112         if (is_rmb) {
1113                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1114                                               IB_ACCESS_REMOTE_WRITE |
1115                                               IB_ACCESS_LOCAL_WRITE,
1116                                               buf_desc, lnk->link_idx);
1117                 if (rc)
1118                         goto buf_unmap;
1119                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1120         }
1121         buf_desc->is_map_ib[lnk->link_idx] = true;
1122         return 0;
1123
1124 buf_unmap:
1125         smc_ib_buf_unmap_sg(lnk, buf_desc,
1126                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1127 free_table:
1128         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1129         return rc;
1130 }
1131
1132 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1133                                                 bool is_rmb, int bufsize)
1134 {
1135         struct smc_buf_desc *buf_desc;
1136
1137         /* try to alloc a new buffer */
1138         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1139         if (!buf_desc)
1140                 return ERR_PTR(-ENOMEM);
1141
1142         buf_desc->order = get_order(bufsize);
1143         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1144                                       __GFP_NOMEMALLOC | __GFP_COMP |
1145                                       __GFP_NORETRY | __GFP_ZERO,
1146                                       buf_desc->order);
1147         if (!buf_desc->pages) {
1148                 kfree(buf_desc);
1149                 return ERR_PTR(-EAGAIN);
1150         }
1151         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1152         buf_desc->len = bufsize;
1153         return buf_desc;
1154 }
1155
1156 /* map buf_desc on all usable links,
1157  * unused buffers stay mapped as long as the link is up
1158  */
1159 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1160                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1161 {
1162         int i, rc = 0;
1163
1164         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1165                 struct smc_link *lnk = &lgr->lnk[i];
1166
1167                 if (!smc_link_usable(lnk))
1168                         continue;
1169                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1170                         smcr_buf_unuse(buf_desc, lnk);
1171                         rc = -ENOMEM;
1172                         goto out;
1173                 }
1174         }
1175 out:
1176         return rc;
1177 }
1178
1179 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1180
1181 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1182                                                 bool is_dmb, int bufsize)
1183 {
1184         struct smc_buf_desc *buf_desc;
1185         int rc;
1186
1187         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1188                 return ERR_PTR(-EAGAIN);
1189
1190         /* try to alloc a new DMB */
1191         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1192         if (!buf_desc)
1193                 return ERR_PTR(-ENOMEM);
1194         if (is_dmb) {
1195                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1196                 if (rc) {
1197                         kfree(buf_desc);
1198                         return ERR_PTR(-EAGAIN);
1199                 }
1200                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1201                 /* CDC header stored in buf. So, pretend it was smaller */
1202                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1203         } else {
1204                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1205                                              __GFP_NOWARN | __GFP_NORETRY |
1206                                              __GFP_NOMEMALLOC);
1207                 if (!buf_desc->cpu_addr) {
1208                         kfree(buf_desc);
1209                         return ERR_PTR(-EAGAIN);
1210                 }
1211                 buf_desc->len = bufsize;
1212         }
1213         return buf_desc;
1214 }
1215
1216 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1217 {
1218         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1219         struct smc_connection *conn = &smc->conn;
1220         struct smc_link_group *lgr = conn->lgr;
1221         struct list_head *buf_list;
1222         int bufsize, bufsize_short;
1223         struct mutex *lock;     /* lock buffer list */
1224         int sk_buf_size;
1225
1226         if (is_rmb)
1227                 /* use socket recv buffer size (w/o overhead) as start value */
1228                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1229         else
1230                 /* use socket send buffer size (w/o overhead) as start value */
1231                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1232
1233         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1234              bufsize_short >= 0; bufsize_short--) {
1235
1236                 if (is_rmb) {
1237                         lock = &lgr->rmbs_lock;
1238                         buf_list = &lgr->rmbs[bufsize_short];
1239                 } else {
1240                         lock = &lgr->sndbufs_lock;
1241                         buf_list = &lgr->sndbufs[bufsize_short];
1242                 }
1243                 bufsize = smc_uncompress_bufsize(bufsize_short);
1244                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1245                         continue;
1246
1247                 /* check for reusable slot in the link group */
1248                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1249                 if (buf_desc) {
1250                         memset(buf_desc->cpu_addr, 0, bufsize);
1251                         break; /* found reusable slot */
1252                 }
1253
1254                 if (is_smcd)
1255                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1256                 else
1257                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1258
1259                 if (PTR_ERR(buf_desc) == -ENOMEM)
1260                         break;
1261                 if (IS_ERR(buf_desc))
1262                         continue;
1263
1264                 buf_desc->used = 1;
1265                 mutex_lock(lock);
1266                 list_add(&buf_desc->list, buf_list);
1267                 mutex_unlock(lock);
1268                 break; /* found */
1269         }
1270
1271         if (IS_ERR(buf_desc))
1272                 return -ENOMEM;
1273
1274         if (!is_smcd) {
1275                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1276                         return -ENOMEM;
1277                 }
1278         }
1279
1280         if (is_rmb) {
1281                 conn->rmb_desc = buf_desc;
1282                 conn->rmbe_size_short = bufsize_short;
1283                 smc->sk.sk_rcvbuf = bufsize * 2;
1284                 atomic_set(&conn->bytes_to_rcv, 0);
1285                 conn->rmbe_update_limit =
1286                         smc_rmb_wnd_update_limit(buf_desc->len);
1287                 if (is_smcd)
1288                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1289         } else {
1290                 conn->sndbuf_desc = buf_desc;
1291                 smc->sk.sk_sndbuf = bufsize * 2;
1292                 atomic_set(&conn->sndbuf_space, bufsize);
1293         }
1294         return 0;
1295 }
1296
1297 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1298 {
1299         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1300                 return;
1301         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1302 }
1303
1304 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1305 {
1306         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1307                 return;
1308         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1309 }
1310
1311 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1312 {
1313         int i;
1314
1315         if (!conn->lgr || conn->lgr->is_smcd)
1316                 return;
1317         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1318                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1319                         continue;
1320                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1321                                        DMA_FROM_DEVICE);
1322         }
1323 }
1324
1325 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1326 {
1327         int i;
1328
1329         if (!conn->lgr || conn->lgr->is_smcd)
1330                 return;
1331         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1332                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1333                         continue;
1334                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1335                                           DMA_FROM_DEVICE);
1336         }
1337 }
1338
1339 /* create the send and receive buffer for an SMC socket;
1340  * receive buffers are called RMBs;
1341  * (even though the SMC protocol allows more than one RMB-element per RMB,
1342  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1343  * extra RMB for every connection in a link group
1344  */
1345 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1346 {
1347         int rc;
1348
1349         /* create send buffer */
1350         rc = __smc_buf_create(smc, is_smcd, false);
1351         if (rc)
1352                 return rc;
1353         /* create rmb */
1354         rc = __smc_buf_create(smc, is_smcd, true);
1355         if (rc)
1356                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1357         return rc;
1358 }
1359
1360 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1361 {
1362         int i;
1363
1364         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1365                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1366                         return i;
1367         }
1368         return -ENOSPC;
1369 }
1370
1371 /* add a new rtoken from peer */
1372 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1373 {
1374         struct smc_link_group *lgr = smc_get_lgr(lnk);
1375         u64 dma_addr = be64_to_cpu(nw_vaddr);
1376         u32 rkey = ntohl(nw_rkey);
1377         int i;
1378
1379         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1380                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1381                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1382                     test_bit(i, lgr->rtokens_used_mask)) {
1383                         /* already in list */
1384                         return i;
1385                 }
1386         }
1387         i = smc_rmb_reserve_rtoken_idx(lgr);
1388         if (i < 0)
1389                 return i;
1390         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1391         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1392         return i;
1393 }
1394
1395 /* delete an rtoken from all links */
1396 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1397 {
1398         struct smc_link_group *lgr = smc_get_lgr(lnk);
1399         u32 rkey = ntohl(nw_rkey);
1400         int i, j;
1401
1402         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1403                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1404                     test_bit(i, lgr->rtokens_used_mask)) {
1405                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1406                                 lgr->rtokens[i][j].rkey = 0;
1407                                 lgr->rtokens[i][j].dma_addr = 0;
1408                         }
1409                         clear_bit(i, lgr->rtokens_used_mask);
1410                         return 0;
1411                 }
1412         }
1413         return -ENOENT;
1414 }
1415
1416 /* save rkey and dma_addr received from peer during clc handshake */
1417 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1418                             struct smc_link *lnk,
1419                             struct smc_clc_msg_accept_confirm *clc)
1420 {
1421         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1422                                           clc->rmb_rkey);
1423         if (conn->rtoken_idx < 0)
1424                 return conn->rtoken_idx;
1425         return 0;
1426 }
1427
1428 static void smc_core_going_away(void)
1429 {
1430         struct smc_ib_device *smcibdev;
1431         struct smcd_dev *smcd;
1432
1433         spin_lock(&smc_ib_devices.lock);
1434         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1435                 int i;
1436
1437                 for (i = 0; i < SMC_MAX_PORTS; i++)
1438                         set_bit(i, smcibdev->ports_going_away);
1439         }
1440         spin_unlock(&smc_ib_devices.lock);
1441
1442         spin_lock(&smcd_dev_list.lock);
1443         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1444                 smcd->going_away = 1;
1445         }
1446         spin_unlock(&smcd_dev_list.lock);
1447 }
1448
1449 /* Clean up all SMC link groups */
1450 static void smc_lgrs_shutdown(void)
1451 {
1452         struct smcd_dev *smcd;
1453
1454         smc_core_going_away();
1455
1456         smc_smcr_terminate_all(NULL);
1457
1458         spin_lock(&smcd_dev_list.lock);
1459         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1460                 smc_smcd_terminate_all(smcd);
1461         spin_unlock(&smcd_dev_list.lock);
1462 }
1463
1464 static int smc_core_reboot_event(struct notifier_block *this,
1465                                  unsigned long event, void *ptr)
1466 {
1467         smc_lgrs_shutdown();
1468         smc_ib_unregister_client();
1469         return 0;
1470 }
1471
1472 static struct notifier_block smc_reboot_notifier = {
1473         .notifier_call = smc_core_reboot_event,
1474 };
1475
1476 int __init smc_core_init(void)
1477 {
1478         return register_reboot_notifier(&smc_reboot_notifier);
1479 }
1480
1481 /* Called (from smc_exit) when module is removed */
1482 void smc_core_exit(void)
1483 {
1484         unregister_reboot_notifier(&smc_reboot_notifier);
1485         smc_lgrs_shutdown();
1486 }