net/smc: introduce link_idx for link group array
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
48                          struct smc_buf_desc *buf_desc);
49 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
50
51 /* return head of link group list and its lock for a given link group */
52 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
53                                                   spinlock_t **lgr_lock)
54 {
55         if (lgr->is_smcd) {
56                 *lgr_lock = &lgr->smcd->lgr_lock;
57                 return &lgr->smcd->lgr_list;
58         }
59
60         *lgr_lock = &smc_lgr_list.lock;
61         return &smc_lgr_list.list;
62 }
63
64 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
65 {
66         /* client link group creation always follows the server link group
67          * creation. For client use a somewhat higher removal delay time,
68          * otherwise there is a risk of out-of-sync link groups.
69          */
70         if (!lgr->freeing && !lgr->freefast) {
71                 mod_delayed_work(system_wq, &lgr->free_work,
72                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
73                                                 SMC_LGR_FREE_DELAY_CLNT :
74                                                 SMC_LGR_FREE_DELAY_SERV);
75         }
76 }
77
78 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
79 {
80         if (!lgr->freeing && !lgr->freefast) {
81                 lgr->freefast = 1;
82                 mod_delayed_work(system_wq, &lgr->free_work,
83                                  SMC_LGR_FREE_DELAY_FAST);
84         }
85 }
86
87 /* Register connection's alert token in our lookup structure.
88  * To use rbtrees we have to implement our own insert core.
89  * Requires @conns_lock
90  * @smc         connection to register
91  * Returns 0 on success, != otherwise.
92  */
93 static void smc_lgr_add_alert_token(struct smc_connection *conn)
94 {
95         struct rb_node **link, *parent = NULL;
96         u32 token = conn->alert_token_local;
97
98         link = &conn->lgr->conns_all.rb_node;
99         while (*link) {
100                 struct smc_connection *cur = rb_entry(*link,
101                                         struct smc_connection, alert_node);
102
103                 parent = *link;
104                 if (cur->alert_token_local > token)
105                         link = &parent->rb_left;
106                 else
107                         link = &parent->rb_right;
108         }
109         /* Put the new node there */
110         rb_link_node(&conn->alert_node, parent, link);
111         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
112 }
113
114 /* Register connection in link group by assigning an alert token
115  * registered in a search tree.
116  * Requires @conns_lock
117  * Note that '0' is a reserved value and not assigned.
118  */
119 static void smc_lgr_register_conn(struct smc_connection *conn)
120 {
121         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
122         static atomic_t nexttoken = ATOMIC_INIT(0);
123
124         /* find a new alert_token_local value not yet used by some connection
125          * in this link group
126          */
127         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
128         while (!conn->alert_token_local) {
129                 conn->alert_token_local = atomic_inc_return(&nexttoken);
130                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
131                         conn->alert_token_local = 0;
132         }
133         smc_lgr_add_alert_token(conn);
134         conn->lgr->conns_num++;
135 }
136
137 /* Unregister connection and reset the alert token of the given connection<
138  */
139 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
140 {
141         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
142         struct smc_link_group *lgr = conn->lgr;
143
144         rb_erase(&conn->alert_node, &lgr->conns_all);
145         lgr->conns_num--;
146         conn->alert_token_local = 0;
147         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
148 }
149
150 /* Unregister connection from lgr
151  */
152 static void smc_lgr_unregister_conn(struct smc_connection *conn)
153 {
154         struct smc_link_group *lgr = conn->lgr;
155
156         if (!lgr)
157                 return;
158         write_lock_bh(&lgr->conns_lock);
159         if (conn->alert_token_local) {
160                 __smc_lgr_unregister_conn(conn);
161         }
162         write_unlock_bh(&lgr->conns_lock);
163         conn->lgr = NULL;
164 }
165
166 void smc_lgr_cleanup_early(struct smc_connection *conn)
167 {
168         struct smc_link_group *lgr = conn->lgr;
169
170         if (!lgr)
171                 return;
172
173         smc_conn_free(conn);
174         smc_lgr_forget(lgr);
175         smc_lgr_schedule_free_work_fast(lgr);
176 }
177
178 /* Send delete link, either as client to request the initiation
179  * of the DELETE LINK sequence from server; or as server to
180  * initiate the delete processing. See smc_llc_rx_delete_link().
181  */
182 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
183 {
184         if (lnk->state == SMC_LNK_ACTIVE &&
185             !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
186                 smc_llc_link_deleting(lnk);
187                 return 0;
188         }
189         return -ENOTCONN;
190 }
191
192 static void smc_lgr_free(struct smc_link_group *lgr);
193
194 static void smc_lgr_free_work(struct work_struct *work)
195 {
196         struct smc_link_group *lgr = container_of(to_delayed_work(work),
197                                                   struct smc_link_group,
198                                                   free_work);
199         spinlock_t *lgr_lock;
200         struct smc_link *lnk;
201         bool conns;
202
203         smc_lgr_list_head(lgr, &lgr_lock);
204         spin_lock_bh(lgr_lock);
205         if (lgr->freeing) {
206                 spin_unlock_bh(lgr_lock);
207                 return;
208         }
209         read_lock_bh(&lgr->conns_lock);
210         conns = RB_EMPTY_ROOT(&lgr->conns_all);
211         read_unlock_bh(&lgr->conns_lock);
212         if (!conns) { /* number of lgr connections is no longer zero */
213                 spin_unlock_bh(lgr_lock);
214                 return;
215         }
216         list_del_init(&lgr->list); /* remove from smc_lgr_list */
217
218         lnk = &lgr->lnk[SMC_SINGLE_LINK];
219         if (!lgr->is_smcd && !lgr->terminating) {
220                 /* try to send del link msg, on error free lgr immediately */
221                 if (lnk->state == SMC_LNK_ACTIVE &&
222                     !smcr_link_send_delete(lnk, true)) {
223                         /* reschedule in case we never receive a response */
224                         smc_lgr_schedule_free_work(lgr);
225                         spin_unlock_bh(lgr_lock);
226                         return;
227                 }
228         }
229         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
230         spin_unlock_bh(lgr_lock);
231         cancel_delayed_work(&lgr->free_work);
232
233         if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
234                 smc_llc_link_inactive(lnk);
235         if (lgr->is_smcd && !lgr->terminating)
236                 smc_ism_signal_shutdown(lgr);
237         smc_lgr_free(lgr);
238 }
239
240 static void smc_lgr_terminate_work(struct work_struct *work)
241 {
242         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
243                                                   terminate_work);
244
245         __smc_lgr_terminate(lgr, true);
246 }
247
248 /* return next unique link id for the lgr */
249 static u8 smcr_next_link_id(struct smc_link_group *lgr)
250 {
251         u8 link_id;
252         int i;
253
254         while (1) {
255                 link_id = ++lgr->next_link_id;
256                 if (!link_id)   /* skip zero as link_id */
257                         link_id = ++lgr->next_link_id;
258                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
259                         if (lgr->lnk[i].state != SMC_LNK_INACTIVE &&
260                             lgr->lnk[i].link_id == link_id)
261                                 continue;
262                 }
263                 break;
264         }
265         return link_id;
266 }
267
268 static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
269                           u8 link_idx, struct smc_init_info *ini)
270 {
271         u8 rndvec[3];
272         int rc;
273
274         get_device(&ini->ib_dev->ibdev->dev);
275         atomic_inc(&ini->ib_dev->lnk_cnt);
276         lnk->state = SMC_LNK_ACTIVATING;
277         lnk->link_id = smcr_next_link_id(lgr);
278         lnk->link_idx = link_idx;
279         lnk->smcibdev = ini->ib_dev;
280         lnk->ibport = ini->ib_port;
281         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
282         if (!ini->ib_dev->initialized) {
283                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
284                 if (rc)
285                         goto out;
286         }
287         get_random_bytes(rndvec, sizeof(rndvec));
288         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
289                 (rndvec[2] << 16);
290         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
291                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
292         if (rc)
293                 goto out;
294         rc = smc_llc_link_init(lnk);
295         if (rc)
296                 goto out;
297         rc = smc_wr_alloc_link_mem(lnk);
298         if (rc)
299                 goto clear_llc_lnk;
300         rc = smc_ib_create_protection_domain(lnk);
301         if (rc)
302                 goto free_link_mem;
303         rc = smc_ib_create_queue_pair(lnk);
304         if (rc)
305                 goto dealloc_pd;
306         rc = smc_wr_create_link(lnk);
307         if (rc)
308                 goto destroy_qp;
309         return 0;
310
311 destroy_qp:
312         smc_ib_destroy_queue_pair(lnk);
313 dealloc_pd:
314         smc_ib_dealloc_protection_domain(lnk);
315 free_link_mem:
316         smc_wr_free_link_mem(lnk);
317 clear_llc_lnk:
318         smc_llc_link_clear(lnk);
319 out:
320         put_device(&ini->ib_dev->ibdev->dev);
321         memset(lnk, 0, sizeof(struct smc_link));
322         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
323                 wake_up(&ini->ib_dev->lnks_deleted);
324         return rc;
325 }
326
327 /* create a new SMC link group */
328 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
329 {
330         struct smc_link_group *lgr;
331         struct list_head *lgr_list;
332         struct smc_link *lnk;
333         spinlock_t *lgr_lock;
334         u8 link_idx;
335         int rc = 0;
336         int i;
337
338         if (ini->is_smcd && ini->vlan_id) {
339                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
340                         rc = SMC_CLC_DECL_ISMVLANERR;
341                         goto out;
342                 }
343         }
344
345         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
346         if (!lgr) {
347                 rc = SMC_CLC_DECL_MEM;
348                 goto ism_put_vlan;
349         }
350         lgr->is_smcd = ini->is_smcd;
351         lgr->sync_err = 0;
352         lgr->terminating = 0;
353         lgr->freefast = 0;
354         lgr->freeing = 0;
355         lgr->vlan_id = ini->vlan_id;
356         rwlock_init(&lgr->sndbufs_lock);
357         rwlock_init(&lgr->rmbs_lock);
358         rwlock_init(&lgr->conns_lock);
359         for (i = 0; i < SMC_RMBE_SIZES; i++) {
360                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
361                 INIT_LIST_HEAD(&lgr->rmbs[i]);
362         }
363         lgr->next_link_id = 0;
364         smc_lgr_list.num += SMC_LGR_NUM_INCR;
365         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
366         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
367         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
368         lgr->conns_all = RB_ROOT;
369         if (ini->is_smcd) {
370                 /* SMC-D specific settings */
371                 get_device(&ini->ism_dev->dev);
372                 lgr->peer_gid = ini->ism_gid;
373                 lgr->smcd = ini->ism_dev;
374                 lgr_list = &ini->ism_dev->lgr_list;
375                 lgr_lock = &lgr->smcd->lgr_lock;
376                 lgr->peer_shutdown = 0;
377                 atomic_inc(&ini->ism_dev->lgr_cnt);
378         } else {
379                 /* SMC-R specific settings */
380                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
381                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
382                        SMC_SYSTEMID_LEN);
383
384                 link_idx = SMC_SINGLE_LINK;
385                 lnk = &lgr->lnk[link_idx];
386                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
387                 if (rc)
388                         goto free_lgr;
389                 lgr_list = &smc_lgr_list.list;
390                 lgr_lock = &smc_lgr_list.lock;
391                 atomic_inc(&lgr_cnt);
392         }
393         smc->conn.lgr = lgr;
394         spin_lock_bh(lgr_lock);
395         list_add(&lgr->list, lgr_list);
396         spin_unlock_bh(lgr_lock);
397         return 0;
398
399 free_lgr:
400         kfree(lgr);
401 ism_put_vlan:
402         if (ini->is_smcd && ini->vlan_id)
403                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
404 out:
405         if (rc < 0) {
406                 if (rc == -ENOMEM)
407                         rc = SMC_CLC_DECL_MEM;
408                 else
409                         rc = SMC_CLC_DECL_INTERR;
410         }
411         return rc;
412 }
413
414 static void smc_buf_unuse(struct smc_connection *conn,
415                           struct smc_link_group *lgr)
416 {
417         if (conn->sndbuf_desc)
418                 conn->sndbuf_desc->used = 0;
419         if (conn->rmb_desc) {
420                 if (!conn->rmb_desc->regerr) {
421                         if (!lgr->is_smcd && !list_empty(&lgr->list)) {
422                                 /* unregister rmb with peer */
423                                 smc_llc_do_delete_rkey(
424                                                 &lgr->lnk[SMC_SINGLE_LINK],
425                                                 conn->rmb_desc);
426                         }
427                         conn->rmb_desc->used = 0;
428                 } else {
429                         /* buf registration failed, reuse not possible */
430                         write_lock_bh(&lgr->rmbs_lock);
431                         list_del(&conn->rmb_desc->list);
432                         write_unlock_bh(&lgr->rmbs_lock);
433
434                         smc_buf_free(lgr, true, conn->rmb_desc);
435                 }
436         }
437 }
438
439 /* remove a finished connection from its link group */
440 void smc_conn_free(struct smc_connection *conn)
441 {
442         struct smc_link_group *lgr = conn->lgr;
443
444         if (!lgr)
445                 return;
446         if (lgr->is_smcd) {
447                 if (!list_empty(&lgr->list))
448                         smc_ism_unset_conn(conn);
449                 tasklet_kill(&conn->rx_tsklet);
450         } else {
451                 smc_cdc_tx_dismiss_slots(conn);
452         }
453         if (!list_empty(&lgr->list)) {
454                 smc_lgr_unregister_conn(conn);
455                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
456         }
457
458         if (!lgr->conns_num)
459                 smc_lgr_schedule_free_work(lgr);
460 }
461
462 static void smcr_link_clear(struct smc_link *lnk)
463 {
464         lnk->peer_qpn = 0;
465         smc_llc_link_clear(lnk);
466         smc_ib_modify_qp_reset(lnk);
467         smc_wr_free_link(lnk);
468         smc_ib_destroy_queue_pair(lnk);
469         smc_ib_dealloc_protection_domain(lnk);
470         smc_wr_free_link_mem(lnk);
471         put_device(&lnk->smcibdev->ibdev->dev);
472         if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
473                 wake_up(&lnk->smcibdev->lnks_deleted);
474 }
475
476 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
477                           struct smc_buf_desc *buf_desc)
478 {
479         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
480
481         if (is_rmb) {
482                 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
483                         smc_ib_put_memory_region(
484                                         buf_desc->mr_rx[SMC_SINGLE_LINK]);
485                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
486                                     DMA_FROM_DEVICE);
487         } else {
488                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
489                                     DMA_TO_DEVICE);
490         }
491         sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
492         if (buf_desc->pages)
493                 __free_pages(buf_desc->pages, buf_desc->order);
494         kfree(buf_desc);
495 }
496
497 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
498                           struct smc_buf_desc *buf_desc)
499 {
500         if (is_dmb) {
501                 /* restore original buf len */
502                 buf_desc->len += sizeof(struct smcd_cdc_msg);
503                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
504         } else {
505                 kfree(buf_desc->cpu_addr);
506         }
507         kfree(buf_desc);
508 }
509
510 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
511                          struct smc_buf_desc *buf_desc)
512 {
513         if (lgr->is_smcd)
514                 smcd_buf_free(lgr, is_rmb, buf_desc);
515         else
516                 smcr_buf_free(lgr, is_rmb, buf_desc);
517 }
518
519 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
520 {
521         struct smc_buf_desc *buf_desc, *bf_desc;
522         struct list_head *buf_list;
523         int i;
524
525         for (i = 0; i < SMC_RMBE_SIZES; i++) {
526                 if (is_rmb)
527                         buf_list = &lgr->rmbs[i];
528                 else
529                         buf_list = &lgr->sndbufs[i];
530                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
531                                          list) {
532                         list_del(&buf_desc->list);
533                         smc_buf_free(lgr, is_rmb, buf_desc);
534                 }
535         }
536 }
537
538 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
539 {
540         /* free send buffers */
541         __smc_lgr_free_bufs(lgr, false);
542         /* free rmbs */
543         __smc_lgr_free_bufs(lgr, true);
544 }
545
546 /* remove a link group */
547 static void smc_lgr_free(struct smc_link_group *lgr)
548 {
549         smc_lgr_free_bufs(lgr);
550         if (lgr->is_smcd) {
551                 if (!lgr->terminating) {
552                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
553                         put_device(&lgr->smcd->dev);
554                 }
555                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
556                         wake_up(&lgr->smcd->lgrs_deleted);
557         } else {
558                 smcr_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
559                 if (!atomic_dec_return(&lgr_cnt))
560                         wake_up(&lgrs_deleted);
561         }
562         kfree(lgr);
563 }
564
565 void smc_lgr_forget(struct smc_link_group *lgr)
566 {
567         struct list_head *lgr_list;
568         spinlock_t *lgr_lock;
569
570         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
571         spin_lock_bh(lgr_lock);
572         /* do not use this link group for new connections */
573         if (!list_empty(lgr_list))
574                 list_del_init(lgr_list);
575         spin_unlock_bh(lgr_lock);
576 }
577
578 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
579 {
580         int i;
581
582         for (i = 0; i < SMC_RMBE_SIZES; i++) {
583                 struct smc_buf_desc *buf_desc;
584
585                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
586                         buf_desc->len += sizeof(struct smcd_cdc_msg);
587                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
588                 }
589         }
590 }
591
592 static void smc_sk_wake_ups(struct smc_sock *smc)
593 {
594         smc->sk.sk_write_space(&smc->sk);
595         smc->sk.sk_data_ready(&smc->sk);
596         smc->sk.sk_state_change(&smc->sk);
597 }
598
599 /* kill a connection */
600 static void smc_conn_kill(struct smc_connection *conn, bool soft)
601 {
602         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
603
604         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
605                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
606         else
607                 smc_close_abort(conn);
608         conn->killed = 1;
609         smc->sk.sk_err = ECONNABORTED;
610         smc_sk_wake_ups(smc);
611         if (conn->lgr->is_smcd) {
612                 smc_ism_unset_conn(conn);
613                 if (soft)
614                         tasklet_kill(&conn->rx_tsklet);
615                 else
616                         tasklet_unlock_wait(&conn->rx_tsklet);
617         } else {
618                 smc_cdc_tx_dismiss_slots(conn);
619         }
620         smc_lgr_unregister_conn(conn);
621         smc_close_active_abort(smc);
622 }
623
624 static void smc_lgr_cleanup(struct smc_link_group *lgr)
625 {
626         if (lgr->is_smcd) {
627                 smc_ism_signal_shutdown(lgr);
628                 smcd_unregister_all_dmbs(lgr);
629                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
630                 put_device(&lgr->smcd->dev);
631         } else {
632                 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
633
634                 if (lnk->state != SMC_LNK_INACTIVE)
635                         smc_llc_link_inactive(lnk);
636         }
637 }
638
639 /* terminate link group
640  * @soft: true if link group shutdown can take its time
641  *        false if immediate link group shutdown is required
642  */
643 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
644 {
645         struct smc_connection *conn;
646         struct smc_sock *smc;
647         struct rb_node *node;
648
649         if (lgr->terminating)
650                 return; /* lgr already terminating */
651         if (!soft)
652                 cancel_delayed_work_sync(&lgr->free_work);
653         lgr->terminating = 1;
654         if (!lgr->is_smcd)
655                 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
656
657         /* kill remaining link group connections */
658         read_lock_bh(&lgr->conns_lock);
659         node = rb_first(&lgr->conns_all);
660         while (node) {
661                 read_unlock_bh(&lgr->conns_lock);
662                 conn = rb_entry(node, struct smc_connection, alert_node);
663                 smc = container_of(conn, struct smc_sock, conn);
664                 sock_hold(&smc->sk); /* sock_put below */
665                 lock_sock(&smc->sk);
666                 smc_conn_kill(conn, soft);
667                 release_sock(&smc->sk);
668                 sock_put(&smc->sk); /* sock_hold above */
669                 read_lock_bh(&lgr->conns_lock);
670                 node = rb_first(&lgr->conns_all);
671         }
672         read_unlock_bh(&lgr->conns_lock);
673         smc_lgr_cleanup(lgr);
674         if (soft)
675                 smc_lgr_schedule_free_work_fast(lgr);
676         else
677                 smc_lgr_free(lgr);
678 }
679
680 /* unlink link group and schedule termination */
681 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
682 {
683         spinlock_t *lgr_lock;
684
685         smc_lgr_list_head(lgr, &lgr_lock);
686         spin_lock_bh(lgr_lock);
687         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
688                 spin_unlock_bh(lgr_lock);
689                 return; /* lgr already terminating */
690         }
691         list_del_init(&lgr->list);
692         spin_unlock_bh(lgr_lock);
693         schedule_work(&lgr->terminate_work);
694 }
695
696 /* Called when IB port is terminated */
697 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
698 {
699         struct smc_link_group *lgr, *l;
700         LIST_HEAD(lgr_free_list);
701
702         spin_lock_bh(&smc_lgr_list.lock);
703         list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
704                 if (!lgr->is_smcd &&
705                     lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
706                     lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
707                         list_move(&lgr->list, &lgr_free_list);
708                         lgr->freeing = 1;
709                 }
710         }
711         spin_unlock_bh(&smc_lgr_list.lock);
712
713         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
714                 list_del_init(&lgr->list);
715                 __smc_lgr_terminate(lgr, false);
716         }
717 }
718
719 /* Called when peer lgr shutdown (regularly or abnormally) is received */
720 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
721 {
722         struct smc_link_group *lgr, *l;
723         LIST_HEAD(lgr_free_list);
724
725         /* run common cleanup function and build free list */
726         spin_lock_bh(&dev->lgr_lock);
727         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
728                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
729                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
730                         if (peer_gid) /* peer triggered termination */
731                                 lgr->peer_shutdown = 1;
732                         list_move(&lgr->list, &lgr_free_list);
733                 }
734         }
735         spin_unlock_bh(&dev->lgr_lock);
736
737         /* cancel the regular free workers and actually free lgrs */
738         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
739                 list_del_init(&lgr->list);
740                 schedule_work(&lgr->terminate_work);
741         }
742 }
743
744 /* Called when an SMCD device is removed or the smc module is unloaded */
745 void smc_smcd_terminate_all(struct smcd_dev *smcd)
746 {
747         struct smc_link_group *lgr, *lg;
748         LIST_HEAD(lgr_free_list);
749
750         spin_lock_bh(&smcd->lgr_lock);
751         list_splice_init(&smcd->lgr_list, &lgr_free_list);
752         list_for_each_entry(lgr, &lgr_free_list, list)
753                 lgr->freeing = 1;
754         spin_unlock_bh(&smcd->lgr_lock);
755
756         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
757                 list_del_init(&lgr->list);
758                 __smc_lgr_terminate(lgr, false);
759         }
760
761         if (atomic_read(&smcd->lgr_cnt))
762                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
763 }
764
765 /* Called when an SMCR device is removed or the smc module is unloaded.
766  * If smcibdev is given, all SMCR link groups using this device are terminated.
767  * If smcibdev is NULL, all SMCR link groups are terminated.
768  */
769 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
770 {
771         struct smc_link_group *lgr, *lg;
772         LIST_HEAD(lgr_free_list);
773
774         spin_lock_bh(&smc_lgr_list.lock);
775         if (!smcibdev) {
776                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
777                 list_for_each_entry(lgr, &lgr_free_list, list)
778                         lgr->freeing = 1;
779         } else {
780                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
781                         if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
782                                 list_move(&lgr->list, &lgr_free_list);
783                                 lgr->freeing = 1;
784                         }
785                 }
786         }
787         spin_unlock_bh(&smc_lgr_list.lock);
788
789         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
790                 list_del_init(&lgr->list);
791                 __smc_lgr_terminate(lgr, false);
792         }
793
794         if (smcibdev) {
795                 if (atomic_read(&smcibdev->lnk_cnt))
796                         wait_event(smcibdev->lnks_deleted,
797                                    !atomic_read(&smcibdev->lnk_cnt));
798         } else {
799                 if (atomic_read(&lgr_cnt))
800                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
801         }
802 }
803
804 /* Determine vlan of internal TCP socket.
805  * @vlan_id: address to store the determined vlan id into
806  */
807 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
808 {
809         struct dst_entry *dst = sk_dst_get(clcsock->sk);
810         struct net_device *ndev;
811         int i, nest_lvl, rc = 0;
812
813         ini->vlan_id = 0;
814         if (!dst) {
815                 rc = -ENOTCONN;
816                 goto out;
817         }
818         if (!dst->dev) {
819                 rc = -ENODEV;
820                 goto out_rel;
821         }
822
823         ndev = dst->dev;
824         if (is_vlan_dev(ndev)) {
825                 ini->vlan_id = vlan_dev_vlan_id(ndev);
826                 goto out_rel;
827         }
828
829         rtnl_lock();
830         nest_lvl = ndev->lower_level;
831         for (i = 0; i < nest_lvl; i++) {
832                 struct list_head *lower = &ndev->adj_list.lower;
833
834                 if (list_empty(lower))
835                         break;
836                 lower = lower->next;
837                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
838                 if (is_vlan_dev(ndev)) {
839                         ini->vlan_id = vlan_dev_vlan_id(ndev);
840                         break;
841                 }
842         }
843         rtnl_unlock();
844
845 out_rel:
846         dst_release(dst);
847 out:
848         return rc;
849 }
850
851 static bool smcr_lgr_match(struct smc_link_group *lgr,
852                            struct smc_clc_msg_local *lcl,
853                            enum smc_lgr_role role, u32 clcqpn)
854 {
855         return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
856                        SMC_SYSTEMID_LEN) &&
857                 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
858                         SMC_GID_SIZE) &&
859                 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
860                         sizeof(lcl->mac)) &&
861                 lgr->role == role &&
862                 (lgr->role == SMC_SERV ||
863                  lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
864 }
865
866 static bool smcd_lgr_match(struct smc_link_group *lgr,
867                            struct smcd_dev *smcismdev, u64 peer_gid)
868 {
869         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
870 }
871
872 /* create a new SMC connection (and a new link group if necessary) */
873 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
874 {
875         struct smc_connection *conn = &smc->conn;
876         struct list_head *lgr_list;
877         struct smc_link_group *lgr;
878         enum smc_lgr_role role;
879         spinlock_t *lgr_lock;
880         int rc = 0;
881
882         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
883         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
884         ini->cln_first_contact = SMC_FIRST_CONTACT;
885         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
886         if (role == SMC_CLNT && ini->srv_first_contact)
887                 /* create new link group as well */
888                 goto create;
889
890         /* determine if an existing link group can be reused */
891         spin_lock_bh(lgr_lock);
892         list_for_each_entry(lgr, lgr_list, list) {
893                 write_lock_bh(&lgr->conns_lock);
894                 if ((ini->is_smcd ?
895                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
896                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
897                     !lgr->sync_err &&
898                     lgr->vlan_id == ini->vlan_id &&
899                     (role == SMC_CLNT ||
900                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
901                         /* link group found */
902                         ini->cln_first_contact = SMC_REUSE_CONTACT;
903                         conn->lgr = lgr;
904                         smc_lgr_register_conn(conn); /* add smc conn to lgr */
905                         if (delayed_work_pending(&lgr->free_work))
906                                 cancel_delayed_work(&lgr->free_work);
907                         write_unlock_bh(&lgr->conns_lock);
908                         break;
909                 }
910                 write_unlock_bh(&lgr->conns_lock);
911         }
912         spin_unlock_bh(lgr_lock);
913
914         if (role == SMC_CLNT && !ini->srv_first_contact &&
915             ini->cln_first_contact == SMC_FIRST_CONTACT) {
916                 /* Server reuses a link group, but Client wants to start
917                  * a new one
918                  * send out_of_sync decline, reason synchr. error
919                  */
920                 return SMC_CLC_DECL_SYNCERR;
921         }
922
923 create:
924         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
925                 rc = smc_lgr_create(smc, ini);
926                 if (rc)
927                         goto out;
928                 lgr = conn->lgr;
929                 write_lock_bh(&lgr->conns_lock);
930                 smc_lgr_register_conn(conn); /* add smc conn to lgr */
931                 write_unlock_bh(&lgr->conns_lock);
932         }
933         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
934         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
935         conn->urg_state = SMC_URG_READ;
936         if (ini->is_smcd) {
937                 conn->rx_off = sizeof(struct smcd_cdc_msg);
938                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
939         }
940 #ifndef KERNEL_HAS_ATOMIC64
941         spin_lock_init(&conn->acurs_lock);
942 #endif
943
944 out:
945         return rc;
946 }
947
948 /* convert the RMB size into the compressed notation - minimum 16K.
949  * In contrast to plain ilog2, this rounds towards the next power of 2,
950  * so the socket application gets at least its desired sndbuf / rcvbuf size.
951  */
952 static u8 smc_compress_bufsize(int size)
953 {
954         u8 compressed;
955
956         if (size <= SMC_BUF_MIN_SIZE)
957                 return 0;
958
959         size = (size - 1) >> 14;
960         compressed = ilog2(size) + 1;
961         if (compressed >= SMC_RMBE_SIZES)
962                 compressed = SMC_RMBE_SIZES - 1;
963         return compressed;
964 }
965
966 /* convert the RMB size from compressed notation into integer */
967 int smc_uncompress_bufsize(u8 compressed)
968 {
969         u32 size;
970
971         size = 0x00000001 << (((int)compressed) + 14);
972         return (int)size;
973 }
974
975 /* try to reuse a sndbuf or rmb description slot for a certain
976  * buffer size; if not available, return NULL
977  */
978 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
979                                              rwlock_t *lock,
980                                              struct list_head *buf_list)
981 {
982         struct smc_buf_desc *buf_slot;
983
984         read_lock_bh(lock);
985         list_for_each_entry(buf_slot, buf_list, list) {
986                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
987                         read_unlock_bh(lock);
988                         return buf_slot;
989                 }
990         }
991         read_unlock_bh(lock);
992         return NULL;
993 }
994
995 /* one of the conditions for announcing a receiver's current window size is
996  * that it "results in a minimum increase in the window size of 10% of the
997  * receive buffer space" [RFC7609]
998  */
999 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1000 {
1001         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1002 }
1003
1004 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1005                                                 bool is_rmb, int bufsize)
1006 {
1007         struct smc_buf_desc *buf_desc;
1008         struct smc_link *lnk;
1009         int rc;
1010
1011         /* try to alloc a new buffer */
1012         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1013         if (!buf_desc)
1014                 return ERR_PTR(-ENOMEM);
1015
1016         buf_desc->order = get_order(bufsize);
1017         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1018                                       __GFP_NOMEMALLOC | __GFP_COMP |
1019                                       __GFP_NORETRY | __GFP_ZERO,
1020                                       buf_desc->order);
1021         if (!buf_desc->pages) {
1022                 kfree(buf_desc);
1023                 return ERR_PTR(-EAGAIN);
1024         }
1025         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1026
1027         /* build the sg table from the pages */
1028         lnk = &lgr->lnk[SMC_SINGLE_LINK];
1029         rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
1030                             GFP_KERNEL);
1031         if (rc) {
1032                 smc_buf_free(lgr, is_rmb, buf_desc);
1033                 return ERR_PTR(rc);
1034         }
1035         sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
1036                    buf_desc->cpu_addr, bufsize);
1037
1038         /* map sg table to DMA address */
1039         rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
1040                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1041         /* SMC protocol depends on mapping to one DMA address only */
1042         if (rc != 1)  {
1043                 smc_buf_free(lgr, is_rmb, buf_desc);
1044                 return ERR_PTR(-EAGAIN);
1045         }
1046
1047         /* create a new memory region for the RMB */
1048         if (is_rmb) {
1049                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1050                                               IB_ACCESS_REMOTE_WRITE |
1051                                               IB_ACCESS_LOCAL_WRITE,
1052                                               buf_desc);
1053                 if (rc) {
1054                         smc_buf_free(lgr, is_rmb, buf_desc);
1055                         return ERR_PTR(rc);
1056                 }
1057         }
1058
1059         buf_desc->len = bufsize;
1060         return buf_desc;
1061 }
1062
1063 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1064
1065 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1066                                                 bool is_dmb, int bufsize)
1067 {
1068         struct smc_buf_desc *buf_desc;
1069         int rc;
1070
1071         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1072                 return ERR_PTR(-EAGAIN);
1073
1074         /* try to alloc a new DMB */
1075         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1076         if (!buf_desc)
1077                 return ERR_PTR(-ENOMEM);
1078         if (is_dmb) {
1079                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1080                 if (rc) {
1081                         kfree(buf_desc);
1082                         return ERR_PTR(-EAGAIN);
1083                 }
1084                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1085                 /* CDC header stored in buf. So, pretend it was smaller */
1086                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1087         } else {
1088                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1089                                              __GFP_NOWARN | __GFP_NORETRY |
1090                                              __GFP_NOMEMALLOC);
1091                 if (!buf_desc->cpu_addr) {
1092                         kfree(buf_desc);
1093                         return ERR_PTR(-EAGAIN);
1094                 }
1095                 buf_desc->len = bufsize;
1096         }
1097         return buf_desc;
1098 }
1099
1100 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1101 {
1102         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1103         struct smc_connection *conn = &smc->conn;
1104         struct smc_link_group *lgr = conn->lgr;
1105         struct list_head *buf_list;
1106         int bufsize, bufsize_short;
1107         int sk_buf_size;
1108         rwlock_t *lock;
1109
1110         if (is_rmb)
1111                 /* use socket recv buffer size (w/o overhead) as start value */
1112                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1113         else
1114                 /* use socket send buffer size (w/o overhead) as start value */
1115                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1116
1117         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1118              bufsize_short >= 0; bufsize_short--) {
1119
1120                 if (is_rmb) {
1121                         lock = &lgr->rmbs_lock;
1122                         buf_list = &lgr->rmbs[bufsize_short];
1123                 } else {
1124                         lock = &lgr->sndbufs_lock;
1125                         buf_list = &lgr->sndbufs[bufsize_short];
1126                 }
1127                 bufsize = smc_uncompress_bufsize(bufsize_short);
1128                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1129                         continue;
1130
1131                 /* check for reusable slot in the link group */
1132                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1133                 if (buf_desc) {
1134                         memset(buf_desc->cpu_addr, 0, bufsize);
1135                         break; /* found reusable slot */
1136                 }
1137
1138                 if (is_smcd)
1139                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1140                 else
1141                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1142
1143                 if (PTR_ERR(buf_desc) == -ENOMEM)
1144                         break;
1145                 if (IS_ERR(buf_desc))
1146                         continue;
1147
1148                 buf_desc->used = 1;
1149                 write_lock_bh(lock);
1150                 list_add(&buf_desc->list, buf_list);
1151                 write_unlock_bh(lock);
1152                 break; /* found */
1153         }
1154
1155         if (IS_ERR(buf_desc))
1156                 return -ENOMEM;
1157
1158         if (is_rmb) {
1159                 conn->rmb_desc = buf_desc;
1160                 conn->rmbe_size_short = bufsize_short;
1161                 smc->sk.sk_rcvbuf = bufsize * 2;
1162                 atomic_set(&conn->bytes_to_rcv, 0);
1163                 conn->rmbe_update_limit =
1164                         smc_rmb_wnd_update_limit(buf_desc->len);
1165                 if (is_smcd)
1166                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1167         } else {
1168                 conn->sndbuf_desc = buf_desc;
1169                 smc->sk.sk_sndbuf = bufsize * 2;
1170                 atomic_set(&conn->sndbuf_space, bufsize);
1171         }
1172         return 0;
1173 }
1174
1175 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1176 {
1177         struct smc_link_group *lgr = conn->lgr;
1178
1179         if (!conn->lgr || conn->lgr->is_smcd)
1180                 return;
1181         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
1182                                conn->sndbuf_desc, DMA_TO_DEVICE);
1183 }
1184
1185 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1186 {
1187         struct smc_link_group *lgr = conn->lgr;
1188
1189         if (!conn->lgr || conn->lgr->is_smcd)
1190                 return;
1191         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
1192                                   conn->sndbuf_desc, DMA_TO_DEVICE);
1193 }
1194
1195 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1196 {
1197         struct smc_link_group *lgr = conn->lgr;
1198
1199         if (!conn->lgr || conn->lgr->is_smcd)
1200                 return;
1201         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
1202                                conn->rmb_desc, DMA_FROM_DEVICE);
1203 }
1204
1205 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1206 {
1207         struct smc_link_group *lgr = conn->lgr;
1208
1209         if (!conn->lgr || conn->lgr->is_smcd)
1210                 return;
1211         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
1212                                   conn->rmb_desc, DMA_FROM_DEVICE);
1213 }
1214
1215 /* create the send and receive buffer for an SMC socket;
1216  * receive buffers are called RMBs;
1217  * (even though the SMC protocol allows more than one RMB-element per RMB,
1218  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1219  * extra RMB for every connection in a link group
1220  */
1221 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1222 {
1223         int rc;
1224
1225         /* create send buffer */
1226         rc = __smc_buf_create(smc, is_smcd, false);
1227         if (rc)
1228                 return rc;
1229         /* create rmb */
1230         rc = __smc_buf_create(smc, is_smcd, true);
1231         if (rc)
1232                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1233         return rc;
1234 }
1235
1236 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1237 {
1238         int i;
1239
1240         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1241                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1242                         return i;
1243         }
1244         return -ENOSPC;
1245 }
1246
1247 /* add a new rtoken from peer */
1248 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
1249 {
1250         u64 dma_addr = be64_to_cpu(nw_vaddr);
1251         u32 rkey = ntohl(nw_rkey);
1252         int i;
1253
1254         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1255                 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
1256                     (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
1257                     test_bit(i, lgr->rtokens_used_mask)) {
1258                         /* already in list */
1259                         return i;
1260                 }
1261         }
1262         i = smc_rmb_reserve_rtoken_idx(lgr);
1263         if (i < 0)
1264                 return i;
1265         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
1266         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
1267         return i;
1268 }
1269
1270 /* delete an rtoken */
1271 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
1272 {
1273         u32 rkey = ntohl(nw_rkey);
1274         int i;
1275
1276         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1277                 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
1278                     test_bit(i, lgr->rtokens_used_mask)) {
1279                         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
1280                         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
1281
1282                         clear_bit(i, lgr->rtokens_used_mask);
1283                         return 0;
1284                 }
1285         }
1286         return -ENOENT;
1287 }
1288
1289 /* save rkey and dma_addr received from peer during clc handshake */
1290 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1291                             struct smc_clc_msg_accept_confirm *clc)
1292 {
1293         conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
1294                                           clc->rmb_rkey);
1295         if (conn->rtoken_idx < 0)
1296                 return conn->rtoken_idx;
1297         return 0;
1298 }
1299
1300 static void smc_core_going_away(void)
1301 {
1302         struct smc_ib_device *smcibdev;
1303         struct smcd_dev *smcd;
1304
1305         spin_lock(&smc_ib_devices.lock);
1306         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1307                 int i;
1308
1309                 for (i = 0; i < SMC_MAX_PORTS; i++)
1310                         set_bit(i, smcibdev->ports_going_away);
1311         }
1312         spin_unlock(&smc_ib_devices.lock);
1313
1314         spin_lock(&smcd_dev_list.lock);
1315         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1316                 smcd->going_away = 1;
1317         }
1318         spin_unlock(&smcd_dev_list.lock);
1319 }
1320
1321 /* Clean up all SMC link groups */
1322 static void smc_lgrs_shutdown(void)
1323 {
1324         struct smcd_dev *smcd;
1325
1326         smc_core_going_away();
1327
1328         smc_smcr_terminate_all(NULL);
1329
1330         spin_lock(&smcd_dev_list.lock);
1331         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1332                 smc_smcd_terminate_all(smcd);
1333         spin_unlock(&smcd_dev_list.lock);
1334 }
1335
1336 static int smc_core_reboot_event(struct notifier_block *this,
1337                                  unsigned long event, void *ptr)
1338 {
1339         smc_lgrs_shutdown();
1340         smc_ib_unregister_client();
1341         return 0;
1342 }
1343
1344 static struct notifier_block smc_reboot_notifier = {
1345         .notifier_call = smc_core_reboot_event,
1346 };
1347
1348 int __init smc_core_init(void)
1349 {
1350         return register_reboot_notifier(&smc_reboot_notifier);
1351 }
1352
1353 /* Called (from smc_exit) when module is removed */
1354 void smc_core_exit(void)
1355 {
1356         unregister_reboot_notifier(&smc_reboot_notifier);
1357         smc_lgrs_shutdown();
1358 }