net/smc: convert static link ID instances to support multiple links
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
48                          struct smc_buf_desc *buf_desc);
49 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
50
51 /* return head of link group list and its lock for a given link group */
52 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
53                                                   spinlock_t **lgr_lock)
54 {
55         if (lgr->is_smcd) {
56                 *lgr_lock = &lgr->smcd->lgr_lock;
57                 return &lgr->smcd->lgr_list;
58         }
59
60         *lgr_lock = &smc_lgr_list.lock;
61         return &smc_lgr_list.list;
62 }
63
64 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
65 {
66         /* client link group creation always follows the server link group
67          * creation. For client use a somewhat higher removal delay time,
68          * otherwise there is a risk of out-of-sync link groups.
69          */
70         if (!lgr->freeing && !lgr->freefast) {
71                 mod_delayed_work(system_wq, &lgr->free_work,
72                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
73                                                 SMC_LGR_FREE_DELAY_CLNT :
74                                                 SMC_LGR_FREE_DELAY_SERV);
75         }
76 }
77
78 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
79 {
80         if (!lgr->freeing && !lgr->freefast) {
81                 lgr->freefast = 1;
82                 mod_delayed_work(system_wq, &lgr->free_work,
83                                  SMC_LGR_FREE_DELAY_FAST);
84         }
85 }
86
87 /* Register connection's alert token in our lookup structure.
88  * To use rbtrees we have to implement our own insert core.
89  * Requires @conns_lock
90  * @smc         connection to register
91  * Returns 0 on success, != otherwise.
92  */
93 static void smc_lgr_add_alert_token(struct smc_connection *conn)
94 {
95         struct rb_node **link, *parent = NULL;
96         u32 token = conn->alert_token_local;
97
98         link = &conn->lgr->conns_all.rb_node;
99         while (*link) {
100                 struct smc_connection *cur = rb_entry(*link,
101                                         struct smc_connection, alert_node);
102
103                 parent = *link;
104                 if (cur->alert_token_local > token)
105                         link = &parent->rb_left;
106                 else
107                         link = &parent->rb_right;
108         }
109         /* Put the new node there */
110         rb_link_node(&conn->alert_node, parent, link);
111         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
112 }
113
114 /* Register connection in link group by assigning an alert token
115  * registered in a search tree.
116  * Requires @conns_lock
117  * Note that '0' is a reserved value and not assigned.
118  */
119 static int smc_lgr_register_conn(struct smc_connection *conn)
120 {
121         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
122         static atomic_t nexttoken = ATOMIC_INIT(0);
123
124         /* find a new alert_token_local value not yet used by some connection
125          * in this link group
126          */
127         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
128         while (!conn->alert_token_local) {
129                 conn->alert_token_local = atomic_inc_return(&nexttoken);
130                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
131                         conn->alert_token_local = 0;
132         }
133         smc_lgr_add_alert_token(conn);
134
135         /* assign the new connection to a link */
136         if (!conn->lgr->is_smcd) {
137                 struct smc_link *lnk;
138                 int i;
139
140                 /* tbd - link balancing */
141                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
142                         lnk = &conn->lgr->lnk[i];
143                         if (lnk->state == SMC_LNK_ACTIVATING ||
144                             lnk->state == SMC_LNK_ACTIVE)
145                                 conn->lnk = lnk;
146                 }
147                 if (!conn->lnk)
148                         return SMC_CLC_DECL_NOACTLINK;
149         }
150         conn->lgr->conns_num++;
151         return 0;
152 }
153
154 /* Unregister connection and reset the alert token of the given connection<
155  */
156 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
157 {
158         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
159         struct smc_link_group *lgr = conn->lgr;
160
161         rb_erase(&conn->alert_node, &lgr->conns_all);
162         lgr->conns_num--;
163         conn->alert_token_local = 0;
164         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
165 }
166
167 /* Unregister connection from lgr
168  */
169 static void smc_lgr_unregister_conn(struct smc_connection *conn)
170 {
171         struct smc_link_group *lgr = conn->lgr;
172
173         if (!lgr)
174                 return;
175         write_lock_bh(&lgr->conns_lock);
176         if (conn->alert_token_local) {
177                 __smc_lgr_unregister_conn(conn);
178         }
179         write_unlock_bh(&lgr->conns_lock);
180         conn->lgr = NULL;
181 }
182
183 void smc_lgr_cleanup_early(struct smc_connection *conn)
184 {
185         struct smc_link_group *lgr = conn->lgr;
186
187         if (!lgr)
188                 return;
189
190         smc_conn_free(conn);
191         smc_lgr_forget(lgr);
192         smc_lgr_schedule_free_work_fast(lgr);
193 }
194
195 /* Send delete link, either as client to request the initiation
196  * of the DELETE LINK sequence from server; or as server to
197  * initiate the delete processing. See smc_llc_rx_delete_link().
198  */
199 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
200 {
201         if (lnk->state == SMC_LNK_ACTIVE &&
202             !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
203                 smc_llc_link_deleting(lnk);
204                 return 0;
205         }
206         return -ENOTCONN;
207 }
208
209 static void smc_lgr_free(struct smc_link_group *lgr);
210
211 static void smc_lgr_free_work(struct work_struct *work)
212 {
213         struct smc_link_group *lgr = container_of(to_delayed_work(work),
214                                                   struct smc_link_group,
215                                                   free_work);
216         spinlock_t *lgr_lock;
217         bool conns;
218         int i;
219
220         smc_lgr_list_head(lgr, &lgr_lock);
221         spin_lock_bh(lgr_lock);
222         if (lgr->freeing) {
223                 spin_unlock_bh(lgr_lock);
224                 return;
225         }
226         read_lock_bh(&lgr->conns_lock);
227         conns = RB_EMPTY_ROOT(&lgr->conns_all);
228         read_unlock_bh(&lgr->conns_lock);
229         if (!conns) { /* number of lgr connections is no longer zero */
230                 spin_unlock_bh(lgr_lock);
231                 return;
232         }
233         list_del_init(&lgr->list); /* remove from smc_lgr_list */
234
235         if (!lgr->is_smcd && !lgr->terminating) {
236                 bool do_wait = false;
237
238                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
239                         struct smc_link *lnk = &lgr->lnk[i];
240                         /* try to send del link msg, on err free immediately */
241                         if (lnk->state == SMC_LNK_ACTIVE &&
242                             !smcr_link_send_delete(lnk, true)) {
243                                 /* reschedule in case we never receive a resp */
244                                 smc_lgr_schedule_free_work(lgr);
245                                 do_wait = true;
246                         }
247                 }
248                 if (do_wait) {
249                         spin_unlock_bh(lgr_lock);
250                         return; /* wait for resp, see smc_llc_rx_delete_link */
251                 }
252         }
253         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
254         spin_unlock_bh(lgr_lock);
255         cancel_delayed_work(&lgr->free_work);
256
257         if (lgr->is_smcd && !lgr->terminating)
258                 smc_ism_signal_shutdown(lgr);
259         if (!lgr->is_smcd) {
260                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
261                         struct smc_link *lnk = &lgr->lnk[i];
262
263                         if (lnk->state != SMC_LNK_INACTIVE)
264                                 smc_llc_link_inactive(lnk);
265                 }
266         }
267         smc_lgr_free(lgr);
268 }
269
270 static void smc_lgr_terminate_work(struct work_struct *work)
271 {
272         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
273                                                   terminate_work);
274
275         __smc_lgr_terminate(lgr, true);
276 }
277
278 /* return next unique link id for the lgr */
279 static u8 smcr_next_link_id(struct smc_link_group *lgr)
280 {
281         u8 link_id;
282         int i;
283
284         while (1) {
285                 link_id = ++lgr->next_link_id;
286                 if (!link_id)   /* skip zero as link_id */
287                         link_id = ++lgr->next_link_id;
288                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
289                         if (lgr->lnk[i].state != SMC_LNK_INACTIVE &&
290                             lgr->lnk[i].link_id == link_id)
291                                 continue;
292                 }
293                 break;
294         }
295         return link_id;
296 }
297
298 static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
299                           u8 link_idx, struct smc_init_info *ini)
300 {
301         u8 rndvec[3];
302         int rc;
303
304         get_device(&ini->ib_dev->ibdev->dev);
305         atomic_inc(&ini->ib_dev->lnk_cnt);
306         lnk->state = SMC_LNK_ACTIVATING;
307         lnk->link_id = smcr_next_link_id(lgr);
308         lnk->lgr = lgr;
309         lnk->link_idx = link_idx;
310         lnk->smcibdev = ini->ib_dev;
311         lnk->ibport = ini->ib_port;
312         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
313         if (!ini->ib_dev->initialized) {
314                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
315                 if (rc)
316                         goto out;
317         }
318         get_random_bytes(rndvec, sizeof(rndvec));
319         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
320                 (rndvec[2] << 16);
321         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
322                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
323         if (rc)
324                 goto out;
325         rc = smc_llc_link_init(lnk);
326         if (rc)
327                 goto out;
328         rc = smc_wr_alloc_link_mem(lnk);
329         if (rc)
330                 goto clear_llc_lnk;
331         rc = smc_ib_create_protection_domain(lnk);
332         if (rc)
333                 goto free_link_mem;
334         rc = smc_ib_create_queue_pair(lnk);
335         if (rc)
336                 goto dealloc_pd;
337         rc = smc_wr_create_link(lnk);
338         if (rc)
339                 goto destroy_qp;
340         return 0;
341
342 destroy_qp:
343         smc_ib_destroy_queue_pair(lnk);
344 dealloc_pd:
345         smc_ib_dealloc_protection_domain(lnk);
346 free_link_mem:
347         smc_wr_free_link_mem(lnk);
348 clear_llc_lnk:
349         smc_llc_link_clear(lnk);
350 out:
351         put_device(&ini->ib_dev->ibdev->dev);
352         memset(lnk, 0, sizeof(struct smc_link));
353         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
354                 wake_up(&ini->ib_dev->lnks_deleted);
355         return rc;
356 }
357
358 /* create a new SMC link group */
359 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
360 {
361         struct smc_link_group *lgr;
362         struct list_head *lgr_list;
363         struct smc_link *lnk;
364         spinlock_t *lgr_lock;
365         u8 link_idx;
366         int rc = 0;
367         int i;
368
369         if (ini->is_smcd && ini->vlan_id) {
370                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
371                         rc = SMC_CLC_DECL_ISMVLANERR;
372                         goto out;
373                 }
374         }
375
376         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
377         if (!lgr) {
378                 rc = SMC_CLC_DECL_MEM;
379                 goto ism_put_vlan;
380         }
381         lgr->is_smcd = ini->is_smcd;
382         lgr->sync_err = 0;
383         lgr->terminating = 0;
384         lgr->freefast = 0;
385         lgr->freeing = 0;
386         lgr->vlan_id = ini->vlan_id;
387         rwlock_init(&lgr->sndbufs_lock);
388         rwlock_init(&lgr->rmbs_lock);
389         rwlock_init(&lgr->conns_lock);
390         for (i = 0; i < SMC_RMBE_SIZES; i++) {
391                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
392                 INIT_LIST_HEAD(&lgr->rmbs[i]);
393         }
394         lgr->next_link_id = 0;
395         smc_lgr_list.num += SMC_LGR_NUM_INCR;
396         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
397         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
398         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
399         lgr->conns_all = RB_ROOT;
400         if (ini->is_smcd) {
401                 /* SMC-D specific settings */
402                 get_device(&ini->ism_dev->dev);
403                 lgr->peer_gid = ini->ism_gid;
404                 lgr->smcd = ini->ism_dev;
405                 lgr_list = &ini->ism_dev->lgr_list;
406                 lgr_lock = &lgr->smcd->lgr_lock;
407                 lgr->peer_shutdown = 0;
408                 atomic_inc(&ini->ism_dev->lgr_cnt);
409         } else {
410                 /* SMC-R specific settings */
411                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
412                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
413                        SMC_SYSTEMID_LEN);
414
415                 link_idx = SMC_SINGLE_LINK;
416                 lnk = &lgr->lnk[link_idx];
417                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
418                 if (rc)
419                         goto free_lgr;
420                 lgr_list = &smc_lgr_list.list;
421                 lgr_lock = &smc_lgr_list.lock;
422                 atomic_inc(&lgr_cnt);
423         }
424         smc->conn.lgr = lgr;
425         spin_lock_bh(lgr_lock);
426         list_add(&lgr->list, lgr_list);
427         spin_unlock_bh(lgr_lock);
428         return 0;
429
430 free_lgr:
431         kfree(lgr);
432 ism_put_vlan:
433         if (ini->is_smcd && ini->vlan_id)
434                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
435 out:
436         if (rc < 0) {
437                 if (rc == -ENOMEM)
438                         rc = SMC_CLC_DECL_MEM;
439                 else
440                         rc = SMC_CLC_DECL_INTERR;
441         }
442         return rc;
443 }
444
445 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
446                            struct smc_link *lnk)
447 {
448         struct smc_link_group *lgr = lnk->lgr;
449
450         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
451                 /* unregister rmb with peer */
452                 smc_llc_do_delete_rkey(lnk, rmb_desc);
453                 rmb_desc->is_conf_rkey = false;
454         }
455         if (rmb_desc->is_reg_err) {
456                 /* buf registration failed, reuse not possible */
457                 write_lock_bh(&lgr->rmbs_lock);
458                 list_del(&rmb_desc->list);
459                 write_unlock_bh(&lgr->rmbs_lock);
460
461                 smc_buf_free(lgr, true, rmb_desc);
462         } else {
463                 rmb_desc->used = 0;
464         }
465 }
466
467 static void smc_buf_unuse(struct smc_connection *conn,
468                           struct smc_link_group *lgr)
469 {
470         if (conn->sndbuf_desc)
471                 conn->sndbuf_desc->used = 0;
472         if (conn->rmb_desc && lgr->is_smcd)
473                 conn->rmb_desc->used = 0;
474         else if (conn->rmb_desc)
475                 smcr_buf_unuse(conn->rmb_desc, conn->lnk);
476 }
477
478 /* remove a finished connection from its link group */
479 void smc_conn_free(struct smc_connection *conn)
480 {
481         struct smc_link_group *lgr = conn->lgr;
482
483         if (!lgr)
484                 return;
485         if (lgr->is_smcd) {
486                 if (!list_empty(&lgr->list))
487                         smc_ism_unset_conn(conn);
488                 tasklet_kill(&conn->rx_tsklet);
489         } else {
490                 smc_cdc_tx_dismiss_slots(conn);
491         }
492         if (!list_empty(&lgr->list)) {
493                 smc_lgr_unregister_conn(conn);
494                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
495         }
496
497         if (!lgr->conns_num)
498                 smc_lgr_schedule_free_work(lgr);
499 }
500
501 static void smcr_link_clear(struct smc_link *lnk)
502 {
503         if (lnk->peer_qpn == 0)
504                 return;
505         lnk->peer_qpn = 0;
506         smc_llc_link_clear(lnk);
507         smc_ib_modify_qp_reset(lnk);
508         smc_wr_free_link(lnk);
509         smc_ib_destroy_queue_pair(lnk);
510         smc_ib_dealloc_protection_domain(lnk);
511         smc_wr_free_link_mem(lnk);
512         put_device(&lnk->smcibdev->ibdev->dev);
513         if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
514                 wake_up(&lnk->smcibdev->lnks_deleted);
515 }
516
517 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
518                           struct smc_buf_desc *buf_desc)
519 {
520         struct smc_link *lnk;
521         int i;
522
523         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
524                 lnk = &lgr->lnk[i];
525                 if (!buf_desc->is_map_ib[lnk->link_idx])
526                         continue;
527                 if (is_rmb) {
528                         if (buf_desc->mr_rx[lnk->link_idx])
529                                 smc_ib_put_memory_region(
530                                                 buf_desc->mr_rx[lnk->link_idx]);
531                         smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
532                 } else {
533                         smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
534                 }
535                 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
536         }
537
538         if (buf_desc->pages)
539                 __free_pages(buf_desc->pages, buf_desc->order);
540         kfree(buf_desc);
541 }
542
543 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
544                           struct smc_buf_desc *buf_desc)
545 {
546         if (is_dmb) {
547                 /* restore original buf len */
548                 buf_desc->len += sizeof(struct smcd_cdc_msg);
549                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
550         } else {
551                 kfree(buf_desc->cpu_addr);
552         }
553         kfree(buf_desc);
554 }
555
556 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
557                          struct smc_buf_desc *buf_desc)
558 {
559         if (lgr->is_smcd)
560                 smcd_buf_free(lgr, is_rmb, buf_desc);
561         else
562                 smcr_buf_free(lgr, is_rmb, buf_desc);
563 }
564
565 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
566 {
567         struct smc_buf_desc *buf_desc, *bf_desc;
568         struct list_head *buf_list;
569         int i;
570
571         for (i = 0; i < SMC_RMBE_SIZES; i++) {
572                 if (is_rmb)
573                         buf_list = &lgr->rmbs[i];
574                 else
575                         buf_list = &lgr->sndbufs[i];
576                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
577                                          list) {
578                         list_del(&buf_desc->list);
579                         smc_buf_free(lgr, is_rmb, buf_desc);
580                 }
581         }
582 }
583
584 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
585 {
586         /* free send buffers */
587         __smc_lgr_free_bufs(lgr, false);
588         /* free rmbs */
589         __smc_lgr_free_bufs(lgr, true);
590 }
591
592 /* remove a link group */
593 static void smc_lgr_free(struct smc_link_group *lgr)
594 {
595         int i;
596
597         smc_lgr_free_bufs(lgr);
598         if (lgr->is_smcd) {
599                 if (!lgr->terminating) {
600                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
601                         put_device(&lgr->smcd->dev);
602                 }
603                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
604                         wake_up(&lgr->smcd->lgrs_deleted);
605         } else {
606                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
607                         if (lgr->lnk[i].state == SMC_LNK_INACTIVE)
608                                 continue;
609                         smcr_link_clear(&lgr->lnk[i]);
610                 }
611                 if (!atomic_dec_return(&lgr_cnt))
612                         wake_up(&lgrs_deleted);
613         }
614         kfree(lgr);
615 }
616
617 void smc_lgr_forget(struct smc_link_group *lgr)
618 {
619         struct list_head *lgr_list;
620         spinlock_t *lgr_lock;
621
622         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
623         spin_lock_bh(lgr_lock);
624         /* do not use this link group for new connections */
625         if (!list_empty(lgr_list))
626                 list_del_init(lgr_list);
627         spin_unlock_bh(lgr_lock);
628 }
629
630 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
631 {
632         int i;
633
634         for (i = 0; i < SMC_RMBE_SIZES; i++) {
635                 struct smc_buf_desc *buf_desc;
636
637                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
638                         buf_desc->len += sizeof(struct smcd_cdc_msg);
639                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
640                 }
641         }
642 }
643
644 static void smc_sk_wake_ups(struct smc_sock *smc)
645 {
646         smc->sk.sk_write_space(&smc->sk);
647         smc->sk.sk_data_ready(&smc->sk);
648         smc->sk.sk_state_change(&smc->sk);
649 }
650
651 /* kill a connection */
652 static void smc_conn_kill(struct smc_connection *conn, bool soft)
653 {
654         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
655
656         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
657                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
658         else
659                 smc_close_abort(conn);
660         conn->killed = 1;
661         smc->sk.sk_err = ECONNABORTED;
662         smc_sk_wake_ups(smc);
663         if (conn->lgr->is_smcd) {
664                 smc_ism_unset_conn(conn);
665                 if (soft)
666                         tasklet_kill(&conn->rx_tsklet);
667                 else
668                         tasklet_unlock_wait(&conn->rx_tsklet);
669         } else {
670                 smc_cdc_tx_dismiss_slots(conn);
671         }
672         smc_lgr_unregister_conn(conn);
673         smc_close_active_abort(smc);
674 }
675
676 static void smc_lgr_cleanup(struct smc_link_group *lgr)
677 {
678         int i;
679
680         if (lgr->is_smcd) {
681                 smc_ism_signal_shutdown(lgr);
682                 smcd_unregister_all_dmbs(lgr);
683                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
684                 put_device(&lgr->smcd->dev);
685         } else {
686                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
687                         struct smc_link *lnk = &lgr->lnk[i];
688
689                         if (lnk->state != SMC_LNK_INACTIVE)
690                                 smc_llc_link_inactive(lnk);
691                 }
692         }
693 }
694
695 /* terminate link group
696  * @soft: true if link group shutdown can take its time
697  *        false if immediate link group shutdown is required
698  */
699 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
700 {
701         struct smc_connection *conn;
702         struct smc_sock *smc;
703         struct rb_node *node;
704         int i;
705
706         if (lgr->terminating)
707                 return; /* lgr already terminating */
708         if (!soft)
709                 cancel_delayed_work_sync(&lgr->free_work);
710         lgr->terminating = 1;
711         if (!lgr->is_smcd)
712                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
713                         smc_llc_link_inactive(&lgr->lnk[i]);
714
715         /* kill remaining link group connections */
716         read_lock_bh(&lgr->conns_lock);
717         node = rb_first(&lgr->conns_all);
718         while (node) {
719                 read_unlock_bh(&lgr->conns_lock);
720                 conn = rb_entry(node, struct smc_connection, alert_node);
721                 smc = container_of(conn, struct smc_sock, conn);
722                 sock_hold(&smc->sk); /* sock_put below */
723                 lock_sock(&smc->sk);
724                 smc_conn_kill(conn, soft);
725                 release_sock(&smc->sk);
726                 sock_put(&smc->sk); /* sock_hold above */
727                 read_lock_bh(&lgr->conns_lock);
728                 node = rb_first(&lgr->conns_all);
729         }
730         read_unlock_bh(&lgr->conns_lock);
731         smc_lgr_cleanup(lgr);
732         if (soft)
733                 smc_lgr_schedule_free_work_fast(lgr);
734         else
735                 smc_lgr_free(lgr);
736 }
737
738 /* unlink link group and schedule termination */
739 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
740 {
741         spinlock_t *lgr_lock;
742
743         smc_lgr_list_head(lgr, &lgr_lock);
744         spin_lock_bh(lgr_lock);
745         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
746                 spin_unlock_bh(lgr_lock);
747                 return; /* lgr already terminating */
748         }
749         list_del_init(&lgr->list);
750         spin_unlock_bh(lgr_lock);
751         schedule_work(&lgr->terminate_work);
752 }
753
754 /* Called when IB port is terminated */
755 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
756 {
757         struct smc_link_group *lgr, *l;
758         LIST_HEAD(lgr_free_list);
759         int i;
760
761         spin_lock_bh(&smc_lgr_list.lock);
762         list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
763                 if (lgr->is_smcd)
764                         continue;
765                 /* tbd - terminate only when no more links are active */
766                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
767                         if (lgr->lnk[i].state == SMC_LNK_INACTIVE ||
768                             lgr->lnk[i].state == SMC_LNK_DELETING)
769                                 continue;
770                         if (lgr->lnk[i].smcibdev == smcibdev &&
771                             lgr->lnk[i].ibport == ibport) {
772                                 list_move(&lgr->list, &lgr_free_list);
773                                 lgr->freeing = 1;
774                         }
775                 }
776         }
777         spin_unlock_bh(&smc_lgr_list.lock);
778
779         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
780                 list_del_init(&lgr->list);
781                 __smc_lgr_terminate(lgr, false);
782         }
783 }
784
785 /* Called when peer lgr shutdown (regularly or abnormally) is received */
786 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
787 {
788         struct smc_link_group *lgr, *l;
789         LIST_HEAD(lgr_free_list);
790
791         /* run common cleanup function and build free list */
792         spin_lock_bh(&dev->lgr_lock);
793         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
794                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
795                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
796                         if (peer_gid) /* peer triggered termination */
797                                 lgr->peer_shutdown = 1;
798                         list_move(&lgr->list, &lgr_free_list);
799                 }
800         }
801         spin_unlock_bh(&dev->lgr_lock);
802
803         /* cancel the regular free workers and actually free lgrs */
804         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
805                 list_del_init(&lgr->list);
806                 schedule_work(&lgr->terminate_work);
807         }
808 }
809
810 /* Called when an SMCD device is removed or the smc module is unloaded */
811 void smc_smcd_terminate_all(struct smcd_dev *smcd)
812 {
813         struct smc_link_group *lgr, *lg;
814         LIST_HEAD(lgr_free_list);
815
816         spin_lock_bh(&smcd->lgr_lock);
817         list_splice_init(&smcd->lgr_list, &lgr_free_list);
818         list_for_each_entry(lgr, &lgr_free_list, list)
819                 lgr->freeing = 1;
820         spin_unlock_bh(&smcd->lgr_lock);
821
822         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
823                 list_del_init(&lgr->list);
824                 __smc_lgr_terminate(lgr, false);
825         }
826
827         if (atomic_read(&smcd->lgr_cnt))
828                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
829 }
830
831 /* Called when an SMCR device is removed or the smc module is unloaded.
832  * If smcibdev is given, all SMCR link groups using this device are terminated.
833  * If smcibdev is NULL, all SMCR link groups are terminated.
834  */
835 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
836 {
837         struct smc_link_group *lgr, *lg;
838         LIST_HEAD(lgr_free_list);
839         int i;
840
841         spin_lock_bh(&smc_lgr_list.lock);
842         if (!smcibdev) {
843                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
844                 list_for_each_entry(lgr, &lgr_free_list, list)
845                         lgr->freeing = 1;
846         } else {
847                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
848                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
849                                 if (lgr->lnk[i].smcibdev == smcibdev) {
850                                         list_move(&lgr->list, &lgr_free_list);
851                                         lgr->freeing = 1;
852                                         break;
853                                 }
854                         }
855                 }
856         }
857         spin_unlock_bh(&smc_lgr_list.lock);
858
859         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
860                 list_del_init(&lgr->list);
861                 __smc_lgr_terminate(lgr, false);
862         }
863
864         if (smcibdev) {
865                 if (atomic_read(&smcibdev->lnk_cnt))
866                         wait_event(smcibdev->lnks_deleted,
867                                    !atomic_read(&smcibdev->lnk_cnt));
868         } else {
869                 if (atomic_read(&lgr_cnt))
870                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
871         }
872 }
873
874 /* Determine vlan of internal TCP socket.
875  * @vlan_id: address to store the determined vlan id into
876  */
877 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
878 {
879         struct dst_entry *dst = sk_dst_get(clcsock->sk);
880         struct net_device *ndev;
881         int i, nest_lvl, rc = 0;
882
883         ini->vlan_id = 0;
884         if (!dst) {
885                 rc = -ENOTCONN;
886                 goto out;
887         }
888         if (!dst->dev) {
889                 rc = -ENODEV;
890                 goto out_rel;
891         }
892
893         ndev = dst->dev;
894         if (is_vlan_dev(ndev)) {
895                 ini->vlan_id = vlan_dev_vlan_id(ndev);
896                 goto out_rel;
897         }
898
899         rtnl_lock();
900         nest_lvl = ndev->lower_level;
901         for (i = 0; i < nest_lvl; i++) {
902                 struct list_head *lower = &ndev->adj_list.lower;
903
904                 if (list_empty(lower))
905                         break;
906                 lower = lower->next;
907                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
908                 if (is_vlan_dev(ndev)) {
909                         ini->vlan_id = vlan_dev_vlan_id(ndev);
910                         break;
911                 }
912         }
913         rtnl_unlock();
914
915 out_rel:
916         dst_release(dst);
917 out:
918         return rc;
919 }
920
921 static bool smcr_lgr_match(struct smc_link_group *lgr,
922                            struct smc_clc_msg_local *lcl,
923                            enum smc_lgr_role role, u32 clcqpn)
924 {
925         int i;
926
927         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
928             lgr->role != role)
929                 return false;
930
931         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
932                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
933                         continue;
934                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
935                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
936                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
937                         return true;
938         }
939         return false;
940 }
941
942 static bool smcd_lgr_match(struct smc_link_group *lgr,
943                            struct smcd_dev *smcismdev, u64 peer_gid)
944 {
945         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
946 }
947
948 /* create a new SMC connection (and a new link group if necessary) */
949 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
950 {
951         struct smc_connection *conn = &smc->conn;
952         struct list_head *lgr_list;
953         struct smc_link_group *lgr;
954         enum smc_lgr_role role;
955         spinlock_t *lgr_lock;
956         int rc = 0;
957
958         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
959         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
960         ini->cln_first_contact = SMC_FIRST_CONTACT;
961         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
962         if (role == SMC_CLNT && ini->srv_first_contact)
963                 /* create new link group as well */
964                 goto create;
965
966         /* determine if an existing link group can be reused */
967         spin_lock_bh(lgr_lock);
968         list_for_each_entry(lgr, lgr_list, list) {
969                 write_lock_bh(&lgr->conns_lock);
970                 if ((ini->is_smcd ?
971                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
972                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
973                     !lgr->sync_err &&
974                     lgr->vlan_id == ini->vlan_id &&
975                     (role == SMC_CLNT ||
976                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
977                         /* link group found */
978                         ini->cln_first_contact = SMC_REUSE_CONTACT;
979                         conn->lgr = lgr;
980                         rc = smc_lgr_register_conn(conn); /* add conn to lgr */
981                         write_unlock_bh(&lgr->conns_lock);
982                         if (!rc && delayed_work_pending(&lgr->free_work))
983                                 cancel_delayed_work(&lgr->free_work);
984                         break;
985                 }
986                 write_unlock_bh(&lgr->conns_lock);
987         }
988         spin_unlock_bh(lgr_lock);
989         if (rc)
990                 return rc;
991
992         if (role == SMC_CLNT && !ini->srv_first_contact &&
993             ini->cln_first_contact == SMC_FIRST_CONTACT) {
994                 /* Server reuses a link group, but Client wants to start
995                  * a new one
996                  * send out_of_sync decline, reason synchr. error
997                  */
998                 return SMC_CLC_DECL_SYNCERR;
999         }
1000
1001 create:
1002         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1003                 rc = smc_lgr_create(smc, ini);
1004                 if (rc)
1005                         goto out;
1006                 lgr = conn->lgr;
1007                 write_lock_bh(&lgr->conns_lock);
1008                 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
1009                 write_unlock_bh(&lgr->conns_lock);
1010                 if (rc)
1011                         goto out;
1012         }
1013         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1014         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1015         conn->urg_state = SMC_URG_READ;
1016         if (ini->is_smcd) {
1017                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1018                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1019         }
1020 #ifndef KERNEL_HAS_ATOMIC64
1021         spin_lock_init(&conn->acurs_lock);
1022 #endif
1023
1024 out:
1025         return rc;
1026 }
1027
1028 /* convert the RMB size into the compressed notation - minimum 16K.
1029  * In contrast to plain ilog2, this rounds towards the next power of 2,
1030  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1031  */
1032 static u8 smc_compress_bufsize(int size)
1033 {
1034         u8 compressed;
1035
1036         if (size <= SMC_BUF_MIN_SIZE)
1037                 return 0;
1038
1039         size = (size - 1) >> 14;
1040         compressed = ilog2(size) + 1;
1041         if (compressed >= SMC_RMBE_SIZES)
1042                 compressed = SMC_RMBE_SIZES - 1;
1043         return compressed;
1044 }
1045
1046 /* convert the RMB size from compressed notation into integer */
1047 int smc_uncompress_bufsize(u8 compressed)
1048 {
1049         u32 size;
1050
1051         size = 0x00000001 << (((int)compressed) + 14);
1052         return (int)size;
1053 }
1054
1055 /* try to reuse a sndbuf or rmb description slot for a certain
1056  * buffer size; if not available, return NULL
1057  */
1058 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1059                                              rwlock_t *lock,
1060                                              struct list_head *buf_list)
1061 {
1062         struct smc_buf_desc *buf_slot;
1063
1064         read_lock_bh(lock);
1065         list_for_each_entry(buf_slot, buf_list, list) {
1066                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1067                         read_unlock_bh(lock);
1068                         return buf_slot;
1069                 }
1070         }
1071         read_unlock_bh(lock);
1072         return NULL;
1073 }
1074
1075 /* one of the conditions for announcing a receiver's current window size is
1076  * that it "results in a minimum increase in the window size of 10% of the
1077  * receive buffer space" [RFC7609]
1078  */
1079 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1080 {
1081         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1082 }
1083
1084 /* map an rmb buf to a link */
1085 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1086                              struct smc_link *lnk)
1087 {
1088         int rc;
1089
1090         if (buf_desc->is_map_ib[lnk->link_idx])
1091                 return 0;
1092
1093         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1094         if (rc)
1095                 return rc;
1096         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1097                    buf_desc->cpu_addr, buf_desc->len);
1098
1099         /* map sg table to DMA address */
1100         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1101                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1102         /* SMC protocol depends on mapping to one DMA address only */
1103         if (rc != 1) {
1104                 rc = -EAGAIN;
1105                 goto free_table;
1106         }
1107
1108         /* create a new memory region for the RMB */
1109         if (is_rmb) {
1110                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1111                                               IB_ACCESS_REMOTE_WRITE |
1112                                               IB_ACCESS_LOCAL_WRITE,
1113                                               buf_desc, lnk->link_idx);
1114                 if (rc)
1115                         goto buf_unmap;
1116                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1117         }
1118         buf_desc->is_map_ib[lnk->link_idx] = true;
1119         return 0;
1120
1121 buf_unmap:
1122         smc_ib_buf_unmap_sg(lnk, buf_desc,
1123                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1124 free_table:
1125         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1126         return rc;
1127 }
1128
1129 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1130                                                 bool is_rmb, int bufsize)
1131 {
1132         struct smc_buf_desc *buf_desc;
1133
1134         /* try to alloc a new buffer */
1135         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1136         if (!buf_desc)
1137                 return ERR_PTR(-ENOMEM);
1138
1139         buf_desc->order = get_order(bufsize);
1140         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1141                                       __GFP_NOMEMALLOC | __GFP_COMP |
1142                                       __GFP_NORETRY | __GFP_ZERO,
1143                                       buf_desc->order);
1144         if (!buf_desc->pages) {
1145                 kfree(buf_desc);
1146                 return ERR_PTR(-EAGAIN);
1147         }
1148         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1149         buf_desc->len = bufsize;
1150         return buf_desc;
1151 }
1152
1153 /* map buf_desc on all usable links,
1154  * unused buffers stay mapped as long as the link is up
1155  */
1156 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1157                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1158 {
1159         int i, rc = 0;
1160
1161         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1162                 struct smc_link *lnk = &lgr->lnk[i];
1163
1164                 if (lnk->state != SMC_LNK_ACTIVE &&
1165                     lnk->state != SMC_LNK_ACTIVATING)
1166                         continue;
1167                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1168                         smcr_buf_unuse(buf_desc, lnk);
1169                         rc = -ENOMEM;
1170                         goto out;
1171                 }
1172         }
1173 out:
1174         return rc;
1175 }
1176
1177 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1178
1179 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1180                                                 bool is_dmb, int bufsize)
1181 {
1182         struct smc_buf_desc *buf_desc;
1183         int rc;
1184
1185         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1186                 return ERR_PTR(-EAGAIN);
1187
1188         /* try to alloc a new DMB */
1189         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1190         if (!buf_desc)
1191                 return ERR_PTR(-ENOMEM);
1192         if (is_dmb) {
1193                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1194                 if (rc) {
1195                         kfree(buf_desc);
1196                         return ERR_PTR(-EAGAIN);
1197                 }
1198                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1199                 /* CDC header stored in buf. So, pretend it was smaller */
1200                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1201         } else {
1202                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1203                                              __GFP_NOWARN | __GFP_NORETRY |
1204                                              __GFP_NOMEMALLOC);
1205                 if (!buf_desc->cpu_addr) {
1206                         kfree(buf_desc);
1207                         return ERR_PTR(-EAGAIN);
1208                 }
1209                 buf_desc->len = bufsize;
1210         }
1211         return buf_desc;
1212 }
1213
1214 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1215 {
1216         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1217         struct smc_connection *conn = &smc->conn;
1218         struct smc_link_group *lgr = conn->lgr;
1219         struct list_head *buf_list;
1220         int bufsize, bufsize_short;
1221         int sk_buf_size;
1222         rwlock_t *lock;
1223
1224         if (is_rmb)
1225                 /* use socket recv buffer size (w/o overhead) as start value */
1226                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1227         else
1228                 /* use socket send buffer size (w/o overhead) as start value */
1229                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1230
1231         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1232              bufsize_short >= 0; bufsize_short--) {
1233
1234                 if (is_rmb) {
1235                         lock = &lgr->rmbs_lock;
1236                         buf_list = &lgr->rmbs[bufsize_short];
1237                 } else {
1238                         lock = &lgr->sndbufs_lock;
1239                         buf_list = &lgr->sndbufs[bufsize_short];
1240                 }
1241                 bufsize = smc_uncompress_bufsize(bufsize_short);
1242                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1243                         continue;
1244
1245                 /* check for reusable slot in the link group */
1246                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1247                 if (buf_desc) {
1248                         memset(buf_desc->cpu_addr, 0, bufsize);
1249                         break; /* found reusable slot */
1250                 }
1251
1252                 if (is_smcd)
1253                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1254                 else
1255                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1256
1257                 if (PTR_ERR(buf_desc) == -ENOMEM)
1258                         break;
1259                 if (IS_ERR(buf_desc))
1260                         continue;
1261
1262                 buf_desc->used = 1;
1263                 write_lock_bh(lock);
1264                 list_add(&buf_desc->list, buf_list);
1265                 write_unlock_bh(lock);
1266                 break; /* found */
1267         }
1268
1269         if (IS_ERR(buf_desc))
1270                 return -ENOMEM;
1271
1272         if (!is_smcd) {
1273                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1274                         return -ENOMEM;
1275                 }
1276         }
1277
1278         if (is_rmb) {
1279                 conn->rmb_desc = buf_desc;
1280                 conn->rmbe_size_short = bufsize_short;
1281                 smc->sk.sk_rcvbuf = bufsize * 2;
1282                 atomic_set(&conn->bytes_to_rcv, 0);
1283                 conn->rmbe_update_limit =
1284                         smc_rmb_wnd_update_limit(buf_desc->len);
1285                 if (is_smcd)
1286                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1287         } else {
1288                 conn->sndbuf_desc = buf_desc;
1289                 smc->sk.sk_sndbuf = bufsize * 2;
1290                 atomic_set(&conn->sndbuf_space, bufsize);
1291         }
1292         return 0;
1293 }
1294
1295 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1296 {
1297         if (!conn->lgr || conn->lgr->is_smcd)
1298                 return;
1299         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1300 }
1301
1302 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1303 {
1304         if (!conn->lgr || conn->lgr->is_smcd)
1305                 return;
1306         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1307 }
1308
1309 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1310 {
1311         int i;
1312
1313         if (!conn->lgr || conn->lgr->is_smcd)
1314                 return;
1315         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1316                 if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
1317                     conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
1318                         continue;
1319                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1320                                        DMA_FROM_DEVICE);
1321         }
1322 }
1323
1324 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1325 {
1326         int i;
1327
1328         if (!conn->lgr || conn->lgr->is_smcd)
1329                 return;
1330         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1331                 if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
1332                     conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
1333                         continue;
1334                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1335                                           DMA_FROM_DEVICE);
1336         }
1337 }
1338
1339 /* create the send and receive buffer for an SMC socket;
1340  * receive buffers are called RMBs;
1341  * (even though the SMC protocol allows more than one RMB-element per RMB,
1342  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1343  * extra RMB for every connection in a link group
1344  */
1345 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1346 {
1347         int rc;
1348
1349         /* create send buffer */
1350         rc = __smc_buf_create(smc, is_smcd, false);
1351         if (rc)
1352                 return rc;
1353         /* create rmb */
1354         rc = __smc_buf_create(smc, is_smcd, true);
1355         if (rc)
1356                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1357         return rc;
1358 }
1359
1360 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1361 {
1362         int i;
1363
1364         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1365                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1366                         return i;
1367         }
1368         return -ENOSPC;
1369 }
1370
1371 /* add a new rtoken from peer */
1372 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1373 {
1374         struct smc_link_group *lgr = smc_get_lgr(lnk);
1375         u64 dma_addr = be64_to_cpu(nw_vaddr);
1376         u32 rkey = ntohl(nw_rkey);
1377         int i;
1378
1379         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1380                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1381                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1382                     test_bit(i, lgr->rtokens_used_mask)) {
1383                         /* already in list */
1384                         return i;
1385                 }
1386         }
1387         i = smc_rmb_reserve_rtoken_idx(lgr);
1388         if (i < 0)
1389                 return i;
1390         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1391         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1392         return i;
1393 }
1394
1395 /* delete an rtoken */
1396 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1397 {
1398         struct smc_link_group *lgr = smc_get_lgr(lnk);
1399         u32 rkey = ntohl(nw_rkey);
1400         int i;
1401
1402         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1403                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1404                     test_bit(i, lgr->rtokens_used_mask)) {
1405                         lgr->rtokens[i][lnk->link_idx].rkey = 0;
1406                         lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1407
1408                         clear_bit(i, lgr->rtokens_used_mask);
1409                         return 0;
1410                 }
1411         }
1412         return -ENOENT;
1413 }
1414
1415 /* save rkey and dma_addr received from peer during clc handshake */
1416 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1417                             struct smc_clc_msg_accept_confirm *clc)
1418 {
1419         conn->rtoken_idx = smc_rtoken_add(conn->lnk, clc->rmb_dma_addr,
1420                                           clc->rmb_rkey);
1421         if (conn->rtoken_idx < 0)
1422                 return conn->rtoken_idx;
1423         return 0;
1424 }
1425
1426 static void smc_core_going_away(void)
1427 {
1428         struct smc_ib_device *smcibdev;
1429         struct smcd_dev *smcd;
1430
1431         spin_lock(&smc_ib_devices.lock);
1432         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1433                 int i;
1434
1435                 for (i = 0; i < SMC_MAX_PORTS; i++)
1436                         set_bit(i, smcibdev->ports_going_away);
1437         }
1438         spin_unlock(&smc_ib_devices.lock);
1439
1440         spin_lock(&smcd_dev_list.lock);
1441         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1442                 smcd->going_away = 1;
1443         }
1444         spin_unlock(&smcd_dev_list.lock);
1445 }
1446
1447 /* Clean up all SMC link groups */
1448 static void smc_lgrs_shutdown(void)
1449 {
1450         struct smcd_dev *smcd;
1451
1452         smc_core_going_away();
1453
1454         smc_smcr_terminate_all(NULL);
1455
1456         spin_lock(&smcd_dev_list.lock);
1457         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1458                 smc_smcd_terminate_all(smcd);
1459         spin_unlock(&smcd_dev_list.lock);
1460 }
1461
1462 static int smc_core_reboot_event(struct notifier_block *this,
1463                                  unsigned long event, void *ptr)
1464 {
1465         smc_lgrs_shutdown();
1466         smc_ib_unregister_client();
1467         return 0;
1468 }
1469
1470 static struct notifier_block smc_reboot_notifier = {
1471         .notifier_call = smc_core_reboot_event,
1472 };
1473
1474 int __init smc_core_init(void)
1475 {
1476         return register_reboot_notifier(&smc_reboot_notifier);
1477 }
1478
1479 /* Called (from smc_exit) when module is removed */
1480 void smc_core_exit(void)
1481 {
1482         unregister_reboot_notifier(&smc_reboot_notifier);
1483         smc_lgrs_shutdown();
1484 }