net/smc: move add link processing for new device into llc layer
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <net/tcp.h>
20 #include <net/sock.h>
21 #include <rdma/ib_verbs.h>
22 #include <rdma/ib_cache.h>
23
24 #include "smc.h"
25 #include "smc_clc.h"
26 #include "smc_core.h"
27 #include "smc_ib.h"
28 #include "smc_wr.h"
29 #include "smc_llc.h"
30 #include "smc_cdc.h"
31 #include "smc_close.h"
32 #include "smc_ism.h"
33
34 #define SMC_LGR_NUM_INCR                256
35 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
36 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
37 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
38
39 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
40         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
41         .list = LIST_HEAD_INIT(smc_lgr_list.list),
42         .num = 0,
43 };
44
45 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
46 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
47
48 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
49                          struct smc_buf_desc *buf_desc);
50 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
51
52 static void smc_link_down_work(struct work_struct *work);
53
54 /* return head of link group list and its lock for a given link group */
55 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
56                                                   spinlock_t **lgr_lock)
57 {
58         if (lgr->is_smcd) {
59                 *lgr_lock = &lgr->smcd->lgr_lock;
60                 return &lgr->smcd->lgr_list;
61         }
62
63         *lgr_lock = &smc_lgr_list.lock;
64         return &smc_lgr_list.list;
65 }
66
67 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
68 {
69         /* client link group creation always follows the server link group
70          * creation. For client use a somewhat higher removal delay time,
71          * otherwise there is a risk of out-of-sync link groups.
72          */
73         if (!lgr->freeing && !lgr->freefast) {
74                 mod_delayed_work(system_wq, &lgr->free_work,
75                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
76                                                 SMC_LGR_FREE_DELAY_CLNT :
77                                                 SMC_LGR_FREE_DELAY_SERV);
78         }
79 }
80
81 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
82 {
83         if (!lgr->freeing && !lgr->freefast) {
84                 lgr->freefast = 1;
85                 mod_delayed_work(system_wq, &lgr->free_work,
86                                  SMC_LGR_FREE_DELAY_FAST);
87         }
88 }
89
90 /* Register connection's alert token in our lookup structure.
91  * To use rbtrees we have to implement our own insert core.
92  * Requires @conns_lock
93  * @smc         connection to register
94  * Returns 0 on success, != otherwise.
95  */
96 static void smc_lgr_add_alert_token(struct smc_connection *conn)
97 {
98         struct rb_node **link, *parent = NULL;
99         u32 token = conn->alert_token_local;
100
101         link = &conn->lgr->conns_all.rb_node;
102         while (*link) {
103                 struct smc_connection *cur = rb_entry(*link,
104                                         struct smc_connection, alert_node);
105
106                 parent = *link;
107                 if (cur->alert_token_local > token)
108                         link = &parent->rb_left;
109                 else
110                         link = &parent->rb_right;
111         }
112         /* Put the new node there */
113         rb_link_node(&conn->alert_node, parent, link);
114         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
115 }
116
117 /* assign an SMC-R link to the connection */
118 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
119 {
120         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
121                                        SMC_LNK_ACTIVE;
122         int i, j;
123
124         /* do link balancing */
125         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
126                 struct smc_link *lnk = &conn->lgr->lnk[i];
127
128                 if (lnk->state != expected || lnk->link_is_asym)
129                         continue;
130                 if (conn->lgr->role == SMC_CLNT) {
131                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
132                         break;
133                 }
134                 if (conn->lgr->conns_num % 2) {
135                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
136                                 struct smc_link *lnk2;
137
138                                 lnk2 = &conn->lgr->lnk[j];
139                                 if (lnk2->state == expected &&
140                                     !lnk2->link_is_asym) {
141                                         conn->lnk = lnk2;
142                                         break;
143                                 }
144                         }
145                 }
146                 if (!conn->lnk)
147                         conn->lnk = lnk;
148                 break;
149         }
150         if (!conn->lnk)
151                 return SMC_CLC_DECL_NOACTLINK;
152         return 0;
153 }
154
155 /* Register connection in link group by assigning an alert token
156  * registered in a search tree.
157  * Requires @conns_lock
158  * Note that '0' is a reserved value and not assigned.
159  */
160 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
161 {
162         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
163         static atomic_t nexttoken = ATOMIC_INIT(0);
164         int rc;
165
166         if (!conn->lgr->is_smcd) {
167                 rc = smcr_lgr_conn_assign_link(conn, first);
168                 if (rc)
169                         return rc;
170         }
171         /* find a new alert_token_local value not yet used by some connection
172          * in this link group
173          */
174         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
175         while (!conn->alert_token_local) {
176                 conn->alert_token_local = atomic_inc_return(&nexttoken);
177                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
178                         conn->alert_token_local = 0;
179         }
180         smc_lgr_add_alert_token(conn);
181         conn->lgr->conns_num++;
182         return 0;
183 }
184
185 /* Unregister connection and reset the alert token of the given connection<
186  */
187 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
188 {
189         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
190         struct smc_link_group *lgr = conn->lgr;
191
192         rb_erase(&conn->alert_node, &lgr->conns_all);
193         lgr->conns_num--;
194         conn->alert_token_local = 0;
195         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
196 }
197
198 /* Unregister connection from lgr
199  */
200 static void smc_lgr_unregister_conn(struct smc_connection *conn)
201 {
202         struct smc_link_group *lgr = conn->lgr;
203
204         if (!lgr)
205                 return;
206         write_lock_bh(&lgr->conns_lock);
207         if (conn->alert_token_local) {
208                 __smc_lgr_unregister_conn(conn);
209         }
210         write_unlock_bh(&lgr->conns_lock);
211         conn->lgr = NULL;
212 }
213
214 void smc_lgr_cleanup_early(struct smc_connection *conn)
215 {
216         struct smc_link_group *lgr = conn->lgr;
217         struct list_head *lgr_list;
218         spinlock_t *lgr_lock;
219
220         if (!lgr)
221                 return;
222
223         smc_conn_free(conn);
224         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
225         spin_lock_bh(lgr_lock);
226         /* do not use this link group for new connections */
227         if (!list_empty(lgr_list))
228                 list_del_init(lgr_list);
229         spin_unlock_bh(lgr_lock);
230         smc_lgr_schedule_free_work_fast(lgr);
231 }
232
233 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
234 {
235         int i;
236
237         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
238                 struct smc_link *lnk = &lgr->lnk[i];
239
240                 if (smc_link_usable(lnk))
241                         lnk->state = SMC_LNK_INACTIVE;
242         }
243         wake_up_all(&lgr->llc_msg_waiter);
244         wake_up_all(&lgr->llc_flow_waiter);
245 }
246
247 static void smc_lgr_free(struct smc_link_group *lgr);
248
249 static void smc_lgr_free_work(struct work_struct *work)
250 {
251         struct smc_link_group *lgr = container_of(to_delayed_work(work),
252                                                   struct smc_link_group,
253                                                   free_work);
254         spinlock_t *lgr_lock;
255         bool conns;
256
257         smc_lgr_list_head(lgr, &lgr_lock);
258         spin_lock_bh(lgr_lock);
259         if (lgr->freeing) {
260                 spin_unlock_bh(lgr_lock);
261                 return;
262         }
263         read_lock_bh(&lgr->conns_lock);
264         conns = RB_EMPTY_ROOT(&lgr->conns_all);
265         read_unlock_bh(&lgr->conns_lock);
266         if (!conns) { /* number of lgr connections is no longer zero */
267                 spin_unlock_bh(lgr_lock);
268                 return;
269         }
270         list_del_init(&lgr->list); /* remove from smc_lgr_list */
271         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
272         spin_unlock_bh(lgr_lock);
273         cancel_delayed_work(&lgr->free_work);
274
275         if (!lgr->is_smcd && !lgr->terminating)
276                 smc_llc_send_link_delete_all(lgr, true,
277                                              SMC_LLC_DEL_PROG_INIT_TERM);
278         if (lgr->is_smcd && !lgr->terminating)
279                 smc_ism_signal_shutdown(lgr);
280         if (!lgr->is_smcd)
281                 smcr_lgr_link_deactivate_all(lgr);
282         smc_lgr_free(lgr);
283 }
284
285 static void smc_lgr_terminate_work(struct work_struct *work)
286 {
287         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
288                                                   terminate_work);
289
290         __smc_lgr_terminate(lgr, true);
291 }
292
293 /* return next unique link id for the lgr */
294 static u8 smcr_next_link_id(struct smc_link_group *lgr)
295 {
296         u8 link_id;
297         int i;
298
299         while (1) {
300                 link_id = ++lgr->next_link_id;
301                 if (!link_id)   /* skip zero as link_id */
302                         link_id = ++lgr->next_link_id;
303                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
304                         if (smc_link_usable(&lgr->lnk[i]) &&
305                             lgr->lnk[i].link_id == link_id)
306                                 continue;
307                 }
308                 break;
309         }
310         return link_id;
311 }
312
313 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
314                    u8 link_idx, struct smc_init_info *ini)
315 {
316         u8 rndvec[3];
317         int rc;
318
319         get_device(&ini->ib_dev->ibdev->dev);
320         atomic_inc(&ini->ib_dev->lnk_cnt);
321         lnk->state = SMC_LNK_ACTIVATING;
322         lnk->link_id = smcr_next_link_id(lgr);
323         lnk->lgr = lgr;
324         lnk->link_idx = link_idx;
325         lnk->smcibdev = ini->ib_dev;
326         lnk->ibport = ini->ib_port;
327         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
328         smc_llc_link_set_uid(lnk);
329         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
330         if (!ini->ib_dev->initialized) {
331                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
332                 if (rc)
333                         goto out;
334         }
335         get_random_bytes(rndvec, sizeof(rndvec));
336         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
337                 (rndvec[2] << 16);
338         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
339                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
340         if (rc)
341                 goto out;
342         rc = smc_llc_link_init(lnk);
343         if (rc)
344                 goto out;
345         rc = smc_wr_alloc_link_mem(lnk);
346         if (rc)
347                 goto clear_llc_lnk;
348         rc = smc_ib_create_protection_domain(lnk);
349         if (rc)
350                 goto free_link_mem;
351         rc = smc_ib_create_queue_pair(lnk);
352         if (rc)
353                 goto dealloc_pd;
354         rc = smc_wr_create_link(lnk);
355         if (rc)
356                 goto destroy_qp;
357         return 0;
358
359 destroy_qp:
360         smc_ib_destroy_queue_pair(lnk);
361 dealloc_pd:
362         smc_ib_dealloc_protection_domain(lnk);
363 free_link_mem:
364         smc_wr_free_link_mem(lnk);
365 clear_llc_lnk:
366         smc_llc_link_clear(lnk, false);
367 out:
368         put_device(&ini->ib_dev->ibdev->dev);
369         memset(lnk, 0, sizeof(struct smc_link));
370         lnk->state = SMC_LNK_UNUSED;
371         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
372                 wake_up(&ini->ib_dev->lnks_deleted);
373         return rc;
374 }
375
376 /* create a new SMC link group */
377 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
378 {
379         struct smc_link_group *lgr;
380         struct list_head *lgr_list;
381         struct smc_link *lnk;
382         spinlock_t *lgr_lock;
383         u8 link_idx;
384         int rc = 0;
385         int i;
386
387         if (ini->is_smcd && ini->vlan_id) {
388                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
389                         rc = SMC_CLC_DECL_ISMVLANERR;
390                         goto out;
391                 }
392         }
393
394         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
395         if (!lgr) {
396                 rc = SMC_CLC_DECL_MEM;
397                 goto ism_put_vlan;
398         }
399         lgr->is_smcd = ini->is_smcd;
400         lgr->sync_err = 0;
401         lgr->terminating = 0;
402         lgr->freefast = 0;
403         lgr->freeing = 0;
404         lgr->vlan_id = ini->vlan_id;
405         mutex_init(&lgr->sndbufs_lock);
406         mutex_init(&lgr->rmbs_lock);
407         rwlock_init(&lgr->conns_lock);
408         for (i = 0; i < SMC_RMBE_SIZES; i++) {
409                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
410                 INIT_LIST_HEAD(&lgr->rmbs[i]);
411         }
412         lgr->next_link_id = 0;
413         smc_lgr_list.num += SMC_LGR_NUM_INCR;
414         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
415         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
416         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
417         lgr->conns_all = RB_ROOT;
418         if (ini->is_smcd) {
419                 /* SMC-D specific settings */
420                 get_device(&ini->ism_dev->dev);
421                 lgr->peer_gid = ini->ism_gid;
422                 lgr->smcd = ini->ism_dev;
423                 lgr_list = &ini->ism_dev->lgr_list;
424                 lgr_lock = &lgr->smcd->lgr_lock;
425                 lgr->peer_shutdown = 0;
426                 atomic_inc(&ini->ism_dev->lgr_cnt);
427         } else {
428                 /* SMC-R specific settings */
429                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
430                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
431                        SMC_SYSTEMID_LEN);
432                 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
433                        SMC_MAX_PNETID_LEN);
434                 smc_llc_lgr_init(lgr, smc);
435
436                 link_idx = SMC_SINGLE_LINK;
437                 lnk = &lgr->lnk[link_idx];
438                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
439                 if (rc)
440                         goto free_lgr;
441                 lgr_list = &smc_lgr_list.list;
442                 lgr_lock = &smc_lgr_list.lock;
443                 atomic_inc(&lgr_cnt);
444         }
445         smc->conn.lgr = lgr;
446         spin_lock_bh(lgr_lock);
447         list_add(&lgr->list, lgr_list);
448         spin_unlock_bh(lgr_lock);
449         return 0;
450
451 free_lgr:
452         kfree(lgr);
453 ism_put_vlan:
454         if (ini->is_smcd && ini->vlan_id)
455                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
456 out:
457         if (rc < 0) {
458                 if (rc == -ENOMEM)
459                         rc = SMC_CLC_DECL_MEM;
460                 else
461                         rc = SMC_CLC_DECL_INTERR;
462         }
463         return rc;
464 }
465
466 static int smc_write_space(struct smc_connection *conn)
467 {
468         int buffer_len = conn->peer_rmbe_size;
469         union smc_host_cursor prod;
470         union smc_host_cursor cons;
471         int space;
472
473         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
474         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
475         /* determine rx_buf space */
476         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
477         return space;
478 }
479
480 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
481                              struct smc_wr_buf *wr_buf)
482 {
483         struct smc_connection *conn = &smc->conn;
484         union smc_host_cursor cons, fin;
485         int rc = 0;
486         int diff;
487
488         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
489         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
490         /* set prod cursor to old state, enforce tx_rdma_writes() */
491         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
492         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
493
494         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
495                 /* cons cursor advanced more than fin, and prod was set
496                  * fin above, so now prod is smaller than cons. Fix that.
497                  */
498                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
499                 smc_curs_add(conn->sndbuf_desc->len,
500                              &conn->tx_curs_sent, diff);
501                 smc_curs_add(conn->sndbuf_desc->len,
502                              &conn->tx_curs_fin, diff);
503
504                 smp_mb__before_atomic();
505                 atomic_add(diff, &conn->sndbuf_space);
506                 smp_mb__after_atomic();
507
508                 smc_curs_add(conn->peer_rmbe_size,
509                              &conn->local_tx_ctrl.prod, diff);
510                 smc_curs_add(conn->peer_rmbe_size,
511                              &conn->local_tx_ctrl_fin, diff);
512         }
513         /* recalculate, value is used by tx_rdma_writes() */
514         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
515
516         if (smc->sk.sk_state != SMC_INIT &&
517             smc->sk.sk_state != SMC_CLOSED) {
518                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
519                 if (!rc) {
520                         schedule_delayed_work(&conn->tx_work, 0);
521                         smc->sk.sk_data_ready(&smc->sk);
522                 }
523         } else {
524                 smc_wr_tx_put_slot(conn->lnk,
525                                    (struct smc_wr_tx_pend_priv *)pend);
526         }
527         return rc;
528 }
529
530 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
531                                   struct smc_link *from_lnk, bool is_dev_err)
532 {
533         struct smc_link *to_lnk = NULL;
534         struct smc_cdc_tx_pend *pend;
535         struct smc_connection *conn;
536         struct smc_wr_buf *wr_buf;
537         struct smc_sock *smc;
538         struct rb_node *node;
539         int i, rc = 0;
540
541         /* link is inactive, wake up tx waiters */
542         smc_wr_wakeup_tx_wait(from_lnk);
543
544         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
545                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
546                     i == from_lnk->link_idx)
547                         continue;
548                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
549                     from_lnk->ibport == lgr->lnk[i].ibport) {
550                         continue;
551                 }
552                 to_lnk = &lgr->lnk[i];
553                 break;
554         }
555         if (!to_lnk) {
556                 smc_lgr_terminate_sched(lgr);
557                 return NULL;
558         }
559 again:
560         read_lock_bh(&lgr->conns_lock);
561         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
562                 conn = rb_entry(node, struct smc_connection, alert_node);
563                 if (conn->lnk != from_lnk)
564                         continue;
565                 smc = container_of(conn, struct smc_sock, conn);
566                 /* conn->lnk not yet set in SMC_INIT state */
567                 if (smc->sk.sk_state == SMC_INIT)
568                         continue;
569                 if (smc->sk.sk_state == SMC_CLOSED ||
570                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
571                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
572                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
573                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
574                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
575                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
576                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
577                     smc->sk.sk_state == SMC_PROCESSABORT) {
578                         spin_lock_bh(&conn->send_lock);
579                         conn->lnk = to_lnk;
580                         spin_unlock_bh(&conn->send_lock);
581                         continue;
582                 }
583                 sock_hold(&smc->sk);
584                 read_unlock_bh(&lgr->conns_lock);
585                 /* pre-fetch buffer outside of send_lock, might sleep */
586                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
587                 if (rc) {
588                         smcr_link_down_cond_sched(to_lnk);
589                         return NULL;
590                 }
591                 /* avoid race with smcr_tx_sndbuf_nonempty() */
592                 spin_lock_bh(&conn->send_lock);
593                 conn->lnk = to_lnk;
594                 rc = smc_switch_cursor(smc, pend, wr_buf);
595                 spin_unlock_bh(&conn->send_lock);
596                 sock_put(&smc->sk);
597                 if (rc) {
598                         smcr_link_down_cond_sched(to_lnk);
599                         return NULL;
600                 }
601                 goto again;
602         }
603         read_unlock_bh(&lgr->conns_lock);
604         return to_lnk;
605 }
606
607 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
608                            struct smc_link_group *lgr)
609 {
610         int rc;
611
612         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
613                 /* unregister rmb with peer */
614                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
615                 if (!rc) {
616                         /* protect against smc_llc_cli_rkey_exchange() */
617                         mutex_lock(&lgr->llc_conf_mutex);
618                         smc_llc_do_delete_rkey(lgr, rmb_desc);
619                         rmb_desc->is_conf_rkey = false;
620                         mutex_unlock(&lgr->llc_conf_mutex);
621                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
622                 }
623         }
624
625         if (rmb_desc->is_reg_err) {
626                 /* buf registration failed, reuse not possible */
627                 mutex_lock(&lgr->rmbs_lock);
628                 list_del(&rmb_desc->list);
629                 mutex_unlock(&lgr->rmbs_lock);
630
631                 smc_buf_free(lgr, true, rmb_desc);
632         } else {
633                 rmb_desc->used = 0;
634         }
635 }
636
637 static void smc_buf_unuse(struct smc_connection *conn,
638                           struct smc_link_group *lgr)
639 {
640         if (conn->sndbuf_desc)
641                 conn->sndbuf_desc->used = 0;
642         if (conn->rmb_desc && lgr->is_smcd)
643                 conn->rmb_desc->used = 0;
644         else if (conn->rmb_desc)
645                 smcr_buf_unuse(conn->rmb_desc, lgr);
646 }
647
648 /* remove a finished connection from its link group */
649 void smc_conn_free(struct smc_connection *conn)
650 {
651         struct smc_link_group *lgr = conn->lgr;
652
653         if (!lgr)
654                 return;
655         if (lgr->is_smcd) {
656                 if (!list_empty(&lgr->list))
657                         smc_ism_unset_conn(conn);
658                 tasklet_kill(&conn->rx_tsklet);
659         } else {
660                 smc_cdc_tx_dismiss_slots(conn);
661                 if (current_work() != &conn->abort_work)
662                         cancel_work_sync(&conn->abort_work);
663         }
664         if (!list_empty(&lgr->list)) {
665                 smc_lgr_unregister_conn(conn);
666                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
667         }
668
669         if (!lgr->conns_num)
670                 smc_lgr_schedule_free_work(lgr);
671 }
672
673 /* unregister a link from a buf_desc */
674 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
675                                 struct smc_link *lnk)
676 {
677         if (is_rmb)
678                 buf_desc->is_reg_mr[lnk->link_idx] = false;
679         if (!buf_desc->is_map_ib[lnk->link_idx])
680                 return;
681         if (is_rmb) {
682                 if (buf_desc->mr_rx[lnk->link_idx]) {
683                         smc_ib_put_memory_region(
684                                         buf_desc->mr_rx[lnk->link_idx]);
685                         buf_desc->mr_rx[lnk->link_idx] = NULL;
686                 }
687                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
688         } else {
689                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
690         }
691         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
692         buf_desc->is_map_ib[lnk->link_idx] = false;
693 }
694
695 /* unmap all buffers of lgr for a deleted link */
696 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
697 {
698         struct smc_link_group *lgr = lnk->lgr;
699         struct smc_buf_desc *buf_desc, *bf;
700         int i;
701
702         for (i = 0; i < SMC_RMBE_SIZES; i++) {
703                 mutex_lock(&lgr->rmbs_lock);
704                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
705                         smcr_buf_unmap_link(buf_desc, true, lnk);
706                 mutex_unlock(&lgr->rmbs_lock);
707                 mutex_lock(&lgr->sndbufs_lock);
708                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
709                                          list)
710                         smcr_buf_unmap_link(buf_desc, false, lnk);
711                 mutex_unlock(&lgr->sndbufs_lock);
712         }
713 }
714
715 static void smcr_rtoken_clear_link(struct smc_link *lnk)
716 {
717         struct smc_link_group *lgr = lnk->lgr;
718         int i;
719
720         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
721                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
722                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
723         }
724 }
725
726 /* must be called under lgr->llc_conf_mutex lock */
727 void smcr_link_clear(struct smc_link *lnk, bool log)
728 {
729         struct smc_ib_device *smcibdev;
730
731         if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
732                 return;
733         lnk->peer_qpn = 0;
734         smc_llc_link_clear(lnk, log);
735         smcr_buf_unmap_lgr(lnk);
736         smcr_rtoken_clear_link(lnk);
737         smc_ib_modify_qp_reset(lnk);
738         smc_wr_free_link(lnk);
739         smc_ib_destroy_queue_pair(lnk);
740         smc_ib_dealloc_protection_domain(lnk);
741         smc_wr_free_link_mem(lnk);
742         put_device(&lnk->smcibdev->ibdev->dev);
743         smcibdev = lnk->smcibdev;
744         memset(lnk, 0, sizeof(struct smc_link));
745         lnk->state = SMC_LNK_UNUSED;
746         if (!atomic_dec_return(&smcibdev->lnk_cnt))
747                 wake_up(&smcibdev->lnks_deleted);
748 }
749
750 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
751                           struct smc_buf_desc *buf_desc)
752 {
753         int i;
754
755         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
756                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
757
758         if (buf_desc->pages)
759                 __free_pages(buf_desc->pages, buf_desc->order);
760         kfree(buf_desc);
761 }
762
763 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
764                           struct smc_buf_desc *buf_desc)
765 {
766         if (is_dmb) {
767                 /* restore original buf len */
768                 buf_desc->len += sizeof(struct smcd_cdc_msg);
769                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
770         } else {
771                 kfree(buf_desc->cpu_addr);
772         }
773         kfree(buf_desc);
774 }
775
776 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
777                          struct smc_buf_desc *buf_desc)
778 {
779         if (lgr->is_smcd)
780                 smcd_buf_free(lgr, is_rmb, buf_desc);
781         else
782                 smcr_buf_free(lgr, is_rmb, buf_desc);
783 }
784
785 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
786 {
787         struct smc_buf_desc *buf_desc, *bf_desc;
788         struct list_head *buf_list;
789         int i;
790
791         for (i = 0; i < SMC_RMBE_SIZES; i++) {
792                 if (is_rmb)
793                         buf_list = &lgr->rmbs[i];
794                 else
795                         buf_list = &lgr->sndbufs[i];
796                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
797                                          list) {
798                         list_del(&buf_desc->list);
799                         smc_buf_free(lgr, is_rmb, buf_desc);
800                 }
801         }
802 }
803
804 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
805 {
806         /* free send buffers */
807         __smc_lgr_free_bufs(lgr, false);
808         /* free rmbs */
809         __smc_lgr_free_bufs(lgr, true);
810 }
811
812 /* remove a link group */
813 static void smc_lgr_free(struct smc_link_group *lgr)
814 {
815         int i;
816
817         if (!lgr->is_smcd) {
818                 mutex_lock(&lgr->llc_conf_mutex);
819                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
820                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
821                                 smcr_link_clear(&lgr->lnk[i], false);
822                 }
823                 mutex_unlock(&lgr->llc_conf_mutex);
824                 smc_llc_lgr_clear(lgr);
825         }
826
827         smc_lgr_free_bufs(lgr);
828         if (lgr->is_smcd) {
829                 if (!lgr->terminating) {
830                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
831                         put_device(&lgr->smcd->dev);
832                 }
833                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
834                         wake_up(&lgr->smcd->lgrs_deleted);
835         } else {
836                 if (!atomic_dec_return(&lgr_cnt))
837                         wake_up(&lgrs_deleted);
838         }
839         kfree(lgr);
840 }
841
842 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
843 {
844         int i;
845
846         for (i = 0; i < SMC_RMBE_SIZES; i++) {
847                 struct smc_buf_desc *buf_desc;
848
849                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
850                         buf_desc->len += sizeof(struct smcd_cdc_msg);
851                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
852                 }
853         }
854 }
855
856 static void smc_sk_wake_ups(struct smc_sock *smc)
857 {
858         smc->sk.sk_write_space(&smc->sk);
859         smc->sk.sk_data_ready(&smc->sk);
860         smc->sk.sk_state_change(&smc->sk);
861 }
862
863 /* kill a connection */
864 static void smc_conn_kill(struct smc_connection *conn, bool soft)
865 {
866         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
867
868         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
869                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
870         else
871                 smc_close_abort(conn);
872         conn->killed = 1;
873         smc->sk.sk_err = ECONNABORTED;
874         smc_sk_wake_ups(smc);
875         if (conn->lgr->is_smcd) {
876                 smc_ism_unset_conn(conn);
877                 if (soft)
878                         tasklet_kill(&conn->rx_tsklet);
879                 else
880                         tasklet_unlock_wait(&conn->rx_tsklet);
881         } else {
882                 smc_cdc_tx_dismiss_slots(conn);
883         }
884         smc_lgr_unregister_conn(conn);
885         smc_close_active_abort(smc);
886 }
887
888 static void smc_lgr_cleanup(struct smc_link_group *lgr)
889 {
890         if (lgr->is_smcd) {
891                 smc_ism_signal_shutdown(lgr);
892                 smcd_unregister_all_dmbs(lgr);
893                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
894                 put_device(&lgr->smcd->dev);
895         } else {
896                 u32 rsn = lgr->llc_termination_rsn;
897
898                 if (!rsn)
899                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
900                 smc_llc_send_link_delete_all(lgr, false, rsn);
901                 smcr_lgr_link_deactivate_all(lgr);
902         }
903 }
904
905 /* terminate link group
906  * @soft: true if link group shutdown can take its time
907  *        false if immediate link group shutdown is required
908  */
909 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
910 {
911         struct smc_connection *conn;
912         struct smc_sock *smc;
913         struct rb_node *node;
914
915         if (lgr->terminating)
916                 return; /* lgr already terminating */
917         /* cancel free_work sync, will terminate when lgr->freeing is set */
918         cancel_delayed_work_sync(&lgr->free_work);
919         lgr->terminating = 1;
920
921         /* kill remaining link group connections */
922         read_lock_bh(&lgr->conns_lock);
923         node = rb_first(&lgr->conns_all);
924         while (node) {
925                 read_unlock_bh(&lgr->conns_lock);
926                 conn = rb_entry(node, struct smc_connection, alert_node);
927                 smc = container_of(conn, struct smc_sock, conn);
928                 sock_hold(&smc->sk); /* sock_put below */
929                 lock_sock(&smc->sk);
930                 smc_conn_kill(conn, soft);
931                 release_sock(&smc->sk);
932                 sock_put(&smc->sk); /* sock_hold above */
933                 read_lock_bh(&lgr->conns_lock);
934                 node = rb_first(&lgr->conns_all);
935         }
936         read_unlock_bh(&lgr->conns_lock);
937         smc_lgr_cleanup(lgr);
938         smc_lgr_free(lgr);
939 }
940
941 /* unlink link group and schedule termination */
942 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
943 {
944         spinlock_t *lgr_lock;
945
946         smc_lgr_list_head(lgr, &lgr_lock);
947         spin_lock_bh(lgr_lock);
948         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
949                 spin_unlock_bh(lgr_lock);
950                 return; /* lgr already terminating */
951         }
952         list_del_init(&lgr->list);
953         lgr->freeing = 1;
954         spin_unlock_bh(lgr_lock);
955         schedule_work(&lgr->terminate_work);
956 }
957
958 /* Called when peer lgr shutdown (regularly or abnormally) is received */
959 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
960 {
961         struct smc_link_group *lgr, *l;
962         LIST_HEAD(lgr_free_list);
963
964         /* run common cleanup function and build free list */
965         spin_lock_bh(&dev->lgr_lock);
966         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
967                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
968                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
969                         if (peer_gid) /* peer triggered termination */
970                                 lgr->peer_shutdown = 1;
971                         list_move(&lgr->list, &lgr_free_list);
972                         lgr->freeing = 1;
973                 }
974         }
975         spin_unlock_bh(&dev->lgr_lock);
976
977         /* cancel the regular free workers and actually free lgrs */
978         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
979                 list_del_init(&lgr->list);
980                 schedule_work(&lgr->terminate_work);
981         }
982 }
983
984 /* Called when an SMCD device is removed or the smc module is unloaded */
985 void smc_smcd_terminate_all(struct smcd_dev *smcd)
986 {
987         struct smc_link_group *lgr, *lg;
988         LIST_HEAD(lgr_free_list);
989
990         spin_lock_bh(&smcd->lgr_lock);
991         list_splice_init(&smcd->lgr_list, &lgr_free_list);
992         list_for_each_entry(lgr, &lgr_free_list, list)
993                 lgr->freeing = 1;
994         spin_unlock_bh(&smcd->lgr_lock);
995
996         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
997                 list_del_init(&lgr->list);
998                 __smc_lgr_terminate(lgr, false);
999         }
1000
1001         if (atomic_read(&smcd->lgr_cnt))
1002                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1003 }
1004
1005 /* Called when an SMCR device is removed or the smc module is unloaded.
1006  * If smcibdev is given, all SMCR link groups using this device are terminated.
1007  * If smcibdev is NULL, all SMCR link groups are terminated.
1008  */
1009 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1010 {
1011         struct smc_link_group *lgr, *lg;
1012         LIST_HEAD(lgr_free_list);
1013         int i;
1014
1015         spin_lock_bh(&smc_lgr_list.lock);
1016         if (!smcibdev) {
1017                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1018                 list_for_each_entry(lgr, &lgr_free_list, list)
1019                         lgr->freeing = 1;
1020         } else {
1021                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1022                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1023                                 if (lgr->lnk[i].smcibdev == smcibdev)
1024                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1025                         }
1026                 }
1027         }
1028         spin_unlock_bh(&smc_lgr_list.lock);
1029
1030         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1031                 list_del_init(&lgr->list);
1032                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1033                 __smc_lgr_terminate(lgr, false);
1034         }
1035
1036         if (smcibdev) {
1037                 if (atomic_read(&smcibdev->lnk_cnt))
1038                         wait_event(smcibdev->lnks_deleted,
1039                                    !atomic_read(&smcibdev->lnk_cnt));
1040         } else {
1041                 if (atomic_read(&lgr_cnt))
1042                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1043         }
1044 }
1045
1046 /* set new lgr type and clear all asymmetric link tagging */
1047 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1048 {
1049         char *lgr_type = "";
1050         int i;
1051
1052         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1053                 if (smc_link_usable(&lgr->lnk[i]))
1054                         lgr->lnk[i].link_is_asym = false;
1055         if (lgr->type == new_type)
1056                 return;
1057         lgr->type = new_type;
1058
1059         switch (lgr->type) {
1060         case SMC_LGR_NONE:
1061                 lgr_type = "NONE";
1062                 break;
1063         case SMC_LGR_SINGLE:
1064                 lgr_type = "SINGLE";
1065                 break;
1066         case SMC_LGR_SYMMETRIC:
1067                 lgr_type = "SYMMETRIC";
1068                 break;
1069         case SMC_LGR_ASYMMETRIC_PEER:
1070                 lgr_type = "ASYMMETRIC_PEER";
1071                 break;
1072         case SMC_LGR_ASYMMETRIC_LOCAL:
1073                 lgr_type = "ASYMMETRIC_LOCAL";
1074                 break;
1075         }
1076         pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1077                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1078                             lgr_type, lgr->pnet_id);
1079 }
1080
1081 /* set new lgr type and tag a link as asymmetric */
1082 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1083                             enum smc_lgr_type new_type, int asym_lnk_idx)
1084 {
1085         smcr_lgr_set_type(lgr, new_type);
1086         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1087 }
1088
1089 /* abort connection, abort_work scheduled from tasklet context */
1090 static void smc_conn_abort_work(struct work_struct *work)
1091 {
1092         struct smc_connection *conn = container_of(work,
1093                                                    struct smc_connection,
1094                                                    abort_work);
1095         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1096
1097         smc_conn_kill(conn, true);
1098         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1099 }
1100
1101 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1102 {
1103         struct smc_link_group *lgr, *n;
1104
1105         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1106                 struct smc_link *link;
1107
1108                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1109                             SMC_MAX_PNETID_LEN) ||
1110                     lgr->type == SMC_LGR_SYMMETRIC ||
1111                     lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1112                         continue;
1113
1114                 /* trigger local add link processing */
1115                 link = smc_llc_usable_link(lgr);
1116                 if (link)
1117                         smc_llc_add_link_local(link);
1118         }
1119 }
1120
1121 /* link is down - switch connections to alternate link,
1122  * must be called under lgr->llc_conf_mutex lock
1123  */
1124 static void smcr_link_down(struct smc_link *lnk)
1125 {
1126         struct smc_link_group *lgr = lnk->lgr;
1127         struct smc_link *to_lnk;
1128         int del_link_id;
1129
1130         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1131                 return;
1132
1133         smc_ib_modify_qp_reset(lnk);
1134         to_lnk = smc_switch_conns(lgr, lnk, true);
1135         if (!to_lnk) { /* no backup link available */
1136                 smcr_link_clear(lnk, true);
1137                 return;
1138         }
1139         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1140         del_link_id = lnk->link_id;
1141
1142         if (lgr->role == SMC_SERV) {
1143                 /* trigger local delete link processing */
1144                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1145         } else {
1146                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1147                         /* another llc task is ongoing */
1148                         mutex_unlock(&lgr->llc_conf_mutex);
1149                         wait_event_timeout(lgr->llc_flow_waiter,
1150                                 (list_empty(&lgr->list) ||
1151                                  lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1152                                 SMC_LLC_WAIT_TIME);
1153                         mutex_lock(&lgr->llc_conf_mutex);
1154                 }
1155                 if (!list_empty(&lgr->list)) {
1156                         smc_llc_send_delete_link(to_lnk, del_link_id,
1157                                                  SMC_LLC_REQ, true,
1158                                                  SMC_LLC_DEL_LOST_PATH);
1159                         smcr_link_clear(lnk, true);
1160                 }
1161                 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
1162         }
1163 }
1164
1165 /* must be called under lgr->llc_conf_mutex lock */
1166 void smcr_link_down_cond(struct smc_link *lnk)
1167 {
1168         if (smc_link_downing(&lnk->state))
1169                 smcr_link_down(lnk);
1170 }
1171
1172 /* will get the lgr->llc_conf_mutex lock */
1173 void smcr_link_down_cond_sched(struct smc_link *lnk)
1174 {
1175         if (smc_link_downing(&lnk->state))
1176                 schedule_work(&lnk->link_down_wrk);
1177 }
1178
1179 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1180 {
1181         struct smc_link_group *lgr, *n;
1182         int i;
1183
1184         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1185                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1186                             SMC_MAX_PNETID_LEN))
1187                         continue; /* lgr is not affected */
1188                 if (list_empty(&lgr->list))
1189                         continue;
1190                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1191                         struct smc_link *lnk = &lgr->lnk[i];
1192
1193                         if (smc_link_usable(lnk) &&
1194                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1195                                 smcr_link_down_cond_sched(lnk);
1196                 }
1197         }
1198 }
1199
1200 static void smc_link_down_work(struct work_struct *work)
1201 {
1202         struct smc_link *link = container_of(work, struct smc_link,
1203                                              link_down_wrk);
1204         struct smc_link_group *lgr = link->lgr;
1205
1206         if (list_empty(&lgr->list))
1207                 return;
1208         wake_up_all(&lgr->llc_msg_waiter);
1209         mutex_lock(&lgr->llc_conf_mutex);
1210         smcr_link_down(link);
1211         mutex_unlock(&lgr->llc_conf_mutex);
1212 }
1213
1214 /* Determine vlan of internal TCP socket.
1215  * @vlan_id: address to store the determined vlan id into
1216  */
1217 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1218 {
1219         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1220         struct net_device *ndev;
1221         int i, nest_lvl, rc = 0;
1222
1223         ini->vlan_id = 0;
1224         if (!dst) {
1225                 rc = -ENOTCONN;
1226                 goto out;
1227         }
1228         if (!dst->dev) {
1229                 rc = -ENODEV;
1230                 goto out_rel;
1231         }
1232
1233         ndev = dst->dev;
1234         if (is_vlan_dev(ndev)) {
1235                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1236                 goto out_rel;
1237         }
1238
1239         rtnl_lock();
1240         nest_lvl = ndev->lower_level;
1241         for (i = 0; i < nest_lvl; i++) {
1242                 struct list_head *lower = &ndev->adj_list.lower;
1243
1244                 if (list_empty(lower))
1245                         break;
1246                 lower = lower->next;
1247                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1248                 if (is_vlan_dev(ndev)) {
1249                         ini->vlan_id = vlan_dev_vlan_id(ndev);
1250                         break;
1251                 }
1252         }
1253         rtnl_unlock();
1254
1255 out_rel:
1256         dst_release(dst);
1257 out:
1258         return rc;
1259 }
1260
1261 static bool smcr_lgr_match(struct smc_link_group *lgr,
1262                            struct smc_clc_msg_local *lcl,
1263                            enum smc_lgr_role role, u32 clcqpn)
1264 {
1265         int i;
1266
1267         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1268             lgr->role != role)
1269                 return false;
1270
1271         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1272                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
1273                         continue;
1274                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1275                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1276                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1277                         return true;
1278         }
1279         return false;
1280 }
1281
1282 static bool smcd_lgr_match(struct smc_link_group *lgr,
1283                            struct smcd_dev *smcismdev, u64 peer_gid)
1284 {
1285         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1286 }
1287
1288 /* create a new SMC connection (and a new link group if necessary) */
1289 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1290 {
1291         struct smc_connection *conn = &smc->conn;
1292         struct list_head *lgr_list;
1293         struct smc_link_group *lgr;
1294         enum smc_lgr_role role;
1295         spinlock_t *lgr_lock;
1296         int rc = 0;
1297
1298         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1299         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1300         ini->cln_first_contact = SMC_FIRST_CONTACT;
1301         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1302         if (role == SMC_CLNT && ini->srv_first_contact)
1303                 /* create new link group as well */
1304                 goto create;
1305
1306         /* determine if an existing link group can be reused */
1307         spin_lock_bh(lgr_lock);
1308         list_for_each_entry(lgr, lgr_list, list) {
1309                 write_lock_bh(&lgr->conns_lock);
1310                 if ((ini->is_smcd ?
1311                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1312                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1313                     !lgr->sync_err &&
1314                     lgr->vlan_id == ini->vlan_id &&
1315                     (role == SMC_CLNT ||
1316                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1317                         /* link group found */
1318                         ini->cln_first_contact = SMC_REUSE_CONTACT;
1319                         conn->lgr = lgr;
1320                         rc = smc_lgr_register_conn(conn, false);
1321                         write_unlock_bh(&lgr->conns_lock);
1322                         if (!rc && delayed_work_pending(&lgr->free_work))
1323                                 cancel_delayed_work(&lgr->free_work);
1324                         break;
1325                 }
1326                 write_unlock_bh(&lgr->conns_lock);
1327         }
1328         spin_unlock_bh(lgr_lock);
1329         if (rc)
1330                 return rc;
1331
1332         if (role == SMC_CLNT && !ini->srv_first_contact &&
1333             ini->cln_first_contact == SMC_FIRST_CONTACT) {
1334                 /* Server reuses a link group, but Client wants to start
1335                  * a new one
1336                  * send out_of_sync decline, reason synchr. error
1337                  */
1338                 return SMC_CLC_DECL_SYNCERR;
1339         }
1340
1341 create:
1342         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1343                 rc = smc_lgr_create(smc, ini);
1344                 if (rc)
1345                         goto out;
1346                 lgr = conn->lgr;
1347                 write_lock_bh(&lgr->conns_lock);
1348                 rc = smc_lgr_register_conn(conn, true);
1349                 write_unlock_bh(&lgr->conns_lock);
1350                 if (rc)
1351                         goto out;
1352         }
1353         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1354         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1355         conn->urg_state = SMC_URG_READ;
1356         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1357         if (ini->is_smcd) {
1358                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1359                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1360         }
1361 #ifndef KERNEL_HAS_ATOMIC64
1362         spin_lock_init(&conn->acurs_lock);
1363 #endif
1364
1365 out:
1366         return rc;
1367 }
1368
1369 /* convert the RMB size into the compressed notation - minimum 16K.
1370  * In contrast to plain ilog2, this rounds towards the next power of 2,
1371  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1372  */
1373 static u8 smc_compress_bufsize(int size)
1374 {
1375         u8 compressed;
1376
1377         if (size <= SMC_BUF_MIN_SIZE)
1378                 return 0;
1379
1380         size = (size - 1) >> 14;
1381         compressed = ilog2(size) + 1;
1382         if (compressed >= SMC_RMBE_SIZES)
1383                 compressed = SMC_RMBE_SIZES - 1;
1384         return compressed;
1385 }
1386
1387 /* convert the RMB size from compressed notation into integer */
1388 int smc_uncompress_bufsize(u8 compressed)
1389 {
1390         u32 size;
1391
1392         size = 0x00000001 << (((int)compressed) + 14);
1393         return (int)size;
1394 }
1395
1396 /* try to reuse a sndbuf or rmb description slot for a certain
1397  * buffer size; if not available, return NULL
1398  */
1399 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1400                                              struct mutex *lock,
1401                                              struct list_head *buf_list)
1402 {
1403         struct smc_buf_desc *buf_slot;
1404
1405         mutex_lock(lock);
1406         list_for_each_entry(buf_slot, buf_list, list) {
1407                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1408                         mutex_unlock(lock);
1409                         return buf_slot;
1410                 }
1411         }
1412         mutex_unlock(lock);
1413         return NULL;
1414 }
1415
1416 /* one of the conditions for announcing a receiver's current window size is
1417  * that it "results in a minimum increase in the window size of 10% of the
1418  * receive buffer space" [RFC7609]
1419  */
1420 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1421 {
1422         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1423 }
1424
1425 /* map an rmb buf to a link */
1426 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1427                              struct smc_link *lnk)
1428 {
1429         int rc;
1430
1431         if (buf_desc->is_map_ib[lnk->link_idx])
1432                 return 0;
1433
1434         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1435         if (rc)
1436                 return rc;
1437         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1438                    buf_desc->cpu_addr, buf_desc->len);
1439
1440         /* map sg table to DMA address */
1441         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1442                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1443         /* SMC protocol depends on mapping to one DMA address only */
1444         if (rc != 1) {
1445                 rc = -EAGAIN;
1446                 goto free_table;
1447         }
1448
1449         /* create a new memory region for the RMB */
1450         if (is_rmb) {
1451                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1452                                               IB_ACCESS_REMOTE_WRITE |
1453                                               IB_ACCESS_LOCAL_WRITE,
1454                                               buf_desc, lnk->link_idx);
1455                 if (rc)
1456                         goto buf_unmap;
1457                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1458         }
1459         buf_desc->is_map_ib[lnk->link_idx] = true;
1460         return 0;
1461
1462 buf_unmap:
1463         smc_ib_buf_unmap_sg(lnk, buf_desc,
1464                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1465 free_table:
1466         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1467         return rc;
1468 }
1469
1470 /* register a new rmb on IB device,
1471  * must be called under lgr->llc_conf_mutex lock
1472  */
1473 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1474 {
1475         if (list_empty(&link->lgr->list))
1476                 return -ENOLINK;
1477         if (!rmb_desc->is_reg_mr[link->link_idx]) {
1478                 /* register memory region for new rmb */
1479                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1480                         rmb_desc->is_reg_err = true;
1481                         return -EFAULT;
1482                 }
1483                 rmb_desc->is_reg_mr[link->link_idx] = true;
1484         }
1485         return 0;
1486 }
1487
1488 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1489                              struct list_head *lst, bool is_rmb)
1490 {
1491         struct smc_buf_desc *buf_desc, *bf;
1492         int rc = 0;
1493
1494         mutex_lock(lock);
1495         list_for_each_entry_safe(buf_desc, bf, lst, list) {
1496                 if (!buf_desc->used)
1497                         continue;
1498                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1499                 if (rc)
1500                         goto out;
1501         }
1502 out:
1503         mutex_unlock(lock);
1504         return rc;
1505 }
1506
1507 /* map all used buffers of lgr for a new link */
1508 int smcr_buf_map_lgr(struct smc_link *lnk)
1509 {
1510         struct smc_link_group *lgr = lnk->lgr;
1511         int i, rc = 0;
1512
1513         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1514                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1515                                        &lgr->rmbs[i], true);
1516                 if (rc)
1517                         return rc;
1518                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1519                                        &lgr->sndbufs[i], false);
1520                 if (rc)
1521                         return rc;
1522         }
1523         return 0;
1524 }
1525
1526 /* register all used buffers of lgr for a new link,
1527  * must be called under lgr->llc_conf_mutex lock
1528  */
1529 int smcr_buf_reg_lgr(struct smc_link *lnk)
1530 {
1531         struct smc_link_group *lgr = lnk->lgr;
1532         struct smc_buf_desc *buf_desc, *bf;
1533         int i, rc = 0;
1534
1535         mutex_lock(&lgr->rmbs_lock);
1536         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1537                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1538                         if (!buf_desc->used)
1539                                 continue;
1540                         rc = smcr_link_reg_rmb(lnk, buf_desc);
1541                         if (rc)
1542                                 goto out;
1543                 }
1544         }
1545 out:
1546         mutex_unlock(&lgr->rmbs_lock);
1547         return rc;
1548 }
1549
1550 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1551                                                 bool is_rmb, int bufsize)
1552 {
1553         struct smc_buf_desc *buf_desc;
1554
1555         /* try to alloc a new buffer */
1556         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1557         if (!buf_desc)
1558                 return ERR_PTR(-ENOMEM);
1559
1560         buf_desc->order = get_order(bufsize);
1561         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1562                                       __GFP_NOMEMALLOC | __GFP_COMP |
1563                                       __GFP_NORETRY | __GFP_ZERO,
1564                                       buf_desc->order);
1565         if (!buf_desc->pages) {
1566                 kfree(buf_desc);
1567                 return ERR_PTR(-EAGAIN);
1568         }
1569         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1570         buf_desc->len = bufsize;
1571         return buf_desc;
1572 }
1573
1574 /* map buf_desc on all usable links,
1575  * unused buffers stay mapped as long as the link is up
1576  */
1577 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1578                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1579 {
1580         int i, rc = 0;
1581
1582         /* protect against parallel link reconfiguration */
1583         mutex_lock(&lgr->llc_conf_mutex);
1584         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1585                 struct smc_link *lnk = &lgr->lnk[i];
1586
1587                 if (!smc_link_usable(lnk))
1588                         continue;
1589                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1590                         rc = -ENOMEM;
1591                         goto out;
1592                 }
1593         }
1594 out:
1595         mutex_unlock(&lgr->llc_conf_mutex);
1596         return rc;
1597 }
1598
1599 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1600
1601 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1602                                                 bool is_dmb, int bufsize)
1603 {
1604         struct smc_buf_desc *buf_desc;
1605         int rc;
1606
1607         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1608                 return ERR_PTR(-EAGAIN);
1609
1610         /* try to alloc a new DMB */
1611         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1612         if (!buf_desc)
1613                 return ERR_PTR(-ENOMEM);
1614         if (is_dmb) {
1615                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1616                 if (rc) {
1617                         kfree(buf_desc);
1618                         return ERR_PTR(-EAGAIN);
1619                 }
1620                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1621                 /* CDC header stored in buf. So, pretend it was smaller */
1622                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1623         } else {
1624                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1625                                              __GFP_NOWARN | __GFP_NORETRY |
1626                                              __GFP_NOMEMALLOC);
1627                 if (!buf_desc->cpu_addr) {
1628                         kfree(buf_desc);
1629                         return ERR_PTR(-EAGAIN);
1630                 }
1631                 buf_desc->len = bufsize;
1632         }
1633         return buf_desc;
1634 }
1635
1636 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1637 {
1638         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1639         struct smc_connection *conn = &smc->conn;
1640         struct smc_link_group *lgr = conn->lgr;
1641         struct list_head *buf_list;
1642         int bufsize, bufsize_short;
1643         struct mutex *lock;     /* lock buffer list */
1644         int sk_buf_size;
1645
1646         if (is_rmb)
1647                 /* use socket recv buffer size (w/o overhead) as start value */
1648                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1649         else
1650                 /* use socket send buffer size (w/o overhead) as start value */
1651                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1652
1653         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1654              bufsize_short >= 0; bufsize_short--) {
1655
1656                 if (is_rmb) {
1657                         lock = &lgr->rmbs_lock;
1658                         buf_list = &lgr->rmbs[bufsize_short];
1659                 } else {
1660                         lock = &lgr->sndbufs_lock;
1661                         buf_list = &lgr->sndbufs[bufsize_short];
1662                 }
1663                 bufsize = smc_uncompress_bufsize(bufsize_short);
1664                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1665                         continue;
1666
1667                 /* check for reusable slot in the link group */
1668                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1669                 if (buf_desc) {
1670                         memset(buf_desc->cpu_addr, 0, bufsize);
1671                         break; /* found reusable slot */
1672                 }
1673
1674                 if (is_smcd)
1675                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1676                 else
1677                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1678
1679                 if (PTR_ERR(buf_desc) == -ENOMEM)
1680                         break;
1681                 if (IS_ERR(buf_desc))
1682                         continue;
1683
1684                 buf_desc->used = 1;
1685                 mutex_lock(lock);
1686                 list_add(&buf_desc->list, buf_list);
1687                 mutex_unlock(lock);
1688                 break; /* found */
1689         }
1690
1691         if (IS_ERR(buf_desc))
1692                 return -ENOMEM;
1693
1694         if (!is_smcd) {
1695                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1696                         smcr_buf_unuse(buf_desc, lgr);
1697                         return -ENOMEM;
1698                 }
1699         }
1700
1701         if (is_rmb) {
1702                 conn->rmb_desc = buf_desc;
1703                 conn->rmbe_size_short = bufsize_short;
1704                 smc->sk.sk_rcvbuf = bufsize * 2;
1705                 atomic_set(&conn->bytes_to_rcv, 0);
1706                 conn->rmbe_update_limit =
1707                         smc_rmb_wnd_update_limit(buf_desc->len);
1708                 if (is_smcd)
1709                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1710         } else {
1711                 conn->sndbuf_desc = buf_desc;
1712                 smc->sk.sk_sndbuf = bufsize * 2;
1713                 atomic_set(&conn->sndbuf_space, bufsize);
1714         }
1715         return 0;
1716 }
1717
1718 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1719 {
1720         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1721                 return;
1722         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1723 }
1724
1725 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1726 {
1727         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1728                 return;
1729         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1730 }
1731
1732 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1733 {
1734         int i;
1735
1736         if (!conn->lgr || conn->lgr->is_smcd)
1737                 return;
1738         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1739                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1740                         continue;
1741                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1742                                        DMA_FROM_DEVICE);
1743         }
1744 }
1745
1746 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1747 {
1748         int i;
1749
1750         if (!conn->lgr || conn->lgr->is_smcd)
1751                 return;
1752         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1753                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1754                         continue;
1755                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1756                                           DMA_FROM_DEVICE);
1757         }
1758 }
1759
1760 /* create the send and receive buffer for an SMC socket;
1761  * receive buffers are called RMBs;
1762  * (even though the SMC protocol allows more than one RMB-element per RMB,
1763  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1764  * extra RMB for every connection in a link group
1765  */
1766 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1767 {
1768         int rc;
1769
1770         /* create send buffer */
1771         rc = __smc_buf_create(smc, is_smcd, false);
1772         if (rc)
1773                 return rc;
1774         /* create rmb */
1775         rc = __smc_buf_create(smc, is_smcd, true);
1776         if (rc)
1777                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1778         return rc;
1779 }
1780
1781 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1782 {
1783         int i;
1784
1785         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1786                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1787                         return i;
1788         }
1789         return -ENOSPC;
1790 }
1791
1792 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1793                                    u32 rkey)
1794 {
1795         int i;
1796
1797         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1798                 if (test_bit(i, lgr->rtokens_used_mask) &&
1799                     lgr->rtokens[i][lnk_idx].rkey == rkey)
1800                         return i;
1801         }
1802         return -ENOENT;
1803 }
1804
1805 /* set rtoken for a new link to an existing rmb */
1806 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1807                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1808 {
1809         int rtok_idx;
1810
1811         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1812         if (rtok_idx == -ENOENT)
1813                 return;
1814         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1815         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1816 }
1817
1818 /* set rtoken for a new link whose link_id is given */
1819 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1820                      __be64 nw_vaddr, __be32 nw_rkey)
1821 {
1822         u64 dma_addr = be64_to_cpu(nw_vaddr);
1823         u32 rkey = ntohl(nw_rkey);
1824         bool found = false;
1825         int link_idx;
1826
1827         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1828                 if (lgr->lnk[link_idx].link_id == link_id) {
1829                         found = true;
1830                         break;
1831                 }
1832         }
1833         if (!found)
1834                 return;
1835         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1836         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1837 }
1838
1839 /* add a new rtoken from peer */
1840 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1841 {
1842         struct smc_link_group *lgr = smc_get_lgr(lnk);
1843         u64 dma_addr = be64_to_cpu(nw_vaddr);
1844         u32 rkey = ntohl(nw_rkey);
1845         int i;
1846
1847         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1848                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1849                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1850                     test_bit(i, lgr->rtokens_used_mask)) {
1851                         /* already in list */
1852                         return i;
1853                 }
1854         }
1855         i = smc_rmb_reserve_rtoken_idx(lgr);
1856         if (i < 0)
1857                 return i;
1858         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1859         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1860         return i;
1861 }
1862
1863 /* delete an rtoken from all links */
1864 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1865 {
1866         struct smc_link_group *lgr = smc_get_lgr(lnk);
1867         u32 rkey = ntohl(nw_rkey);
1868         int i, j;
1869
1870         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1871                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1872                     test_bit(i, lgr->rtokens_used_mask)) {
1873                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1874                                 lgr->rtokens[i][j].rkey = 0;
1875                                 lgr->rtokens[i][j].dma_addr = 0;
1876                         }
1877                         clear_bit(i, lgr->rtokens_used_mask);
1878                         return 0;
1879                 }
1880         }
1881         return -ENOENT;
1882 }
1883
1884 /* save rkey and dma_addr received from peer during clc handshake */
1885 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1886                             struct smc_link *lnk,
1887                             struct smc_clc_msg_accept_confirm *clc)
1888 {
1889         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1890                                           clc->rmb_rkey);
1891         if (conn->rtoken_idx < 0)
1892                 return conn->rtoken_idx;
1893         return 0;
1894 }
1895
1896 static void smc_core_going_away(void)
1897 {
1898         struct smc_ib_device *smcibdev;
1899         struct smcd_dev *smcd;
1900
1901         mutex_lock(&smc_ib_devices.mutex);
1902         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1903                 int i;
1904
1905                 for (i = 0; i < SMC_MAX_PORTS; i++)
1906                         set_bit(i, smcibdev->ports_going_away);
1907         }
1908         mutex_unlock(&smc_ib_devices.mutex);
1909
1910         mutex_lock(&smcd_dev_list.mutex);
1911         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1912                 smcd->going_away = 1;
1913         }
1914         mutex_unlock(&smcd_dev_list.mutex);
1915 }
1916
1917 /* Clean up all SMC link groups */
1918 static void smc_lgrs_shutdown(void)
1919 {
1920         struct smcd_dev *smcd;
1921
1922         smc_core_going_away();
1923
1924         smc_smcr_terminate_all(NULL);
1925
1926         mutex_lock(&smcd_dev_list.mutex);
1927         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1928                 smc_smcd_terminate_all(smcd);
1929         mutex_unlock(&smcd_dev_list.mutex);
1930 }
1931
1932 static int smc_core_reboot_event(struct notifier_block *this,
1933                                  unsigned long event, void *ptr)
1934 {
1935         smc_lgrs_shutdown();
1936         smc_ib_unregister_client();
1937         return 0;
1938 }
1939
1940 static struct notifier_block smc_reboot_notifier = {
1941         .notifier_call = smc_core_reboot_event,
1942 };
1943
1944 int __init smc_core_init(void)
1945 {
1946         return register_reboot_notifier(&smc_reboot_notifier);
1947 }
1948
1949 /* Called (from smc_exit) when module is removed */
1950 void smc_core_exit(void)
1951 {
1952         unregister_reboot_notifier(&smc_reboot_notifier);
1953         smc_lgrs_shutdown();
1954 }