Merge tag 'efi-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <net/tcp.h>
20 #include <net/sock.h>
21 #include <rdma/ib_verbs.h>
22 #include <rdma/ib_cache.h>
23
24 #include "smc.h"
25 #include "smc_clc.h"
26 #include "smc_core.h"
27 #include "smc_ib.h"
28 #include "smc_wr.h"
29 #include "smc_llc.h"
30 #include "smc_cdc.h"
31 #include "smc_close.h"
32 #include "smc_ism.h"
33
34 #define SMC_LGR_NUM_INCR                256
35 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
36 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
37 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
38
39 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
40         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
41         .list = LIST_HEAD_INIT(smc_lgr_list.list),
42         .num = 0,
43 };
44
45 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
46 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
47
48 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
49                          struct smc_buf_desc *buf_desc);
50 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
51
52 static void smc_link_down_work(struct work_struct *work);
53
54 /* return head of link group list and its lock for a given link group */
55 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
56                                                   spinlock_t **lgr_lock)
57 {
58         if (lgr->is_smcd) {
59                 *lgr_lock = &lgr->smcd->lgr_lock;
60                 return &lgr->smcd->lgr_list;
61         }
62
63         *lgr_lock = &smc_lgr_list.lock;
64         return &smc_lgr_list.list;
65 }
66
67 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
68 {
69         /* client link group creation always follows the server link group
70          * creation. For client use a somewhat higher removal delay time,
71          * otherwise there is a risk of out-of-sync link groups.
72          */
73         if (!lgr->freeing && !lgr->freefast) {
74                 mod_delayed_work(system_wq, &lgr->free_work,
75                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
76                                                 SMC_LGR_FREE_DELAY_CLNT :
77                                                 SMC_LGR_FREE_DELAY_SERV);
78         }
79 }
80
81 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
82 {
83         if (!lgr->freeing && !lgr->freefast) {
84                 lgr->freefast = 1;
85                 mod_delayed_work(system_wq, &lgr->free_work,
86                                  SMC_LGR_FREE_DELAY_FAST);
87         }
88 }
89
90 /* Register connection's alert token in our lookup structure.
91  * To use rbtrees we have to implement our own insert core.
92  * Requires @conns_lock
93  * @smc         connection to register
94  * Returns 0 on success, != otherwise.
95  */
96 static void smc_lgr_add_alert_token(struct smc_connection *conn)
97 {
98         struct rb_node **link, *parent = NULL;
99         u32 token = conn->alert_token_local;
100
101         link = &conn->lgr->conns_all.rb_node;
102         while (*link) {
103                 struct smc_connection *cur = rb_entry(*link,
104                                         struct smc_connection, alert_node);
105
106                 parent = *link;
107                 if (cur->alert_token_local > token)
108                         link = &parent->rb_left;
109                 else
110                         link = &parent->rb_right;
111         }
112         /* Put the new node there */
113         rb_link_node(&conn->alert_node, parent, link);
114         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
115 }
116
117 /* assign an SMC-R link to the connection */
118 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
119 {
120         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
121                                        SMC_LNK_ACTIVE;
122         int i, j;
123
124         /* do link balancing */
125         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
126                 struct smc_link *lnk = &conn->lgr->lnk[i];
127
128                 if (lnk->state != expected || lnk->link_is_asym)
129                         continue;
130                 if (conn->lgr->role == SMC_CLNT) {
131                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
132                         break;
133                 }
134                 if (conn->lgr->conns_num % 2) {
135                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
136                                 struct smc_link *lnk2;
137
138                                 lnk2 = &conn->lgr->lnk[j];
139                                 if (lnk2->state == expected &&
140                                     !lnk2->link_is_asym) {
141                                         conn->lnk = lnk2;
142                                         break;
143                                 }
144                         }
145                 }
146                 if (!conn->lnk)
147                         conn->lnk = lnk;
148                 break;
149         }
150         if (!conn->lnk)
151                 return SMC_CLC_DECL_NOACTLINK;
152         return 0;
153 }
154
155 /* Register connection in link group by assigning an alert token
156  * registered in a search tree.
157  * Requires @conns_lock
158  * Note that '0' is a reserved value and not assigned.
159  */
160 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
161 {
162         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
163         static atomic_t nexttoken = ATOMIC_INIT(0);
164         int rc;
165
166         if (!conn->lgr->is_smcd) {
167                 rc = smcr_lgr_conn_assign_link(conn, first);
168                 if (rc)
169                         return rc;
170         }
171         /* find a new alert_token_local value not yet used by some connection
172          * in this link group
173          */
174         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
175         while (!conn->alert_token_local) {
176                 conn->alert_token_local = atomic_inc_return(&nexttoken);
177                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
178                         conn->alert_token_local = 0;
179         }
180         smc_lgr_add_alert_token(conn);
181         conn->lgr->conns_num++;
182         return 0;
183 }
184
185 /* Unregister connection and reset the alert token of the given connection<
186  */
187 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
188 {
189         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
190         struct smc_link_group *lgr = conn->lgr;
191
192         rb_erase(&conn->alert_node, &lgr->conns_all);
193         lgr->conns_num--;
194         conn->alert_token_local = 0;
195         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
196 }
197
198 /* Unregister connection from lgr
199  */
200 static void smc_lgr_unregister_conn(struct smc_connection *conn)
201 {
202         struct smc_link_group *lgr = conn->lgr;
203
204         if (!lgr)
205                 return;
206         write_lock_bh(&lgr->conns_lock);
207         if (conn->alert_token_local) {
208                 __smc_lgr_unregister_conn(conn);
209         }
210         write_unlock_bh(&lgr->conns_lock);
211         conn->lgr = NULL;
212 }
213
214 void smc_lgr_cleanup_early(struct smc_connection *conn)
215 {
216         struct smc_link_group *lgr = conn->lgr;
217         struct list_head *lgr_list;
218         spinlock_t *lgr_lock;
219
220         if (!lgr)
221                 return;
222
223         smc_conn_free(conn);
224         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
225         spin_lock_bh(lgr_lock);
226         /* do not use this link group for new connections */
227         if (!list_empty(lgr_list))
228                 list_del_init(lgr_list);
229         spin_unlock_bh(lgr_lock);
230         smc_lgr_schedule_free_work_fast(lgr);
231 }
232
233 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
234 {
235         int i;
236
237         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
238                 struct smc_link *lnk = &lgr->lnk[i];
239
240                 if (smc_link_usable(lnk))
241                         lnk->state = SMC_LNK_INACTIVE;
242         }
243         wake_up_all(&lgr->llc_msg_waiter);
244         wake_up_all(&lgr->llc_flow_waiter);
245 }
246
247 static void smc_lgr_free(struct smc_link_group *lgr);
248
249 static void smc_lgr_free_work(struct work_struct *work)
250 {
251         struct smc_link_group *lgr = container_of(to_delayed_work(work),
252                                                   struct smc_link_group,
253                                                   free_work);
254         spinlock_t *lgr_lock;
255         bool conns;
256
257         smc_lgr_list_head(lgr, &lgr_lock);
258         spin_lock_bh(lgr_lock);
259         if (lgr->freeing) {
260                 spin_unlock_bh(lgr_lock);
261                 return;
262         }
263         read_lock_bh(&lgr->conns_lock);
264         conns = RB_EMPTY_ROOT(&lgr->conns_all);
265         read_unlock_bh(&lgr->conns_lock);
266         if (!conns) { /* number of lgr connections is no longer zero */
267                 spin_unlock_bh(lgr_lock);
268                 return;
269         }
270         list_del_init(&lgr->list); /* remove from smc_lgr_list */
271         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
272         spin_unlock_bh(lgr_lock);
273         cancel_delayed_work(&lgr->free_work);
274
275         if (!lgr->is_smcd && !lgr->terminating)
276                 smc_llc_send_link_delete_all(lgr, true,
277                                              SMC_LLC_DEL_PROG_INIT_TERM);
278         if (lgr->is_smcd && !lgr->terminating)
279                 smc_ism_signal_shutdown(lgr);
280         if (!lgr->is_smcd)
281                 smcr_lgr_link_deactivate_all(lgr);
282         smc_lgr_free(lgr);
283 }
284
285 static void smc_lgr_terminate_work(struct work_struct *work)
286 {
287         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
288                                                   terminate_work);
289
290         __smc_lgr_terminate(lgr, true);
291 }
292
293 /* return next unique link id for the lgr */
294 static u8 smcr_next_link_id(struct smc_link_group *lgr)
295 {
296         u8 link_id;
297         int i;
298
299         while (1) {
300                 link_id = ++lgr->next_link_id;
301                 if (!link_id)   /* skip zero as link_id */
302                         link_id = ++lgr->next_link_id;
303                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
304                         if (smc_link_usable(&lgr->lnk[i]) &&
305                             lgr->lnk[i].link_id == link_id)
306                                 continue;
307                 }
308                 break;
309         }
310         return link_id;
311 }
312
313 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
314                    u8 link_idx, struct smc_init_info *ini)
315 {
316         u8 rndvec[3];
317         int rc;
318
319         get_device(&ini->ib_dev->ibdev->dev);
320         atomic_inc(&ini->ib_dev->lnk_cnt);
321         lnk->link_id = smcr_next_link_id(lgr);
322         lnk->lgr = lgr;
323         lnk->link_idx = link_idx;
324         lnk->smcibdev = ini->ib_dev;
325         lnk->ibport = ini->ib_port;
326         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
327         smc_llc_link_set_uid(lnk);
328         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
329         if (!ini->ib_dev->initialized) {
330                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
331                 if (rc)
332                         goto out;
333         }
334         get_random_bytes(rndvec, sizeof(rndvec));
335         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
336                 (rndvec[2] << 16);
337         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
338                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
339         if (rc)
340                 goto out;
341         rc = smc_llc_link_init(lnk);
342         if (rc)
343                 goto out;
344         rc = smc_wr_alloc_link_mem(lnk);
345         if (rc)
346                 goto clear_llc_lnk;
347         rc = smc_ib_create_protection_domain(lnk);
348         if (rc)
349                 goto free_link_mem;
350         rc = smc_ib_create_queue_pair(lnk);
351         if (rc)
352                 goto dealloc_pd;
353         rc = smc_wr_create_link(lnk);
354         if (rc)
355                 goto destroy_qp;
356         lnk->state = SMC_LNK_ACTIVATING;
357         return 0;
358
359 destroy_qp:
360         smc_ib_destroy_queue_pair(lnk);
361 dealloc_pd:
362         smc_ib_dealloc_protection_domain(lnk);
363 free_link_mem:
364         smc_wr_free_link_mem(lnk);
365 clear_llc_lnk:
366         smc_llc_link_clear(lnk, false);
367 out:
368         put_device(&ini->ib_dev->ibdev->dev);
369         memset(lnk, 0, sizeof(struct smc_link));
370         lnk->state = SMC_LNK_UNUSED;
371         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
372                 wake_up(&ini->ib_dev->lnks_deleted);
373         return rc;
374 }
375
376 /* create a new SMC link group */
377 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
378 {
379         struct smc_link_group *lgr;
380         struct list_head *lgr_list;
381         struct smc_link *lnk;
382         spinlock_t *lgr_lock;
383         u8 link_idx;
384         int rc = 0;
385         int i;
386
387         if (ini->is_smcd && ini->vlan_id) {
388                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
389                         rc = SMC_CLC_DECL_ISMVLANERR;
390                         goto out;
391                 }
392         }
393
394         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
395         if (!lgr) {
396                 rc = SMC_CLC_DECL_MEM;
397                 goto ism_put_vlan;
398         }
399         lgr->is_smcd = ini->is_smcd;
400         lgr->sync_err = 0;
401         lgr->terminating = 0;
402         lgr->freefast = 0;
403         lgr->freeing = 0;
404         lgr->vlan_id = ini->vlan_id;
405         mutex_init(&lgr->sndbufs_lock);
406         mutex_init(&lgr->rmbs_lock);
407         rwlock_init(&lgr->conns_lock);
408         for (i = 0; i < SMC_RMBE_SIZES; i++) {
409                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
410                 INIT_LIST_HEAD(&lgr->rmbs[i]);
411         }
412         lgr->next_link_id = 0;
413         smc_lgr_list.num += SMC_LGR_NUM_INCR;
414         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
415         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
416         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
417         lgr->conns_all = RB_ROOT;
418         if (ini->is_smcd) {
419                 /* SMC-D specific settings */
420                 get_device(&ini->ism_dev->dev);
421                 lgr->peer_gid = ini->ism_gid;
422                 lgr->smcd = ini->ism_dev;
423                 lgr_list = &ini->ism_dev->lgr_list;
424                 lgr_lock = &lgr->smcd->lgr_lock;
425                 lgr->peer_shutdown = 0;
426                 atomic_inc(&ini->ism_dev->lgr_cnt);
427         } else {
428                 /* SMC-R specific settings */
429                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
430                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
431                        SMC_SYSTEMID_LEN);
432                 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
433                        SMC_MAX_PNETID_LEN);
434                 smc_llc_lgr_init(lgr, smc);
435
436                 link_idx = SMC_SINGLE_LINK;
437                 lnk = &lgr->lnk[link_idx];
438                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
439                 if (rc)
440                         goto free_lgr;
441                 lgr_list = &smc_lgr_list.list;
442                 lgr_lock = &smc_lgr_list.lock;
443                 atomic_inc(&lgr_cnt);
444         }
445         smc->conn.lgr = lgr;
446         spin_lock_bh(lgr_lock);
447         list_add_tail(&lgr->list, lgr_list);
448         spin_unlock_bh(lgr_lock);
449         return 0;
450
451 free_lgr:
452         kfree(lgr);
453 ism_put_vlan:
454         if (ini->is_smcd && ini->vlan_id)
455                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
456 out:
457         if (rc < 0) {
458                 if (rc == -ENOMEM)
459                         rc = SMC_CLC_DECL_MEM;
460                 else
461                         rc = SMC_CLC_DECL_INTERR;
462         }
463         return rc;
464 }
465
466 static int smc_write_space(struct smc_connection *conn)
467 {
468         int buffer_len = conn->peer_rmbe_size;
469         union smc_host_cursor prod;
470         union smc_host_cursor cons;
471         int space;
472
473         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
474         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
475         /* determine rx_buf space */
476         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
477         return space;
478 }
479
480 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
481                              struct smc_wr_buf *wr_buf)
482 {
483         struct smc_connection *conn = &smc->conn;
484         union smc_host_cursor cons, fin;
485         int rc = 0;
486         int diff;
487
488         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
489         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
490         /* set prod cursor to old state, enforce tx_rdma_writes() */
491         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
492         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
493
494         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
495                 /* cons cursor advanced more than fin, and prod was set
496                  * fin above, so now prod is smaller than cons. Fix that.
497                  */
498                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
499                 smc_curs_add(conn->sndbuf_desc->len,
500                              &conn->tx_curs_sent, diff);
501                 smc_curs_add(conn->sndbuf_desc->len,
502                              &conn->tx_curs_fin, diff);
503
504                 smp_mb__before_atomic();
505                 atomic_add(diff, &conn->sndbuf_space);
506                 smp_mb__after_atomic();
507
508                 smc_curs_add(conn->peer_rmbe_size,
509                              &conn->local_tx_ctrl.prod, diff);
510                 smc_curs_add(conn->peer_rmbe_size,
511                              &conn->local_tx_ctrl_fin, diff);
512         }
513         /* recalculate, value is used by tx_rdma_writes() */
514         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
515
516         if (smc->sk.sk_state != SMC_INIT &&
517             smc->sk.sk_state != SMC_CLOSED) {
518                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
519                 if (!rc) {
520                         schedule_delayed_work(&conn->tx_work, 0);
521                         smc->sk.sk_data_ready(&smc->sk);
522                 }
523         } else {
524                 smc_wr_tx_put_slot(conn->lnk,
525                                    (struct smc_wr_tx_pend_priv *)pend);
526         }
527         return rc;
528 }
529
530 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
531                                   struct smc_link *from_lnk, bool is_dev_err)
532 {
533         struct smc_link *to_lnk = NULL;
534         struct smc_cdc_tx_pend *pend;
535         struct smc_connection *conn;
536         struct smc_wr_buf *wr_buf;
537         struct smc_sock *smc;
538         struct rb_node *node;
539         int i, rc = 0;
540
541         /* link is inactive, wake up tx waiters */
542         smc_wr_wakeup_tx_wait(from_lnk);
543
544         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
545                 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
546                         continue;
547                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
548                     from_lnk->ibport == lgr->lnk[i].ibport) {
549                         continue;
550                 }
551                 to_lnk = &lgr->lnk[i];
552                 break;
553         }
554         if (!to_lnk) {
555                 smc_lgr_terminate_sched(lgr);
556                 return NULL;
557         }
558 again:
559         read_lock_bh(&lgr->conns_lock);
560         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
561                 conn = rb_entry(node, struct smc_connection, alert_node);
562                 if (conn->lnk != from_lnk)
563                         continue;
564                 smc = container_of(conn, struct smc_sock, conn);
565                 /* conn->lnk not yet set in SMC_INIT state */
566                 if (smc->sk.sk_state == SMC_INIT)
567                         continue;
568                 if (smc->sk.sk_state == SMC_CLOSED ||
569                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
570                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
571                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
572                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
573                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
574                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
575                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
576                     smc->sk.sk_state == SMC_PROCESSABORT) {
577                         spin_lock_bh(&conn->send_lock);
578                         conn->lnk = to_lnk;
579                         spin_unlock_bh(&conn->send_lock);
580                         continue;
581                 }
582                 sock_hold(&smc->sk);
583                 read_unlock_bh(&lgr->conns_lock);
584                 /* pre-fetch buffer outside of send_lock, might sleep */
585                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
586                 if (rc) {
587                         smcr_link_down_cond_sched(to_lnk);
588                         return NULL;
589                 }
590                 /* avoid race with smcr_tx_sndbuf_nonempty() */
591                 spin_lock_bh(&conn->send_lock);
592                 conn->lnk = to_lnk;
593                 rc = smc_switch_cursor(smc, pend, wr_buf);
594                 spin_unlock_bh(&conn->send_lock);
595                 sock_put(&smc->sk);
596                 if (rc) {
597                         smcr_link_down_cond_sched(to_lnk);
598                         return NULL;
599                 }
600                 goto again;
601         }
602         read_unlock_bh(&lgr->conns_lock);
603         return to_lnk;
604 }
605
606 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
607                            struct smc_link_group *lgr)
608 {
609         int rc;
610
611         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
612                 /* unregister rmb with peer */
613                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
614                 if (!rc) {
615                         /* protect against smc_llc_cli_rkey_exchange() */
616                         mutex_lock(&lgr->llc_conf_mutex);
617                         smc_llc_do_delete_rkey(lgr, rmb_desc);
618                         rmb_desc->is_conf_rkey = false;
619                         mutex_unlock(&lgr->llc_conf_mutex);
620                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
621                 }
622         }
623
624         if (rmb_desc->is_reg_err) {
625                 /* buf registration failed, reuse not possible */
626                 mutex_lock(&lgr->rmbs_lock);
627                 list_del(&rmb_desc->list);
628                 mutex_unlock(&lgr->rmbs_lock);
629
630                 smc_buf_free(lgr, true, rmb_desc);
631         } else {
632                 rmb_desc->used = 0;
633         }
634 }
635
636 static void smc_buf_unuse(struct smc_connection *conn,
637                           struct smc_link_group *lgr)
638 {
639         if (conn->sndbuf_desc)
640                 conn->sndbuf_desc->used = 0;
641         if (conn->rmb_desc && lgr->is_smcd)
642                 conn->rmb_desc->used = 0;
643         else if (conn->rmb_desc)
644                 smcr_buf_unuse(conn->rmb_desc, lgr);
645 }
646
647 /* remove a finished connection from its link group */
648 void smc_conn_free(struct smc_connection *conn)
649 {
650         struct smc_link_group *lgr = conn->lgr;
651
652         if (!lgr)
653                 return;
654         if (lgr->is_smcd) {
655                 if (!list_empty(&lgr->list))
656                         smc_ism_unset_conn(conn);
657                 tasklet_kill(&conn->rx_tsklet);
658         } else {
659                 smc_cdc_tx_dismiss_slots(conn);
660                 if (current_work() != &conn->abort_work)
661                         cancel_work_sync(&conn->abort_work);
662         }
663         if (!list_empty(&lgr->list)) {
664                 smc_lgr_unregister_conn(conn);
665                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
666         }
667
668         if (!lgr->conns_num)
669                 smc_lgr_schedule_free_work(lgr);
670 }
671
672 /* unregister a link from a buf_desc */
673 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
674                                 struct smc_link *lnk)
675 {
676         if (is_rmb)
677                 buf_desc->is_reg_mr[lnk->link_idx] = false;
678         if (!buf_desc->is_map_ib[lnk->link_idx])
679                 return;
680         if (is_rmb) {
681                 if (buf_desc->mr_rx[lnk->link_idx]) {
682                         smc_ib_put_memory_region(
683                                         buf_desc->mr_rx[lnk->link_idx]);
684                         buf_desc->mr_rx[lnk->link_idx] = NULL;
685                 }
686                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
687         } else {
688                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
689         }
690         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
691         buf_desc->is_map_ib[lnk->link_idx] = false;
692 }
693
694 /* unmap all buffers of lgr for a deleted link */
695 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
696 {
697         struct smc_link_group *lgr = lnk->lgr;
698         struct smc_buf_desc *buf_desc, *bf;
699         int i;
700
701         for (i = 0; i < SMC_RMBE_SIZES; i++) {
702                 mutex_lock(&lgr->rmbs_lock);
703                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
704                         smcr_buf_unmap_link(buf_desc, true, lnk);
705                 mutex_unlock(&lgr->rmbs_lock);
706                 mutex_lock(&lgr->sndbufs_lock);
707                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
708                                          list)
709                         smcr_buf_unmap_link(buf_desc, false, lnk);
710                 mutex_unlock(&lgr->sndbufs_lock);
711         }
712 }
713
714 static void smcr_rtoken_clear_link(struct smc_link *lnk)
715 {
716         struct smc_link_group *lgr = lnk->lgr;
717         int i;
718
719         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
720                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
721                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
722         }
723 }
724
725 /* must be called under lgr->llc_conf_mutex lock */
726 void smcr_link_clear(struct smc_link *lnk, bool log)
727 {
728         struct smc_ib_device *smcibdev;
729
730         if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
731                 return;
732         lnk->peer_qpn = 0;
733         smc_llc_link_clear(lnk, log);
734         smcr_buf_unmap_lgr(lnk);
735         smcr_rtoken_clear_link(lnk);
736         smc_ib_modify_qp_reset(lnk);
737         smc_wr_free_link(lnk);
738         smc_ib_destroy_queue_pair(lnk);
739         smc_ib_dealloc_protection_domain(lnk);
740         smc_wr_free_link_mem(lnk);
741         put_device(&lnk->smcibdev->ibdev->dev);
742         smcibdev = lnk->smcibdev;
743         memset(lnk, 0, sizeof(struct smc_link));
744         lnk->state = SMC_LNK_UNUSED;
745         if (!atomic_dec_return(&smcibdev->lnk_cnt))
746                 wake_up(&smcibdev->lnks_deleted);
747 }
748
749 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
750                           struct smc_buf_desc *buf_desc)
751 {
752         int i;
753
754         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
755                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
756
757         if (buf_desc->pages)
758                 __free_pages(buf_desc->pages, buf_desc->order);
759         kfree(buf_desc);
760 }
761
762 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
763                           struct smc_buf_desc *buf_desc)
764 {
765         if (is_dmb) {
766                 /* restore original buf len */
767                 buf_desc->len += sizeof(struct smcd_cdc_msg);
768                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
769         } else {
770                 kfree(buf_desc->cpu_addr);
771         }
772         kfree(buf_desc);
773 }
774
775 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
776                          struct smc_buf_desc *buf_desc)
777 {
778         if (lgr->is_smcd)
779                 smcd_buf_free(lgr, is_rmb, buf_desc);
780         else
781                 smcr_buf_free(lgr, is_rmb, buf_desc);
782 }
783
784 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
785 {
786         struct smc_buf_desc *buf_desc, *bf_desc;
787         struct list_head *buf_list;
788         int i;
789
790         for (i = 0; i < SMC_RMBE_SIZES; i++) {
791                 if (is_rmb)
792                         buf_list = &lgr->rmbs[i];
793                 else
794                         buf_list = &lgr->sndbufs[i];
795                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
796                                          list) {
797                         list_del(&buf_desc->list);
798                         smc_buf_free(lgr, is_rmb, buf_desc);
799                 }
800         }
801 }
802
803 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
804 {
805         /* free send buffers */
806         __smc_lgr_free_bufs(lgr, false);
807         /* free rmbs */
808         __smc_lgr_free_bufs(lgr, true);
809 }
810
811 /* remove a link group */
812 static void smc_lgr_free(struct smc_link_group *lgr)
813 {
814         int i;
815
816         if (!lgr->is_smcd) {
817                 mutex_lock(&lgr->llc_conf_mutex);
818                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
819                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
820                                 smcr_link_clear(&lgr->lnk[i], false);
821                 }
822                 mutex_unlock(&lgr->llc_conf_mutex);
823                 smc_llc_lgr_clear(lgr);
824         }
825
826         smc_lgr_free_bufs(lgr);
827         if (lgr->is_smcd) {
828                 if (!lgr->terminating) {
829                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
830                         put_device(&lgr->smcd->dev);
831                 }
832                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
833                         wake_up(&lgr->smcd->lgrs_deleted);
834         } else {
835                 if (!atomic_dec_return(&lgr_cnt))
836                         wake_up(&lgrs_deleted);
837         }
838         kfree(lgr);
839 }
840
841 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
842 {
843         int i;
844
845         for (i = 0; i < SMC_RMBE_SIZES; i++) {
846                 struct smc_buf_desc *buf_desc;
847
848                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
849                         buf_desc->len += sizeof(struct smcd_cdc_msg);
850                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
851                 }
852         }
853 }
854
855 static void smc_sk_wake_ups(struct smc_sock *smc)
856 {
857         smc->sk.sk_write_space(&smc->sk);
858         smc->sk.sk_data_ready(&smc->sk);
859         smc->sk.sk_state_change(&smc->sk);
860 }
861
862 /* kill a connection */
863 static void smc_conn_kill(struct smc_connection *conn, bool soft)
864 {
865         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
866
867         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
868                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
869         else
870                 smc_close_abort(conn);
871         conn->killed = 1;
872         smc->sk.sk_err = ECONNABORTED;
873         smc_sk_wake_ups(smc);
874         if (conn->lgr->is_smcd) {
875                 smc_ism_unset_conn(conn);
876                 if (soft)
877                         tasklet_kill(&conn->rx_tsklet);
878                 else
879                         tasklet_unlock_wait(&conn->rx_tsklet);
880         } else {
881                 smc_cdc_tx_dismiss_slots(conn);
882         }
883         smc_lgr_unregister_conn(conn);
884         smc_close_active_abort(smc);
885 }
886
887 static void smc_lgr_cleanup(struct smc_link_group *lgr)
888 {
889         if (lgr->is_smcd) {
890                 smc_ism_signal_shutdown(lgr);
891                 smcd_unregister_all_dmbs(lgr);
892                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
893                 put_device(&lgr->smcd->dev);
894         } else {
895                 u32 rsn = lgr->llc_termination_rsn;
896
897                 if (!rsn)
898                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
899                 smc_llc_send_link_delete_all(lgr, false, rsn);
900                 smcr_lgr_link_deactivate_all(lgr);
901         }
902 }
903
904 /* terminate link group
905  * @soft: true if link group shutdown can take its time
906  *        false if immediate link group shutdown is required
907  */
908 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
909 {
910         struct smc_connection *conn;
911         struct smc_sock *smc;
912         struct rb_node *node;
913
914         if (lgr->terminating)
915                 return; /* lgr already terminating */
916         /* cancel free_work sync, will terminate when lgr->freeing is set */
917         cancel_delayed_work_sync(&lgr->free_work);
918         lgr->terminating = 1;
919
920         /* kill remaining link group connections */
921         read_lock_bh(&lgr->conns_lock);
922         node = rb_first(&lgr->conns_all);
923         while (node) {
924                 read_unlock_bh(&lgr->conns_lock);
925                 conn = rb_entry(node, struct smc_connection, alert_node);
926                 smc = container_of(conn, struct smc_sock, conn);
927                 sock_hold(&smc->sk); /* sock_put below */
928                 lock_sock(&smc->sk);
929                 smc_conn_kill(conn, soft);
930                 release_sock(&smc->sk);
931                 sock_put(&smc->sk); /* sock_hold above */
932                 read_lock_bh(&lgr->conns_lock);
933                 node = rb_first(&lgr->conns_all);
934         }
935         read_unlock_bh(&lgr->conns_lock);
936         smc_lgr_cleanup(lgr);
937         smc_lgr_free(lgr);
938 }
939
940 /* unlink link group and schedule termination */
941 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
942 {
943         spinlock_t *lgr_lock;
944
945         smc_lgr_list_head(lgr, &lgr_lock);
946         spin_lock_bh(lgr_lock);
947         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
948                 spin_unlock_bh(lgr_lock);
949                 return; /* lgr already terminating */
950         }
951         list_del_init(&lgr->list);
952         lgr->freeing = 1;
953         spin_unlock_bh(lgr_lock);
954         schedule_work(&lgr->terminate_work);
955 }
956
957 /* Called when peer lgr shutdown (regularly or abnormally) is received */
958 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
959 {
960         struct smc_link_group *lgr, *l;
961         LIST_HEAD(lgr_free_list);
962
963         /* run common cleanup function and build free list */
964         spin_lock_bh(&dev->lgr_lock);
965         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
966                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
967                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
968                         if (peer_gid) /* peer triggered termination */
969                                 lgr->peer_shutdown = 1;
970                         list_move(&lgr->list, &lgr_free_list);
971                         lgr->freeing = 1;
972                 }
973         }
974         spin_unlock_bh(&dev->lgr_lock);
975
976         /* cancel the regular free workers and actually free lgrs */
977         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
978                 list_del_init(&lgr->list);
979                 schedule_work(&lgr->terminate_work);
980         }
981 }
982
983 /* Called when an SMCD device is removed or the smc module is unloaded */
984 void smc_smcd_terminate_all(struct smcd_dev *smcd)
985 {
986         struct smc_link_group *lgr, *lg;
987         LIST_HEAD(lgr_free_list);
988
989         spin_lock_bh(&smcd->lgr_lock);
990         list_splice_init(&smcd->lgr_list, &lgr_free_list);
991         list_for_each_entry(lgr, &lgr_free_list, list)
992                 lgr->freeing = 1;
993         spin_unlock_bh(&smcd->lgr_lock);
994
995         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
996                 list_del_init(&lgr->list);
997                 __smc_lgr_terminate(lgr, false);
998         }
999
1000         if (atomic_read(&smcd->lgr_cnt))
1001                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1002 }
1003
1004 /* Called when an SMCR device is removed or the smc module is unloaded.
1005  * If smcibdev is given, all SMCR link groups using this device are terminated.
1006  * If smcibdev is NULL, all SMCR link groups are terminated.
1007  */
1008 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1009 {
1010         struct smc_link_group *lgr, *lg;
1011         LIST_HEAD(lgr_free_list);
1012         int i;
1013
1014         spin_lock_bh(&smc_lgr_list.lock);
1015         if (!smcibdev) {
1016                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1017                 list_for_each_entry(lgr, &lgr_free_list, list)
1018                         lgr->freeing = 1;
1019         } else {
1020                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1021                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1022                                 if (lgr->lnk[i].smcibdev == smcibdev)
1023                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1024                         }
1025                 }
1026         }
1027         spin_unlock_bh(&smc_lgr_list.lock);
1028
1029         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1030                 list_del_init(&lgr->list);
1031                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1032                 __smc_lgr_terminate(lgr, false);
1033         }
1034
1035         if (smcibdev) {
1036                 if (atomic_read(&smcibdev->lnk_cnt))
1037                         wait_event(smcibdev->lnks_deleted,
1038                                    !atomic_read(&smcibdev->lnk_cnt));
1039         } else {
1040                 if (atomic_read(&lgr_cnt))
1041                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1042         }
1043 }
1044
1045 /* set new lgr type and clear all asymmetric link tagging */
1046 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1047 {
1048         char *lgr_type = "";
1049         int i;
1050
1051         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1052                 if (smc_link_usable(&lgr->lnk[i]))
1053                         lgr->lnk[i].link_is_asym = false;
1054         if (lgr->type == new_type)
1055                 return;
1056         lgr->type = new_type;
1057
1058         switch (lgr->type) {
1059         case SMC_LGR_NONE:
1060                 lgr_type = "NONE";
1061                 break;
1062         case SMC_LGR_SINGLE:
1063                 lgr_type = "SINGLE";
1064                 break;
1065         case SMC_LGR_SYMMETRIC:
1066                 lgr_type = "SYMMETRIC";
1067                 break;
1068         case SMC_LGR_ASYMMETRIC_PEER:
1069                 lgr_type = "ASYMMETRIC_PEER";
1070                 break;
1071         case SMC_LGR_ASYMMETRIC_LOCAL:
1072                 lgr_type = "ASYMMETRIC_LOCAL";
1073                 break;
1074         }
1075         pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1076                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1077                             lgr_type, lgr->pnet_id);
1078 }
1079
1080 /* set new lgr type and tag a link as asymmetric */
1081 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1082                             enum smc_lgr_type new_type, int asym_lnk_idx)
1083 {
1084         smcr_lgr_set_type(lgr, new_type);
1085         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1086 }
1087
1088 /* abort connection, abort_work scheduled from tasklet context */
1089 static void smc_conn_abort_work(struct work_struct *work)
1090 {
1091         struct smc_connection *conn = container_of(work,
1092                                                    struct smc_connection,
1093                                                    abort_work);
1094         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1095
1096         smc_conn_kill(conn, true);
1097         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1098 }
1099
1100 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1101 {
1102         struct smc_link_group *lgr, *n;
1103
1104         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1105                 struct smc_link *link;
1106
1107                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1108                             SMC_MAX_PNETID_LEN) ||
1109                     lgr->type == SMC_LGR_SYMMETRIC ||
1110                     lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1111                         continue;
1112
1113                 /* trigger local add link processing */
1114                 link = smc_llc_usable_link(lgr);
1115                 if (link)
1116                         smc_llc_add_link_local(link);
1117         }
1118 }
1119
1120 /* link is down - switch connections to alternate link,
1121  * must be called under lgr->llc_conf_mutex lock
1122  */
1123 static void smcr_link_down(struct smc_link *lnk)
1124 {
1125         struct smc_link_group *lgr = lnk->lgr;
1126         struct smc_link *to_lnk;
1127         int del_link_id;
1128
1129         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1130                 return;
1131
1132         smc_ib_modify_qp_reset(lnk);
1133         to_lnk = smc_switch_conns(lgr, lnk, true);
1134         if (!to_lnk) { /* no backup link available */
1135                 smcr_link_clear(lnk, true);
1136                 return;
1137         }
1138         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1139         del_link_id = lnk->link_id;
1140
1141         if (lgr->role == SMC_SERV) {
1142                 /* trigger local delete link processing */
1143                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1144         } else {
1145                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1146                         /* another llc task is ongoing */
1147                         mutex_unlock(&lgr->llc_conf_mutex);
1148                         wait_event_timeout(lgr->llc_flow_waiter,
1149                                 (list_empty(&lgr->list) ||
1150                                  lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1151                                 SMC_LLC_WAIT_TIME);
1152                         mutex_lock(&lgr->llc_conf_mutex);
1153                 }
1154                 if (!list_empty(&lgr->list)) {
1155                         smc_llc_send_delete_link(to_lnk, del_link_id,
1156                                                  SMC_LLC_REQ, true,
1157                                                  SMC_LLC_DEL_LOST_PATH);
1158                         smcr_link_clear(lnk, true);
1159                 }
1160                 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
1161         }
1162 }
1163
1164 /* must be called under lgr->llc_conf_mutex lock */
1165 void smcr_link_down_cond(struct smc_link *lnk)
1166 {
1167         if (smc_link_downing(&lnk->state))
1168                 smcr_link_down(lnk);
1169 }
1170
1171 /* will get the lgr->llc_conf_mutex lock */
1172 void smcr_link_down_cond_sched(struct smc_link *lnk)
1173 {
1174         if (smc_link_downing(&lnk->state))
1175                 schedule_work(&lnk->link_down_wrk);
1176 }
1177
1178 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1179 {
1180         struct smc_link_group *lgr, *n;
1181         int i;
1182
1183         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1184                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1185                             SMC_MAX_PNETID_LEN))
1186                         continue; /* lgr is not affected */
1187                 if (list_empty(&lgr->list))
1188                         continue;
1189                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1190                         struct smc_link *lnk = &lgr->lnk[i];
1191
1192                         if (smc_link_usable(lnk) &&
1193                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1194                                 smcr_link_down_cond_sched(lnk);
1195                 }
1196         }
1197 }
1198
1199 static void smc_link_down_work(struct work_struct *work)
1200 {
1201         struct smc_link *link = container_of(work, struct smc_link,
1202                                              link_down_wrk);
1203         struct smc_link_group *lgr = link->lgr;
1204
1205         if (list_empty(&lgr->list))
1206                 return;
1207         wake_up_all(&lgr->llc_msg_waiter);
1208         mutex_lock(&lgr->llc_conf_mutex);
1209         smcr_link_down(link);
1210         mutex_unlock(&lgr->llc_conf_mutex);
1211 }
1212
1213 /* Determine vlan of internal TCP socket.
1214  * @vlan_id: address to store the determined vlan id into
1215  */
1216 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1217 {
1218         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1219         struct net_device *ndev;
1220         int i, nest_lvl, rc = 0;
1221
1222         ini->vlan_id = 0;
1223         if (!dst) {
1224                 rc = -ENOTCONN;
1225                 goto out;
1226         }
1227         if (!dst->dev) {
1228                 rc = -ENODEV;
1229                 goto out_rel;
1230         }
1231
1232         ndev = dst->dev;
1233         if (is_vlan_dev(ndev)) {
1234                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1235                 goto out_rel;
1236         }
1237
1238         rtnl_lock();
1239         nest_lvl = ndev->lower_level;
1240         for (i = 0; i < nest_lvl; i++) {
1241                 struct list_head *lower = &ndev->adj_list.lower;
1242
1243                 if (list_empty(lower))
1244                         break;
1245                 lower = lower->next;
1246                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1247                 if (is_vlan_dev(ndev)) {
1248                         ini->vlan_id = vlan_dev_vlan_id(ndev);
1249                         break;
1250                 }
1251         }
1252         rtnl_unlock();
1253
1254 out_rel:
1255         dst_release(dst);
1256 out:
1257         return rc;
1258 }
1259
1260 static bool smcr_lgr_match(struct smc_link_group *lgr,
1261                            struct smc_clc_msg_local *lcl,
1262                            enum smc_lgr_role role, u32 clcqpn)
1263 {
1264         int i;
1265
1266         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1267             lgr->role != role)
1268                 return false;
1269
1270         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1271                 if (!smc_link_active(&lgr->lnk[i]))
1272                         continue;
1273                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1274                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1275                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1276                         return true;
1277         }
1278         return false;
1279 }
1280
1281 static bool smcd_lgr_match(struct smc_link_group *lgr,
1282                            struct smcd_dev *smcismdev, u64 peer_gid)
1283 {
1284         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1285 }
1286
1287 /* create a new SMC connection (and a new link group if necessary) */
1288 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1289 {
1290         struct smc_connection *conn = &smc->conn;
1291         struct list_head *lgr_list;
1292         struct smc_link_group *lgr;
1293         enum smc_lgr_role role;
1294         spinlock_t *lgr_lock;
1295         int rc = 0;
1296
1297         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1298         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1299         ini->cln_first_contact = SMC_FIRST_CONTACT;
1300         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1301         if (role == SMC_CLNT && ini->srv_first_contact)
1302                 /* create new link group as well */
1303                 goto create;
1304
1305         /* determine if an existing link group can be reused */
1306         spin_lock_bh(lgr_lock);
1307         list_for_each_entry(lgr, lgr_list, list) {
1308                 write_lock_bh(&lgr->conns_lock);
1309                 if ((ini->is_smcd ?
1310                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1311                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1312                     !lgr->sync_err &&
1313                     lgr->vlan_id == ini->vlan_id &&
1314                     (role == SMC_CLNT || ini->is_smcd ||
1315                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1316                         /* link group found */
1317                         ini->cln_first_contact = SMC_REUSE_CONTACT;
1318                         conn->lgr = lgr;
1319                         rc = smc_lgr_register_conn(conn, false);
1320                         write_unlock_bh(&lgr->conns_lock);
1321                         if (!rc && delayed_work_pending(&lgr->free_work))
1322                                 cancel_delayed_work(&lgr->free_work);
1323                         break;
1324                 }
1325                 write_unlock_bh(&lgr->conns_lock);
1326         }
1327         spin_unlock_bh(lgr_lock);
1328         if (rc)
1329                 return rc;
1330
1331         if (role == SMC_CLNT && !ini->srv_first_contact &&
1332             ini->cln_first_contact == SMC_FIRST_CONTACT) {
1333                 /* Server reuses a link group, but Client wants to start
1334                  * a new one
1335                  * send out_of_sync decline, reason synchr. error
1336                  */
1337                 return SMC_CLC_DECL_SYNCERR;
1338         }
1339
1340 create:
1341         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1342                 rc = smc_lgr_create(smc, ini);
1343                 if (rc)
1344                         goto out;
1345                 lgr = conn->lgr;
1346                 write_lock_bh(&lgr->conns_lock);
1347                 rc = smc_lgr_register_conn(conn, true);
1348                 write_unlock_bh(&lgr->conns_lock);
1349                 if (rc)
1350                         goto out;
1351         }
1352         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1353         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1354         conn->urg_state = SMC_URG_READ;
1355         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1356         if (ini->is_smcd) {
1357                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1358                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1359         } else {
1360                 conn->rx_off = 0;
1361         }
1362 #ifndef KERNEL_HAS_ATOMIC64
1363         spin_lock_init(&conn->acurs_lock);
1364 #endif
1365
1366 out:
1367         return rc;
1368 }
1369
1370 /* convert the RMB size into the compressed notation - minimum 16K.
1371  * In contrast to plain ilog2, this rounds towards the next power of 2,
1372  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1373  */
1374 static u8 smc_compress_bufsize(int size)
1375 {
1376         u8 compressed;
1377
1378         if (size <= SMC_BUF_MIN_SIZE)
1379                 return 0;
1380
1381         size = (size - 1) >> 14;
1382         compressed = ilog2(size) + 1;
1383         if (compressed >= SMC_RMBE_SIZES)
1384                 compressed = SMC_RMBE_SIZES - 1;
1385         return compressed;
1386 }
1387
1388 /* convert the RMB size from compressed notation into integer */
1389 int smc_uncompress_bufsize(u8 compressed)
1390 {
1391         u32 size;
1392
1393         size = 0x00000001 << (((int)compressed) + 14);
1394         return (int)size;
1395 }
1396
1397 /* try to reuse a sndbuf or rmb description slot for a certain
1398  * buffer size; if not available, return NULL
1399  */
1400 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1401                                              struct mutex *lock,
1402                                              struct list_head *buf_list)
1403 {
1404         struct smc_buf_desc *buf_slot;
1405
1406         mutex_lock(lock);
1407         list_for_each_entry(buf_slot, buf_list, list) {
1408                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1409                         mutex_unlock(lock);
1410                         return buf_slot;
1411                 }
1412         }
1413         mutex_unlock(lock);
1414         return NULL;
1415 }
1416
1417 /* one of the conditions for announcing a receiver's current window size is
1418  * that it "results in a minimum increase in the window size of 10% of the
1419  * receive buffer space" [RFC7609]
1420  */
1421 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1422 {
1423         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1424 }
1425
1426 /* map an rmb buf to a link */
1427 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1428                              struct smc_link *lnk)
1429 {
1430         int rc;
1431
1432         if (buf_desc->is_map_ib[lnk->link_idx])
1433                 return 0;
1434
1435         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1436         if (rc)
1437                 return rc;
1438         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1439                    buf_desc->cpu_addr, buf_desc->len);
1440
1441         /* map sg table to DMA address */
1442         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1443                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1444         /* SMC protocol depends on mapping to one DMA address only */
1445         if (rc != 1) {
1446                 rc = -EAGAIN;
1447                 goto free_table;
1448         }
1449
1450         /* create a new memory region for the RMB */
1451         if (is_rmb) {
1452                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1453                                               IB_ACCESS_REMOTE_WRITE |
1454                                               IB_ACCESS_LOCAL_WRITE,
1455                                               buf_desc, lnk->link_idx);
1456                 if (rc)
1457                         goto buf_unmap;
1458                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1459         }
1460         buf_desc->is_map_ib[lnk->link_idx] = true;
1461         return 0;
1462
1463 buf_unmap:
1464         smc_ib_buf_unmap_sg(lnk, buf_desc,
1465                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1466 free_table:
1467         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1468         return rc;
1469 }
1470
1471 /* register a new rmb on IB device,
1472  * must be called under lgr->llc_conf_mutex lock
1473  */
1474 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1475 {
1476         if (list_empty(&link->lgr->list))
1477                 return -ENOLINK;
1478         if (!rmb_desc->is_reg_mr[link->link_idx]) {
1479                 /* register memory region for new rmb */
1480                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1481                         rmb_desc->is_reg_err = true;
1482                         return -EFAULT;
1483                 }
1484                 rmb_desc->is_reg_mr[link->link_idx] = true;
1485         }
1486         return 0;
1487 }
1488
1489 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1490                              struct list_head *lst, bool is_rmb)
1491 {
1492         struct smc_buf_desc *buf_desc, *bf;
1493         int rc = 0;
1494
1495         mutex_lock(lock);
1496         list_for_each_entry_safe(buf_desc, bf, lst, list) {
1497                 if (!buf_desc->used)
1498                         continue;
1499                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1500                 if (rc)
1501                         goto out;
1502         }
1503 out:
1504         mutex_unlock(lock);
1505         return rc;
1506 }
1507
1508 /* map all used buffers of lgr for a new link */
1509 int smcr_buf_map_lgr(struct smc_link *lnk)
1510 {
1511         struct smc_link_group *lgr = lnk->lgr;
1512         int i, rc = 0;
1513
1514         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1515                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1516                                        &lgr->rmbs[i], true);
1517                 if (rc)
1518                         return rc;
1519                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1520                                        &lgr->sndbufs[i], false);
1521                 if (rc)
1522                         return rc;
1523         }
1524         return 0;
1525 }
1526
1527 /* register all used buffers of lgr for a new link,
1528  * must be called under lgr->llc_conf_mutex lock
1529  */
1530 int smcr_buf_reg_lgr(struct smc_link *lnk)
1531 {
1532         struct smc_link_group *lgr = lnk->lgr;
1533         struct smc_buf_desc *buf_desc, *bf;
1534         int i, rc = 0;
1535
1536         mutex_lock(&lgr->rmbs_lock);
1537         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1538                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1539                         if (!buf_desc->used)
1540                                 continue;
1541                         rc = smcr_link_reg_rmb(lnk, buf_desc);
1542                         if (rc)
1543                                 goto out;
1544                 }
1545         }
1546 out:
1547         mutex_unlock(&lgr->rmbs_lock);
1548         return rc;
1549 }
1550
1551 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1552                                                 bool is_rmb, int bufsize)
1553 {
1554         struct smc_buf_desc *buf_desc;
1555
1556         /* try to alloc a new buffer */
1557         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1558         if (!buf_desc)
1559                 return ERR_PTR(-ENOMEM);
1560
1561         buf_desc->order = get_order(bufsize);
1562         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1563                                       __GFP_NOMEMALLOC | __GFP_COMP |
1564                                       __GFP_NORETRY | __GFP_ZERO,
1565                                       buf_desc->order);
1566         if (!buf_desc->pages) {
1567                 kfree(buf_desc);
1568                 return ERR_PTR(-EAGAIN);
1569         }
1570         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1571         buf_desc->len = bufsize;
1572         return buf_desc;
1573 }
1574
1575 /* map buf_desc on all usable links,
1576  * unused buffers stay mapped as long as the link is up
1577  */
1578 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1579                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1580 {
1581         int i, rc = 0;
1582
1583         /* protect against parallel link reconfiguration */
1584         mutex_lock(&lgr->llc_conf_mutex);
1585         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1586                 struct smc_link *lnk = &lgr->lnk[i];
1587
1588                 if (!smc_link_usable(lnk))
1589                         continue;
1590                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1591                         rc = -ENOMEM;
1592                         goto out;
1593                 }
1594         }
1595 out:
1596         mutex_unlock(&lgr->llc_conf_mutex);
1597         return rc;
1598 }
1599
1600 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1601
1602 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1603                                                 bool is_dmb, int bufsize)
1604 {
1605         struct smc_buf_desc *buf_desc;
1606         int rc;
1607
1608         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1609                 return ERR_PTR(-EAGAIN);
1610
1611         /* try to alloc a new DMB */
1612         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1613         if (!buf_desc)
1614                 return ERR_PTR(-ENOMEM);
1615         if (is_dmb) {
1616                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1617                 if (rc) {
1618                         kfree(buf_desc);
1619                         return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
1620                 }
1621                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1622                 /* CDC header stored in buf. So, pretend it was smaller */
1623                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1624         } else {
1625                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1626                                              __GFP_NOWARN | __GFP_NORETRY |
1627                                              __GFP_NOMEMALLOC);
1628                 if (!buf_desc->cpu_addr) {
1629                         kfree(buf_desc);
1630                         return ERR_PTR(-EAGAIN);
1631                 }
1632                 buf_desc->len = bufsize;
1633         }
1634         return buf_desc;
1635 }
1636
1637 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1638 {
1639         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1640         struct smc_connection *conn = &smc->conn;
1641         struct smc_link_group *lgr = conn->lgr;
1642         struct list_head *buf_list;
1643         int bufsize, bufsize_short;
1644         struct mutex *lock;     /* lock buffer list */
1645         int sk_buf_size;
1646
1647         if (is_rmb)
1648                 /* use socket recv buffer size (w/o overhead) as start value */
1649                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1650         else
1651                 /* use socket send buffer size (w/o overhead) as start value */
1652                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1653
1654         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1655              bufsize_short >= 0; bufsize_short--) {
1656
1657                 if (is_rmb) {
1658                         lock = &lgr->rmbs_lock;
1659                         buf_list = &lgr->rmbs[bufsize_short];
1660                 } else {
1661                         lock = &lgr->sndbufs_lock;
1662                         buf_list = &lgr->sndbufs[bufsize_short];
1663                 }
1664                 bufsize = smc_uncompress_bufsize(bufsize_short);
1665                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1666                         continue;
1667
1668                 /* check for reusable slot in the link group */
1669                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1670                 if (buf_desc) {
1671                         memset(buf_desc->cpu_addr, 0, bufsize);
1672                         break; /* found reusable slot */
1673                 }
1674
1675                 if (is_smcd)
1676                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1677                 else
1678                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1679
1680                 if (PTR_ERR(buf_desc) == -ENOMEM)
1681                         break;
1682                 if (IS_ERR(buf_desc))
1683                         continue;
1684
1685                 buf_desc->used = 1;
1686                 mutex_lock(lock);
1687                 list_add(&buf_desc->list, buf_list);
1688                 mutex_unlock(lock);
1689                 break; /* found */
1690         }
1691
1692         if (IS_ERR(buf_desc))
1693                 return PTR_ERR(buf_desc);
1694
1695         if (!is_smcd) {
1696                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1697                         smcr_buf_unuse(buf_desc, lgr);
1698                         return -ENOMEM;
1699                 }
1700         }
1701
1702         if (is_rmb) {
1703                 conn->rmb_desc = buf_desc;
1704                 conn->rmbe_size_short = bufsize_short;
1705                 smc->sk.sk_rcvbuf = bufsize * 2;
1706                 atomic_set(&conn->bytes_to_rcv, 0);
1707                 conn->rmbe_update_limit =
1708                         smc_rmb_wnd_update_limit(buf_desc->len);
1709                 if (is_smcd)
1710                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1711         } else {
1712                 conn->sndbuf_desc = buf_desc;
1713                 smc->sk.sk_sndbuf = bufsize * 2;
1714                 atomic_set(&conn->sndbuf_space, bufsize);
1715         }
1716         return 0;
1717 }
1718
1719 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1720 {
1721         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
1722                 return;
1723         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1724 }
1725
1726 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1727 {
1728         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
1729                 return;
1730         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1731 }
1732
1733 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1734 {
1735         int i;
1736
1737         if (!conn->lgr || conn->lgr->is_smcd)
1738                 return;
1739         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1740                 if (!smc_link_active(&conn->lgr->lnk[i]))
1741                         continue;
1742                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1743                                        DMA_FROM_DEVICE);
1744         }
1745 }
1746
1747 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1748 {
1749         int i;
1750
1751         if (!conn->lgr || conn->lgr->is_smcd)
1752                 return;
1753         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1754                 if (!smc_link_active(&conn->lgr->lnk[i]))
1755                         continue;
1756                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1757                                           DMA_FROM_DEVICE);
1758         }
1759 }
1760
1761 /* create the send and receive buffer for an SMC socket;
1762  * receive buffers are called RMBs;
1763  * (even though the SMC protocol allows more than one RMB-element per RMB,
1764  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1765  * extra RMB for every connection in a link group
1766  */
1767 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1768 {
1769         int rc;
1770
1771         /* create send buffer */
1772         rc = __smc_buf_create(smc, is_smcd, false);
1773         if (rc)
1774                 return rc;
1775         /* create rmb */
1776         rc = __smc_buf_create(smc, is_smcd, true);
1777         if (rc) {
1778                 mutex_lock(&smc->conn.lgr->sndbufs_lock);
1779                 list_del(&smc->conn.sndbuf_desc->list);
1780                 mutex_unlock(&smc->conn.lgr->sndbufs_lock);
1781                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1782                 smc->conn.sndbuf_desc = NULL;
1783         }
1784         return rc;
1785 }
1786
1787 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1788 {
1789         int i;
1790
1791         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1792                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1793                         return i;
1794         }
1795         return -ENOSPC;
1796 }
1797
1798 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1799                                    u32 rkey)
1800 {
1801         int i;
1802
1803         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1804                 if (test_bit(i, lgr->rtokens_used_mask) &&
1805                     lgr->rtokens[i][lnk_idx].rkey == rkey)
1806                         return i;
1807         }
1808         return -ENOENT;
1809 }
1810
1811 /* set rtoken for a new link to an existing rmb */
1812 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1813                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1814 {
1815         int rtok_idx;
1816
1817         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1818         if (rtok_idx == -ENOENT)
1819                 return;
1820         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1821         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1822 }
1823
1824 /* set rtoken for a new link whose link_id is given */
1825 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1826                      __be64 nw_vaddr, __be32 nw_rkey)
1827 {
1828         u64 dma_addr = be64_to_cpu(nw_vaddr);
1829         u32 rkey = ntohl(nw_rkey);
1830         bool found = false;
1831         int link_idx;
1832
1833         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1834                 if (lgr->lnk[link_idx].link_id == link_id) {
1835                         found = true;
1836                         break;
1837                 }
1838         }
1839         if (!found)
1840                 return;
1841         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1842         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1843 }
1844
1845 /* add a new rtoken from peer */
1846 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1847 {
1848         struct smc_link_group *lgr = smc_get_lgr(lnk);
1849         u64 dma_addr = be64_to_cpu(nw_vaddr);
1850         u32 rkey = ntohl(nw_rkey);
1851         int i;
1852
1853         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1854                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1855                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1856                     test_bit(i, lgr->rtokens_used_mask)) {
1857                         /* already in list */
1858                         return i;
1859                 }
1860         }
1861         i = smc_rmb_reserve_rtoken_idx(lgr);
1862         if (i < 0)
1863                 return i;
1864         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1865         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1866         return i;
1867 }
1868
1869 /* delete an rtoken from all links */
1870 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1871 {
1872         struct smc_link_group *lgr = smc_get_lgr(lnk);
1873         u32 rkey = ntohl(nw_rkey);
1874         int i, j;
1875
1876         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1877                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1878                     test_bit(i, lgr->rtokens_used_mask)) {
1879                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1880                                 lgr->rtokens[i][j].rkey = 0;
1881                                 lgr->rtokens[i][j].dma_addr = 0;
1882                         }
1883                         clear_bit(i, lgr->rtokens_used_mask);
1884                         return 0;
1885                 }
1886         }
1887         return -ENOENT;
1888 }
1889
1890 /* save rkey and dma_addr received from peer during clc handshake */
1891 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1892                             struct smc_link *lnk,
1893                             struct smc_clc_msg_accept_confirm *clc)
1894 {
1895         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1896                                           clc->rmb_rkey);
1897         if (conn->rtoken_idx < 0)
1898                 return conn->rtoken_idx;
1899         return 0;
1900 }
1901
1902 static void smc_core_going_away(void)
1903 {
1904         struct smc_ib_device *smcibdev;
1905         struct smcd_dev *smcd;
1906
1907         mutex_lock(&smc_ib_devices.mutex);
1908         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1909                 int i;
1910
1911                 for (i = 0; i < SMC_MAX_PORTS; i++)
1912                         set_bit(i, smcibdev->ports_going_away);
1913         }
1914         mutex_unlock(&smc_ib_devices.mutex);
1915
1916         mutex_lock(&smcd_dev_list.mutex);
1917         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1918                 smcd->going_away = 1;
1919         }
1920         mutex_unlock(&smcd_dev_list.mutex);
1921 }
1922
1923 /* Clean up all SMC link groups */
1924 static void smc_lgrs_shutdown(void)
1925 {
1926         struct smcd_dev *smcd;
1927
1928         smc_core_going_away();
1929
1930         smc_smcr_terminate_all(NULL);
1931
1932         mutex_lock(&smcd_dev_list.mutex);
1933         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1934                 smc_smcd_terminate_all(smcd);
1935         mutex_unlock(&smcd_dev_list.mutex);
1936 }
1937
1938 static int smc_core_reboot_event(struct notifier_block *this,
1939                                  unsigned long event, void *ptr)
1940 {
1941         smc_lgrs_shutdown();
1942         smc_ib_unregister_client();
1943         return 0;
1944 }
1945
1946 static struct notifier_block smc_reboot_notifier = {
1947         .notifier_call = smc_core_reboot_event,
1948 };
1949
1950 int __init smc_core_init(void)
1951 {
1952         return register_reboot_notifier(&smc_reboot_notifier);
1953 }
1954
1955 /* Called (from smc_exit) when module is removed */
1956 void smc_core_exit(void)
1957 {
1958         unregister_reboot_notifier(&smc_reboot_notifier);
1959         smc_lgrs_shutdown();
1960 }