net/smc: log important pnetid and state change events
[linux-2.6-microblaze.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 struct smc_ib_up_work {
48         struct work_struct      work;
49         struct smc_link_group   *lgr;
50         struct smc_ib_device    *smcibdev;
51         u8                      ibport;
52 };
53
54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
55                          struct smc_buf_desc *buf_desc);
56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
57
58 static void smc_link_up_work(struct work_struct *work);
59 static void smc_link_down_work(struct work_struct *work);
60
61 /* return head of link group list and its lock for a given link group */
62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
63                                                   spinlock_t **lgr_lock)
64 {
65         if (lgr->is_smcd) {
66                 *lgr_lock = &lgr->smcd->lgr_lock;
67                 return &lgr->smcd->lgr_list;
68         }
69
70         *lgr_lock = &smc_lgr_list.lock;
71         return &smc_lgr_list.list;
72 }
73
74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
75 {
76         /* client link group creation always follows the server link group
77          * creation. For client use a somewhat higher removal delay time,
78          * otherwise there is a risk of out-of-sync link groups.
79          */
80         if (!lgr->freeing && !lgr->freefast) {
81                 mod_delayed_work(system_wq, &lgr->free_work,
82                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
83                                                 SMC_LGR_FREE_DELAY_CLNT :
84                                                 SMC_LGR_FREE_DELAY_SERV);
85         }
86 }
87
88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
89 {
90         if (!lgr->freeing && !lgr->freefast) {
91                 lgr->freefast = 1;
92                 mod_delayed_work(system_wq, &lgr->free_work,
93                                  SMC_LGR_FREE_DELAY_FAST);
94         }
95 }
96
97 /* Register connection's alert token in our lookup structure.
98  * To use rbtrees we have to implement our own insert core.
99  * Requires @conns_lock
100  * @smc         connection to register
101  * Returns 0 on success, != otherwise.
102  */
103 static void smc_lgr_add_alert_token(struct smc_connection *conn)
104 {
105         struct rb_node **link, *parent = NULL;
106         u32 token = conn->alert_token_local;
107
108         link = &conn->lgr->conns_all.rb_node;
109         while (*link) {
110                 struct smc_connection *cur = rb_entry(*link,
111                                         struct smc_connection, alert_node);
112
113                 parent = *link;
114                 if (cur->alert_token_local > token)
115                         link = &parent->rb_left;
116                 else
117                         link = &parent->rb_right;
118         }
119         /* Put the new node there */
120         rb_link_node(&conn->alert_node, parent, link);
121         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
122 }
123
124 /* assign an SMC-R link to the connection */
125 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
126 {
127         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
128                                        SMC_LNK_ACTIVE;
129         int i, j;
130
131         /* do link balancing */
132         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
133                 struct smc_link *lnk = &conn->lgr->lnk[i];
134
135                 if (lnk->state != expected || lnk->link_is_asym)
136                         continue;
137                 if (conn->lgr->role == SMC_CLNT) {
138                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
139                         break;
140                 }
141                 if (conn->lgr->conns_num % 2) {
142                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
143                                 struct smc_link *lnk2;
144
145                                 lnk2 = &conn->lgr->lnk[j];
146                                 if (lnk2->state == expected &&
147                                     !lnk2->link_is_asym) {
148                                         conn->lnk = lnk2;
149                                         break;
150                                 }
151                         }
152                 }
153                 if (!conn->lnk)
154                         conn->lnk = lnk;
155                 break;
156         }
157         if (!conn->lnk)
158                 return SMC_CLC_DECL_NOACTLINK;
159         return 0;
160 }
161
162 /* Register connection in link group by assigning an alert token
163  * registered in a search tree.
164  * Requires @conns_lock
165  * Note that '0' is a reserved value and not assigned.
166  */
167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
168 {
169         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
170         static atomic_t nexttoken = ATOMIC_INIT(0);
171         int rc;
172
173         if (!conn->lgr->is_smcd) {
174                 rc = smcr_lgr_conn_assign_link(conn, first);
175                 if (rc)
176                         return rc;
177         }
178         /* find a new alert_token_local value not yet used by some connection
179          * in this link group
180          */
181         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
182         while (!conn->alert_token_local) {
183                 conn->alert_token_local = atomic_inc_return(&nexttoken);
184                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
185                         conn->alert_token_local = 0;
186         }
187         smc_lgr_add_alert_token(conn);
188         conn->lgr->conns_num++;
189         return 0;
190 }
191
192 /* Unregister connection and reset the alert token of the given connection<
193  */
194 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
195 {
196         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
197         struct smc_link_group *lgr = conn->lgr;
198
199         rb_erase(&conn->alert_node, &lgr->conns_all);
200         lgr->conns_num--;
201         conn->alert_token_local = 0;
202         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
203 }
204
205 /* Unregister connection from lgr
206  */
207 static void smc_lgr_unregister_conn(struct smc_connection *conn)
208 {
209         struct smc_link_group *lgr = conn->lgr;
210
211         if (!lgr)
212                 return;
213         write_lock_bh(&lgr->conns_lock);
214         if (conn->alert_token_local) {
215                 __smc_lgr_unregister_conn(conn);
216         }
217         write_unlock_bh(&lgr->conns_lock);
218         conn->lgr = NULL;
219 }
220
221 void smc_lgr_cleanup_early(struct smc_connection *conn)
222 {
223         struct smc_link_group *lgr = conn->lgr;
224         struct list_head *lgr_list;
225         spinlock_t *lgr_lock;
226
227         if (!lgr)
228                 return;
229
230         smc_conn_free(conn);
231         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
232         spin_lock_bh(lgr_lock);
233         /* do not use this link group for new connections */
234         if (!list_empty(lgr_list))
235                 list_del_init(lgr_list);
236         spin_unlock_bh(lgr_lock);
237         smc_lgr_schedule_free_work_fast(lgr);
238 }
239
240 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
241 {
242         int i;
243
244         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
245                 struct smc_link *lnk = &lgr->lnk[i];
246
247                 if (smc_link_usable(lnk))
248                         lnk->state = SMC_LNK_INACTIVE;
249         }
250         wake_up_interruptible_all(&lgr->llc_waiter);
251 }
252
253 static void smc_lgr_free(struct smc_link_group *lgr);
254
255 static void smc_lgr_free_work(struct work_struct *work)
256 {
257         struct smc_link_group *lgr = container_of(to_delayed_work(work),
258                                                   struct smc_link_group,
259                                                   free_work);
260         spinlock_t *lgr_lock;
261         bool conns;
262
263         smc_lgr_list_head(lgr, &lgr_lock);
264         spin_lock_bh(lgr_lock);
265         if (lgr->freeing) {
266                 spin_unlock_bh(lgr_lock);
267                 return;
268         }
269         read_lock_bh(&lgr->conns_lock);
270         conns = RB_EMPTY_ROOT(&lgr->conns_all);
271         read_unlock_bh(&lgr->conns_lock);
272         if (!conns) { /* number of lgr connections is no longer zero */
273                 spin_unlock_bh(lgr_lock);
274                 return;
275         }
276         list_del_init(&lgr->list); /* remove from smc_lgr_list */
277         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
278         spin_unlock_bh(lgr_lock);
279         cancel_delayed_work(&lgr->free_work);
280
281         if (!lgr->is_smcd && !lgr->terminating)
282                 smc_llc_send_link_delete_all(lgr, true,
283                                              SMC_LLC_DEL_PROG_INIT_TERM);
284         if (lgr->is_smcd && !lgr->terminating)
285                 smc_ism_signal_shutdown(lgr);
286         if (!lgr->is_smcd)
287                 smcr_lgr_link_deactivate_all(lgr);
288         smc_lgr_free(lgr);
289 }
290
291 static void smc_lgr_terminate_work(struct work_struct *work)
292 {
293         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
294                                                   terminate_work);
295
296         __smc_lgr_terminate(lgr, true);
297 }
298
299 /* return next unique link id for the lgr */
300 static u8 smcr_next_link_id(struct smc_link_group *lgr)
301 {
302         u8 link_id;
303         int i;
304
305         while (1) {
306                 link_id = ++lgr->next_link_id;
307                 if (!link_id)   /* skip zero as link_id */
308                         link_id = ++lgr->next_link_id;
309                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
310                         if (smc_link_usable(&lgr->lnk[i]) &&
311                             lgr->lnk[i].link_id == link_id)
312                                 continue;
313                 }
314                 break;
315         }
316         return link_id;
317 }
318
319 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
320                    u8 link_idx, struct smc_init_info *ini)
321 {
322         u8 rndvec[3];
323         int rc;
324
325         get_device(&ini->ib_dev->ibdev->dev);
326         atomic_inc(&ini->ib_dev->lnk_cnt);
327         lnk->state = SMC_LNK_ACTIVATING;
328         lnk->link_id = smcr_next_link_id(lgr);
329         lnk->lgr = lgr;
330         lnk->link_idx = link_idx;
331         lnk->smcibdev = ini->ib_dev;
332         lnk->ibport = ini->ib_port;
333         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
334         smc_llc_link_set_uid(lnk);
335         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
336         if (!ini->ib_dev->initialized) {
337                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
338                 if (rc)
339                         goto out;
340         }
341         get_random_bytes(rndvec, sizeof(rndvec));
342         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
343                 (rndvec[2] << 16);
344         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
345                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
346         if (rc)
347                 goto out;
348         rc = smc_llc_link_init(lnk);
349         if (rc)
350                 goto out;
351         rc = smc_wr_alloc_link_mem(lnk);
352         if (rc)
353                 goto clear_llc_lnk;
354         rc = smc_ib_create_protection_domain(lnk);
355         if (rc)
356                 goto free_link_mem;
357         rc = smc_ib_create_queue_pair(lnk);
358         if (rc)
359                 goto dealloc_pd;
360         rc = smc_wr_create_link(lnk);
361         if (rc)
362                 goto destroy_qp;
363         return 0;
364
365 destroy_qp:
366         smc_ib_destroy_queue_pair(lnk);
367 dealloc_pd:
368         smc_ib_dealloc_protection_domain(lnk);
369 free_link_mem:
370         smc_wr_free_link_mem(lnk);
371 clear_llc_lnk:
372         smc_llc_link_clear(lnk, false);
373 out:
374         put_device(&ini->ib_dev->ibdev->dev);
375         memset(lnk, 0, sizeof(struct smc_link));
376         lnk->state = SMC_LNK_UNUSED;
377         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
378                 wake_up(&ini->ib_dev->lnks_deleted);
379         return rc;
380 }
381
382 /* create a new SMC link group */
383 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
384 {
385         struct smc_link_group *lgr;
386         struct list_head *lgr_list;
387         struct smc_link *lnk;
388         spinlock_t *lgr_lock;
389         u8 link_idx;
390         int rc = 0;
391         int i;
392
393         if (ini->is_smcd && ini->vlan_id) {
394                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
395                         rc = SMC_CLC_DECL_ISMVLANERR;
396                         goto out;
397                 }
398         }
399
400         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
401         if (!lgr) {
402                 rc = SMC_CLC_DECL_MEM;
403                 goto ism_put_vlan;
404         }
405         lgr->is_smcd = ini->is_smcd;
406         lgr->sync_err = 0;
407         lgr->terminating = 0;
408         lgr->freefast = 0;
409         lgr->freeing = 0;
410         lgr->vlan_id = ini->vlan_id;
411         mutex_init(&lgr->sndbufs_lock);
412         mutex_init(&lgr->rmbs_lock);
413         rwlock_init(&lgr->conns_lock);
414         for (i = 0; i < SMC_RMBE_SIZES; i++) {
415                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
416                 INIT_LIST_HEAD(&lgr->rmbs[i]);
417         }
418         lgr->next_link_id = 0;
419         smc_lgr_list.num += SMC_LGR_NUM_INCR;
420         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
421         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
422         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
423         lgr->conns_all = RB_ROOT;
424         if (ini->is_smcd) {
425                 /* SMC-D specific settings */
426                 get_device(&ini->ism_dev->dev);
427                 lgr->peer_gid = ini->ism_gid;
428                 lgr->smcd = ini->ism_dev;
429                 lgr_list = &ini->ism_dev->lgr_list;
430                 lgr_lock = &lgr->smcd->lgr_lock;
431                 lgr->peer_shutdown = 0;
432                 atomic_inc(&ini->ism_dev->lgr_cnt);
433         } else {
434                 /* SMC-R specific settings */
435                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
436                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
437                        SMC_SYSTEMID_LEN);
438                 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
439                        SMC_MAX_PNETID_LEN);
440                 smc_llc_lgr_init(lgr, smc);
441
442                 link_idx = SMC_SINGLE_LINK;
443                 lnk = &lgr->lnk[link_idx];
444                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
445                 if (rc)
446                         goto free_lgr;
447                 lgr_list = &smc_lgr_list.list;
448                 lgr_lock = &smc_lgr_list.lock;
449                 atomic_inc(&lgr_cnt);
450         }
451         smc->conn.lgr = lgr;
452         spin_lock_bh(lgr_lock);
453         list_add(&lgr->list, lgr_list);
454         spin_unlock_bh(lgr_lock);
455         return 0;
456
457 free_lgr:
458         kfree(lgr);
459 ism_put_vlan:
460         if (ini->is_smcd && ini->vlan_id)
461                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
462 out:
463         if (rc < 0) {
464                 if (rc == -ENOMEM)
465                         rc = SMC_CLC_DECL_MEM;
466                 else
467                         rc = SMC_CLC_DECL_INTERR;
468         }
469         return rc;
470 }
471
472 static int smc_write_space(struct smc_connection *conn)
473 {
474         int buffer_len = conn->peer_rmbe_size;
475         union smc_host_cursor prod;
476         union smc_host_cursor cons;
477         int space;
478
479         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
480         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
481         /* determine rx_buf space */
482         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
483         return space;
484 }
485
486 static int smc_switch_cursor(struct smc_sock *smc)
487 {
488         struct smc_connection *conn = &smc->conn;
489         union smc_host_cursor cons, fin;
490         int rc = 0;
491         int diff;
492
493         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
494         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
495         /* set prod cursor to old state, enforce tx_rdma_writes() */
496         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
497         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
498
499         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
500                 /* cons cursor advanced more than fin, and prod was set
501                  * fin above, so now prod is smaller than cons. Fix that.
502                  */
503                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
504                 smc_curs_add(conn->sndbuf_desc->len,
505                              &conn->tx_curs_sent, diff);
506                 smc_curs_add(conn->sndbuf_desc->len,
507                              &conn->tx_curs_fin, diff);
508
509                 smp_mb__before_atomic();
510                 atomic_add(diff, &conn->sndbuf_space);
511                 smp_mb__after_atomic();
512
513                 smc_curs_add(conn->peer_rmbe_size,
514                              &conn->local_tx_ctrl.prod, diff);
515                 smc_curs_add(conn->peer_rmbe_size,
516                              &conn->local_tx_ctrl_fin, diff);
517         }
518         /* recalculate, value is used by tx_rdma_writes() */
519         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
520
521         if (smc->sk.sk_state != SMC_INIT &&
522             smc->sk.sk_state != SMC_CLOSED) {
523                 rc = smcr_cdc_msg_send_validation(conn);
524                 if (!rc) {
525                         schedule_delayed_work(&conn->tx_work, 0);
526                         smc->sk.sk_data_ready(&smc->sk);
527                 }
528         }
529         return rc;
530 }
531
532 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
533                                   struct smc_link *from_lnk, bool is_dev_err)
534 {
535         struct smc_link *to_lnk = NULL;
536         struct smc_connection *conn;
537         struct smc_sock *smc;
538         struct rb_node *node;
539         int i, rc = 0;
540
541         /* link is inactive, wake up tx waiters */
542         smc_wr_wakeup_tx_wait(from_lnk);
543
544         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
545                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
546                     i == from_lnk->link_idx)
547                         continue;
548                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
549                     from_lnk->ibport == lgr->lnk[i].ibport) {
550                         continue;
551                 }
552                 to_lnk = &lgr->lnk[i];
553                 break;
554         }
555         if (!to_lnk) {
556                 smc_lgr_terminate_sched(lgr);
557                 return NULL;
558         }
559 again:
560         read_lock_bh(&lgr->conns_lock);
561         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
562                 conn = rb_entry(node, struct smc_connection, alert_node);
563                 if (conn->lnk != from_lnk)
564                         continue;
565                 smc = container_of(conn, struct smc_sock, conn);
566                 /* conn->lnk not yet set in SMC_INIT state */
567                 if (smc->sk.sk_state == SMC_INIT)
568                         continue;
569                 if (smc->sk.sk_state == SMC_CLOSED ||
570                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
571                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
572                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
573                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
574                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
575                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
576                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
577                     smc->sk.sk_state == SMC_PROCESSABORT) {
578                         spin_lock_bh(&conn->send_lock);
579                         conn->lnk = to_lnk;
580                         spin_unlock_bh(&conn->send_lock);
581                         continue;
582                 }
583                 sock_hold(&smc->sk);
584                 read_unlock_bh(&lgr->conns_lock);
585                 /* avoid race with smcr_tx_sndbuf_nonempty() */
586                 spin_lock_bh(&conn->send_lock);
587                 conn->lnk = to_lnk;
588                 rc = smc_switch_cursor(smc);
589                 spin_unlock_bh(&conn->send_lock);
590                 sock_put(&smc->sk);
591                 if (rc) {
592                         smcr_link_down_cond_sched(to_lnk);
593                         return NULL;
594                 }
595                 goto again;
596         }
597         read_unlock_bh(&lgr->conns_lock);
598         return to_lnk;
599 }
600
601 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
602                            struct smc_link_group *lgr)
603 {
604         int rc;
605
606         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
607                 /* unregister rmb with peer */
608                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
609                 if (!rc) {
610                         /* protect against smc_llc_cli_rkey_exchange() */
611                         mutex_lock(&lgr->llc_conf_mutex);
612                         smc_llc_do_delete_rkey(lgr, rmb_desc);
613                         rmb_desc->is_conf_rkey = false;
614                         mutex_unlock(&lgr->llc_conf_mutex);
615                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
616                 }
617         }
618
619         if (rmb_desc->is_reg_err) {
620                 /* buf registration failed, reuse not possible */
621                 mutex_lock(&lgr->rmbs_lock);
622                 list_del(&rmb_desc->list);
623                 mutex_unlock(&lgr->rmbs_lock);
624
625                 smc_buf_free(lgr, true, rmb_desc);
626         } else {
627                 rmb_desc->used = 0;
628         }
629 }
630
631 static void smc_buf_unuse(struct smc_connection *conn,
632                           struct smc_link_group *lgr)
633 {
634         if (conn->sndbuf_desc)
635                 conn->sndbuf_desc->used = 0;
636         if (conn->rmb_desc && lgr->is_smcd)
637                 conn->rmb_desc->used = 0;
638         else if (conn->rmb_desc)
639                 smcr_buf_unuse(conn->rmb_desc, lgr);
640 }
641
642 /* remove a finished connection from its link group */
643 void smc_conn_free(struct smc_connection *conn)
644 {
645         struct smc_link_group *lgr = conn->lgr;
646
647         if (!lgr)
648                 return;
649         if (lgr->is_smcd) {
650                 if (!list_empty(&lgr->list))
651                         smc_ism_unset_conn(conn);
652                 tasklet_kill(&conn->rx_tsklet);
653         } else {
654                 smc_cdc_tx_dismiss_slots(conn);
655                 if (current_work() != &conn->abort_work)
656                         cancel_work_sync(&conn->abort_work);
657         }
658         if (!list_empty(&lgr->list)) {
659                 smc_lgr_unregister_conn(conn);
660                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
661         }
662
663         if (!lgr->conns_num)
664                 smc_lgr_schedule_free_work(lgr);
665 }
666
667 /* unregister a link from a buf_desc */
668 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
669                                 struct smc_link *lnk)
670 {
671         if (is_rmb)
672                 buf_desc->is_reg_mr[lnk->link_idx] = false;
673         if (!buf_desc->is_map_ib[lnk->link_idx])
674                 return;
675         if (is_rmb) {
676                 if (buf_desc->mr_rx[lnk->link_idx]) {
677                         smc_ib_put_memory_region(
678                                         buf_desc->mr_rx[lnk->link_idx]);
679                         buf_desc->mr_rx[lnk->link_idx] = NULL;
680                 }
681                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
682         } else {
683                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
684         }
685         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
686         buf_desc->is_map_ib[lnk->link_idx] = false;
687 }
688
689 /* unmap all buffers of lgr for a deleted link */
690 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
691 {
692         struct smc_link_group *lgr = lnk->lgr;
693         struct smc_buf_desc *buf_desc, *bf;
694         int i;
695
696         for (i = 0; i < SMC_RMBE_SIZES; i++) {
697                 mutex_lock(&lgr->rmbs_lock);
698                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
699                         smcr_buf_unmap_link(buf_desc, true, lnk);
700                 mutex_unlock(&lgr->rmbs_lock);
701                 mutex_lock(&lgr->sndbufs_lock);
702                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
703                                          list)
704                         smcr_buf_unmap_link(buf_desc, false, lnk);
705                 mutex_unlock(&lgr->sndbufs_lock);
706         }
707 }
708
709 static void smcr_rtoken_clear_link(struct smc_link *lnk)
710 {
711         struct smc_link_group *lgr = lnk->lgr;
712         int i;
713
714         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
715                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
716                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
717         }
718 }
719
720 /* must be called under lgr->llc_conf_mutex lock */
721 void smcr_link_clear(struct smc_link *lnk, bool log)
722 {
723         struct smc_ib_device *smcibdev;
724
725         if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
726                 return;
727         lnk->peer_qpn = 0;
728         smc_llc_link_clear(lnk, log);
729         smcr_buf_unmap_lgr(lnk);
730         smcr_rtoken_clear_link(lnk);
731         smc_ib_modify_qp_reset(lnk);
732         smc_wr_free_link(lnk);
733         smc_ib_destroy_queue_pair(lnk);
734         smc_ib_dealloc_protection_domain(lnk);
735         smc_wr_free_link_mem(lnk);
736         put_device(&lnk->smcibdev->ibdev->dev);
737         smcibdev = lnk->smcibdev;
738         memset(lnk, 0, sizeof(struct smc_link));
739         lnk->state = SMC_LNK_UNUSED;
740         if (!atomic_dec_return(&smcibdev->lnk_cnt))
741                 wake_up(&smcibdev->lnks_deleted);
742 }
743
744 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
745                           struct smc_buf_desc *buf_desc)
746 {
747         int i;
748
749         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
750                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
751
752         if (buf_desc->pages)
753                 __free_pages(buf_desc->pages, buf_desc->order);
754         kfree(buf_desc);
755 }
756
757 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
758                           struct smc_buf_desc *buf_desc)
759 {
760         if (is_dmb) {
761                 /* restore original buf len */
762                 buf_desc->len += sizeof(struct smcd_cdc_msg);
763                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
764         } else {
765                 kfree(buf_desc->cpu_addr);
766         }
767         kfree(buf_desc);
768 }
769
770 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
771                          struct smc_buf_desc *buf_desc)
772 {
773         if (lgr->is_smcd)
774                 smcd_buf_free(lgr, is_rmb, buf_desc);
775         else
776                 smcr_buf_free(lgr, is_rmb, buf_desc);
777 }
778
779 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
780 {
781         struct smc_buf_desc *buf_desc, *bf_desc;
782         struct list_head *buf_list;
783         int i;
784
785         for (i = 0; i < SMC_RMBE_SIZES; i++) {
786                 if (is_rmb)
787                         buf_list = &lgr->rmbs[i];
788                 else
789                         buf_list = &lgr->sndbufs[i];
790                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
791                                          list) {
792                         list_del(&buf_desc->list);
793                         smc_buf_free(lgr, is_rmb, buf_desc);
794                 }
795         }
796 }
797
798 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
799 {
800         /* free send buffers */
801         __smc_lgr_free_bufs(lgr, false);
802         /* free rmbs */
803         __smc_lgr_free_bufs(lgr, true);
804 }
805
806 /* remove a link group */
807 static void smc_lgr_free(struct smc_link_group *lgr)
808 {
809         int i;
810
811         if (!lgr->is_smcd) {
812                 mutex_lock(&lgr->llc_conf_mutex);
813                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
814                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
815                                 smcr_link_clear(&lgr->lnk[i], false);
816                 }
817                 mutex_unlock(&lgr->llc_conf_mutex);
818                 smc_llc_lgr_clear(lgr);
819         }
820
821         smc_lgr_free_bufs(lgr);
822         if (lgr->is_smcd) {
823                 if (!lgr->terminating) {
824                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
825                         put_device(&lgr->smcd->dev);
826                 }
827                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
828                         wake_up(&lgr->smcd->lgrs_deleted);
829         } else {
830                 if (!atomic_dec_return(&lgr_cnt))
831                         wake_up(&lgrs_deleted);
832         }
833         kfree(lgr);
834 }
835
836 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
837 {
838         int i;
839
840         for (i = 0; i < SMC_RMBE_SIZES; i++) {
841                 struct smc_buf_desc *buf_desc;
842
843                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
844                         buf_desc->len += sizeof(struct smcd_cdc_msg);
845                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
846                 }
847         }
848 }
849
850 static void smc_sk_wake_ups(struct smc_sock *smc)
851 {
852         smc->sk.sk_write_space(&smc->sk);
853         smc->sk.sk_data_ready(&smc->sk);
854         smc->sk.sk_state_change(&smc->sk);
855 }
856
857 /* kill a connection */
858 static void smc_conn_kill(struct smc_connection *conn, bool soft)
859 {
860         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
861
862         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
863                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
864         else
865                 smc_close_abort(conn);
866         conn->killed = 1;
867         smc->sk.sk_err = ECONNABORTED;
868         smc_sk_wake_ups(smc);
869         if (conn->lgr->is_smcd) {
870                 smc_ism_unset_conn(conn);
871                 if (soft)
872                         tasklet_kill(&conn->rx_tsklet);
873                 else
874                         tasklet_unlock_wait(&conn->rx_tsklet);
875         } else {
876                 smc_cdc_tx_dismiss_slots(conn);
877         }
878         smc_lgr_unregister_conn(conn);
879         smc_close_active_abort(smc);
880 }
881
882 static void smc_lgr_cleanup(struct smc_link_group *lgr)
883 {
884         if (lgr->is_smcd) {
885                 smc_ism_signal_shutdown(lgr);
886                 smcd_unregister_all_dmbs(lgr);
887                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
888                 put_device(&lgr->smcd->dev);
889         } else {
890                 u32 rsn = lgr->llc_termination_rsn;
891
892                 if (!rsn)
893                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
894                 smc_llc_send_link_delete_all(lgr, false, rsn);
895                 smcr_lgr_link_deactivate_all(lgr);
896         }
897 }
898
899 /* terminate link group
900  * @soft: true if link group shutdown can take its time
901  *        false if immediate link group shutdown is required
902  */
903 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
904 {
905         struct smc_connection *conn;
906         struct smc_sock *smc;
907         struct rb_node *node;
908
909         if (lgr->terminating)
910                 return; /* lgr already terminating */
911         /* cancel free_work sync, will terminate when lgr->freeing is set */
912         cancel_delayed_work_sync(&lgr->free_work);
913         lgr->terminating = 1;
914
915         /* kill remaining link group connections */
916         read_lock_bh(&lgr->conns_lock);
917         node = rb_first(&lgr->conns_all);
918         while (node) {
919                 read_unlock_bh(&lgr->conns_lock);
920                 conn = rb_entry(node, struct smc_connection, alert_node);
921                 smc = container_of(conn, struct smc_sock, conn);
922                 sock_hold(&smc->sk); /* sock_put below */
923                 lock_sock(&smc->sk);
924                 smc_conn_kill(conn, soft);
925                 release_sock(&smc->sk);
926                 sock_put(&smc->sk); /* sock_hold above */
927                 read_lock_bh(&lgr->conns_lock);
928                 node = rb_first(&lgr->conns_all);
929         }
930         read_unlock_bh(&lgr->conns_lock);
931         smc_lgr_cleanup(lgr);
932         smc_lgr_free(lgr);
933 }
934
935 /* unlink link group and schedule termination */
936 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
937 {
938         spinlock_t *lgr_lock;
939
940         smc_lgr_list_head(lgr, &lgr_lock);
941         spin_lock_bh(lgr_lock);
942         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
943                 spin_unlock_bh(lgr_lock);
944                 return; /* lgr already terminating */
945         }
946         list_del_init(&lgr->list);
947         lgr->freeing = 1;
948         spin_unlock_bh(lgr_lock);
949         schedule_work(&lgr->terminate_work);
950 }
951
952 /* Called when peer lgr shutdown (regularly or abnormally) is received */
953 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
954 {
955         struct smc_link_group *lgr, *l;
956         LIST_HEAD(lgr_free_list);
957
958         /* run common cleanup function and build free list */
959         spin_lock_bh(&dev->lgr_lock);
960         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
961                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
962                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
963                         if (peer_gid) /* peer triggered termination */
964                                 lgr->peer_shutdown = 1;
965                         list_move(&lgr->list, &lgr_free_list);
966                         lgr->freeing = 1;
967                 }
968         }
969         spin_unlock_bh(&dev->lgr_lock);
970
971         /* cancel the regular free workers and actually free lgrs */
972         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
973                 list_del_init(&lgr->list);
974                 schedule_work(&lgr->terminate_work);
975         }
976 }
977
978 /* Called when an SMCD device is removed or the smc module is unloaded */
979 void smc_smcd_terminate_all(struct smcd_dev *smcd)
980 {
981         struct smc_link_group *lgr, *lg;
982         LIST_HEAD(lgr_free_list);
983
984         spin_lock_bh(&smcd->lgr_lock);
985         list_splice_init(&smcd->lgr_list, &lgr_free_list);
986         list_for_each_entry(lgr, &lgr_free_list, list)
987                 lgr->freeing = 1;
988         spin_unlock_bh(&smcd->lgr_lock);
989
990         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
991                 list_del_init(&lgr->list);
992                 __smc_lgr_terminate(lgr, false);
993         }
994
995         if (atomic_read(&smcd->lgr_cnt))
996                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
997 }
998
999 /* Called when an SMCR device is removed or the smc module is unloaded.
1000  * If smcibdev is given, all SMCR link groups using this device are terminated.
1001  * If smcibdev is NULL, all SMCR link groups are terminated.
1002  */
1003 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1004 {
1005         struct smc_link_group *lgr, *lg;
1006         LIST_HEAD(lgr_free_list);
1007         int i;
1008
1009         spin_lock_bh(&smc_lgr_list.lock);
1010         if (!smcibdev) {
1011                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1012                 list_for_each_entry(lgr, &lgr_free_list, list)
1013                         lgr->freeing = 1;
1014         } else {
1015                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1016                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1017                                 if (lgr->lnk[i].smcibdev == smcibdev)
1018                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1019                         }
1020                 }
1021         }
1022         spin_unlock_bh(&smc_lgr_list.lock);
1023
1024         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1025                 list_del_init(&lgr->list);
1026                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1027                 __smc_lgr_terminate(lgr, false);
1028         }
1029
1030         if (smcibdev) {
1031                 if (atomic_read(&smcibdev->lnk_cnt))
1032                         wait_event(smcibdev->lnks_deleted,
1033                                    !atomic_read(&smcibdev->lnk_cnt));
1034         } else {
1035                 if (atomic_read(&lgr_cnt))
1036                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1037         }
1038 }
1039
1040 /* set new lgr type and clear all asymmetric link tagging */
1041 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1042 {
1043         char *lgr_type = "";
1044         int i;
1045
1046         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1047                 if (smc_link_usable(&lgr->lnk[i]))
1048                         lgr->lnk[i].link_is_asym = false;
1049         if (lgr->type == new_type)
1050                 return;
1051         lgr->type = new_type;
1052
1053         switch (lgr->type) {
1054         case SMC_LGR_NONE:
1055                 lgr_type = "NONE";
1056                 break;
1057         case SMC_LGR_SINGLE:
1058                 lgr_type = "SINGLE";
1059                 break;
1060         case SMC_LGR_SYMMETRIC:
1061                 lgr_type = "SYMMETRIC";
1062                 break;
1063         case SMC_LGR_ASYMMETRIC_PEER:
1064                 lgr_type = "ASYMMETRIC_PEER";
1065                 break;
1066         case SMC_LGR_ASYMMETRIC_LOCAL:
1067                 lgr_type = "ASYMMETRIC_LOCAL";
1068                 break;
1069         }
1070         pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1071                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1072                             lgr_type, lgr->pnet_id);
1073 }
1074
1075 /* set new lgr type and tag a link as asymmetric */
1076 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1077                             enum smc_lgr_type new_type, int asym_lnk_idx)
1078 {
1079         smcr_lgr_set_type(lgr, new_type);
1080         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1081 }
1082
1083 /* abort connection, abort_work scheduled from tasklet context */
1084 static void smc_conn_abort_work(struct work_struct *work)
1085 {
1086         struct smc_connection *conn = container_of(work,
1087                                                    struct smc_connection,
1088                                                    abort_work);
1089         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1090
1091         smc_conn_kill(conn, true);
1092         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1093 }
1094
1095 /* link is up - establish alternate link if applicable */
1096 static void smcr_link_up(struct smc_link_group *lgr,
1097                          struct smc_ib_device *smcibdev, u8 ibport)
1098 {
1099         struct smc_link *link = NULL;
1100
1101         if (list_empty(&lgr->list) ||
1102             lgr->type == SMC_LGR_SYMMETRIC ||
1103             lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1104                 return;
1105
1106         if (lgr->role == SMC_SERV) {
1107                 /* trigger local add link processing */
1108                 link = smc_llc_usable_link(lgr);
1109                 if (!link)
1110                         return;
1111                 smc_llc_srv_add_link_local(link);
1112         } else {
1113                 /* invite server to start add link processing */
1114                 u8 gid[SMC_GID_SIZE];
1115
1116                 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
1117                                          NULL))
1118                         return;
1119                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1120                         /* some other llc task is ongoing */
1121                         wait_event_interruptible_timeout(lgr->llc_waiter,
1122                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1123                                 SMC_LLC_WAIT_TIME);
1124                 }
1125                 if (list_empty(&lgr->list) ||
1126                     !smc_ib_port_active(smcibdev, ibport))
1127                         return; /* lgr or device no longer active */
1128                 link = smc_llc_usable_link(lgr);
1129                 if (!link)
1130                         return;
1131                 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
1132                                       NULL, SMC_LLC_REQ);
1133         }
1134 }
1135
1136 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1137 {
1138         struct smc_ib_up_work *ib_work;
1139         struct smc_link_group *lgr, *n;
1140
1141         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1142                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1143                             SMC_MAX_PNETID_LEN) ||
1144                     lgr->type == SMC_LGR_SYMMETRIC ||
1145                     lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1146                         continue;
1147                 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
1148                 if (!ib_work)
1149                         continue;
1150                 INIT_WORK(&ib_work->work, smc_link_up_work);
1151                 ib_work->lgr = lgr;
1152                 ib_work->smcibdev = smcibdev;
1153                 ib_work->ibport = ibport;
1154                 schedule_work(&ib_work->work);
1155         }
1156 }
1157
1158 /* link is down - switch connections to alternate link,
1159  * must be called under lgr->llc_conf_mutex lock
1160  */
1161 static void smcr_link_down(struct smc_link *lnk)
1162 {
1163         struct smc_link_group *lgr = lnk->lgr;
1164         struct smc_link *to_lnk;
1165         int del_link_id;
1166
1167         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1168                 return;
1169
1170         smc_ib_modify_qp_reset(lnk);
1171         to_lnk = smc_switch_conns(lgr, lnk, true);
1172         if (!to_lnk) { /* no backup link available */
1173                 smcr_link_clear(lnk, true);
1174                 return;
1175         }
1176         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1177         del_link_id = lnk->link_id;
1178
1179         if (lgr->role == SMC_SERV) {
1180                 /* trigger local delete link processing */
1181                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1182         } else {
1183                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1184                         /* another llc task is ongoing */
1185                         mutex_unlock(&lgr->llc_conf_mutex);
1186                         wait_event_interruptible_timeout(lgr->llc_waiter,
1187                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1188                                 SMC_LLC_WAIT_TIME);
1189                         mutex_lock(&lgr->llc_conf_mutex);
1190                 }
1191                 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
1192                                          SMC_LLC_DEL_LOST_PATH);
1193         }
1194 }
1195
1196 /* must be called under lgr->llc_conf_mutex lock */
1197 void smcr_link_down_cond(struct smc_link *lnk)
1198 {
1199         if (smc_link_downing(&lnk->state))
1200                 smcr_link_down(lnk);
1201 }
1202
1203 /* will get the lgr->llc_conf_mutex lock */
1204 void smcr_link_down_cond_sched(struct smc_link *lnk)
1205 {
1206         if (smc_link_downing(&lnk->state))
1207                 schedule_work(&lnk->link_down_wrk);
1208 }
1209
1210 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1211 {
1212         struct smc_link_group *lgr, *n;
1213         int i;
1214
1215         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1216                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1217                             SMC_MAX_PNETID_LEN))
1218                         continue; /* lgr is not affected */
1219                 if (list_empty(&lgr->list))
1220                         continue;
1221                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1222                         struct smc_link *lnk = &lgr->lnk[i];
1223
1224                         if (smc_link_usable(lnk) &&
1225                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1226                                 smcr_link_down_cond_sched(lnk);
1227                 }
1228         }
1229 }
1230
1231 static void smc_link_up_work(struct work_struct *work)
1232 {
1233         struct smc_ib_up_work *ib_work = container_of(work,
1234                                                       struct smc_ib_up_work,
1235                                                       work);
1236         struct smc_link_group *lgr = ib_work->lgr;
1237
1238         if (list_empty(&lgr->list))
1239                 goto out;
1240         smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
1241 out:
1242         kfree(ib_work);
1243 }
1244
1245 static void smc_link_down_work(struct work_struct *work)
1246 {
1247         struct smc_link *link = container_of(work, struct smc_link,
1248                                              link_down_wrk);
1249         struct smc_link_group *lgr = link->lgr;
1250
1251         if (list_empty(&lgr->list))
1252                 return;
1253         wake_up_interruptible_all(&lgr->llc_waiter);
1254         mutex_lock(&lgr->llc_conf_mutex);
1255         smcr_link_down(link);
1256         mutex_unlock(&lgr->llc_conf_mutex);
1257 }
1258
1259 /* Determine vlan of internal TCP socket.
1260  * @vlan_id: address to store the determined vlan id into
1261  */
1262 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1263 {
1264         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1265         struct net_device *ndev;
1266         int i, nest_lvl, rc = 0;
1267
1268         ini->vlan_id = 0;
1269         if (!dst) {
1270                 rc = -ENOTCONN;
1271                 goto out;
1272         }
1273         if (!dst->dev) {
1274                 rc = -ENODEV;
1275                 goto out_rel;
1276         }
1277
1278         ndev = dst->dev;
1279         if (is_vlan_dev(ndev)) {
1280                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1281                 goto out_rel;
1282         }
1283
1284         rtnl_lock();
1285         nest_lvl = ndev->lower_level;
1286         for (i = 0; i < nest_lvl; i++) {
1287                 struct list_head *lower = &ndev->adj_list.lower;
1288
1289                 if (list_empty(lower))
1290                         break;
1291                 lower = lower->next;
1292                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1293                 if (is_vlan_dev(ndev)) {
1294                         ini->vlan_id = vlan_dev_vlan_id(ndev);
1295                         break;
1296                 }
1297         }
1298         rtnl_unlock();
1299
1300 out_rel:
1301         dst_release(dst);
1302 out:
1303         return rc;
1304 }
1305
1306 static bool smcr_lgr_match(struct smc_link_group *lgr,
1307                            struct smc_clc_msg_local *lcl,
1308                            enum smc_lgr_role role, u32 clcqpn)
1309 {
1310         int i;
1311
1312         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1313             lgr->role != role)
1314                 return false;
1315
1316         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1317                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
1318                         continue;
1319                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1320                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1321                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1322                         return true;
1323         }
1324         return false;
1325 }
1326
1327 static bool smcd_lgr_match(struct smc_link_group *lgr,
1328                            struct smcd_dev *smcismdev, u64 peer_gid)
1329 {
1330         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1331 }
1332
1333 /* create a new SMC connection (and a new link group if necessary) */
1334 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1335 {
1336         struct smc_connection *conn = &smc->conn;
1337         struct list_head *lgr_list;
1338         struct smc_link_group *lgr;
1339         enum smc_lgr_role role;
1340         spinlock_t *lgr_lock;
1341         int rc = 0;
1342
1343         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1344         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1345         ini->cln_first_contact = SMC_FIRST_CONTACT;
1346         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1347         if (role == SMC_CLNT && ini->srv_first_contact)
1348                 /* create new link group as well */
1349                 goto create;
1350
1351         /* determine if an existing link group can be reused */
1352         spin_lock_bh(lgr_lock);
1353         list_for_each_entry(lgr, lgr_list, list) {
1354                 write_lock_bh(&lgr->conns_lock);
1355                 if ((ini->is_smcd ?
1356                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1357                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1358                     !lgr->sync_err &&
1359                     lgr->vlan_id == ini->vlan_id &&
1360                     (role == SMC_CLNT ||
1361                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1362                         /* link group found */
1363                         ini->cln_first_contact = SMC_REUSE_CONTACT;
1364                         conn->lgr = lgr;
1365                         rc = smc_lgr_register_conn(conn, false);
1366                         write_unlock_bh(&lgr->conns_lock);
1367                         if (!rc && delayed_work_pending(&lgr->free_work))
1368                                 cancel_delayed_work(&lgr->free_work);
1369                         break;
1370                 }
1371                 write_unlock_bh(&lgr->conns_lock);
1372         }
1373         spin_unlock_bh(lgr_lock);
1374         if (rc)
1375                 return rc;
1376
1377         if (role == SMC_CLNT && !ini->srv_first_contact &&
1378             ini->cln_first_contact == SMC_FIRST_CONTACT) {
1379                 /* Server reuses a link group, but Client wants to start
1380                  * a new one
1381                  * send out_of_sync decline, reason synchr. error
1382                  */
1383                 return SMC_CLC_DECL_SYNCERR;
1384         }
1385
1386 create:
1387         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1388                 rc = smc_lgr_create(smc, ini);
1389                 if (rc)
1390                         goto out;
1391                 lgr = conn->lgr;
1392                 write_lock_bh(&lgr->conns_lock);
1393                 rc = smc_lgr_register_conn(conn, true);
1394                 write_unlock_bh(&lgr->conns_lock);
1395                 if (rc)
1396                         goto out;
1397         }
1398         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1399         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1400         conn->urg_state = SMC_URG_READ;
1401         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1402         if (ini->is_smcd) {
1403                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1404                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1405         }
1406 #ifndef KERNEL_HAS_ATOMIC64
1407         spin_lock_init(&conn->acurs_lock);
1408 #endif
1409
1410 out:
1411         return rc;
1412 }
1413
1414 /* convert the RMB size into the compressed notation - minimum 16K.
1415  * In contrast to plain ilog2, this rounds towards the next power of 2,
1416  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1417  */
1418 static u8 smc_compress_bufsize(int size)
1419 {
1420         u8 compressed;
1421
1422         if (size <= SMC_BUF_MIN_SIZE)
1423                 return 0;
1424
1425         size = (size - 1) >> 14;
1426         compressed = ilog2(size) + 1;
1427         if (compressed >= SMC_RMBE_SIZES)
1428                 compressed = SMC_RMBE_SIZES - 1;
1429         return compressed;
1430 }
1431
1432 /* convert the RMB size from compressed notation into integer */
1433 int smc_uncompress_bufsize(u8 compressed)
1434 {
1435         u32 size;
1436
1437         size = 0x00000001 << (((int)compressed) + 14);
1438         return (int)size;
1439 }
1440
1441 /* try to reuse a sndbuf or rmb description slot for a certain
1442  * buffer size; if not available, return NULL
1443  */
1444 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1445                                              struct mutex *lock,
1446                                              struct list_head *buf_list)
1447 {
1448         struct smc_buf_desc *buf_slot;
1449
1450         mutex_lock(lock);
1451         list_for_each_entry(buf_slot, buf_list, list) {
1452                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1453                         mutex_unlock(lock);
1454                         return buf_slot;
1455                 }
1456         }
1457         mutex_unlock(lock);
1458         return NULL;
1459 }
1460
1461 /* one of the conditions for announcing a receiver's current window size is
1462  * that it "results in a minimum increase in the window size of 10% of the
1463  * receive buffer space" [RFC7609]
1464  */
1465 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1466 {
1467         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1468 }
1469
1470 /* map an rmb buf to a link */
1471 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1472                              struct smc_link *lnk)
1473 {
1474         int rc;
1475
1476         if (buf_desc->is_map_ib[lnk->link_idx])
1477                 return 0;
1478
1479         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1480         if (rc)
1481                 return rc;
1482         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1483                    buf_desc->cpu_addr, buf_desc->len);
1484
1485         /* map sg table to DMA address */
1486         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1487                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1488         /* SMC protocol depends on mapping to one DMA address only */
1489         if (rc != 1) {
1490                 rc = -EAGAIN;
1491                 goto free_table;
1492         }
1493
1494         /* create a new memory region for the RMB */
1495         if (is_rmb) {
1496                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1497                                               IB_ACCESS_REMOTE_WRITE |
1498                                               IB_ACCESS_LOCAL_WRITE,
1499                                               buf_desc, lnk->link_idx);
1500                 if (rc)
1501                         goto buf_unmap;
1502                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1503         }
1504         buf_desc->is_map_ib[lnk->link_idx] = true;
1505         return 0;
1506
1507 buf_unmap:
1508         smc_ib_buf_unmap_sg(lnk, buf_desc,
1509                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1510 free_table:
1511         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1512         return rc;
1513 }
1514
1515 /* register a new rmb on IB device,
1516  * must be called under lgr->llc_conf_mutex lock
1517  */
1518 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1519 {
1520         if (list_empty(&link->lgr->list))
1521                 return -ENOLINK;
1522         if (!rmb_desc->is_reg_mr[link->link_idx]) {
1523                 /* register memory region for new rmb */
1524                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1525                         rmb_desc->is_reg_err = true;
1526                         return -EFAULT;
1527                 }
1528                 rmb_desc->is_reg_mr[link->link_idx] = true;
1529         }
1530         return 0;
1531 }
1532
1533 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1534                              struct list_head *lst, bool is_rmb)
1535 {
1536         struct smc_buf_desc *buf_desc, *bf;
1537         int rc = 0;
1538
1539         mutex_lock(lock);
1540         list_for_each_entry_safe(buf_desc, bf, lst, list) {
1541                 if (!buf_desc->used)
1542                         continue;
1543                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1544                 if (rc)
1545                         goto out;
1546         }
1547 out:
1548         mutex_unlock(lock);
1549         return rc;
1550 }
1551
1552 /* map all used buffers of lgr for a new link */
1553 int smcr_buf_map_lgr(struct smc_link *lnk)
1554 {
1555         struct smc_link_group *lgr = lnk->lgr;
1556         int i, rc = 0;
1557
1558         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1559                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1560                                        &lgr->rmbs[i], true);
1561                 if (rc)
1562                         return rc;
1563                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1564                                        &lgr->sndbufs[i], false);
1565                 if (rc)
1566                         return rc;
1567         }
1568         return 0;
1569 }
1570
1571 /* register all used buffers of lgr for a new link,
1572  * must be called under lgr->llc_conf_mutex lock
1573  */
1574 int smcr_buf_reg_lgr(struct smc_link *lnk)
1575 {
1576         struct smc_link_group *lgr = lnk->lgr;
1577         struct smc_buf_desc *buf_desc, *bf;
1578         int i, rc = 0;
1579
1580         mutex_lock(&lgr->rmbs_lock);
1581         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1582                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1583                         if (!buf_desc->used)
1584                                 continue;
1585                         rc = smcr_link_reg_rmb(lnk, buf_desc);
1586                         if (rc)
1587                                 goto out;
1588                 }
1589         }
1590 out:
1591         mutex_unlock(&lgr->rmbs_lock);
1592         return rc;
1593 }
1594
1595 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1596                                                 bool is_rmb, int bufsize)
1597 {
1598         struct smc_buf_desc *buf_desc;
1599
1600         /* try to alloc a new buffer */
1601         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1602         if (!buf_desc)
1603                 return ERR_PTR(-ENOMEM);
1604
1605         buf_desc->order = get_order(bufsize);
1606         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1607                                       __GFP_NOMEMALLOC | __GFP_COMP |
1608                                       __GFP_NORETRY | __GFP_ZERO,
1609                                       buf_desc->order);
1610         if (!buf_desc->pages) {
1611                 kfree(buf_desc);
1612                 return ERR_PTR(-EAGAIN);
1613         }
1614         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1615         buf_desc->len = bufsize;
1616         return buf_desc;
1617 }
1618
1619 /* map buf_desc on all usable links,
1620  * unused buffers stay mapped as long as the link is up
1621  */
1622 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1623                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1624 {
1625         int i, rc = 0;
1626
1627         /* protect against parallel link reconfiguration */
1628         mutex_lock(&lgr->llc_conf_mutex);
1629         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1630                 struct smc_link *lnk = &lgr->lnk[i];
1631
1632                 if (!smc_link_usable(lnk))
1633                         continue;
1634                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1635                         rc = -ENOMEM;
1636                         goto out;
1637                 }
1638         }
1639 out:
1640         mutex_unlock(&lgr->llc_conf_mutex);
1641         return rc;
1642 }
1643
1644 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1645
1646 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1647                                                 bool is_dmb, int bufsize)
1648 {
1649         struct smc_buf_desc *buf_desc;
1650         int rc;
1651
1652         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1653                 return ERR_PTR(-EAGAIN);
1654
1655         /* try to alloc a new DMB */
1656         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1657         if (!buf_desc)
1658                 return ERR_PTR(-ENOMEM);
1659         if (is_dmb) {
1660                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1661                 if (rc) {
1662                         kfree(buf_desc);
1663                         return ERR_PTR(-EAGAIN);
1664                 }
1665                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1666                 /* CDC header stored in buf. So, pretend it was smaller */
1667                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1668         } else {
1669                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1670                                              __GFP_NOWARN | __GFP_NORETRY |
1671                                              __GFP_NOMEMALLOC);
1672                 if (!buf_desc->cpu_addr) {
1673                         kfree(buf_desc);
1674                         return ERR_PTR(-EAGAIN);
1675                 }
1676                 buf_desc->len = bufsize;
1677         }
1678         return buf_desc;
1679 }
1680
1681 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1682 {
1683         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1684         struct smc_connection *conn = &smc->conn;
1685         struct smc_link_group *lgr = conn->lgr;
1686         struct list_head *buf_list;
1687         int bufsize, bufsize_short;
1688         struct mutex *lock;     /* lock buffer list */
1689         int sk_buf_size;
1690
1691         if (is_rmb)
1692                 /* use socket recv buffer size (w/o overhead) as start value */
1693                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1694         else
1695                 /* use socket send buffer size (w/o overhead) as start value */
1696                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1697
1698         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1699              bufsize_short >= 0; bufsize_short--) {
1700
1701                 if (is_rmb) {
1702                         lock = &lgr->rmbs_lock;
1703                         buf_list = &lgr->rmbs[bufsize_short];
1704                 } else {
1705                         lock = &lgr->sndbufs_lock;
1706                         buf_list = &lgr->sndbufs[bufsize_short];
1707                 }
1708                 bufsize = smc_uncompress_bufsize(bufsize_short);
1709                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1710                         continue;
1711
1712                 /* check for reusable slot in the link group */
1713                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1714                 if (buf_desc) {
1715                         memset(buf_desc->cpu_addr, 0, bufsize);
1716                         break; /* found reusable slot */
1717                 }
1718
1719                 if (is_smcd)
1720                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1721                 else
1722                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1723
1724                 if (PTR_ERR(buf_desc) == -ENOMEM)
1725                         break;
1726                 if (IS_ERR(buf_desc))
1727                         continue;
1728
1729                 buf_desc->used = 1;
1730                 mutex_lock(lock);
1731                 list_add(&buf_desc->list, buf_list);
1732                 mutex_unlock(lock);
1733                 break; /* found */
1734         }
1735
1736         if (IS_ERR(buf_desc))
1737                 return -ENOMEM;
1738
1739         if (!is_smcd) {
1740                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1741                         smcr_buf_unuse(buf_desc, lgr);
1742                         return -ENOMEM;
1743                 }
1744         }
1745
1746         if (is_rmb) {
1747                 conn->rmb_desc = buf_desc;
1748                 conn->rmbe_size_short = bufsize_short;
1749                 smc->sk.sk_rcvbuf = bufsize * 2;
1750                 atomic_set(&conn->bytes_to_rcv, 0);
1751                 conn->rmbe_update_limit =
1752                         smc_rmb_wnd_update_limit(buf_desc->len);
1753                 if (is_smcd)
1754                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1755         } else {
1756                 conn->sndbuf_desc = buf_desc;
1757                 smc->sk.sk_sndbuf = bufsize * 2;
1758                 atomic_set(&conn->sndbuf_space, bufsize);
1759         }
1760         return 0;
1761 }
1762
1763 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1764 {
1765         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1766                 return;
1767         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1768 }
1769
1770 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1771 {
1772         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1773                 return;
1774         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1775 }
1776
1777 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1778 {
1779         int i;
1780
1781         if (!conn->lgr || conn->lgr->is_smcd)
1782                 return;
1783         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1784                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1785                         continue;
1786                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1787                                        DMA_FROM_DEVICE);
1788         }
1789 }
1790
1791 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1792 {
1793         int i;
1794
1795         if (!conn->lgr || conn->lgr->is_smcd)
1796                 return;
1797         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1798                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1799                         continue;
1800                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1801                                           DMA_FROM_DEVICE);
1802         }
1803 }
1804
1805 /* create the send and receive buffer for an SMC socket;
1806  * receive buffers are called RMBs;
1807  * (even though the SMC protocol allows more than one RMB-element per RMB,
1808  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1809  * extra RMB for every connection in a link group
1810  */
1811 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1812 {
1813         int rc;
1814
1815         /* create send buffer */
1816         rc = __smc_buf_create(smc, is_smcd, false);
1817         if (rc)
1818                 return rc;
1819         /* create rmb */
1820         rc = __smc_buf_create(smc, is_smcd, true);
1821         if (rc)
1822                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1823         return rc;
1824 }
1825
1826 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1827 {
1828         int i;
1829
1830         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1831                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1832                         return i;
1833         }
1834         return -ENOSPC;
1835 }
1836
1837 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1838                                    u32 rkey)
1839 {
1840         int i;
1841
1842         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1843                 if (test_bit(i, lgr->rtokens_used_mask) &&
1844                     lgr->rtokens[i][lnk_idx].rkey == rkey)
1845                         return i;
1846         }
1847         return -ENOENT;
1848 }
1849
1850 /* set rtoken for a new link to an existing rmb */
1851 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1852                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1853 {
1854         int rtok_idx;
1855
1856         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1857         if (rtok_idx == -ENOENT)
1858                 return;
1859         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1860         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1861 }
1862
1863 /* set rtoken for a new link whose link_id is given */
1864 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1865                      __be64 nw_vaddr, __be32 nw_rkey)
1866 {
1867         u64 dma_addr = be64_to_cpu(nw_vaddr);
1868         u32 rkey = ntohl(nw_rkey);
1869         bool found = false;
1870         int link_idx;
1871
1872         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1873                 if (lgr->lnk[link_idx].link_id == link_id) {
1874                         found = true;
1875                         break;
1876                 }
1877         }
1878         if (!found)
1879                 return;
1880         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1881         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1882 }
1883
1884 /* add a new rtoken from peer */
1885 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1886 {
1887         struct smc_link_group *lgr = smc_get_lgr(lnk);
1888         u64 dma_addr = be64_to_cpu(nw_vaddr);
1889         u32 rkey = ntohl(nw_rkey);
1890         int i;
1891
1892         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1893                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1894                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1895                     test_bit(i, lgr->rtokens_used_mask)) {
1896                         /* already in list */
1897                         return i;
1898                 }
1899         }
1900         i = smc_rmb_reserve_rtoken_idx(lgr);
1901         if (i < 0)
1902                 return i;
1903         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1904         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1905         return i;
1906 }
1907
1908 /* delete an rtoken from all links */
1909 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1910 {
1911         struct smc_link_group *lgr = smc_get_lgr(lnk);
1912         u32 rkey = ntohl(nw_rkey);
1913         int i, j;
1914
1915         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1916                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1917                     test_bit(i, lgr->rtokens_used_mask)) {
1918                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1919                                 lgr->rtokens[i][j].rkey = 0;
1920                                 lgr->rtokens[i][j].dma_addr = 0;
1921                         }
1922                         clear_bit(i, lgr->rtokens_used_mask);
1923                         return 0;
1924                 }
1925         }
1926         return -ENOENT;
1927 }
1928
1929 /* save rkey and dma_addr received from peer during clc handshake */
1930 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1931                             struct smc_link *lnk,
1932                             struct smc_clc_msg_accept_confirm *clc)
1933 {
1934         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1935                                           clc->rmb_rkey);
1936         if (conn->rtoken_idx < 0)
1937                 return conn->rtoken_idx;
1938         return 0;
1939 }
1940
1941 static void smc_core_going_away(void)
1942 {
1943         struct smc_ib_device *smcibdev;
1944         struct smcd_dev *smcd;
1945
1946         spin_lock(&smc_ib_devices.lock);
1947         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1948                 int i;
1949
1950                 for (i = 0; i < SMC_MAX_PORTS; i++)
1951                         set_bit(i, smcibdev->ports_going_away);
1952         }
1953         spin_unlock(&smc_ib_devices.lock);
1954
1955         spin_lock(&smcd_dev_list.lock);
1956         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1957                 smcd->going_away = 1;
1958         }
1959         spin_unlock(&smcd_dev_list.lock);
1960 }
1961
1962 /* Clean up all SMC link groups */
1963 static void smc_lgrs_shutdown(void)
1964 {
1965         struct smcd_dev *smcd;
1966
1967         smc_core_going_away();
1968
1969         smc_smcr_terminate_all(NULL);
1970
1971         spin_lock(&smcd_dev_list.lock);
1972         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1973                 smc_smcd_terminate_all(smcd);
1974         spin_unlock(&smcd_dev_list.lock);
1975 }
1976
1977 static int smc_core_reboot_event(struct notifier_block *this,
1978                                  unsigned long event, void *ptr)
1979 {
1980         smc_lgrs_shutdown();
1981         smc_ib_unregister_client();
1982         return 0;
1983 }
1984
1985 static struct notifier_block smc_reboot_notifier = {
1986         .notifier_call = smc_core_reboot_event,
1987 };
1988
1989 int __init smc_core_init(void)
1990 {
1991         return register_reboot_notifier(&smc_reboot_notifier);
1992 }
1993
1994 /* Called (from smc_exit) when module is removed */
1995 void smc_core_exit(void)
1996 {
1997         unregister_reboot_notifier(&smc_reboot_notifier);
1998         smc_lgrs_shutdown();
1999 }