Merge tag 'actions-arm-dt-for-4.15' of ssh://gitolite.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / net / smc / smc_wr.c
1 /*
2  * Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  * Work Requests exploiting Infiniband API
5  *
6  * Work requests (WR) of type ib_post_send or ib_post_recv respectively
7  * are submitted to either RC SQ or RC RQ respectively
8  * (reliably connected send/receive queue)
9  * and become work queue entries (WQEs).
10  * While an SQ WR/WQE is pending, we track it until transmission completion.
11  * Through a send or receive completion queue (CQ) respectively,
12  * we get completion queue entries (CQEs) [aka work completions (WCs)].
13  * Since the CQ callback is called from IRQ context, we split work by using
14  * bottom halves implemented by tasklets.
15  *
16  * SMC uses this to exchange LLC (link layer control)
17  * and CDC (connection data control) messages.
18  *
19  * Copyright IBM Corp. 2016
20  *
21  * Author(s):  Steffen Maier <maier@linux.vnet.ibm.com>
22  */
23
24 #include <linux/atomic.h>
25 #include <linux/hashtable.h>
26 #include <linux/wait.h>
27 #include <rdma/ib_verbs.h>
28 #include <asm/div64.h>
29
30 #include "smc.h"
31 #include "smc_wr.h"
32
33 #define SMC_WR_MAX_POLL_CQE 10  /* max. # of compl. queue elements in 1 poll */
34
35 #define SMC_WR_RX_HASH_BITS 4
36 static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
37 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
38
39 struct smc_wr_tx_pend { /* control data for a pending send request */
40         u64                     wr_id;          /* work request id sent */
41         smc_wr_tx_handler       handler;
42         enum ib_wc_status       wc_status;      /* CQE status */
43         struct smc_link         *link;
44         u32                     idx;
45         struct smc_wr_tx_pend_priv priv;
46 };
47
48 /******************************** send queue *********************************/
49
50 /*------------------------------- completion --------------------------------*/
51
52 static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
53 {
54         u32 i;
55
56         for (i = 0; i < link->wr_tx_cnt; i++) {
57                 if (link->wr_tx_pends[i].wr_id == wr_id)
58                         return i;
59         }
60         return link->wr_tx_cnt;
61 }
62
63 static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
64 {
65         struct smc_wr_tx_pend pnd_snd;
66         struct smc_link *link;
67         u32 pnd_snd_idx;
68         int i;
69
70         link = wc->qp->qp_context;
71
72         if (wc->opcode == IB_WC_REG_MR) {
73                 if (wc->status)
74                         link->wr_reg_state = FAILED;
75                 else
76                         link->wr_reg_state = CONFIRMED;
77                 wake_up(&link->wr_reg_wait);
78                 return;
79         }
80
81         pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
82         if (pnd_snd_idx == link->wr_tx_cnt)
83                 return;
84         link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
85         memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
86         /* clear the full struct smc_wr_tx_pend including .priv */
87         memset(&link->wr_tx_pends[pnd_snd_idx], 0,
88                sizeof(link->wr_tx_pends[pnd_snd_idx]));
89         memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
90                sizeof(link->wr_tx_bufs[pnd_snd_idx]));
91         if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
92                 return;
93         if (wc->status) {
94                 struct smc_link_group *lgr;
95
96                 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
97                         /* clear full struct smc_wr_tx_pend including .priv */
98                         memset(&link->wr_tx_pends[i], 0,
99                                sizeof(link->wr_tx_pends[i]));
100                         memset(&link->wr_tx_bufs[i], 0,
101                                sizeof(link->wr_tx_bufs[i]));
102                         clear_bit(i, link->wr_tx_mask);
103                 }
104                 /* terminate connections of this link group abnormally */
105                 lgr = container_of(link, struct smc_link_group,
106                                    lnk[SMC_SINGLE_LINK]);
107                 smc_lgr_terminate(lgr);
108         }
109         if (pnd_snd.handler)
110                 pnd_snd.handler(&pnd_snd.priv, link, wc->status);
111         wake_up(&link->wr_tx_wait);
112 }
113
114 static void smc_wr_tx_tasklet_fn(unsigned long data)
115 {
116         struct smc_ib_device *dev = (struct smc_ib_device *)data;
117         struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
118         int i = 0, rc;
119         int polled = 0;
120
121 again:
122         polled++;
123         do {
124                 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
125                 if (polled == 1) {
126                         ib_req_notify_cq(dev->roce_cq_send,
127                                          IB_CQ_NEXT_COMP |
128                                          IB_CQ_REPORT_MISSED_EVENTS);
129                 }
130                 if (!rc)
131                         break;
132                 for (i = 0; i < rc; i++)
133                         smc_wr_tx_process_cqe(&wc[i]);
134         } while (rc > 0);
135         if (polled == 1)
136                 goto again;
137 }
138
139 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
140 {
141         struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
142
143         tasklet_schedule(&dev->send_tasklet);
144 }
145
146 /*---------------------------- request submission ---------------------------*/
147
148 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
149 {
150         *idx = link->wr_tx_cnt;
151         for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
152                 if (!test_and_set_bit(*idx, link->wr_tx_mask))
153                         return 0;
154         }
155         *idx = link->wr_tx_cnt;
156         return -EBUSY;
157 }
158
159 /**
160  * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
161  *                      and sets info for pending transmit tracking
162  * @link:               Pointer to smc_link used to later send the message.
163  * @handler:            Send completion handler function pointer.
164  * @wr_buf:             Out value returns pointer to message buffer.
165  * @wr_pend_priv:       Out value returns pointer serving as handler context.
166  *
167  * Return: 0 on success, or -errno on error.
168  */
169 int smc_wr_tx_get_free_slot(struct smc_link *link,
170                             smc_wr_tx_handler handler,
171                             struct smc_wr_buf **wr_buf,
172                             struct smc_wr_tx_pend_priv **wr_pend_priv)
173 {
174         struct smc_wr_tx_pend *wr_pend;
175         struct ib_send_wr *wr_ib;
176         u64 wr_id;
177         u32 idx;
178         int rc;
179
180         *wr_buf = NULL;
181         *wr_pend_priv = NULL;
182         if (in_softirq()) {
183                 rc = smc_wr_tx_get_free_slot_index(link, &idx);
184                 if (rc)
185                         return rc;
186         } else {
187                 rc = wait_event_interruptible_timeout(
188                         link->wr_tx_wait,
189                         (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
190                         SMC_WR_TX_WAIT_FREE_SLOT_TIME);
191                 if (!rc) {
192                         /* timeout - terminate connections */
193                         struct smc_link_group *lgr;
194
195                         lgr = container_of(link, struct smc_link_group,
196                                            lnk[SMC_SINGLE_LINK]);
197                         smc_lgr_terminate(lgr);
198                         return -EPIPE;
199                 }
200                 if (rc == -ERESTARTSYS)
201                         return -EINTR;
202                 if (idx == link->wr_tx_cnt)
203                         return -EPIPE;
204         }
205         wr_id = smc_wr_tx_get_next_wr_id(link);
206         wr_pend = &link->wr_tx_pends[idx];
207         wr_pend->wr_id = wr_id;
208         wr_pend->handler = handler;
209         wr_pend->link = link;
210         wr_pend->idx = idx;
211         wr_ib = &link->wr_tx_ibs[idx];
212         wr_ib->wr_id = wr_id;
213         *wr_buf = &link->wr_tx_bufs[idx];
214         *wr_pend_priv = &wr_pend->priv;
215         return 0;
216 }
217
218 int smc_wr_tx_put_slot(struct smc_link *link,
219                        struct smc_wr_tx_pend_priv *wr_pend_priv)
220 {
221         struct smc_wr_tx_pend *pend;
222
223         pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
224         if (pend->idx < link->wr_tx_cnt) {
225                 /* clear the full struct smc_wr_tx_pend including .priv */
226                 memset(&link->wr_tx_pends[pend->idx], 0,
227                        sizeof(link->wr_tx_pends[pend->idx]));
228                 memset(&link->wr_tx_bufs[pend->idx], 0,
229                        sizeof(link->wr_tx_bufs[pend->idx]));
230                 test_and_clear_bit(pend->idx, link->wr_tx_mask);
231                 return 1;
232         }
233
234         return 0;
235 }
236
237 /* Send prepared WR slot via ib_post_send.
238  * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
239  */
240 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
241 {
242         struct ib_send_wr *failed_wr = NULL;
243         struct smc_wr_tx_pend *pend;
244         int rc;
245
246         ib_req_notify_cq(link->smcibdev->roce_cq_send,
247                          IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
248         pend = container_of(priv, struct smc_wr_tx_pend, priv);
249         rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
250                           &failed_wr);
251         if (rc)
252                 smc_wr_tx_put_slot(link, priv);
253         return rc;
254 }
255
256 /* Register a memory region and wait for result. */
257 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
258 {
259         struct ib_send_wr *failed_wr = NULL;
260         int rc;
261
262         ib_req_notify_cq(link->smcibdev->roce_cq_send,
263                          IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
264         link->wr_reg_state = POSTED;
265         link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
266         link->wr_reg.mr = mr;
267         link->wr_reg.key = mr->rkey;
268         failed_wr = &link->wr_reg.wr;
269         rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
270         WARN_ON(failed_wr != &link->wr_reg.wr);
271         if (rc)
272                 return rc;
273
274         rc = wait_event_interruptible_timeout(link->wr_reg_wait,
275                                               (link->wr_reg_state != POSTED),
276                                               SMC_WR_REG_MR_WAIT_TIME);
277         if (!rc) {
278                 /* timeout - terminate connections */
279                 struct smc_link_group *lgr;
280
281                 lgr = container_of(link, struct smc_link_group,
282                                    lnk[SMC_SINGLE_LINK]);
283                 smc_lgr_terminate(lgr);
284                 return -EPIPE;
285         }
286         if (rc == -ERESTARTSYS)
287                 return -EINTR;
288         switch (link->wr_reg_state) {
289         case CONFIRMED:
290                 rc = 0;
291                 break;
292         case FAILED:
293                 rc = -EIO;
294                 break;
295         case POSTED:
296                 rc = -EPIPE;
297                 break;
298         }
299         return rc;
300 }
301
302 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
303                              smc_wr_tx_filter filter,
304                              smc_wr_tx_dismisser dismisser,
305                              unsigned long data)
306 {
307         struct smc_wr_tx_pend_priv *tx_pend;
308         struct smc_wr_rx_hdr *wr_rx;
309         int i;
310
311         for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
312                 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
313                 if (wr_rx->type != wr_rx_hdr_type)
314                         continue;
315                 tx_pend = &link->wr_tx_pends[i].priv;
316                 if (filter(tx_pend, data))
317                         dismisser(tx_pend);
318         }
319 }
320
321 bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
322                            smc_wr_tx_filter filter, unsigned long data)
323 {
324         struct smc_wr_tx_pend_priv *tx_pend;
325         struct smc_wr_rx_hdr *wr_rx;
326         int i;
327
328         for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
329                 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
330                 if (wr_rx->type != wr_rx_hdr_type)
331                         continue;
332                 tx_pend = &link->wr_tx_pends[i].priv;
333                 if (filter(tx_pend, data))
334                         return true;
335         }
336         return false;
337 }
338
339 /****************************** receive queue ********************************/
340
341 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
342 {
343         struct smc_wr_rx_handler *h_iter;
344         int rc = 0;
345
346         spin_lock(&smc_wr_rx_hash_lock);
347         hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
348                 if (h_iter->type == handler->type) {
349                         rc = -EEXIST;
350                         goto out_unlock;
351                 }
352         }
353         hash_add(smc_wr_rx_hash, &handler->list, handler->type);
354 out_unlock:
355         spin_unlock(&smc_wr_rx_hash_lock);
356         return rc;
357 }
358
359 /* Demultiplex a received work request based on the message type to its handler.
360  * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
361  * and not being modified any more afterwards so we don't need to lock it.
362  */
363 static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
364 {
365         struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
366         struct smc_wr_rx_handler *handler;
367         struct smc_wr_rx_hdr *wr_rx;
368         u64 temp_wr_id;
369         u32 index;
370
371         if (wc->byte_len < sizeof(*wr_rx))
372                 return; /* short message */
373         temp_wr_id = wc->wr_id;
374         index = do_div(temp_wr_id, link->wr_rx_cnt);
375         wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
376         hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
377                 if (handler->type == wr_rx->type)
378                         handler->handler(wc, wr_rx);
379         }
380 }
381
382 static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
383 {
384         struct smc_link *link;
385         int i;
386
387         for (i = 0; i < num; i++) {
388                 link = wc[i].qp->qp_context;
389                 if (wc[i].status == IB_WC_SUCCESS) {
390                         smc_wr_rx_demultiplex(&wc[i]);
391                         smc_wr_rx_post(link); /* refill WR RX */
392                 } else {
393                         struct smc_link_group *lgr;
394
395                         /* handle status errors */
396                         switch (wc[i].status) {
397                         case IB_WC_RETRY_EXC_ERR:
398                         case IB_WC_RNR_RETRY_EXC_ERR:
399                         case IB_WC_WR_FLUSH_ERR:
400                                 /* terminate connections of this link group
401                                  * abnormally
402                                  */
403                                 lgr = container_of(link, struct smc_link_group,
404                                                    lnk[SMC_SINGLE_LINK]);
405                                 smc_lgr_terminate(lgr);
406                                 break;
407                         default:
408                                 smc_wr_rx_post(link); /* refill WR RX */
409                                 break;
410                         }
411                 }
412         }
413 }
414
415 static void smc_wr_rx_tasklet_fn(unsigned long data)
416 {
417         struct smc_ib_device *dev = (struct smc_ib_device *)data;
418         struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
419         int polled = 0;
420         int rc;
421
422 again:
423         polled++;
424         do {
425                 memset(&wc, 0, sizeof(wc));
426                 rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
427                 if (polled == 1) {
428                         ib_req_notify_cq(dev->roce_cq_recv,
429                                          IB_CQ_SOLICITED_MASK
430                                          | IB_CQ_REPORT_MISSED_EVENTS);
431                 }
432                 if (!rc)
433                         break;
434                 smc_wr_rx_process_cqes(&wc[0], rc);
435         } while (rc > 0);
436         if (polled == 1)
437                 goto again;
438 }
439
440 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
441 {
442         struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
443
444         tasklet_schedule(&dev->recv_tasklet);
445 }
446
447 int smc_wr_rx_post_init(struct smc_link *link)
448 {
449         u32 i;
450         int rc = 0;
451
452         for (i = 0; i < link->wr_rx_cnt; i++)
453                 rc = smc_wr_rx_post(link);
454         return rc;
455 }
456
457 /***************************** init, exit, misc ******************************/
458
459 void smc_wr_remember_qp_attr(struct smc_link *lnk)
460 {
461         struct ib_qp_attr *attr = &lnk->qp_attr;
462         struct ib_qp_init_attr init_attr;
463
464         memset(attr, 0, sizeof(*attr));
465         memset(&init_attr, 0, sizeof(init_attr));
466         ib_query_qp(lnk->roce_qp, attr,
467                     IB_QP_STATE |
468                     IB_QP_CUR_STATE |
469                     IB_QP_PKEY_INDEX |
470                     IB_QP_PORT |
471                     IB_QP_QKEY |
472                     IB_QP_AV |
473                     IB_QP_PATH_MTU |
474                     IB_QP_TIMEOUT |
475                     IB_QP_RETRY_CNT |
476                     IB_QP_RNR_RETRY |
477                     IB_QP_RQ_PSN |
478                     IB_QP_ALT_PATH |
479                     IB_QP_MIN_RNR_TIMER |
480                     IB_QP_SQ_PSN |
481                     IB_QP_PATH_MIG_STATE |
482                     IB_QP_CAP |
483                     IB_QP_DEST_QPN,
484                     &init_attr);
485
486         lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
487                                lnk->qp_attr.cap.max_send_wr);
488         lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
489                                lnk->qp_attr.cap.max_recv_wr);
490 }
491
492 static void smc_wr_init_sge(struct smc_link *lnk)
493 {
494         u32 i;
495
496         for (i = 0; i < lnk->wr_tx_cnt; i++) {
497                 lnk->wr_tx_sges[i].addr =
498                         lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
499                 lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
500                 lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
501                 lnk->wr_tx_ibs[i].next = NULL;
502                 lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
503                 lnk->wr_tx_ibs[i].num_sge = 1;
504                 lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
505                 lnk->wr_tx_ibs[i].send_flags =
506                         IB_SEND_SIGNALED | IB_SEND_SOLICITED;
507         }
508         for (i = 0; i < lnk->wr_rx_cnt; i++) {
509                 lnk->wr_rx_sges[i].addr =
510                         lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
511                 lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
512                 lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
513                 lnk->wr_rx_ibs[i].next = NULL;
514                 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
515                 lnk->wr_rx_ibs[i].num_sge = 1;
516         }
517         lnk->wr_reg.wr.next = NULL;
518         lnk->wr_reg.wr.num_sge = 0;
519         lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
520         lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
521         lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
522 }
523
524 void smc_wr_free_link(struct smc_link *lnk)
525 {
526         struct ib_device *ibdev;
527
528         memset(lnk->wr_tx_mask, 0,
529                BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
530
531         if (!lnk->smcibdev)
532                 return;
533         ibdev = lnk->smcibdev->ibdev;
534
535         if (lnk->wr_rx_dma_addr) {
536                 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
537                                     SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
538                                     DMA_FROM_DEVICE);
539                 lnk->wr_rx_dma_addr = 0;
540         }
541         if (lnk->wr_tx_dma_addr) {
542                 ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
543                                     SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
544                                     DMA_TO_DEVICE);
545                 lnk->wr_tx_dma_addr = 0;
546         }
547 }
548
549 void smc_wr_free_link_mem(struct smc_link *lnk)
550 {
551         kfree(lnk->wr_tx_pends);
552         lnk->wr_tx_pends = NULL;
553         kfree(lnk->wr_tx_mask);
554         lnk->wr_tx_mask = NULL;
555         kfree(lnk->wr_tx_sges);
556         lnk->wr_tx_sges = NULL;
557         kfree(lnk->wr_rx_sges);
558         lnk->wr_rx_sges = NULL;
559         kfree(lnk->wr_rx_ibs);
560         lnk->wr_rx_ibs = NULL;
561         kfree(lnk->wr_tx_ibs);
562         lnk->wr_tx_ibs = NULL;
563         kfree(lnk->wr_tx_bufs);
564         lnk->wr_tx_bufs = NULL;
565         kfree(lnk->wr_rx_bufs);
566         lnk->wr_rx_bufs = NULL;
567 }
568
569 int smc_wr_alloc_link_mem(struct smc_link *link)
570 {
571         /* allocate link related memory */
572         link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
573         if (!link->wr_tx_bufs)
574                 goto no_mem;
575         link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
576                                    GFP_KERNEL);
577         if (!link->wr_rx_bufs)
578                 goto no_mem_wr_tx_bufs;
579         link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
580                                   GFP_KERNEL);
581         if (!link->wr_tx_ibs)
582                 goto no_mem_wr_rx_bufs;
583         link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
584                                   sizeof(link->wr_rx_ibs[0]),
585                                   GFP_KERNEL);
586         if (!link->wr_rx_ibs)
587                 goto no_mem_wr_tx_ibs;
588         link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
589                                    GFP_KERNEL);
590         if (!link->wr_tx_sges)
591                 goto no_mem_wr_rx_ibs;
592         link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
593                                    sizeof(link->wr_rx_sges[0]),
594                                    GFP_KERNEL);
595         if (!link->wr_rx_sges)
596                 goto no_mem_wr_tx_sges;
597         link->wr_tx_mask = kzalloc(
598                 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*link->wr_tx_mask),
599                 GFP_KERNEL);
600         if (!link->wr_tx_mask)
601                 goto no_mem_wr_rx_sges;
602         link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
603                                     sizeof(link->wr_tx_pends[0]),
604                                     GFP_KERNEL);
605         if (!link->wr_tx_pends)
606                 goto no_mem_wr_tx_mask;
607         return 0;
608
609 no_mem_wr_tx_mask:
610         kfree(link->wr_tx_mask);
611 no_mem_wr_rx_sges:
612         kfree(link->wr_rx_sges);
613 no_mem_wr_tx_sges:
614         kfree(link->wr_tx_sges);
615 no_mem_wr_rx_ibs:
616         kfree(link->wr_rx_ibs);
617 no_mem_wr_tx_ibs:
618         kfree(link->wr_tx_ibs);
619 no_mem_wr_rx_bufs:
620         kfree(link->wr_rx_bufs);
621 no_mem_wr_tx_bufs:
622         kfree(link->wr_tx_bufs);
623 no_mem:
624         return -ENOMEM;
625 }
626
627 void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
628 {
629         tasklet_kill(&smcibdev->recv_tasklet);
630         tasklet_kill(&smcibdev->send_tasklet);
631 }
632
633 void smc_wr_add_dev(struct smc_ib_device *smcibdev)
634 {
635         tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn,
636                      (unsigned long)smcibdev);
637         tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn,
638                      (unsigned long)smcibdev);
639 }
640
641 int smc_wr_create_link(struct smc_link *lnk)
642 {
643         struct ib_device *ibdev = lnk->smcibdev->ibdev;
644         int rc = 0;
645
646         smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
647         lnk->wr_rx_id = 0;
648         lnk->wr_rx_dma_addr = ib_dma_map_single(
649                 ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
650                 DMA_FROM_DEVICE);
651         if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
652                 lnk->wr_rx_dma_addr = 0;
653                 rc = -EIO;
654                 goto out;
655         }
656         lnk->wr_tx_dma_addr = ib_dma_map_single(
657                 ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
658                 DMA_TO_DEVICE);
659         if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
660                 rc = -EIO;
661                 goto dma_unmap;
662         }
663         smc_wr_init_sge(lnk);
664         memset(lnk->wr_tx_mask, 0,
665                BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
666         init_waitqueue_head(&lnk->wr_tx_wait);
667         init_waitqueue_head(&lnk->wr_reg_wait);
668         return rc;
669
670 dma_unmap:
671         ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
672                             SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
673                             DMA_FROM_DEVICE);
674         lnk->wr_rx_dma_addr = 0;
675 out:
676         return rc;
677 }