1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Link Layer Control (LLC)
7 * Copyright IBM Corp. 2016
9 * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
10 * Ursula Braun <ubraun@linux.vnet.ibm.com>
14 #include <rdma/ib_verbs.h>
22 #define SMC_LLC_DATA_LEN 40
25 struct smc_wr_rx_hdr common;
27 #if defined(__BIG_ENDIAN_BITFIELD)
30 #elif defined(__LITTLE_ENDIAN_BITFIELD)
31 u8 add_link_rej_rsn:4,
37 #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
39 struct smc_llc_msg_confirm_link { /* type 0x01 */
40 struct smc_llc_hdr hd;
41 u8 sender_mac[ETH_ALEN];
42 u8 sender_gid[SMC_GID_SIZE];
45 u8 link_uid[SMC_LGR_ID_SIZE];
50 #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
51 #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
53 #define SMC_LLC_ADD_LNK_MAX_LINKS 2
55 struct smc_llc_msg_add_link { /* type 0x02 */
56 struct smc_llc_hdr hd;
57 u8 sender_mac[ETH_ALEN];
59 u8 sender_gid[SMC_GID_SIZE];
62 #if defined(__BIG_ENDIAN_BITFIELD)
65 #elif defined(__LITTLE_ENDIAN_BITFIELD)
73 struct smc_llc_msg_add_link_cont_rt {
79 #define SMC_LLC_RKEYS_PER_CONT_MSG 2
81 struct smc_llc_msg_add_link_cont { /* type 0x03 */
82 struct smc_llc_hdr hd;
86 struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG];
88 } __packed; /* format defined in RFC7609 */
90 #define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
91 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
93 struct smc_llc_msg_del_link { /* type 0x04 */
94 struct smc_llc_hdr hd;
98 } __packed; /* format defined in RFC7609 */
100 struct smc_llc_msg_test_link { /* type 0x07 */
101 struct smc_llc_hdr hd;
106 struct smc_rmb_rtoken {
108 u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
109 /* is actually the num of rtokens, first */
110 /* rtoken is always for the current link */
111 u8 link_id; /* link id of the rtoken */
115 } __packed; /* format defined in RFC7609 */
117 #define SMC_LLC_RKEYS_PER_MSG 3
119 struct smc_llc_msg_confirm_rkey { /* type 0x06 */
120 struct smc_llc_hdr hd;
121 struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
125 #define SMC_LLC_DEL_RKEY_MAX 8
126 #define SMC_LLC_FLAG_RKEY_RETRY 0x10
127 #define SMC_LLC_FLAG_RKEY_NEG 0x20
129 struct smc_llc_msg_delete_rkey { /* type 0x09 */
130 struct smc_llc_hdr hd;
139 struct smc_llc_msg_confirm_link confirm_link;
140 struct smc_llc_msg_add_link add_link;
141 struct smc_llc_msg_add_link_cont add_link_cont;
142 struct smc_llc_msg_del_link delete_link;
144 struct smc_llc_msg_confirm_rkey confirm_rkey;
145 struct smc_llc_msg_delete_rkey delete_rkey;
147 struct smc_llc_msg_test_link test_link;
149 struct smc_llc_hdr hdr;
150 u8 data[SMC_LLC_DATA_LEN];
154 #define SMC_LLC_FLAG_RESP 0x80
156 struct smc_llc_qentry {
157 struct list_head list;
158 struct smc_link *link;
159 union smc_llc_msg msg;
162 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
164 struct smc_llc_qentry *qentry = flow->qentry;
170 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
172 struct smc_llc_qentry *qentry;
175 qentry = flow->qentry;
181 static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
182 struct smc_llc_qentry *qentry)
184 flow->qentry = qentry;
187 /* try to start a new llc flow, initiated by an incoming llc msg */
188 static bool smc_llc_flow_start(struct smc_llc_flow *flow,
189 struct smc_llc_qentry *qentry)
191 struct smc_link_group *lgr = qentry->link->lgr;
193 spin_lock_bh(&lgr->llc_flow_lock);
195 /* a flow is already active */
196 if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
197 qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
198 !lgr->delayed_event) {
199 lgr->delayed_event = qentry;
201 /* forget this llc request */
204 spin_unlock_bh(&lgr->llc_flow_lock);
207 switch (qentry->msg.raw.hdr.common.type) {
208 case SMC_LLC_ADD_LINK:
209 flow->type = SMC_LLC_FLOW_ADD_LINK;
211 case SMC_LLC_DELETE_LINK:
212 flow->type = SMC_LLC_FLOW_DEL_LINK;
214 case SMC_LLC_CONFIRM_RKEY:
215 case SMC_LLC_DELETE_RKEY:
216 flow->type = SMC_LLC_FLOW_RKEY;
219 flow->type = SMC_LLC_FLOW_NONE;
221 if (qentry == lgr->delayed_event)
222 lgr->delayed_event = NULL;
223 spin_unlock_bh(&lgr->llc_flow_lock);
224 smc_llc_flow_qentry_set(flow, qentry);
228 /* start a new local llc flow, wait till current flow finished */
229 int smc_llc_flow_initiate(struct smc_link_group *lgr,
230 enum smc_llc_flowtype type)
232 enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
235 /* all flows except confirm_rkey and delete_rkey are exclusive,
236 * confirm/delete rkey flows can run concurrently (local and remote)
238 if (type == SMC_LLC_FLOW_RKEY)
239 allowed_remote = SMC_LLC_FLOW_RKEY;
241 if (list_empty(&lgr->list))
243 spin_lock_bh(&lgr->llc_flow_lock);
244 if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
245 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
246 lgr->llc_flow_rmt.type == allowed_remote)) {
247 lgr->llc_flow_lcl.type = type;
248 spin_unlock_bh(&lgr->llc_flow_lock);
251 spin_unlock_bh(&lgr->llc_flow_lock);
252 rc = wait_event_interruptible_timeout(lgr->llc_waiter,
253 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
254 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
255 lgr->llc_flow_rmt.type == allowed_remote)),
262 /* finish the current llc flow */
263 void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
265 spin_lock_bh(&lgr->llc_flow_lock);
266 memset(flow, 0, sizeof(*flow));
267 flow->type = SMC_LLC_FLOW_NONE;
268 spin_unlock_bh(&lgr->llc_flow_lock);
269 if (!list_empty(&lgr->list) && lgr->delayed_event &&
270 flow == &lgr->llc_flow_lcl)
271 schedule_work(&lgr->llc_event_work);
273 wake_up_interruptible(&lgr->llc_waiter);
276 /* lnk is optional and used for early wakeup when link goes down, useful in
277 * cases where we wait for a response on the link after we sent a request
279 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
280 struct smc_link *lnk,
281 int time_out, u8 exp_msg)
283 struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
285 wait_event_interruptible_timeout(lgr->llc_waiter,
287 (lnk && !smc_link_usable(lnk)) ||
288 list_empty(&lgr->list)),
291 (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
292 smc_llc_flow_qentry_del(flow);
295 if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
296 if (exp_msg == SMC_LLC_ADD_LINK &&
297 flow->qentry->msg.raw.hdr.common.type ==
298 SMC_LLC_DELETE_LINK) {
299 /* flow_start will delay the unexpected msg */
300 smc_llc_flow_start(&lgr->llc_flow_lcl,
301 smc_llc_flow_qentry_clr(flow));
304 smc_llc_flow_qentry_del(flow);
310 /********************************** send *************************************/
312 struct smc_llc_tx_pend {
315 /* handler for send/transmission completion of an LLC msg */
316 static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
317 struct smc_link *link,
318 enum ib_wc_status wc_status)
320 /* future work: handle wc_status error for recovery and failover */
324 * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
325 * @link: Pointer to SMC link used for sending LLC control message.
326 * @wr_buf: Out variable returning pointer to work request payload buffer.
327 * @pend: Out variable returning pointer to private pending WR tracking.
328 * It's the context the transmit complete handler will get.
330 * Reserves and pre-fills an entry for a pending work request send/tx.
331 * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
332 * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
334 * Return: 0 on success, otherwise an error value.
336 static int smc_llc_add_pending_send(struct smc_link *link,
337 struct smc_wr_buf **wr_buf,
338 struct smc_wr_tx_pend_priv **pend)
342 rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
347 sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
348 "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
350 sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
351 "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
353 sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
354 "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
358 /* high-level API to send LLC confirm link */
359 int smc_llc_send_confirm_link(struct smc_link *link,
360 enum smc_llc_reqresp reqresp)
362 struct smc_link_group *lgr = smc_get_lgr(link);
363 struct smc_llc_msg_confirm_link *confllc;
364 struct smc_wr_tx_pend_priv *pend;
365 struct smc_wr_buf *wr_buf;
368 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
371 confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
372 memset(confllc, 0, sizeof(*confllc));
373 confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
374 confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
375 confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
376 if (reqresp == SMC_LLC_RESP)
377 confllc->hd.flags |= SMC_LLC_FLAG_RESP;
378 memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
380 memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
381 hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
382 confllc->link_num = link->link_id;
383 memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
384 confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
385 /* send llc message */
386 rc = smc_wr_tx_send(link, pend);
390 /* send LLC confirm rkey request */
391 static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
392 struct smc_buf_desc *rmb_desc)
394 struct smc_llc_msg_confirm_rkey *rkeyllc;
395 struct smc_wr_tx_pend_priv *pend;
396 struct smc_wr_buf *wr_buf;
397 struct smc_link *link;
400 rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
403 rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
404 memset(rkeyllc, 0, sizeof(*rkeyllc));
405 rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
406 rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
409 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
410 link = &send_link->lgr->lnk[i];
411 if (link->state == SMC_LNK_ACTIVE && link != send_link) {
412 rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
413 rkeyllc->rtoken[rtok_ix].rmb_key =
414 htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
415 rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
417 rmb_desc->sgt[link->link_idx].sgl));
421 /* rkey of send_link is in rtoken[0] */
422 rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
423 rkeyllc->rtoken[0].rmb_key =
424 htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
425 rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
426 (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
427 /* send llc message */
428 rc = smc_wr_tx_send(send_link, pend);
432 /* send LLC delete rkey request */
433 static int smc_llc_send_delete_rkey(struct smc_link *link,
434 struct smc_buf_desc *rmb_desc)
436 struct smc_llc_msg_delete_rkey *rkeyllc;
437 struct smc_wr_tx_pend_priv *pend;
438 struct smc_wr_buf *wr_buf;
441 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
444 rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
445 memset(rkeyllc, 0, sizeof(*rkeyllc));
446 rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
447 rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
448 rkeyllc->num_rkeys = 1;
449 rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
450 /* send llc message */
451 rc = smc_wr_tx_send(link, pend);
455 /* send ADD LINK request or response */
456 int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
457 struct smc_link *link_new,
458 enum smc_llc_reqresp reqresp)
460 struct smc_llc_msg_add_link *addllc;
461 struct smc_wr_tx_pend_priv *pend;
462 struct smc_wr_buf *wr_buf;
465 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
468 addllc = (struct smc_llc_msg_add_link *)wr_buf;
470 memset(addllc, 0, sizeof(*addllc));
471 addllc->hd.common.type = SMC_LLC_ADD_LINK;
472 addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
473 if (reqresp == SMC_LLC_RESP)
474 addllc->hd.flags |= SMC_LLC_FLAG_RESP;
475 memcpy(addllc->sender_mac, mac, ETH_ALEN);
476 memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
478 addllc->link_num = link_new->link_id;
479 hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
480 hton24(addllc->initial_psn, link_new->psn_initial);
481 if (reqresp == SMC_LLC_REQ)
482 addllc->qp_mtu = link_new->path_mtu;
484 addllc->qp_mtu = min(link_new->path_mtu,
487 /* send llc message */
488 rc = smc_wr_tx_send(link, pend);
492 /* send DELETE LINK request or response */
493 int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
494 enum smc_llc_reqresp reqresp, bool orderly,
497 struct smc_llc_msg_del_link *delllc;
498 struct smc_wr_tx_pend_priv *pend;
499 struct smc_wr_buf *wr_buf;
502 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
505 delllc = (struct smc_llc_msg_del_link *)wr_buf;
507 memset(delllc, 0, sizeof(*delllc));
508 delllc->hd.common.type = SMC_LLC_DELETE_LINK;
509 delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
510 if (reqresp == SMC_LLC_RESP)
511 delllc->hd.flags |= SMC_LLC_FLAG_RESP;
513 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
515 delllc->link_num = link_del_id;
517 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
518 delllc->reason = htonl(reason);
519 /* send llc message */
520 rc = smc_wr_tx_send(link, pend);
524 /* send LLC test link request */
525 static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
527 struct smc_llc_msg_test_link *testllc;
528 struct smc_wr_tx_pend_priv *pend;
529 struct smc_wr_buf *wr_buf;
532 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
535 testllc = (struct smc_llc_msg_test_link *)wr_buf;
536 memset(testllc, 0, sizeof(*testllc));
537 testllc->hd.common.type = SMC_LLC_TEST_LINK;
538 testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
539 memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
540 /* send llc message */
541 rc = smc_wr_tx_send(link, pend);
545 /* schedule an llc send on link, may wait for buffers */
546 static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
548 struct smc_wr_tx_pend_priv *pend;
549 struct smc_wr_buf *wr_buf;
552 if (!smc_link_usable(link))
554 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
557 memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
558 return smc_wr_tx_send(link, pend);
561 /********************************* receive ***********************************/
563 static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
564 enum smc_lgr_type lgr_new_t)
568 if (lgr->type == SMC_LGR_SYMMETRIC ||
569 (lgr->type != SMC_LGR_SINGLE &&
570 (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
571 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
574 if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
575 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
576 for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
577 if (lgr->lnk[i].state == SMC_LNK_UNUSED)
580 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
581 if (lgr->lnk[i].state == SMC_LNK_UNUSED)
587 /* return first buffer from any of the next buf lists */
588 static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
591 struct smc_buf_desc *buf_pos;
593 while (*buf_lst < SMC_RMBE_SIZES) {
594 buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
595 struct smc_buf_desc, list);
603 /* return next rmb from buffer lists */
604 static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
606 struct smc_buf_desc *buf_pos)
608 struct smc_buf_desc *buf_next;
610 if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
612 return _smc_llc_get_next_rmb(lgr, buf_lst);
614 buf_next = list_next_entry(buf_pos, list);
618 static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
622 return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
625 /* send one add_link_continue msg */
626 static int smc_llc_add_link_cont(struct smc_link *link,
627 struct smc_link *link_new, u8 *num_rkeys_todo,
628 int *buf_lst, struct smc_buf_desc **buf_pos)
630 struct smc_llc_msg_add_link_cont *addc_llc;
631 struct smc_link_group *lgr = link->lgr;
632 int prim_lnk_idx, lnk_idx, i, rc;
633 struct smc_wr_tx_pend_priv *pend;
634 struct smc_wr_buf *wr_buf;
635 struct smc_buf_desc *rmb;
638 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
641 addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
642 memset(addc_llc, 0, sizeof(*addc_llc));
644 prim_lnk_idx = link->link_idx;
645 lnk_idx = link_new->link_idx;
646 addc_llc->link_num = link_new->link_id;
647 addc_llc->num_rkeys = *num_rkeys_todo;
649 for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
651 addc_llc->num_rkeys = addc_llc->num_rkeys -
658 addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
659 addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
660 addc_llc->rt[i].rmb_vaddr_new =
661 cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
664 *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
665 while (*buf_pos && !(*buf_pos)->used)
666 *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
668 addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
669 addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
670 if (lgr->role == SMC_CLNT)
671 addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
672 return smc_wr_tx_send(link, pend);
675 static int smc_llc_cli_rkey_exchange(struct smc_link *link,
676 struct smc_link *link_new)
678 struct smc_llc_msg_add_link_cont *addc_llc;
679 struct smc_link_group *lgr = link->lgr;
680 u8 max, num_rkeys_send, num_rkeys_recv;
681 struct smc_llc_qentry *qentry;
682 struct smc_buf_desc *buf_pos;
687 mutex_lock(&lgr->rmbs_lock);
688 num_rkeys_send = lgr->conns_num;
689 buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
691 qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME,
692 SMC_LLC_ADD_LINK_CONT);
697 addc_llc = &qentry->msg.add_link_cont;
698 num_rkeys_recv = addc_llc->num_rkeys;
699 max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
700 for (i = 0; i < max; i++) {
701 smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
702 addc_llc->rt[i].rmb_key,
703 addc_llc->rt[i].rmb_vaddr_new,
704 addc_llc->rt[i].rmb_key_new);
707 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
708 rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
712 } while (num_rkeys_send || num_rkeys_recv);
714 mutex_unlock(&lgr->rmbs_lock);
718 /* prepare and send an add link reject response */
719 static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
721 qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
722 qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
723 qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
724 return smc_llc_send_message(qentry->link, &qentry->msg);
727 static void smc_llc_save_add_link_info(struct smc_link *link,
728 struct smc_llc_msg_add_link *add_llc)
730 link->peer_qpn = ntoh24(add_llc->sender_qp_num);
731 memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE);
732 memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN);
733 link->peer_psn = ntoh24(add_llc->initial_psn);
734 link->peer_mtu = add_llc->qp_mtu;
737 /* as an SMC client, process an add link request */
738 int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
740 struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
741 enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
742 struct smc_link_group *lgr = smc_get_lgr(link);
743 struct smc_link *lnk_new = NULL;
744 struct smc_init_info ini;
747 ini.vlan_id = lgr->vlan_id;
748 smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
749 if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
750 !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
753 lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
756 lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
757 ini.ib_dev = link->smcibdev;
758 ini.ib_port = link->ibport;
760 lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
763 lnk_new = &lgr->lnk[lnk_idx];
764 rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
767 smc_llc_save_add_link_info(lnk_new, llc);
768 lnk_new->link_id = llc->link_num;
770 rc = smc_ib_ready_link(lnk_new);
774 rc = smcr_buf_map_lgr(lnk_new);
778 rc = smc_llc_send_add_link(link,
779 lnk_new->smcibdev->mac[ini.ib_port - 1],
780 lnk_new->gid, lnk_new, SMC_LLC_RESP);
783 rc = smc_llc_cli_rkey_exchange(link, lnk_new);
788 /* tbd: rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); */
792 smcr_link_clear(lnk_new);
794 smc_llc_cli_add_link_reject(qentry);
800 /* worker to process an add link message */
801 static void smc_llc_add_link_work(struct work_struct *work)
803 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
806 if (list_empty(&lgr->list)) {
807 /* link group is terminating */
808 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
812 /* tbd: call smc_llc_process_cli_add_link(lgr); */
813 /* tbd: call smc_llc_process_srv_add_link(lgr); */
815 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
818 static void smc_llc_rx_delete_link(struct smc_link *link,
819 struct smc_llc_msg_del_link *llc)
821 struct smc_link_group *lgr = smc_get_lgr(link);
824 if (lgr->role == SMC_SERV) {
825 /* client asks to delete this link, send request */
826 smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true,
827 SMC_LLC_DEL_PROG_INIT_TERM);
829 /* server requests to delete this link, send response */
830 smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
831 SMC_LLC_DEL_PROG_INIT_TERM);
833 smcr_link_down_cond(link);
836 /* process a confirm_rkey request from peer, remote flow */
837 static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
839 struct smc_llc_msg_confirm_rkey *llc;
840 struct smc_llc_qentry *qentry;
841 struct smc_link *link;
846 qentry = lgr->llc_flow_rmt.qentry;
847 llc = &qentry->msg.confirm_rkey;
850 num_entries = llc->rtoken[0].num_rkeys;
851 /* first rkey entry is for receiving link */
852 rk_idx = smc_rtoken_add(link,
853 llc->rtoken[0].rmb_vaddr,
854 llc->rtoken[0].rmb_key);
858 for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
859 smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
860 llc->rtoken[i].rmb_vaddr,
861 llc->rtoken[i].rmb_key);
862 /* max links is 3 so there is no need to support conf_rkey_cont msgs */
865 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
866 llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
868 llc->hd.flags |= SMC_LLC_FLAG_RESP;
869 smc_llc_send_message(link, &qentry->msg);
870 smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
873 /* process a delete_rkey request from peer, remote flow */
874 static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
876 struct smc_llc_msg_delete_rkey *llc;
877 struct smc_llc_qentry *qentry;
878 struct smc_link *link;
882 qentry = lgr->llc_flow_rmt.qentry;
883 llc = &qentry->msg.delete_rkey;
886 max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
887 for (i = 0; i < max; i++) {
888 if (smc_rtoken_delete(link, llc->rkey[i]))
889 err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
892 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
893 llc->err_mask = err_mask;
895 llc->hd.flags |= SMC_LLC_FLAG_RESP;
896 smc_llc_send_message(link, &qentry->msg);
897 smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
900 /* flush the llc event queue */
901 static void smc_llc_event_flush(struct smc_link_group *lgr)
903 struct smc_llc_qentry *qentry, *q;
905 spin_lock_bh(&lgr->llc_event_q_lock);
906 list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
907 list_del_init(&qentry->list);
910 spin_unlock_bh(&lgr->llc_event_q_lock);
913 static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
915 union smc_llc_msg *llc = &qentry->msg;
916 struct smc_link *link = qentry->link;
917 struct smc_link_group *lgr = link->lgr;
919 if (!smc_link_usable(link))
922 switch (llc->raw.hdr.common.type) {
923 case SMC_LLC_TEST_LINK:
924 llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
925 smc_llc_send_message(link, llc);
927 case SMC_LLC_ADD_LINK:
928 if (list_empty(&lgr->list))
929 goto out; /* lgr is terminating */
930 if (lgr->role == SMC_CLNT) {
931 if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
932 /* a flow is waiting for this message */
933 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
935 wake_up_interruptible(&lgr->llc_waiter);
936 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
938 schedule_work(&lgr->llc_add_link_work);
940 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
941 /* as smc server, handle client suggestion */
942 schedule_work(&lgr->llc_add_link_work);
945 case SMC_LLC_CONFIRM_LINK:
946 case SMC_LLC_ADD_LINK_CONT:
947 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
948 /* a flow is waiting for this message */
949 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
950 wake_up_interruptible(&lgr->llc_waiter);
954 case SMC_LLC_DELETE_LINK:
955 smc_llc_rx_delete_link(link, &llc->delete_link);
957 case SMC_LLC_CONFIRM_RKEY:
958 /* new request from remote, assign to remote flow */
959 if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
960 /* process here, does not wait for more llc msgs */
961 smc_llc_rmt_conf_rkey(lgr);
962 smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
965 case SMC_LLC_CONFIRM_RKEY_CONT:
966 /* not used because max links is 3, and 3 rkeys fit into
967 * one CONFIRM_RKEY message
970 case SMC_LLC_DELETE_RKEY:
971 /* new request from remote, assign to remote flow */
972 if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
973 /* process here, does not wait for more llc msgs */
974 smc_llc_rmt_delete_rkey(lgr);
975 smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
983 /* worker to process llc messages on the event queue */
984 static void smc_llc_event_work(struct work_struct *work)
986 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
988 struct smc_llc_qentry *qentry;
990 if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
991 if (smc_link_usable(lgr->delayed_event->link)) {
992 smc_llc_event_handler(lgr->delayed_event);
994 qentry = lgr->delayed_event;
995 lgr->delayed_event = NULL;
1001 spin_lock_bh(&lgr->llc_event_q_lock);
1002 if (!list_empty(&lgr->llc_event_q)) {
1003 qentry = list_first_entry(&lgr->llc_event_q,
1004 struct smc_llc_qentry, list);
1005 list_del_init(&qentry->list);
1006 spin_unlock_bh(&lgr->llc_event_q_lock);
1007 smc_llc_event_handler(qentry);
1010 spin_unlock_bh(&lgr->llc_event_q_lock);
1013 /* process llc responses in tasklet context */
1014 static void smc_llc_rx_response(struct smc_link *link,
1015 struct smc_llc_qentry *qentry)
1017 u8 llc_type = qentry->msg.raw.hdr.common.type;
1020 case SMC_LLC_TEST_LINK:
1021 if (link->state == SMC_LNK_ACTIVE)
1022 complete(&link->llc_testlink_resp);
1024 case SMC_LLC_ADD_LINK:
1025 case SMC_LLC_CONFIRM_LINK:
1026 case SMC_LLC_ADD_LINK_CONT:
1027 case SMC_LLC_CONFIRM_RKEY:
1028 case SMC_LLC_DELETE_RKEY:
1029 /* assign responses to the local flow, we requested them */
1030 smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
1031 wake_up_interruptible(&link->lgr->llc_waiter);
1033 case SMC_LLC_DELETE_LINK:
1034 if (link->lgr->role == SMC_SERV)
1035 smc_lgr_schedule_free_work_fast(link->lgr);
1037 case SMC_LLC_CONFIRM_RKEY_CONT:
1038 /* not used because max links is 3 */
1044 static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
1046 struct smc_link_group *lgr = link->lgr;
1047 struct smc_llc_qentry *qentry;
1048 unsigned long flags;
1050 qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
1053 qentry->link = link;
1054 INIT_LIST_HEAD(&qentry->list);
1055 memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
1057 /* process responses immediately */
1058 if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
1059 smc_llc_rx_response(link, qentry);
1063 /* add requests to event queue */
1064 spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
1065 list_add_tail(&qentry->list, &lgr->llc_event_q);
1066 spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
1067 schedule_work(&link->lgr->llc_event_work);
1070 /* copy received msg and add it to the event queue */
1071 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
1073 struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
1074 union smc_llc_msg *llc = buf;
1076 if (wc->byte_len < sizeof(*llc))
1077 return; /* short message */
1078 if (llc->raw.hdr.length != sizeof(*llc))
1079 return; /* invalid message */
1081 smc_llc_enqueue(link, llc);
1084 /***************************** worker, utils *********************************/
1086 static void smc_llc_testlink_work(struct work_struct *work)
1088 struct smc_link *link = container_of(to_delayed_work(work),
1089 struct smc_link, llc_testlink_wrk);
1090 unsigned long next_interval;
1091 unsigned long expire_time;
1092 u8 user_data[16] = { 0 };
1095 if (link->state != SMC_LNK_ACTIVE)
1096 return; /* don't reschedule worker */
1097 expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
1098 if (time_is_after_jiffies(expire_time)) {
1099 next_interval = expire_time - jiffies;
1102 reinit_completion(&link->llc_testlink_resp);
1103 smc_llc_send_test_link(link, user_data);
1104 /* receive TEST LINK response over RoCE fabric */
1105 rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
1107 if (link->state != SMC_LNK_ACTIVE)
1108 return; /* link state changed */
1110 smcr_link_down_cond_sched(link);
1113 next_interval = link->llc_testlink_time;
1115 schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
1118 void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
1120 struct net *net = sock_net(smc->clcsock->sk);
1122 INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
1123 INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
1124 INIT_LIST_HEAD(&lgr->llc_event_q);
1125 spin_lock_init(&lgr->llc_event_q_lock);
1126 spin_lock_init(&lgr->llc_flow_lock);
1127 init_waitqueue_head(&lgr->llc_waiter);
1128 mutex_init(&lgr->llc_conf_mutex);
1129 lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
1132 /* called after lgr was removed from lgr_list */
1133 void smc_llc_lgr_clear(struct smc_link_group *lgr)
1135 smc_llc_event_flush(lgr);
1136 wake_up_interruptible_all(&lgr->llc_waiter);
1137 cancel_work_sync(&lgr->llc_event_work);
1138 cancel_work_sync(&lgr->llc_add_link_work);
1139 if (lgr->delayed_event) {
1140 kfree(lgr->delayed_event);
1141 lgr->delayed_event = NULL;
1145 int smc_llc_link_init(struct smc_link *link)
1147 init_completion(&link->llc_testlink_resp);
1148 INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
1152 void smc_llc_link_active(struct smc_link *link)
1154 link->state = SMC_LNK_ACTIVE;
1155 if (link->lgr->llc_testlink_time) {
1156 link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
1157 schedule_delayed_work(&link->llc_testlink_wrk,
1158 link->llc_testlink_time);
1162 /* called in worker context */
1163 void smc_llc_link_clear(struct smc_link *link)
1165 complete(&link->llc_testlink_resp);
1166 cancel_delayed_work_sync(&link->llc_testlink_wrk);
1167 smc_wr_wakeup_reg_wait(link);
1168 smc_wr_wakeup_tx_wait(link);
1171 /* register a new rtoken at the remote peer (for all links) */
1172 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
1173 struct smc_buf_desc *rmb_desc)
1175 struct smc_link_group *lgr = send_link->lgr;
1176 struct smc_llc_qentry *qentry = NULL;
1179 rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
1182 /* receive CONFIRM RKEY response from server over RoCE fabric */
1183 qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
1184 SMC_LLC_CONFIRM_RKEY);
1185 if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
1189 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1193 /* unregister an rtoken at the remote peer */
1194 int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
1195 struct smc_buf_desc *rmb_desc)
1197 struct smc_llc_qentry *qentry = NULL;
1198 struct smc_link *send_link;
1201 send_link = smc_llc_usable_link(lgr);
1205 /* protected by llc_flow control */
1206 rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
1209 /* receive DELETE RKEY response from server over RoCE fabric */
1210 qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
1211 SMC_LLC_DELETE_RKEY);
1212 if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
1216 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1220 /* evaluate confirm link request or response */
1221 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
1222 enum smc_llc_reqresp type)
1224 if (type == SMC_LLC_REQ) /* SMC server assigns link_id */
1225 qentry->link->link_id = qentry->msg.confirm_link.link_num;
1226 if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
1231 /***************************** init, exit, misc ******************************/
1233 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
1235 .handler = smc_llc_rx_handler,
1236 .type = SMC_LLC_CONFIRM_LINK
1239 .handler = smc_llc_rx_handler,
1240 .type = SMC_LLC_TEST_LINK
1243 .handler = smc_llc_rx_handler,
1244 .type = SMC_LLC_ADD_LINK
1247 .handler = smc_llc_rx_handler,
1248 .type = SMC_LLC_ADD_LINK_CONT
1251 .handler = smc_llc_rx_handler,
1252 .type = SMC_LLC_DELETE_LINK
1255 .handler = smc_llc_rx_handler,
1256 .type = SMC_LLC_CONFIRM_RKEY
1259 .handler = smc_llc_rx_handler,
1260 .type = SMC_LLC_CONFIRM_RKEY_CONT
1263 .handler = smc_llc_rx_handler,
1264 .type = SMC_LLC_DELETE_RKEY
1271 int __init smc_llc_init(void)
1273 struct smc_wr_rx_handler *handler;
1276 for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
1277 INIT_HLIST_NODE(&handler->list);
1278 rc = smc_wr_rx_register_handler(handler);