Merge tag 'regulator-v5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / steering / dr_send.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10
11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
12
13 struct dr_data_seg {
14         u64 addr;
15         u32 length;
16         u32 lkey;
17         unsigned int send_flags;
18 };
19
20 struct postsend_info {
21         struct dr_data_seg write;
22         struct dr_data_seg read;
23         u64 remote_addr;
24         u32 rkey;
25 };
26
27 struct dr_qp_rtr_attr {
28         struct mlx5dr_cmd_gid_attr dgid_attr;
29         enum ib_mtu mtu;
30         u32 qp_num;
31         u16 port_num;
32         u8 min_rnr_timer;
33         u8 sgid_index;
34         u16 udp_src_port;
35 };
36
37 struct dr_qp_rts_attr {
38         u8 timeout;
39         u8 retry_cnt;
40         u8 rnr_retry;
41 };
42
43 struct dr_qp_init_attr {
44         u32 cqn;
45         u32 pdn;
46         u32 max_send_wr;
47         struct mlx5_uars_page *uar;
48 };
49
50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
51 {
52         unsigned int idx;
53         u8 opcode;
54
55         opcode = get_cqe_opcode(cqe64);
56         if (opcode == MLX5_CQE_REQ_ERR) {
57                 idx = be16_to_cpu(cqe64->wqe_counter) &
58                         (dr_cq->qp->sq.wqe_cnt - 1);
59                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
60         } else if (opcode == MLX5_CQE_RESP_ERR) {
61                 ++dr_cq->qp->sq.cc;
62         } else {
63                 idx = be16_to_cpu(cqe64->wqe_counter) &
64                         (dr_cq->qp->sq.wqe_cnt - 1);
65                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
66
67                 return CQ_OK;
68         }
69
70         return CQ_POLL_ERR;
71 }
72
73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
74 {
75         struct mlx5_cqe64 *cqe64;
76         int err;
77
78         cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
79         if (!cqe64)
80                 return CQ_EMPTY;
81
82         mlx5_cqwq_pop(&dr_cq->wq);
83         err = dr_parse_cqe(dr_cq, cqe64);
84         mlx5_cqwq_update_db_record(&dr_cq->wq);
85
86         return err;
87 }
88
89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
90 {
91         int npolled;
92         int err = 0;
93
94         for (npolled = 0; npolled < ne; ++npolled) {
95                 err = dr_cq_poll_one(dr_cq);
96                 if (err != CQ_OK)
97                         break;
98         }
99
100         return err == CQ_POLL_ERR ? err : npolled;
101 }
102
103 static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
104 {
105         pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
106 }
107
108 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
109                                          struct dr_qp_init_attr *attr)
110 {
111         u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
112         struct mlx5_wq_param wqp;
113         struct mlx5dr_qp *dr_qp;
114         int inlen;
115         void *qpc;
116         void *in;
117         int err;
118
119         dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
120         if (!dr_qp)
121                 return NULL;
122
123         wqp.buf_numa_node = mdev->priv.numa_node;
124         wqp.db_numa_node = mdev->priv.numa_node;
125
126         dr_qp->rq.pc = 0;
127         dr_qp->rq.cc = 0;
128         dr_qp->rq.wqe_cnt = 4;
129         dr_qp->sq.pc = 0;
130         dr_qp->sq.cc = 0;
131         dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
132
133         MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
134         MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
135         MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
136         err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
137                                 &dr_qp->wq_ctrl);
138         if (err) {
139                 mlx5_core_info(mdev, "Can't create QP WQ\n");
140                 goto err_wq;
141         }
142
143         dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
144                                      sizeof(dr_qp->sq.wqe_head[0]),
145                                      GFP_KERNEL);
146
147         if (!dr_qp->sq.wqe_head) {
148                 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
149                 goto err_wqe_head;
150         }
151
152         inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
153                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
154                 dr_qp->wq_ctrl.buf.npages;
155         in = kvzalloc(inlen, GFP_KERNEL);
156         if (!in) {
157                 err = -ENOMEM;
158                 goto err_in;
159         }
160
161         qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
162         MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
163         MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
164         MLX5_SET(qpc, qpc, pd, attr->pdn);
165         MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
166         MLX5_SET(qpc, qpc, log_page_size,
167                  dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
168         MLX5_SET(qpc, qpc, fre, 1);
169         MLX5_SET(qpc, qpc, rlky, 1);
170         MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
171         MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
172         MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
173         MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
174         MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
175         MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
176         MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
177         if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
178                 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
179         mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
180                                   (__be64 *)MLX5_ADDR_OF(create_qp_in,
181                                                          in, pas));
182
183         err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
184         kfree(in);
185
186         if (err) {
187                 mlx5_core_warn(mdev, " Can't create QP\n");
188                 goto err_in;
189         }
190         dr_qp->mqp.event = dr_qp_event;
191         dr_qp->uar = attr->uar;
192
193         return dr_qp;
194
195 err_in:
196         kfree(dr_qp->sq.wqe_head);
197 err_wqe_head:
198         mlx5_wq_destroy(&dr_qp->wq_ctrl);
199 err_wq:
200         kfree(dr_qp);
201         return NULL;
202 }
203
204 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
205                           struct mlx5dr_qp *dr_qp)
206 {
207         mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
208         kfree(dr_qp->sq.wqe_head);
209         mlx5_wq_destroy(&dr_qp->wq_ctrl);
210         kfree(dr_qp);
211 }
212
213 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
214 {
215         dma_wmb();
216         *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
217
218         /* After wmb() the hw aware of new work */
219         wmb();
220
221         mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
222 }
223
224 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
225                              u32 rkey, struct dr_data_seg *data_seg,
226                              u32 opcode, int nreq)
227 {
228         struct mlx5_wqe_raddr_seg *wq_raddr;
229         struct mlx5_wqe_ctrl_seg *wq_ctrl;
230         struct mlx5_wqe_data_seg *wq_dseg;
231         unsigned int size;
232         unsigned int idx;
233
234         size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
235                 sizeof(*wq_raddr) / 16;
236
237         idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
238
239         wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
240         wq_ctrl->imm = 0;
241         wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
242                 MLX5_WQE_CTRL_CQ_UPDATE : 0;
243         wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
244                                                 opcode);
245         wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
246         wq_raddr = (void *)(wq_ctrl + 1);
247         wq_raddr->raddr = cpu_to_be64(remote_addr);
248         wq_raddr->rkey = cpu_to_be32(rkey);
249         wq_raddr->reserved = 0;
250
251         wq_dseg = (void *)(wq_raddr + 1);
252         wq_dseg->byte_count = cpu_to_be32(data_seg->length);
253         wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
254         wq_dseg->addr = cpu_to_be64(data_seg->addr);
255
256         dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
257
258         if (nreq)
259                 dr_cmd_notify_hw(dr_qp, wq_ctrl);
260 }
261
262 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
263 {
264         dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
265                          &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
266         dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
267                          &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
268 }
269
270 /**
271  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
272  * with send_list parameters:
273  *
274  *     @ste:       The data that attached to this specific ste
275  *     @size:      of data to write
276  *     @offset:    of the data from start of the hw_ste entry
277  *     @data:      data
278  *     @ste_info:  ste to be sent with send_list
279  *     @send_list: to append into it
280  *     @copy_data: if true indicates that the data should be kept because
281  *                 it's not backuped any where (like in re-hash).
282  *                 if false, it lets the data to be updated after
283  *                 it was added to the list.
284  */
285 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
286                                                u16 offset, u8 *data,
287                                                struct mlx5dr_ste_send_info *ste_info,
288                                                struct list_head *send_list,
289                                                bool copy_data)
290 {
291         ste_info->size = size;
292         ste_info->ste = ste;
293         ste_info->offset = offset;
294
295         if (copy_data) {
296                 memcpy(ste_info->data_cont, data, size);
297                 ste_info->data = ste_info->data_cont;
298         } else {
299                 ste_info->data = data;
300         }
301
302         list_add_tail(&ste_info->send_list, send_list);
303 }
304
305 /* The function tries to consume one wc each time, unless the queue is full, in
306  * that case, which means that the hw is behind the sw in a full queue len
307  * the function will drain the cq till it empty.
308  */
309 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
310                                 struct mlx5dr_send_ring *send_ring)
311 {
312         bool is_drain = false;
313         int ne;
314
315         if (send_ring->pending_wqe < send_ring->signal_th)
316                 return 0;
317
318         /* Queue is full start drain it */
319         if (send_ring->pending_wqe >=
320             dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
321                 is_drain = true;
322
323         do {
324                 ne = dr_poll_cq(send_ring->cq, 1);
325                 if (ne < 0)
326                         return ne;
327                 else if (ne == 1)
328                         send_ring->pending_wqe -= send_ring->signal_th;
329         } while (is_drain && send_ring->pending_wqe);
330
331         return 0;
332 }
333
334 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
335                               struct postsend_info *send_info)
336 {
337         send_ring->pending_wqe++;
338
339         if (send_ring->pending_wqe % send_ring->signal_th == 0)
340                 send_info->write.send_flags |= IB_SEND_SIGNALED;
341
342         send_ring->pending_wqe++;
343         send_info->read.length = send_info->write.length;
344         /* Read into the same write area */
345         send_info->read.addr = (uintptr_t)send_info->write.addr;
346         send_info->read.lkey = send_ring->mr->mkey.key;
347
348         if (send_ring->pending_wqe % send_ring->signal_th == 0)
349                 send_info->read.send_flags = IB_SEND_SIGNALED;
350         else
351                 send_info->read.send_flags = 0;
352 }
353
354 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
355                                 struct postsend_info *send_info)
356 {
357         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
358         u32 buff_offset;
359         int ret;
360
361         ret = dr_handle_pending_wc(dmn, send_ring);
362         if (ret)
363                 return ret;
364
365         if (send_info->write.length > dmn->info.max_inline_size) {
366                 buff_offset = (send_ring->tx_head &
367                                (dmn->send_ring->signal_th - 1)) *
368                         send_ring->max_post_send_size;
369                 /* Copy to ring mr */
370                 memcpy(send_ring->buf + buff_offset,
371                        (void *)(uintptr_t)send_info->write.addr,
372                        send_info->write.length);
373                 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
374                 send_info->write.lkey = send_ring->mr->mkey.key;
375         }
376
377         send_ring->tx_head++;
378         dr_fill_data_segs(send_ring, send_info);
379         dr_post_send(send_ring->qp, send_info);
380
381         return 0;
382 }
383
384 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
385                                    struct mlx5dr_ste_htbl *htbl,
386                                    u8 **data,
387                                    u32 *byte_size,
388                                    int *iterations,
389                                    int *num_stes)
390 {
391         int alloc_size;
392
393         if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
394                 *iterations = htbl->chunk->byte_size /
395                         dmn->send_ring->max_post_send_size;
396                 *byte_size = dmn->send_ring->max_post_send_size;
397                 alloc_size = *byte_size;
398                 *num_stes = *byte_size / DR_STE_SIZE;
399         } else {
400                 *iterations = 1;
401                 *num_stes = htbl->chunk->num_of_entries;
402                 alloc_size = *num_stes * DR_STE_SIZE;
403         }
404
405         *data = kzalloc(alloc_size, GFP_KERNEL);
406         if (!*data)
407                 return -ENOMEM;
408
409         return 0;
410 }
411
412 /**
413  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
414  *
415  *     @dmn:    Domain
416  *     @ste:    The ste struct that contains the data (at
417  *              least part of it)
418  *     @data:   The real data to send size data
419  *     @size:   for writing.
420  *     @offset: The offset from the icm mapped data to
421  *              start write to this for write only part of the
422  *              buffer.
423  *
424  * Return: 0 on success.
425  */
426 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
427                              u8 *data, u16 size, u16 offset)
428 {
429         struct postsend_info send_info = {};
430
431         send_info.write.addr = (uintptr_t)data;
432         send_info.write.length = size;
433         send_info.write.lkey = 0;
434         send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
435         send_info.rkey = ste->htbl->chunk->rkey;
436
437         return dr_postsend_icm_data(dmn, &send_info);
438 }
439
440 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
441                               struct mlx5dr_ste_htbl *htbl,
442                               u8 *formatted_ste, u8 *mask)
443 {
444         u32 byte_size = htbl->chunk->byte_size;
445         int num_stes_per_iter;
446         int iterations;
447         u8 *data;
448         int ret;
449         int i;
450         int j;
451
452         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
453                                       &iterations, &num_stes_per_iter);
454         if (ret)
455                 return ret;
456
457         /* Send the data iteration times */
458         for (i = 0; i < iterations; i++) {
459                 u32 ste_index = i * (byte_size / DR_STE_SIZE);
460                 struct postsend_info send_info = {};
461
462                 /* Copy all ste's on the data buffer
463                  * need to add the bit_mask
464                  */
465                 for (j = 0; j < num_stes_per_iter; j++) {
466                         u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
467                         u32 ste_off = j * DR_STE_SIZE;
468
469                         if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
470                                 memcpy(data + ste_off,
471                                        formatted_ste, DR_STE_SIZE);
472                         } else {
473                                 /* Copy data */
474                                 memcpy(data + ste_off,
475                                        htbl->ste_arr[ste_index + j].hw_ste,
476                                        DR_STE_SIZE_REDUCED);
477                                 /* Copy bit_mask */
478                                 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
479                                        mask, DR_STE_SIZE_MASK);
480                         }
481                 }
482
483                 send_info.write.addr = (uintptr_t)data;
484                 send_info.write.length = byte_size;
485                 send_info.write.lkey = 0;
486                 send_info.remote_addr =
487                         mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
488                 send_info.rkey = htbl->chunk->rkey;
489
490                 ret = dr_postsend_icm_data(dmn, &send_info);
491                 if (ret)
492                         goto out_free;
493         }
494
495 out_free:
496         kfree(data);
497         return ret;
498 }
499
500 /* Initialize htble with default STEs */
501 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
502                                         struct mlx5dr_ste_htbl *htbl,
503                                         u8 *ste_init_data,
504                                         bool update_hw_ste)
505 {
506         u32 byte_size = htbl->chunk->byte_size;
507         int iterations;
508         int num_stes;
509         u8 *data;
510         int ret;
511         int i;
512
513         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
514                                       &iterations, &num_stes);
515         if (ret)
516                 return ret;
517
518         for (i = 0; i < num_stes; i++) {
519                 u8 *copy_dst;
520
521                 /* Copy the same ste on the data buffer */
522                 copy_dst = data + i * DR_STE_SIZE;
523                 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
524
525                 if (update_hw_ste) {
526                         /* Copy the reduced ste to hash table ste_arr */
527                         copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
528                         memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
529                 }
530         }
531
532         /* Send the data iteration times */
533         for (i = 0; i < iterations; i++) {
534                 u8 ste_index = i * (byte_size / DR_STE_SIZE);
535                 struct postsend_info send_info = {};
536
537                 send_info.write.addr = (uintptr_t)data;
538                 send_info.write.length = byte_size;
539                 send_info.write.lkey = 0;
540                 send_info.remote_addr =
541                         mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
542                 send_info.rkey = htbl->chunk->rkey;
543
544                 ret = dr_postsend_icm_data(dmn, &send_info);
545                 if (ret)
546                         goto out_free;
547         }
548
549 out_free:
550         kfree(data);
551         return ret;
552 }
553
554 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
555                                 struct mlx5dr_action *action)
556 {
557         struct postsend_info send_info = {};
558         int ret;
559
560         send_info.write.addr = (uintptr_t)action->rewrite.data;
561         send_info.write.length = action->rewrite.chunk->byte_size;
562         send_info.write.lkey = 0;
563         send_info.remote_addr = action->rewrite.chunk->mr_addr;
564         send_info.rkey = action->rewrite.chunk->rkey;
565
566         mutex_lock(&dmn->mutex);
567         ret = dr_postsend_icm_data(dmn, &send_info);
568         mutex_unlock(&dmn->mutex);
569
570         return ret;
571 }
572
573 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
574                                  struct mlx5dr_qp *dr_qp,
575                                  int port)
576 {
577         u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
578         void *qpc;
579
580         qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
581
582         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
583         MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
584         MLX5_SET(qpc, qpc, rre, 1);
585         MLX5_SET(qpc, qpc, rwe, 1);
586
587         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
588                                    &dr_qp->mqp);
589 }
590
591 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
592                                     struct mlx5dr_qp *dr_qp,
593                                     struct dr_qp_rts_attr *attr)
594 {
595         u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
596         void *qpc;
597
598         qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
599
600         MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
601
602         MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
603         MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
604         MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
605
606         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
607                                    &dr_qp->mqp);
608 }
609
610 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
611                                      struct mlx5dr_qp *dr_qp,
612                                      struct dr_qp_rtr_attr *attr)
613 {
614         u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
615         void *qpc;
616
617         qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
618
619         MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
620
621         MLX5_SET(qpc, qpc, mtu, attr->mtu);
622         MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
623         MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
624         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
625                attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
626         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
627                attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
628         MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
629                  attr->sgid_index);
630
631         if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
632                 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
633                          attr->udp_src_port);
634
635         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
636         MLX5_SET(qpc, qpc, min_rnr_nak, 1);
637
638         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
639                                    &dr_qp->mqp);
640 }
641
642 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
643 {
644         struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
645         struct dr_qp_rts_attr rts_attr = {};
646         struct dr_qp_rtr_attr rtr_attr = {};
647         enum ib_mtu mtu = IB_MTU_1024;
648         u16 gid_index = 0;
649         int port = 1;
650         int ret;
651
652         /* Init */
653         ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
654         if (ret)
655                 return ret;
656
657         /* RTR */
658         ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
659         if (ret)
660                 return ret;
661
662         rtr_attr.mtu            = mtu;
663         rtr_attr.qp_num         = dr_qp->mqp.qpn;
664         rtr_attr.min_rnr_timer  = 12;
665         rtr_attr.port_num       = port;
666         rtr_attr.sgid_index     = gid_index;
667         rtr_attr.udp_src_port   = dmn->info.caps.roce_min_src_udp;
668
669         ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
670         if (ret)
671                 return ret;
672
673         /* RTS */
674         rts_attr.timeout        = 14;
675         rts_attr.retry_cnt      = 7;
676         rts_attr.rnr_retry      = 7;
677
678         ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
679         if (ret)
680                 return ret;
681
682         return 0;
683 }
684
685 static void dr_cq_event(struct mlx5_core_cq *mcq,
686                         enum mlx5_event event)
687 {
688         pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
689 }
690
691 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
692                                       struct mlx5_uars_page *uar,
693                                       size_t ncqe)
694 {
695         u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
696         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
697         struct mlx5_wq_param wqp;
698         struct mlx5_cqe64 *cqe;
699         struct mlx5dr_cq *cq;
700         int inlen, err, eqn;
701         unsigned int irqn;
702         void *cqc, *in;
703         __be64 *pas;
704         int vector;
705         u32 i;
706
707         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
708         if (!cq)
709                 return NULL;
710
711         ncqe = roundup_pow_of_two(ncqe);
712         MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
713
714         wqp.buf_numa_node = mdev->priv.numa_node;
715         wqp.db_numa_node = mdev->priv.numa_node;
716
717         err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
718                                &cq->wq_ctrl);
719         if (err)
720                 goto out;
721
722         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
723                 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
724                 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
725         }
726
727         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
728                 sizeof(u64) * cq->wq_ctrl.buf.npages;
729         in = kvzalloc(inlen, GFP_KERNEL);
730         if (!in)
731                 goto err_cqwq;
732
733         vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
734         err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
735         if (err) {
736                 kvfree(in);
737                 goto err_cqwq;
738         }
739
740         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
741         MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
742         MLX5_SET(cqc, cqc, c_eqn, eqn);
743         MLX5_SET(cqc, cqc, uar_page, uar->index);
744         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
745                  MLX5_ADAPTER_PAGE_SHIFT);
746         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
747
748         pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
749         mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
750
751         cq->mcq.event = dr_cq_event;
752
753         err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
754         kvfree(in);
755
756         if (err)
757                 goto err_cqwq;
758
759         cq->mcq.cqe_sz = 64;
760         cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
761         cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
762         *cq->mcq.set_ci_db = 0;
763         *cq->mcq.arm_db = 0;
764         cq->mcq.vector = 0;
765         cq->mcq.irqn = irqn;
766         cq->mcq.uar = uar;
767
768         return cq;
769
770 err_cqwq:
771         mlx5_wq_destroy(&cq->wq_ctrl);
772 out:
773         kfree(cq);
774         return NULL;
775 }
776
777 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
778 {
779         mlx5_core_destroy_cq(mdev, &cq->mcq);
780         mlx5_wq_destroy(&cq->wq_ctrl);
781         kfree(cq);
782 }
783
784 static int
785 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
786 {
787         u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
788         void *mkc;
789
790         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
791         MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
792         MLX5_SET(mkc, mkc, a, 1);
793         MLX5_SET(mkc, mkc, rw, 1);
794         MLX5_SET(mkc, mkc, rr, 1);
795         MLX5_SET(mkc, mkc, lw, 1);
796         MLX5_SET(mkc, mkc, lr, 1);
797
798         MLX5_SET(mkc, mkc, pd, pdn);
799         MLX5_SET(mkc, mkc, length64, 1);
800         MLX5_SET(mkc, mkc, qpn, 0xffffff);
801
802         return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
803 }
804
805 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
806                                    u32 pdn, void *buf, size_t size)
807 {
808         struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
809         struct device *dma_device;
810         dma_addr_t dma_addr;
811         int err;
812
813         if (!mr)
814                 return NULL;
815
816         dma_device = &mdev->pdev->dev;
817         dma_addr = dma_map_single(dma_device, buf, size,
818                                   DMA_BIDIRECTIONAL);
819         err = dma_mapping_error(dma_device, dma_addr);
820         if (err) {
821                 mlx5_core_warn(mdev, "Can't dma buf\n");
822                 kfree(mr);
823                 return NULL;
824         }
825
826         err = dr_create_mkey(mdev, pdn, &mr->mkey);
827         if (err) {
828                 mlx5_core_warn(mdev, "Can't create mkey\n");
829                 dma_unmap_single(dma_device, dma_addr, size,
830                                  DMA_BIDIRECTIONAL);
831                 kfree(mr);
832                 return NULL;
833         }
834
835         mr->dma_addr = dma_addr;
836         mr->size = size;
837         mr->addr = buf;
838
839         return mr;
840 }
841
842 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
843 {
844         mlx5_core_destroy_mkey(mdev, &mr->mkey);
845         dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
846                          DMA_BIDIRECTIONAL);
847         kfree(mr);
848 }
849
850 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
851 {
852         struct dr_qp_init_attr init_attr = {};
853         int cq_size;
854         int size;
855         int ret;
856
857         dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
858         if (!dmn->send_ring)
859                 return -ENOMEM;
860
861         cq_size = QUEUE_SIZE + 1;
862         dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
863         if (!dmn->send_ring->cq) {
864                 ret = -ENOMEM;
865                 goto free_send_ring;
866         }
867
868         init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
869         init_attr.pdn = dmn->pdn;
870         init_attr.uar = dmn->uar;
871         init_attr.max_send_wr = QUEUE_SIZE;
872
873         dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
874         if (!dmn->send_ring->qp)  {
875                 ret = -ENOMEM;
876                 goto clean_cq;
877         }
878
879         dmn->send_ring->cq->qp = dmn->send_ring->qp;
880
881         dmn->info.max_send_wr = QUEUE_SIZE;
882         dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
883                                         DR_STE_SIZE);
884
885         dmn->send_ring->signal_th = dmn->info.max_send_wr /
886                 SIGNAL_PER_DIV_QUEUE;
887
888         /* Prepare qp to be used */
889         ret = dr_prepare_qp_to_rts(dmn);
890         if (ret)
891                 goto clean_qp;
892
893         dmn->send_ring->max_post_send_size =
894                 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
895                                                    DR_ICM_TYPE_STE);
896
897         /* Allocating the max size as a buffer for writing */
898         size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
899         dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
900         if (!dmn->send_ring->buf) {
901                 ret = -ENOMEM;
902                 goto clean_qp;
903         }
904
905         dmn->send_ring->buf_size = size;
906
907         dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
908                                        dmn->pdn, dmn->send_ring->buf, size);
909         if (!dmn->send_ring->mr) {
910                 ret = -ENOMEM;
911                 goto free_mem;
912         }
913
914         dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
915                                             dmn->pdn, dmn->send_ring->sync_buff,
916                                             MIN_READ_SYNC);
917         if (!dmn->send_ring->sync_mr) {
918                 ret = -ENOMEM;
919                 goto clean_mr;
920         }
921
922         return 0;
923
924 clean_mr:
925         dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
926 free_mem:
927         kfree(dmn->send_ring->buf);
928 clean_qp:
929         dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
930 clean_cq:
931         dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
932 free_send_ring:
933         kfree(dmn->send_ring);
934
935         return ret;
936 }
937
938 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
939                            struct mlx5dr_send_ring *send_ring)
940 {
941         dr_destroy_qp(dmn->mdev, send_ring->qp);
942         dr_destroy_cq(dmn->mdev, send_ring->cq);
943         dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
944         dr_dereg_mr(dmn->mdev, send_ring->mr);
945         kfree(send_ring->buf);
946         kfree(send_ring);
947 }
948
949 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
950 {
951         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
952         struct postsend_info send_info = {};
953         u8 data[DR_STE_SIZE];
954         int num_of_sends_req;
955         int ret;
956         int i;
957
958         /* Sending this amount of requests makes sure we will get drain */
959         num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
960
961         /* Send fake requests forcing the last to be signaled */
962         send_info.write.addr = (uintptr_t)data;
963         send_info.write.length = DR_STE_SIZE;
964         send_info.write.lkey = 0;
965         /* Using the sync_mr in order to write/read */
966         send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
967         send_info.rkey = send_ring->sync_mr->mkey.key;
968
969         for (i = 0; i < num_of_sends_req; i++) {
970                 ret = dr_postsend_icm_data(dmn, &send_info);
971                 if (ret)
972                         return ret;
973         }
974
975         ret = dr_handle_pending_wc(dmn, send_ring);
976
977         return ret;
978 }