mlx5, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_main.c
1 /*
2  * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/tc_act/tc_gact.h>
34 #include <net/pkt_cls.h>
35 #include <linux/mlx5/fs.h>
36 #include <net/vxlan.h>
37 #include <net/geneve.h>
38 #include <linux/bpf.h>
39 #include <linux/if_bridge.h>
40 #include <net/page_pool.h>
41 #include <net/xdp_sock_drv.h>
42 #include "eswitch.h"
43 #include "en.h"
44 #include "en/txrx.h"
45 #include "en_tc.h"
46 #include "en_rep.h"
47 #include "en_accel/ipsec.h"
48 #include "en_accel/ipsec_rxtx.h"
49 #include "en_accel/en_accel.h"
50 #include "en_accel/tls.h"
51 #include "accel/ipsec.h"
52 #include "accel/tls.h"
53 #include "lib/vxlan.h"
54 #include "lib/clock.h"
55 #include "en/port.h"
56 #include "en/xdp.h"
57 #include "lib/eq.h"
58 #include "en/monitor_stats.h"
59 #include "en/health.h"
60 #include "en/params.h"
61 #include "en/xsk/umem.h"
62 #include "en/xsk/setup.h"
63 #include "en/xsk/rx.h"
64 #include "en/xsk/tx.h"
65 #include "en/hv_vhca_stats.h"
66 #include "en/devlink.h"
67 #include "lib/mlx5.h"
68
69
70 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
71 {
72         bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
73                 MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
74                 MLX5_CAP_ETH(mdev, reg_umr_sq);
75         u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
76         bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
77
78         if (!striding_rq_umr)
79                 return false;
80         if (!inline_umr) {
81                 mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
82                                (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
83                 return false;
84         }
85         return true;
86 }
87
88 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
89                                struct mlx5e_params *params)
90 {
91         params->log_rq_mtu_frames = is_kdump_kernel() ?
92                 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
93                 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
94
95         mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
96                        params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
97                        params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
98                        BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
99                        BIT(params->log_rq_mtu_frames),
100                        BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
101                        MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
102 }
103
104 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
105                                 struct mlx5e_params *params)
106 {
107         if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
108                 return false;
109
110         if (MLX5_IPSEC_DEV(mdev))
111                 return false;
112
113         if (params->xdp_prog) {
114                 /* XSK params are not considered here. If striding RQ is in use,
115                  * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
116                  * be called with the known XSK params.
117                  */
118                 if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
119                         return false;
120         }
121
122         return true;
123 }
124
125 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
126 {
127         params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
128                 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
129                 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
130                 MLX5_WQ_TYPE_CYCLIC;
131 }
132
133 void mlx5e_update_carrier(struct mlx5e_priv *priv)
134 {
135         struct mlx5_core_dev *mdev = priv->mdev;
136         u8 port_state;
137
138         port_state = mlx5_query_vport_state(mdev,
139                                             MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
140                                             0);
141
142         if (port_state == VPORT_STATE_UP) {
143                 netdev_info(priv->netdev, "Link up\n");
144                 netif_carrier_on(priv->netdev);
145         } else {
146                 netdev_info(priv->netdev, "Link down\n");
147                 netif_carrier_off(priv->netdev);
148         }
149 }
150
151 static void mlx5e_update_carrier_work(struct work_struct *work)
152 {
153         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
154                                                update_carrier_work);
155
156         mutex_lock(&priv->state_lock);
157         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
158                 if (priv->profile->update_carrier)
159                         priv->profile->update_carrier(priv);
160         mutex_unlock(&priv->state_lock);
161 }
162
163 void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
164 {
165         int i;
166
167         for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--)
168                 if (mlx5e_nic_stats_grps[i]->update_stats_mask &
169                     MLX5E_NDO_UPDATE_STATS)
170                         mlx5e_nic_stats_grps[i]->update_stats(priv);
171 }
172
173 static void mlx5e_update_stats_work(struct work_struct *work)
174 {
175         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
176                                                update_stats_work);
177
178         mutex_lock(&priv->state_lock);
179         priv->profile->update_stats(priv);
180         mutex_unlock(&priv->state_lock);
181 }
182
183 void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
184 {
185         if (!priv->profile->update_stats)
186                 return;
187
188         if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
189                 return;
190
191         queue_work(priv->wq, &priv->update_stats_work);
192 }
193
194 static int async_event(struct notifier_block *nb, unsigned long event, void *data)
195 {
196         struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
197         struct mlx5_eqe   *eqe = data;
198
199         if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
200                 return NOTIFY_DONE;
201
202         switch (eqe->sub_type) {
203         case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
204         case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
205                 queue_work(priv->wq, &priv->update_carrier_work);
206                 break;
207         default:
208                 return NOTIFY_DONE;
209         }
210
211         return NOTIFY_OK;
212 }
213
214 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
215 {
216         priv->events_nb.notifier_call = async_event;
217         mlx5_notifier_register(priv->mdev, &priv->events_nb);
218 }
219
220 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
221 {
222         mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
223 }
224
225 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
226                                        struct mlx5e_icosq *sq,
227                                        struct mlx5e_umr_wqe *wqe)
228 {
229         struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
230         struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
231         u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
232
233         cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
234                                       ds_cnt);
235         cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
236         cseg->umr_mkey  = rq->mkey_be;
237
238         ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
239         ucseg->xlt_octowords =
240                 cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
241         ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
242 }
243
244 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
245                                      struct mlx5e_channel *c)
246 {
247         int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
248
249         rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
250                                                   sizeof(*rq->mpwqe.info)),
251                                        GFP_KERNEL, cpu_to_node(c->cpu));
252         if (!rq->mpwqe.info)
253                 return -ENOMEM;
254
255         mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
256
257         return 0;
258 }
259
260 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
261                                  u64 npages, u8 page_shift,
262                                  struct mlx5_core_mkey *umr_mkey)
263 {
264         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
265         void *mkc;
266         u32 *in;
267         int err;
268
269         in = kvzalloc(inlen, GFP_KERNEL);
270         if (!in)
271                 return -ENOMEM;
272
273         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
274
275         MLX5_SET(mkc, mkc, free, 1);
276         MLX5_SET(mkc, mkc, umr_en, 1);
277         MLX5_SET(mkc, mkc, lw, 1);
278         MLX5_SET(mkc, mkc, lr, 1);
279         MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
280
281         MLX5_SET(mkc, mkc, qpn, 0xffffff);
282         MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
283         MLX5_SET64(mkc, mkc, len, npages << page_shift);
284         MLX5_SET(mkc, mkc, translations_octword_size,
285                  MLX5_MTT_OCTW(npages));
286         MLX5_SET(mkc, mkc, log_page_size, page_shift);
287
288         err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
289
290         kvfree(in);
291         return err;
292 }
293
294 static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
295 {
296         u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
297
298         return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
299 }
300
301 static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
302 {
303         return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
304 }
305
306 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
307 {
308         struct mlx5e_wqe_frag_info next_frag = {};
309         struct mlx5e_wqe_frag_info *prev = NULL;
310         int i;
311
312         next_frag.di = &rq->wqe.di[0];
313
314         for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
315                 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
316                 struct mlx5e_wqe_frag_info *frag =
317                         &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
318                 int f;
319
320                 for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
321                         if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
322                                 next_frag.di++;
323                                 next_frag.offset = 0;
324                                 if (prev)
325                                         prev->last_in_page = true;
326                         }
327                         *frag = next_frag;
328
329                         /* prepare next */
330                         next_frag.offset += frag_info[f].frag_stride;
331                         prev = frag;
332                 }
333         }
334
335         if (prev)
336                 prev->last_in_page = true;
337 }
338
339 static int mlx5e_init_di_list(struct mlx5e_rq *rq,
340                               int wq_sz, int cpu)
341 {
342         int len = wq_sz << rq->wqe.info.log_num_frags;
343
344         rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)),
345                                    GFP_KERNEL, cpu_to_node(cpu));
346         if (!rq->wqe.di)
347                 return -ENOMEM;
348
349         mlx5e_init_frags_partition(rq);
350
351         return 0;
352 }
353
354 static void mlx5e_free_di_list(struct mlx5e_rq *rq)
355 {
356         kvfree(rq->wqe.di);
357 }
358
359 static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
360 {
361         struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
362
363         mlx5e_reporter_rq_cqe_err(rq);
364 }
365
366 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
367                           struct mlx5e_params *params,
368                           struct mlx5e_xsk_param *xsk,
369                           struct xdp_umem *umem,
370                           struct mlx5e_rq_param *rqp,
371                           struct mlx5e_rq *rq)
372 {
373         struct page_pool_params pp_params = { 0 };
374         struct mlx5_core_dev *mdev = c->mdev;
375         void *rqc = rqp->rqc;
376         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
377         u32 rq_xdp_ix;
378         u32 pool_size;
379         int wq_sz;
380         int err;
381         int i;
382
383         rqp->wq.db_numa_node = cpu_to_node(c->cpu);
384
385         rq->wq_type = params->rq_wq_type;
386         rq->pdev    = c->pdev;
387         rq->netdev  = c->netdev;
388         rq->tstamp  = c->tstamp;
389         rq->clock   = &mdev->clock;
390         rq->channel = c;
391         rq->ix      = c->ix;
392         rq->mdev    = mdev;
393         rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
394         rq->xdpsq   = &c->rq_xdpsq;
395         rq->umem    = umem;
396
397         if (rq->umem)
398                 rq->stats = &c->priv->channel_stats[c->ix].xskrq;
399         else
400                 rq->stats = &c->priv->channel_stats[c->ix].rq;
401         INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
402
403         if (params->xdp_prog)
404                 bpf_prog_inc(params->xdp_prog);
405         rq->xdp_prog = params->xdp_prog;
406
407         rq_xdp_ix = rq->ix;
408         if (xsk)
409                 rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
410         err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
411         if (err < 0)
412                 goto err_rq_wq_destroy;
413
414         rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
415         rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
416         pool_size = 1 << params->log_rq_mtu_frames;
417
418         switch (rq->wq_type) {
419         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
420                 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
421                                         &rq->wq_ctrl);
422                 if (err)
423                         return err;
424
425                 rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
426
427                 wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
428
429                 pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
430                         mlx5e_mpwqe_get_log_rq_size(params, xsk);
431
432                 rq->post_wqes = mlx5e_post_rx_mpwqes;
433                 rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
434
435                 rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
436 #ifdef CONFIG_MLX5_EN_IPSEC
437                 if (MLX5_IPSEC_DEV(mdev)) {
438                         err = -EINVAL;
439                         netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
440                         goto err_rq_wq_destroy;
441                 }
442 #endif
443                 if (!rq->handle_rx_cqe) {
444                         err = -EINVAL;
445                         netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
446                         goto err_rq_wq_destroy;
447                 }
448
449                 rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
450                         mlx5e_xsk_skb_from_cqe_mpwrq_linear :
451                         mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
452                                 mlx5e_skb_from_cqe_mpwrq_linear :
453                                 mlx5e_skb_from_cqe_mpwrq_nonlinear;
454
455                 rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
456                 rq->mpwqe.num_strides =
457                         BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
458
459                 rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
460
461                 err = mlx5e_create_rq_umr_mkey(mdev, rq);
462                 if (err)
463                         goto err_rq_wq_destroy;
464                 rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
465
466                 err = mlx5e_rq_alloc_mpwqe_info(rq, c);
467                 if (err)
468                         goto err_free;
469                 break;
470         default: /* MLX5_WQ_TYPE_CYCLIC */
471                 err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
472                                          &rq->wq_ctrl);
473                 if (err)
474                         return err;
475
476                 rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
477
478                 wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
479
480                 rq->wqe.info = rqp->frags_info;
481                 rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
482
483                 rq->wqe.frags =
484                         kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
485                                         (wq_sz << rq->wqe.info.log_num_frags)),
486                                       GFP_KERNEL, cpu_to_node(c->cpu));
487                 if (!rq->wqe.frags) {
488                         err = -ENOMEM;
489                         goto err_free;
490                 }
491
492                 err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
493                 if (err)
494                         goto err_free;
495
496                 rq->post_wqes = mlx5e_post_rx_wqes;
497                 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
498
499 #ifdef CONFIG_MLX5_EN_IPSEC
500                 if (c->priv->ipsec)
501                         rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
502                 else
503 #endif
504                         rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
505                 if (!rq->handle_rx_cqe) {
506                         err = -EINVAL;
507                         netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
508                         goto err_free;
509                 }
510
511                 rq->wqe.skb_from_cqe = xsk ?
512                         mlx5e_xsk_skb_from_cqe_linear :
513                         mlx5e_rx_is_linear_skb(params, NULL) ?
514                                 mlx5e_skb_from_cqe_linear :
515                                 mlx5e_skb_from_cqe_nonlinear;
516                 rq->mkey_be = c->mkey_be;
517         }
518
519         if (xsk) {
520                 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
521                                                  MEM_TYPE_XSK_BUFF_POOL, NULL);
522                 xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
523         } else {
524                 /* Create a page_pool and register it with rxq */
525                 pp_params.order     = 0;
526                 pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
527                 pp_params.pool_size = pool_size;
528                 pp_params.nid       = cpu_to_node(c->cpu);
529                 pp_params.dev       = c->pdev;
530                 pp_params.dma_dir   = rq->buff.map_dir;
531
532                 /* page_pool can be used even when there is no rq->xdp_prog,
533                  * given page_pool does not handle DMA mapping there is no
534                  * required state to clear. And page_pool gracefully handle
535                  * elevated refcnt.
536                  */
537                 rq->page_pool = page_pool_create(&pp_params);
538                 if (IS_ERR(rq->page_pool)) {
539                         err = PTR_ERR(rq->page_pool);
540                         rq->page_pool = NULL;
541                         goto err_free;
542                 }
543                 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
544                                                  MEM_TYPE_PAGE_POOL, rq->page_pool);
545         }
546         if (err)
547                 goto err_free;
548
549         for (i = 0; i < wq_sz; i++) {
550                 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
551                         struct mlx5e_rx_wqe_ll *wqe =
552                                 mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
553                         u32 byte_count =
554                                 rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
555                         u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
556
557                         wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
558                         wqe->data[0].byte_count = cpu_to_be32(byte_count);
559                         wqe->data[0].lkey = rq->mkey_be;
560                 } else {
561                         struct mlx5e_rx_wqe_cyc *wqe =
562                                 mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
563                         int f;
564
565                         for (f = 0; f < rq->wqe.info.num_frags; f++) {
566                                 u32 frag_size = rq->wqe.info.arr[f].frag_size |
567                                         MLX5_HW_START_PADDING;
568
569                                 wqe->data[f].byte_count = cpu_to_be32(frag_size);
570                                 wqe->data[f].lkey = rq->mkey_be;
571                         }
572                         /* check if num_frags is not a pow of two */
573                         if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
574                                 wqe->data[f].byte_count = 0;
575                                 wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
576                                 wqe->data[f].addr = 0;
577                         }
578                 }
579         }
580
581         INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
582
583         switch (params->rx_cq_moderation.cq_period_mode) {
584         case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
585                 rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
586                 break;
587         case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
588         default:
589                 rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
590         }
591
592         rq->page_cache.head = 0;
593         rq->page_cache.tail = 0;
594
595         return 0;
596
597 err_free:
598         switch (rq->wq_type) {
599         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
600                 kvfree(rq->mpwqe.info);
601                 mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
602                 break;
603         default: /* MLX5_WQ_TYPE_CYCLIC */
604                 kvfree(rq->wqe.frags);
605                 mlx5e_free_di_list(rq);
606         }
607
608 err_rq_wq_destroy:
609         if (rq->xdp_prog)
610                 bpf_prog_put(rq->xdp_prog);
611         xdp_rxq_info_unreg(&rq->xdp_rxq);
612         page_pool_destroy(rq->page_pool);
613         mlx5_wq_destroy(&rq->wq_ctrl);
614
615         return err;
616 }
617
618 static void mlx5e_free_rq(struct mlx5e_rq *rq)
619 {
620         int i;
621
622         if (rq->xdp_prog)
623                 bpf_prog_put(rq->xdp_prog);
624
625         switch (rq->wq_type) {
626         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
627                 kvfree(rq->mpwqe.info);
628                 mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
629                 break;
630         default: /* MLX5_WQ_TYPE_CYCLIC */
631                 kvfree(rq->wqe.frags);
632                 mlx5e_free_di_list(rq);
633         }
634
635         for (i = rq->page_cache.head; i != rq->page_cache.tail;
636              i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
637                 struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
638
639                 /* With AF_XDP, page_cache is not used, so this loop is not
640                  * entered, and it's safe to call mlx5e_page_release_dynamic
641                  * directly.
642                  */
643                 mlx5e_page_release_dynamic(rq, dma_info, false);
644         }
645
646         xdp_rxq_info_unreg(&rq->xdp_rxq);
647         page_pool_destroy(rq->page_pool);
648         mlx5_wq_destroy(&rq->wq_ctrl);
649 }
650
651 static int mlx5e_create_rq(struct mlx5e_rq *rq,
652                            struct mlx5e_rq_param *param)
653 {
654         struct mlx5_core_dev *mdev = rq->mdev;
655
656         void *in;
657         void *rqc;
658         void *wq;
659         int inlen;
660         int err;
661
662         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
663                 sizeof(u64) * rq->wq_ctrl.buf.npages;
664         in = kvzalloc(inlen, GFP_KERNEL);
665         if (!in)
666                 return -ENOMEM;
667
668         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
669         wq  = MLX5_ADDR_OF(rqc, rqc, wq);
670
671         memcpy(rqc, param->rqc, sizeof(param->rqc));
672
673         MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
674         MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
675         MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
676                                                 MLX5_ADAPTER_PAGE_SHIFT);
677         MLX5_SET64(wq, wq,  dbr_addr,           rq->wq_ctrl.db.dma);
678
679         mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
680                                   (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
681
682         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
683
684         kvfree(in);
685
686         return err;
687 }
688
689 int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
690 {
691         struct mlx5_core_dev *mdev = rq->mdev;
692
693         void *in;
694         void *rqc;
695         int inlen;
696         int err;
697
698         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
699         in = kvzalloc(inlen, GFP_KERNEL);
700         if (!in)
701                 return -ENOMEM;
702
703         if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
704                 mlx5e_rqwq_reset(rq);
705
706         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
707
708         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
709         MLX5_SET(rqc, rqc, state, next_state);
710
711         err = mlx5_core_modify_rq(mdev, rq->rqn, in);
712
713         kvfree(in);
714
715         return err;
716 }
717
718 static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
719 {
720         struct mlx5e_channel *c = rq->channel;
721         struct mlx5e_priv *priv = c->priv;
722         struct mlx5_core_dev *mdev = priv->mdev;
723
724         void *in;
725         void *rqc;
726         int inlen;
727         int err;
728
729         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
730         in = kvzalloc(inlen, GFP_KERNEL);
731         if (!in)
732                 return -ENOMEM;
733
734         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
735
736         MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
737         MLX5_SET64(modify_rq_in, in, modify_bitmask,
738                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
739         MLX5_SET(rqc, rqc, scatter_fcs, enable);
740         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
741
742         err = mlx5_core_modify_rq(mdev, rq->rqn, in);
743
744         kvfree(in);
745
746         return err;
747 }
748
749 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
750 {
751         struct mlx5e_channel *c = rq->channel;
752         struct mlx5_core_dev *mdev = c->mdev;
753         void *in;
754         void *rqc;
755         int inlen;
756         int err;
757
758         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
759         in = kvzalloc(inlen, GFP_KERNEL);
760         if (!in)
761                 return -ENOMEM;
762
763         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
764
765         MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
766         MLX5_SET64(modify_rq_in, in, modify_bitmask,
767                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
768         MLX5_SET(rqc, rqc, vsd, vsd);
769         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
770
771         err = mlx5_core_modify_rq(mdev, rq->rqn, in);
772
773         kvfree(in);
774
775         return err;
776 }
777
778 static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
779 {
780         mlx5_core_destroy_rq(rq->mdev, rq->rqn);
781 }
782
783 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
784 {
785         unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
786         struct mlx5e_channel *c = rq->channel;
787
788         u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
789
790         do {
791                 if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
792                         return 0;
793
794                 msleep(20);
795         } while (time_before(jiffies, exp_time));
796
797         netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
798                     c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
799
800         mlx5e_reporter_rx_timeout(rq);
801         return -ETIMEDOUT;
802 }
803
804 void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
805 {
806         struct mlx5_wq_ll *wq;
807         u16 head;
808         int i;
809
810         if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
811                 return;
812
813         wq = &rq->mpwqe.wq;
814         head = wq->head;
815
816         /* Outstanding UMR WQEs (in progress) start at wq->head */
817         for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
818                 rq->dealloc_wqe(rq, head);
819                 head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
820         }
821
822         rq->mpwqe.actual_wq_head = wq->head;
823         rq->mpwqe.umr_in_progress = 0;
824         rq->mpwqe.umr_completed = 0;
825 }
826
827 void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
828 {
829         __be16 wqe_ix_be;
830         u16 wqe_ix;
831
832         if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
833                 struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
834
835                 mlx5e_free_rx_in_progress_descs(rq);
836
837                 while (!mlx5_wq_ll_is_empty(wq)) {
838                         struct mlx5e_rx_wqe_ll *wqe;
839
840                         wqe_ix_be = *wq->tail_next;
841                         wqe_ix    = be16_to_cpu(wqe_ix_be);
842                         wqe       = mlx5_wq_ll_get_wqe(wq, wqe_ix);
843                         rq->dealloc_wqe(rq, wqe_ix);
844                         mlx5_wq_ll_pop(wq, wqe_ix_be,
845                                        &wqe->next.next_wqe_index);
846                 }
847         } else {
848                 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
849
850                 while (!mlx5_wq_cyc_is_empty(wq)) {
851                         wqe_ix = mlx5_wq_cyc_get_tail(wq);
852                         rq->dealloc_wqe(rq, wqe_ix);
853                         mlx5_wq_cyc_pop(wq);
854                 }
855         }
856
857 }
858
859 int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
860                   struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
861                   struct xdp_umem *umem, struct mlx5e_rq *rq)
862 {
863         int err;
864
865         err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
866         if (err)
867                 return err;
868
869         err = mlx5e_create_rq(rq, param);
870         if (err)
871                 goto err_free_rq;
872
873         err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
874         if (err)
875                 goto err_destroy_rq;
876
877         if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
878                 __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
879
880         if (params->rx_dim_enabled)
881                 __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
882
883         /* We disable csum_complete when XDP is enabled since
884          * XDP programs might manipulate packets which will render
885          * skb->checksum incorrect.
886          */
887         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
888                 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
889
890         return 0;
891
892 err_destroy_rq:
893         mlx5e_destroy_rq(rq);
894 err_free_rq:
895         mlx5e_free_rq(rq);
896
897         return err;
898 }
899
900 void mlx5e_activate_rq(struct mlx5e_rq *rq)
901 {
902         set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
903         mlx5e_trigger_irq(&rq->channel->icosq);
904 }
905
906 void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
907 {
908         clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
909         napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
910 }
911
912 void mlx5e_close_rq(struct mlx5e_rq *rq)
913 {
914         cancel_work_sync(&rq->dim.work);
915         cancel_work_sync(&rq->channel->icosq.recover_work);
916         cancel_work_sync(&rq->recover_work);
917         mlx5e_destroy_rq(rq);
918         mlx5e_free_rx_descs(rq);
919         mlx5e_free_rq(rq);
920 }
921
922 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
923 {
924         kvfree(sq->db.xdpi_fifo.xi);
925         kvfree(sq->db.wqe_info);
926 }
927
928 static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
929 {
930         struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
931         int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
932         int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
933
934         xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
935                                       GFP_KERNEL, numa);
936         if (!xdpi_fifo->xi)
937                 return -ENOMEM;
938
939         xdpi_fifo->pc   = &sq->xdpi_fifo_pc;
940         xdpi_fifo->cc   = &sq->xdpi_fifo_cc;
941         xdpi_fifo->mask = dsegs_per_wq - 1;
942
943         return 0;
944 }
945
946 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
947 {
948         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
949         int err;
950
951         sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
952                                         GFP_KERNEL, numa);
953         if (!sq->db.wqe_info)
954                 return -ENOMEM;
955
956         err = mlx5e_alloc_xdpsq_fifo(sq, numa);
957         if (err) {
958                 mlx5e_free_xdpsq_db(sq);
959                 return err;
960         }
961
962         return 0;
963 }
964
965 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
966                              struct mlx5e_params *params,
967                              struct xdp_umem *umem,
968                              struct mlx5e_sq_param *param,
969                              struct mlx5e_xdpsq *sq,
970                              bool is_redirect)
971 {
972         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
973         struct mlx5_core_dev *mdev = c->mdev;
974         struct mlx5_wq_cyc *wq = &sq->wq;
975         int err;
976
977         sq->pdev      = c->pdev;
978         sq->mkey_be   = c->mkey_be;
979         sq->channel   = c;
980         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
981         sq->min_inline_mode = params->tx_min_inline_mode;
982         sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
983         sq->umem      = umem;
984
985         sq->stats = sq->umem ?
986                 &c->priv->channel_stats[c->ix].xsksq :
987                 is_redirect ?
988                         &c->priv->channel_stats[c->ix].xdpsq :
989                         &c->priv->channel_stats[c->ix].rq_xdpsq;
990
991         param->wq.db_numa_node = cpu_to_node(c->cpu);
992         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
993         if (err)
994                 return err;
995         wq->db = &wq->db[MLX5_SND_DBR];
996
997         err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
998         if (err)
999                 goto err_sq_wq_destroy;
1000
1001         return 0;
1002
1003 err_sq_wq_destroy:
1004         mlx5_wq_destroy(&sq->wq_ctrl);
1005
1006         return err;
1007 }
1008
1009 static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
1010 {
1011         mlx5e_free_xdpsq_db(sq);
1012         mlx5_wq_destroy(&sq->wq_ctrl);
1013 }
1014
1015 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
1016 {
1017         kvfree(sq->db.wqe_info);
1018 }
1019
1020 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
1021 {
1022         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1023         size_t size;
1024
1025         size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
1026         sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
1027         if (!sq->db.wqe_info)
1028                 return -ENOMEM;
1029
1030         return 0;
1031 }
1032
1033 static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
1034 {
1035         struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
1036                                               recover_work);
1037
1038         mlx5e_reporter_icosq_cqe_err(sq);
1039 }
1040
1041 static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1042                              struct mlx5e_sq_param *param,
1043                              struct mlx5e_icosq *sq)
1044 {
1045         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1046         struct mlx5_core_dev *mdev = c->mdev;
1047         struct mlx5_wq_cyc *wq = &sq->wq;
1048         int err;
1049
1050         sq->channel   = c;
1051         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
1052
1053         param->wq.db_numa_node = cpu_to_node(c->cpu);
1054         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1055         if (err)
1056                 return err;
1057         wq->db = &wq->db[MLX5_SND_DBR];
1058
1059         err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1060         if (err)
1061                 goto err_sq_wq_destroy;
1062
1063         INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
1064
1065         return 0;
1066
1067 err_sq_wq_destroy:
1068         mlx5_wq_destroy(&sq->wq_ctrl);
1069
1070         return err;
1071 }
1072
1073 static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
1074 {
1075         mlx5e_free_icosq_db(sq);
1076         mlx5_wq_destroy(&sq->wq_ctrl);
1077 }
1078
1079 static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
1080 {
1081         kvfree(sq->db.wqe_info);
1082         kvfree(sq->db.dma_fifo);
1083 }
1084
1085 static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
1086 {
1087         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1088         int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
1089
1090         sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
1091                                                    sizeof(*sq->db.dma_fifo)),
1092                                         GFP_KERNEL, numa);
1093         sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
1094                                                    sizeof(*sq->db.wqe_info)),
1095                                         GFP_KERNEL, numa);
1096         if (!sq->db.dma_fifo || !sq->db.wqe_info) {
1097                 mlx5e_free_txqsq_db(sq);
1098                 return -ENOMEM;
1099         }
1100
1101         sq->dma_fifo_mask = df_sz - 1;
1102
1103         return 0;
1104 }
1105
1106 static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
1107 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1108                              int txq_ix,
1109                              struct mlx5e_params *params,
1110                              struct mlx5e_sq_param *param,
1111                              struct mlx5e_txqsq *sq,
1112                              int tc)
1113 {
1114         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1115         struct mlx5_core_dev *mdev = c->mdev;
1116         struct mlx5_wq_cyc *wq = &sq->wq;
1117         int err;
1118
1119         sq->pdev      = c->pdev;
1120         sq->tstamp    = c->tstamp;
1121         sq->clock     = &mdev->clock;
1122         sq->mkey_be   = c->mkey_be;
1123         sq->channel   = c;
1124         sq->ch_ix     = c->ix;
1125         sq->txq_ix    = txq_ix;
1126         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
1127         sq->min_inline_mode = params->tx_min_inline_mode;
1128         sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
1129         sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
1130         sq->stop_room = MLX5E_SQ_STOP_ROOM;
1131         INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
1132         if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
1133                 set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
1134         if (MLX5_IPSEC_DEV(c->priv->mdev))
1135                 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1136 #ifdef CONFIG_MLX5_EN_TLS
1137         if (mlx5_accel_is_tls_device(c->priv->mdev)) {
1138                 set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
1139                 sq->stop_room += MLX5E_SQ_TLS_ROOM +
1140                         mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
1141                                                     TLS_MAX_PAYLOAD_SIZE);
1142         }
1143 #endif
1144
1145         param->wq.db_numa_node = cpu_to_node(c->cpu);
1146         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1147         if (err)
1148                 return err;
1149         wq->db    = &wq->db[MLX5_SND_DBR];
1150
1151         err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1152         if (err)
1153                 goto err_sq_wq_destroy;
1154
1155         INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
1156         sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
1157
1158         return 0;
1159
1160 err_sq_wq_destroy:
1161         mlx5_wq_destroy(&sq->wq_ctrl);
1162
1163         return err;
1164 }
1165
1166 static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
1167 {
1168         mlx5e_free_txqsq_db(sq);
1169         mlx5_wq_destroy(&sq->wq_ctrl);
1170 }
1171
1172 struct mlx5e_create_sq_param {
1173         struct mlx5_wq_ctrl        *wq_ctrl;
1174         u32                         cqn;
1175         u32                         tisn;
1176         u8                          tis_lst_sz;
1177         u8                          min_inline_mode;
1178 };
1179
1180 static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
1181                            struct mlx5e_sq_param *param,
1182                            struct mlx5e_create_sq_param *csp,
1183                            u32 *sqn)
1184 {
1185         void *in;
1186         void *sqc;
1187         void *wq;
1188         int inlen;
1189         int err;
1190
1191         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1192                 sizeof(u64) * csp->wq_ctrl->buf.npages;
1193         in = kvzalloc(inlen, GFP_KERNEL);
1194         if (!in)
1195                 return -ENOMEM;
1196
1197         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1198         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1199
1200         memcpy(sqc, param->sqc, sizeof(param->sqc));
1201         MLX5_SET(sqc,  sqc, tis_lst_sz, csp->tis_lst_sz);
1202         MLX5_SET(sqc,  sqc, tis_num_0, csp->tisn);
1203         MLX5_SET(sqc,  sqc, cqn, csp->cqn);
1204
1205         if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
1206                 MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
1207
1208         MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
1209         MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
1210
1211         MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
1212         MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
1213         MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
1214                                           MLX5_ADAPTER_PAGE_SHIFT);
1215         MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
1216
1217         mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
1218                                   (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
1219
1220         err = mlx5_core_create_sq(mdev, in, inlen, sqn);
1221
1222         kvfree(in);
1223
1224         return err;
1225 }
1226
1227 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1228                     struct mlx5e_modify_sq_param *p)
1229 {
1230         void *in;
1231         void *sqc;
1232         int inlen;
1233         int err;
1234
1235         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1236         in = kvzalloc(inlen, GFP_KERNEL);
1237         if (!in)
1238                 return -ENOMEM;
1239
1240         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1241
1242         MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
1243         MLX5_SET(sqc, sqc, state, p->next_state);
1244         if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
1245                 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
1246                 MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, p->rl_index);
1247         }
1248
1249         err = mlx5_core_modify_sq(mdev, sqn, in);
1250
1251         kvfree(in);
1252
1253         return err;
1254 }
1255
1256 static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
1257 {
1258         mlx5_core_destroy_sq(mdev, sqn);
1259 }
1260
1261 static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
1262                                struct mlx5e_sq_param *param,
1263                                struct mlx5e_create_sq_param *csp,
1264                                u32 *sqn)
1265 {
1266         struct mlx5e_modify_sq_param msp = {0};
1267         int err;
1268
1269         err = mlx5e_create_sq(mdev, param, csp, sqn);
1270         if (err)
1271                 return err;
1272
1273         msp.curr_state = MLX5_SQC_STATE_RST;
1274         msp.next_state = MLX5_SQC_STATE_RDY;
1275         err = mlx5e_modify_sq(mdev, *sqn, &msp);
1276         if (err)
1277                 mlx5e_destroy_sq(mdev, *sqn);
1278
1279         return err;
1280 }
1281
1282 static int mlx5e_set_sq_maxrate(struct net_device *dev,
1283                                 struct mlx5e_txqsq *sq, u32 rate);
1284
1285 static int mlx5e_open_txqsq(struct mlx5e_channel *c,
1286                             u32 tisn,
1287                             int txq_ix,
1288                             struct mlx5e_params *params,
1289                             struct mlx5e_sq_param *param,
1290                             struct mlx5e_txqsq *sq,
1291                             int tc)
1292 {
1293         struct mlx5e_create_sq_param csp = {};
1294         u32 tx_rate;
1295         int err;
1296
1297         err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
1298         if (err)
1299                 return err;
1300
1301         csp.tisn            = tisn;
1302         csp.tis_lst_sz      = 1;
1303         csp.cqn             = sq->cq.mcq.cqn;
1304         csp.wq_ctrl         = &sq->wq_ctrl;
1305         csp.min_inline_mode = sq->min_inline_mode;
1306         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
1307         if (err)
1308                 goto err_free_txqsq;
1309
1310         tx_rate = c->priv->tx_rates[sq->txq_ix];
1311         if (tx_rate)
1312                 mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
1313
1314         if (params->tx_dim_enabled)
1315                 sq->state |= BIT(MLX5E_SQ_STATE_AM);
1316
1317         return 0;
1318
1319 err_free_txqsq:
1320         mlx5e_free_txqsq(sq);
1321
1322         return err;
1323 }
1324
1325 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1326 {
1327         sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
1328         set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1329         netdev_tx_reset_queue(sq->txq);
1330         netif_tx_start_queue(sq->txq);
1331 }
1332
1333 void mlx5e_tx_disable_queue(struct netdev_queue *txq)
1334 {
1335         __netif_tx_lock_bh(txq);
1336         netif_tx_stop_queue(txq);
1337         __netif_tx_unlock_bh(txq);
1338 }
1339
1340 static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
1341 {
1342         struct mlx5e_channel *c = sq->channel;
1343         struct mlx5_wq_cyc *wq = &sq->wq;
1344
1345         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1346         /* prevent netif_tx_wake_queue */
1347         napi_synchronize(&c->napi);
1348
1349         mlx5e_tx_disable_queue(sq->txq);
1350
1351         /* last doorbell out, godspeed .. */
1352         if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
1353                 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
1354                 struct mlx5e_tx_wqe *nop;
1355
1356                 sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
1357                         .num_wqebbs = 1,
1358                 };
1359
1360                 nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
1361                 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
1362         }
1363 }
1364
1365 static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
1366 {
1367         struct mlx5e_channel *c = sq->channel;
1368         struct mlx5_core_dev *mdev = c->mdev;
1369         struct mlx5_rate_limit rl = {0};
1370
1371         cancel_work_sync(&sq->dim.work);
1372         cancel_work_sync(&sq->recover_work);
1373         mlx5e_destroy_sq(mdev, sq->sqn);
1374         if (sq->rate_limit) {
1375                 rl.rate = sq->rate_limit;
1376                 mlx5_rl_remove_rate(mdev, &rl);
1377         }
1378         mlx5e_free_txqsq_descs(sq);
1379         mlx5e_free_txqsq(sq);
1380 }
1381
1382 static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
1383 {
1384         struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
1385                                               recover_work);
1386
1387         mlx5e_reporter_tx_err_cqe(sq);
1388 }
1389
1390 int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
1391                      struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
1392 {
1393         struct mlx5e_create_sq_param csp = {};
1394         int err;
1395
1396         err = mlx5e_alloc_icosq(c, param, sq);
1397         if (err)
1398                 return err;
1399
1400         csp.cqn             = sq->cq.mcq.cqn;
1401         csp.wq_ctrl         = &sq->wq_ctrl;
1402         csp.min_inline_mode = params->tx_min_inline_mode;
1403         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
1404         if (err)
1405                 goto err_free_icosq;
1406
1407         return 0;
1408
1409 err_free_icosq:
1410         mlx5e_free_icosq(sq);
1411
1412         return err;
1413 }
1414
1415 void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
1416 {
1417         set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1418 }
1419
1420 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
1421 {
1422         struct mlx5e_channel *c = icosq->channel;
1423
1424         clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1425         napi_synchronize(&c->napi);
1426 }
1427
1428 void mlx5e_close_icosq(struct mlx5e_icosq *sq)
1429 {
1430         struct mlx5e_channel *c = sq->channel;
1431
1432         mlx5e_destroy_sq(c->mdev, sq->sqn);
1433         mlx5e_free_icosq(sq);
1434 }
1435
1436 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
1437                      struct mlx5e_sq_param *param, struct xdp_umem *umem,
1438                      struct mlx5e_xdpsq *sq, bool is_redirect)
1439 {
1440         struct mlx5e_create_sq_param csp = {};
1441         int err;
1442
1443         err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
1444         if (err)
1445                 return err;
1446
1447         csp.tis_lst_sz      = 1;
1448         csp.tisn            = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
1449         csp.cqn             = sq->cq.mcq.cqn;
1450         csp.wq_ctrl         = &sq->wq_ctrl;
1451         csp.min_inline_mode = sq->min_inline_mode;
1452         set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1453         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
1454         if (err)
1455                 goto err_free_xdpsq;
1456
1457         mlx5e_set_xmit_fp(sq, param->is_mpw);
1458
1459         if (!param->is_mpw) {
1460                 unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
1461                 unsigned int inline_hdr_sz = 0;
1462                 int i;
1463
1464                 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
1465                         inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
1466                         ds_cnt++;
1467                 }
1468
1469                 /* Pre initialize fixed WQE fields */
1470                 for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
1471                         struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
1472                         struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
1473                         struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
1474                         struct mlx5_wqe_data_seg *dseg;
1475
1476                         sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
1477                                 .num_wqebbs = 1,
1478                                 .num_pkts   = 1,
1479                         };
1480
1481                         cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
1482                         eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
1483
1484                         dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
1485                         dseg->lkey = sq->mkey_be;
1486                 }
1487         }
1488
1489         return 0;
1490
1491 err_free_xdpsq:
1492         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1493         mlx5e_free_xdpsq(sq);
1494
1495         return err;
1496 }
1497
1498 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
1499 {
1500         struct mlx5e_channel *c = sq->channel;
1501
1502         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1503         napi_synchronize(&c->napi);
1504
1505         mlx5e_destroy_sq(c->mdev, sq->sqn);
1506         mlx5e_free_xdpsq_descs(sq);
1507         mlx5e_free_xdpsq(sq);
1508 }
1509
1510 static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
1511                                  struct mlx5e_cq_param *param,
1512                                  struct mlx5e_cq *cq)
1513 {
1514         struct mlx5_core_cq *mcq = &cq->mcq;
1515         int eqn_not_used;
1516         unsigned int irqn;
1517         int err;
1518         u32 i;
1519
1520         err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1521         if (err)
1522                 return err;
1523
1524         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1525                                &cq->wq_ctrl);
1526         if (err)
1527                 return err;
1528
1529         mcq->cqe_sz     = 64;
1530         mcq->set_ci_db  = cq->wq_ctrl.db.db;
1531         mcq->arm_db     = cq->wq_ctrl.db.db + 1;
1532         *mcq->set_ci_db = 0;
1533         *mcq->arm_db    = 0;
1534         mcq->vector     = param->eq_ix;
1535         mcq->comp       = mlx5e_completion_event;
1536         mcq->event      = mlx5e_cq_error_event;
1537         mcq->irqn       = irqn;
1538
1539         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1540                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1541
1542                 cqe->op_own = 0xf1;
1543         }
1544
1545         cq->mdev = mdev;
1546
1547         return 0;
1548 }
1549
1550 static int mlx5e_alloc_cq(struct mlx5e_channel *c,
1551                           struct mlx5e_cq_param *param,
1552                           struct mlx5e_cq *cq)
1553 {
1554         struct mlx5_core_dev *mdev = c->priv->mdev;
1555         int err;
1556
1557         param->wq.buf_numa_node = cpu_to_node(c->cpu);
1558         param->wq.db_numa_node  = cpu_to_node(c->cpu);
1559         param->eq_ix   = c->ix;
1560
1561         err = mlx5e_alloc_cq_common(mdev, param, cq);
1562
1563         cq->napi    = &c->napi;
1564         cq->channel = c;
1565
1566         return err;
1567 }
1568
1569 static void mlx5e_free_cq(struct mlx5e_cq *cq)
1570 {
1571         mlx5_wq_destroy(&cq->wq_ctrl);
1572 }
1573
1574 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
1575 {
1576         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1577         struct mlx5_core_dev *mdev = cq->mdev;
1578         struct mlx5_core_cq *mcq = &cq->mcq;
1579
1580         void *in;
1581         void *cqc;
1582         int inlen;
1583         unsigned int irqn_not_used;
1584         int eqn;
1585         int err;
1586
1587         err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1588         if (err)
1589                 return err;
1590
1591         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1592                 sizeof(u64) * cq->wq_ctrl.buf.npages;
1593         in = kvzalloc(inlen, GFP_KERNEL);
1594         if (!in)
1595                 return -ENOMEM;
1596
1597         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1598
1599         memcpy(cqc, param->cqc, sizeof(param->cqc));
1600
1601         mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
1602                                   (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
1603
1604         MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
1605         MLX5_SET(cqc,   cqc, c_eqn,         eqn);
1606         MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
1607         MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1608                                             MLX5_ADAPTER_PAGE_SHIFT);
1609         MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
1610
1611         err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
1612
1613         kvfree(in);
1614
1615         if (err)
1616                 return err;
1617
1618         mlx5e_cq_arm(cq);
1619
1620         return 0;
1621 }
1622
1623 static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
1624 {
1625         mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
1626 }
1627
1628 int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
1629                   struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
1630 {
1631         struct mlx5_core_dev *mdev = c->mdev;
1632         int err;
1633
1634         err = mlx5e_alloc_cq(c, param, cq);
1635         if (err)
1636                 return err;
1637
1638         err = mlx5e_create_cq(cq, param);
1639         if (err)
1640                 goto err_free_cq;
1641
1642         if (MLX5_CAP_GEN(mdev, cq_moderation))
1643                 mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
1644         return 0;
1645
1646 err_free_cq:
1647         mlx5e_free_cq(cq);
1648
1649         return err;
1650 }
1651
1652 void mlx5e_close_cq(struct mlx5e_cq *cq)
1653 {
1654         mlx5e_destroy_cq(cq);
1655         mlx5e_free_cq(cq);
1656 }
1657
1658 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1659                              struct mlx5e_params *params,
1660                              struct mlx5e_channel_param *cparam)
1661 {
1662         int err;
1663         int tc;
1664
1665         for (tc = 0; tc < c->num_tc; tc++) {
1666                 err = mlx5e_open_cq(c, params->tx_cq_moderation,
1667                                     &cparam->tx_cq, &c->sq[tc].cq);
1668                 if (err)
1669                         goto err_close_tx_cqs;
1670         }
1671
1672         return 0;
1673
1674 err_close_tx_cqs:
1675         for (tc--; tc >= 0; tc--)
1676                 mlx5e_close_cq(&c->sq[tc].cq);
1677
1678         return err;
1679 }
1680
1681 static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1682 {
1683         int tc;
1684
1685         for (tc = 0; tc < c->num_tc; tc++)
1686                 mlx5e_close_cq(&c->sq[tc].cq);
1687 }
1688
1689 static int mlx5e_open_sqs(struct mlx5e_channel *c,
1690                           struct mlx5e_params *params,
1691                           struct mlx5e_channel_param *cparam)
1692 {
1693         int err, tc;
1694
1695         for (tc = 0; tc < params->num_tc; tc++) {
1696                 int txq_ix = c->ix + tc * params->num_channels;
1697
1698                 err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
1699                                        params, &cparam->sq, &c->sq[tc], tc);
1700                 if (err)
1701                         goto err_close_sqs;
1702         }
1703
1704         return 0;
1705
1706 err_close_sqs:
1707         for (tc--; tc >= 0; tc--)
1708                 mlx5e_close_txqsq(&c->sq[tc]);
1709
1710         return err;
1711 }
1712
1713 static void mlx5e_close_sqs(struct mlx5e_channel *c)
1714 {
1715         int tc;
1716
1717         for (tc = 0; tc < c->num_tc; tc++)
1718                 mlx5e_close_txqsq(&c->sq[tc]);
1719 }
1720
1721 static int mlx5e_set_sq_maxrate(struct net_device *dev,
1722                                 struct mlx5e_txqsq *sq, u32 rate)
1723 {
1724         struct mlx5e_priv *priv = netdev_priv(dev);
1725         struct mlx5_core_dev *mdev = priv->mdev;
1726         struct mlx5e_modify_sq_param msp = {0};
1727         struct mlx5_rate_limit rl = {0};
1728         u16 rl_index = 0;
1729         int err;
1730
1731         if (rate == sq->rate_limit)
1732                 /* nothing to do */
1733                 return 0;
1734
1735         if (sq->rate_limit) {
1736                 rl.rate = sq->rate_limit;
1737                 /* remove current rl index to free space to next ones */
1738                 mlx5_rl_remove_rate(mdev, &rl);
1739         }
1740
1741         sq->rate_limit = 0;
1742
1743         if (rate) {
1744                 rl.rate = rate;
1745                 err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
1746                 if (err) {
1747                         netdev_err(dev, "Failed configuring rate %u: %d\n",
1748                                    rate, err);
1749                         return err;
1750                 }
1751         }
1752
1753         msp.curr_state = MLX5_SQC_STATE_RDY;
1754         msp.next_state = MLX5_SQC_STATE_RDY;
1755         msp.rl_index   = rl_index;
1756         msp.rl_update  = true;
1757         err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
1758         if (err) {
1759                 netdev_err(dev, "Failed configuring rate %u: %d\n",
1760                            rate, err);
1761                 /* remove the rate from the table */
1762                 if (rate)
1763                         mlx5_rl_remove_rate(mdev, &rl);
1764                 return err;
1765         }
1766
1767         sq->rate_limit = rate;
1768         return 0;
1769 }
1770
1771 static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
1772 {
1773         struct mlx5e_priv *priv = netdev_priv(dev);
1774         struct mlx5_core_dev *mdev = priv->mdev;
1775         struct mlx5e_txqsq *sq = priv->txq2sq[index];
1776         int err = 0;
1777
1778         if (!mlx5_rl_is_supported(mdev)) {
1779                 netdev_err(dev, "Rate limiting is not supported on this device\n");
1780                 return -EINVAL;
1781         }
1782
1783         /* rate is given in Mb/sec, HW config is in Kb/sec */
1784         rate = rate << 10;
1785
1786         /* Check whether rate in valid range, 0 is always valid */
1787         if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
1788                 netdev_err(dev, "TX rate %u, is not in range\n", rate);
1789                 return -ERANGE;
1790         }
1791
1792         mutex_lock(&priv->state_lock);
1793         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
1794                 err = mlx5e_set_sq_maxrate(dev, sq, rate);
1795         if (!err)
1796                 priv->tx_rates[index] = rate;
1797         mutex_unlock(&priv->state_lock);
1798
1799         return err;
1800 }
1801
1802 static int mlx5e_open_queues(struct mlx5e_channel *c,
1803                              struct mlx5e_params *params,
1804                              struct mlx5e_channel_param *cparam)
1805 {
1806         struct dim_cq_moder icocq_moder = {0, 0};
1807         int err;
1808
1809         err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
1810         if (err)
1811                 return err;
1812
1813         err = mlx5e_open_tx_cqs(c, params, cparam);
1814         if (err)
1815                 goto err_close_icosq_cq;
1816
1817         err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
1818         if (err)
1819                 goto err_close_tx_cqs;
1820
1821         err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
1822         if (err)
1823                 goto err_close_xdp_tx_cqs;
1824
1825         /* XDP SQ CQ params are same as normal TXQ sq CQ params */
1826         err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
1827                                      &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
1828         if (err)
1829                 goto err_close_rx_cq;
1830
1831         napi_enable(&c->napi);
1832
1833         err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
1834         if (err)
1835                 goto err_disable_napi;
1836
1837         err = mlx5e_open_sqs(c, params, cparam);
1838         if (err)
1839                 goto err_close_icosq;
1840
1841         if (c->xdp) {
1842                 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
1843                                        &c->rq_xdpsq, false);
1844                 if (err)
1845                         goto err_close_sqs;
1846         }
1847
1848         err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
1849         if (err)
1850                 goto err_close_xdp_sq;
1851
1852         err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
1853         if (err)
1854                 goto err_close_rq;
1855
1856         return 0;
1857
1858 err_close_rq:
1859         mlx5e_close_rq(&c->rq);
1860
1861 err_close_xdp_sq:
1862         if (c->xdp)
1863                 mlx5e_close_xdpsq(&c->rq_xdpsq);
1864
1865 err_close_sqs:
1866         mlx5e_close_sqs(c);
1867
1868 err_close_icosq:
1869         mlx5e_close_icosq(&c->icosq);
1870
1871 err_disable_napi:
1872         napi_disable(&c->napi);
1873
1874         if (c->xdp)
1875                 mlx5e_close_cq(&c->rq_xdpsq.cq);
1876
1877 err_close_rx_cq:
1878         mlx5e_close_cq(&c->rq.cq);
1879
1880 err_close_xdp_tx_cqs:
1881         mlx5e_close_cq(&c->xdpsq.cq);
1882
1883 err_close_tx_cqs:
1884         mlx5e_close_tx_cqs(c);
1885
1886 err_close_icosq_cq:
1887         mlx5e_close_cq(&c->icosq.cq);
1888
1889         return err;
1890 }
1891
1892 static void mlx5e_close_queues(struct mlx5e_channel *c)
1893 {
1894         mlx5e_close_xdpsq(&c->xdpsq);
1895         mlx5e_close_rq(&c->rq);
1896         if (c->xdp)
1897                 mlx5e_close_xdpsq(&c->rq_xdpsq);
1898         mlx5e_close_sqs(c);
1899         mlx5e_close_icosq(&c->icosq);
1900         napi_disable(&c->napi);
1901         if (c->xdp)
1902                 mlx5e_close_cq(&c->rq_xdpsq.cq);
1903         mlx5e_close_cq(&c->rq.cq);
1904         mlx5e_close_cq(&c->xdpsq.cq);
1905         mlx5e_close_tx_cqs(c);
1906         mlx5e_close_cq(&c->icosq.cq);
1907 }
1908
1909 static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
1910 {
1911         u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
1912
1913         return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
1914 }
1915
1916 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1917                               struct mlx5e_params *params,
1918                               struct mlx5e_channel_param *cparam,
1919                               struct xdp_umem *umem,
1920                               struct mlx5e_channel **cp)
1921 {
1922         int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
1923         struct net_device *netdev = priv->netdev;
1924         struct mlx5e_xsk_param xsk;
1925         struct mlx5e_channel *c;
1926         unsigned int irq;
1927         int err;
1928         int eqn;
1929
1930         err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
1931         if (err)
1932                 return err;
1933
1934         c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1935         if (!c)
1936                 return -ENOMEM;
1937
1938         c->priv     = priv;
1939         c->mdev     = priv->mdev;
1940         c->tstamp   = &priv->tstamp;
1941         c->ix       = ix;
1942         c->cpu      = cpu;
1943         c->pdev     = priv->mdev->device;
1944         c->netdev   = priv->netdev;
1945         c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
1946         c->num_tc   = params->num_tc;
1947         c->xdp      = !!params->xdp_prog;
1948         c->stats    = &priv->channel_stats[ix].ch;
1949         c->irq_desc = irq_to_desc(irq);
1950         c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
1951
1952         netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
1953
1954         err = mlx5e_open_queues(c, params, cparam);
1955         if (unlikely(err))
1956                 goto err_napi_del;
1957
1958         if (umem) {
1959                 mlx5e_build_xsk_param(umem, &xsk);
1960                 err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
1961                 if (unlikely(err))
1962                         goto err_close_queues;
1963         }
1964
1965         *cp = c;
1966
1967         return 0;
1968
1969 err_close_queues:
1970         mlx5e_close_queues(c);
1971
1972 err_napi_del:
1973         netif_napi_del(&c->napi);
1974
1975         kvfree(c);
1976
1977         return err;
1978 }
1979
1980 static void mlx5e_activate_channel(struct mlx5e_channel *c)
1981 {
1982         int tc;
1983
1984         for (tc = 0; tc < c->num_tc; tc++)
1985                 mlx5e_activate_txqsq(&c->sq[tc]);
1986         mlx5e_activate_icosq(&c->icosq);
1987         mlx5e_activate_rq(&c->rq);
1988
1989         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
1990                 mlx5e_activate_xsk(c);
1991 }
1992
1993 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
1994 {
1995         int tc;
1996
1997         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
1998                 mlx5e_deactivate_xsk(c);
1999
2000         mlx5e_deactivate_rq(&c->rq);
2001         mlx5e_deactivate_icosq(&c->icosq);
2002         for (tc = 0; tc < c->num_tc; tc++)
2003                 mlx5e_deactivate_txqsq(&c->sq[tc]);
2004 }
2005
2006 static void mlx5e_close_channel(struct mlx5e_channel *c)
2007 {
2008         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2009                 mlx5e_close_xsk(c);
2010         mlx5e_close_queues(c);
2011         netif_napi_del(&c->napi);
2012
2013         kvfree(c);
2014 }
2015
2016 #define DEFAULT_FRAG_SIZE (2048)
2017
2018 static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
2019                                       struct mlx5e_params *params,
2020                                       struct mlx5e_xsk_param *xsk,
2021                                       struct mlx5e_rq_frags_info *info)
2022 {
2023         u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
2024         int frag_size_max = DEFAULT_FRAG_SIZE;
2025         u32 buf_size = 0;
2026         int i;
2027
2028 #ifdef CONFIG_MLX5_EN_IPSEC
2029         if (MLX5_IPSEC_DEV(mdev))
2030                 byte_count += MLX5E_METADATA_ETHER_LEN;
2031 #endif
2032
2033         if (mlx5e_rx_is_linear_skb(params, xsk)) {
2034                 int frag_stride;
2035
2036                 frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
2037                 frag_stride = roundup_pow_of_two(frag_stride);
2038
2039                 info->arr[0].frag_size = byte_count;
2040                 info->arr[0].frag_stride = frag_stride;
2041                 info->num_frags = 1;
2042                 info->wqe_bulk = PAGE_SIZE / frag_stride;
2043                 goto out;
2044         }
2045
2046         if (byte_count > PAGE_SIZE +
2047             (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
2048                 frag_size_max = PAGE_SIZE;
2049
2050         i = 0;
2051         while (buf_size < byte_count) {
2052                 int frag_size = byte_count - buf_size;
2053
2054                 if (i < MLX5E_MAX_RX_FRAGS - 1)
2055                         frag_size = min(frag_size, frag_size_max);
2056
2057                 info->arr[i].frag_size = frag_size;
2058                 info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
2059
2060                 buf_size += frag_size;
2061                 i++;
2062         }
2063         info->num_frags = i;
2064         /* number of different wqes sharing a page */
2065         info->wqe_bulk = 1 + (info->num_frags % 2);
2066
2067 out:
2068         info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
2069         info->log_num_frags = order_base_2(info->num_frags);
2070 }
2071
2072 static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
2073 {
2074         int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
2075
2076         switch (wq_type) {
2077         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2078                 sz += sizeof(struct mlx5e_rx_wqe_ll);
2079                 break;
2080         default: /* MLX5_WQ_TYPE_CYCLIC */
2081                 sz += sizeof(struct mlx5e_rx_wqe_cyc);
2082         }
2083
2084         return order_base_2(sz);
2085 }
2086
2087 static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
2088 {
2089         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
2090
2091         return MLX5_GET(wq, wq, log_wq_sz);
2092 }
2093
2094 void mlx5e_build_rq_param(struct mlx5e_priv *priv,
2095                           struct mlx5e_params *params,
2096                           struct mlx5e_xsk_param *xsk,
2097                           struct mlx5e_rq_param *param)
2098 {
2099         struct mlx5_core_dev *mdev = priv->mdev;
2100         void *rqc = param->rqc;
2101         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
2102         int ndsegs = 1;
2103
2104         switch (params->rq_wq_type) {
2105         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2106                 MLX5_SET(wq, wq, log_wqe_num_of_strides,
2107                          mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
2108                          MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
2109                 MLX5_SET(wq, wq, log_wqe_stride_size,
2110                          mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
2111                          MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
2112                 MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
2113                 break;
2114         default: /* MLX5_WQ_TYPE_CYCLIC */
2115                 MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
2116                 mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
2117                 ndsegs = param->frags_info.num_frags;
2118         }
2119
2120         MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
2121         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
2122         MLX5_SET(wq, wq, log_wq_stride,
2123                  mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
2124         MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
2125         MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
2126         MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
2127         MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
2128
2129         param->wq.buf_numa_node = dev_to_node(mdev->device);
2130 }
2131
2132 static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
2133                                       struct mlx5e_rq_param *param)
2134 {
2135         struct mlx5_core_dev *mdev = priv->mdev;
2136         void *rqc = param->rqc;
2137         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
2138
2139         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
2140         MLX5_SET(wq, wq, log_wq_stride,
2141                  mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
2142         MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
2143
2144         param->wq.buf_numa_node = dev_to_node(mdev->device);
2145 }
2146
2147 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
2148                                  struct mlx5e_sq_param *param)
2149 {
2150         void *sqc = param->sqc;
2151         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
2152
2153         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
2154         MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
2155
2156         param->wq.buf_numa_node = dev_to_node(priv->mdev->device);
2157 }
2158
2159 static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
2160                                  struct mlx5e_params *params,
2161                                  struct mlx5e_sq_param *param)
2162 {
2163         void *sqc = param->sqc;
2164         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
2165         bool allow_swp;
2166
2167         allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
2168                     !!MLX5_IPSEC_DEV(priv->mdev);
2169         mlx5e_build_sq_param_common(priv, param);
2170         MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
2171         MLX5_SET(sqc, sqc, allow_swp, allow_swp);
2172 }
2173
2174 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
2175                                         struct mlx5e_cq_param *param)
2176 {
2177         void *cqc = param->cqc;
2178
2179         MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
2180         if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
2181                 MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
2182 }
2183
2184 void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
2185                              struct mlx5e_params *params,
2186                              struct mlx5e_xsk_param *xsk,
2187                              struct mlx5e_cq_param *param)
2188 {
2189         struct mlx5_core_dev *mdev = priv->mdev;
2190         void *cqc = param->cqc;
2191         u8 log_cq_size;
2192
2193         switch (params->rq_wq_type) {
2194         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2195                 log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
2196                         mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
2197                 break;
2198         default: /* MLX5_WQ_TYPE_CYCLIC */
2199                 log_cq_size = params->log_rq_mtu_frames;
2200         }
2201
2202         MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
2203         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
2204                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
2205                 MLX5_SET(cqc, cqc, cqe_comp_en, 1);
2206         }
2207
2208         mlx5e_build_common_cq_param(priv, param);
2209         param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
2210 }
2211
2212 void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
2213                              struct mlx5e_params *params,
2214                              struct mlx5e_cq_param *param)
2215 {
2216         void *cqc = param->cqc;
2217
2218         MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
2219
2220         mlx5e_build_common_cq_param(priv, param);
2221         param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
2222 }
2223
2224 void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
2225                               u8 log_wq_size,
2226                               struct mlx5e_cq_param *param)
2227 {
2228         void *cqc = param->cqc;
2229
2230         MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
2231
2232         mlx5e_build_common_cq_param(priv, param);
2233
2234         param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
2235 }
2236
2237 void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
2238                              u8 log_wq_size,
2239                              struct mlx5e_sq_param *param)
2240 {
2241         void *sqc = param->sqc;
2242         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
2243
2244         mlx5e_build_sq_param_common(priv, param);
2245
2246         MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
2247         MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
2248 }
2249
2250 void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
2251                              struct mlx5e_params *params,
2252                              struct mlx5e_sq_param *param)
2253 {
2254         void *sqc = param->sqc;
2255         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
2256
2257         mlx5e_build_sq_param_common(priv, param);
2258         MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
2259         param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
2260 }
2261
2262 static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
2263                                       struct mlx5e_rq_param *rqp)
2264 {
2265         switch (params->rq_wq_type) {
2266         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2267                 return order_base_2(MLX5E_UMR_WQEBBS) +
2268                         mlx5e_get_rq_log_wq_sz(rqp->rqc);
2269         default: /* MLX5_WQ_TYPE_CYCLIC */
2270                 return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
2271         }
2272 }
2273
2274 static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
2275                                       struct mlx5e_params *params,
2276                                       struct mlx5e_channel_param *cparam)
2277 {
2278         u8 icosq_log_wq_sz;
2279
2280         mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
2281
2282         icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
2283
2284         mlx5e_build_sq_param(priv, params, &cparam->sq);
2285         mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
2286         mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
2287         mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
2288         mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
2289         mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
2290 }
2291
2292 int mlx5e_open_channels(struct mlx5e_priv *priv,
2293                         struct mlx5e_channels *chs)
2294 {
2295         struct mlx5e_channel_param *cparam;
2296         int err = -ENOMEM;
2297         int i;
2298
2299         chs->num = chs->params.num_channels;
2300
2301         chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
2302         cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
2303         if (!chs->c || !cparam)
2304                 goto err_free;
2305
2306         mlx5e_build_channel_param(priv, &chs->params, cparam);
2307         for (i = 0; i < chs->num; i++) {
2308                 struct xdp_umem *umem = NULL;
2309
2310                 if (chs->params.xdp_prog)
2311                         umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
2312
2313                 err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
2314                 if (err)
2315                         goto err_close_channels;
2316         }
2317
2318         mlx5e_health_channels_update(priv);
2319         kvfree(cparam);
2320         return 0;
2321
2322 err_close_channels:
2323         for (i--; i >= 0; i--)
2324                 mlx5e_close_channel(chs->c[i]);
2325
2326 err_free:
2327         kfree(chs->c);
2328         kvfree(cparam);
2329         chs->num = 0;
2330         return err;
2331 }
2332
2333 static void mlx5e_activate_channels(struct mlx5e_channels *chs)
2334 {
2335         int i;
2336
2337         for (i = 0; i < chs->num; i++)
2338                 mlx5e_activate_channel(chs->c[i]);
2339 }
2340
2341 #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
2342
2343 static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
2344 {
2345         int err = 0;
2346         int i;
2347
2348         for (i = 0; i < chs->num; i++) {
2349                 int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
2350
2351                 err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
2352
2353                 /* Don't wait on the XSK RQ, because the newer xdpsock sample
2354                  * doesn't provide any Fill Ring entries at the setup stage.
2355                  */
2356         }
2357
2358         return err ? -ETIMEDOUT : 0;
2359 }
2360
2361 static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
2362 {
2363         int i;
2364
2365         for (i = 0; i < chs->num; i++)
2366                 mlx5e_deactivate_channel(chs->c[i]);
2367 }
2368
2369 void mlx5e_close_channels(struct mlx5e_channels *chs)
2370 {
2371         int i;
2372
2373         for (i = 0; i < chs->num; i++)
2374                 mlx5e_close_channel(chs->c[i]);
2375
2376         kfree(chs->c);
2377         chs->num = 0;
2378 }
2379
2380 static int
2381 mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
2382 {
2383         struct mlx5_core_dev *mdev = priv->mdev;
2384         void *rqtc;
2385         int inlen;
2386         int err;
2387         u32 *in;
2388         int i;
2389
2390         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2391         in = kvzalloc(inlen, GFP_KERNEL);
2392         if (!in)
2393                 return -ENOMEM;
2394
2395         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2396
2397         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2398         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2399
2400         for (i = 0; i < sz; i++)
2401                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
2402
2403         err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
2404         if (!err)
2405                 rqt->enabled = true;
2406
2407         kvfree(in);
2408         return err;
2409 }
2410
2411 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
2412 {
2413         rqt->enabled = false;
2414         mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
2415 }
2416
2417 int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
2418 {
2419         struct mlx5e_rqt *rqt = &priv->indir_rqt;
2420         int err;
2421
2422         err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
2423         if (err)
2424                 mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
2425         return err;
2426 }
2427
2428 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
2429 {
2430         int err;
2431         int ix;
2432
2433         for (ix = 0; ix < priv->max_nch; ix++) {
2434                 err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
2435                 if (unlikely(err))
2436                         goto err_destroy_rqts;
2437         }
2438
2439         return 0;
2440
2441 err_destroy_rqts:
2442         mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
2443         for (ix--; ix >= 0; ix--)
2444                 mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
2445
2446         return err;
2447 }
2448
2449 void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
2450 {
2451         int i;
2452
2453         for (i = 0; i < priv->max_nch; i++)
2454                 mlx5e_destroy_rqt(priv, &tirs[i].rqt);
2455 }
2456
2457 static int mlx5e_rx_hash_fn(int hfunc)
2458 {
2459         return (hfunc == ETH_RSS_HASH_TOP) ?
2460                MLX5_RX_HASH_FN_TOEPLITZ :
2461                MLX5_RX_HASH_FN_INVERTED_XOR8;
2462 }
2463
2464 int mlx5e_bits_invert(unsigned long a, int size)
2465 {
2466         int inv = 0;
2467         int i;
2468
2469         for (i = 0; i < size; i++)
2470                 inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
2471
2472         return inv;
2473 }
2474
2475 static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
2476                                 struct mlx5e_redirect_rqt_param rrp, void *rqtc)
2477 {
2478         int i;
2479
2480         for (i = 0; i < sz; i++) {
2481                 u32 rqn;
2482
2483                 if (rrp.is_rss) {
2484                         int ix = i;
2485
2486                         if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
2487                                 ix = mlx5e_bits_invert(i, ilog2(sz));
2488
2489                         ix = priv->rss_params.indirection_rqt[ix];
2490                         rqn = rrp.rss.channels->c[ix]->rq.rqn;
2491                 } else {
2492                         rqn = rrp.rqn;
2493                 }
2494                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
2495         }
2496 }
2497
2498 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
2499                        struct mlx5e_redirect_rqt_param rrp)
2500 {
2501         struct mlx5_core_dev *mdev = priv->mdev;
2502         void *rqtc;
2503         int inlen;
2504         u32 *in;
2505         int err;
2506
2507         inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
2508         in = kvzalloc(inlen, GFP_KERNEL);
2509         if (!in)
2510                 return -ENOMEM;
2511
2512         rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
2513
2514         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2515         MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
2516         mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
2517         err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
2518
2519         kvfree(in);
2520         return err;
2521 }
2522
2523 static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
2524                                 struct mlx5e_redirect_rqt_param rrp)
2525 {
2526         if (!rrp.is_rss)
2527                 return rrp.rqn;
2528
2529         if (ix >= rrp.rss.channels->num)
2530                 return priv->drop_rq.rqn;
2531
2532         return rrp.rss.channels->c[ix]->rq.rqn;
2533 }
2534
2535 static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
2536                                 struct mlx5e_redirect_rqt_param rrp)
2537 {
2538         u32 rqtn;
2539         int ix;
2540
2541         if (priv->indir_rqt.enabled) {
2542                 /* RSS RQ table */
2543                 rqtn = priv->indir_rqt.rqtn;
2544                 mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
2545         }
2546
2547         for (ix = 0; ix < priv->max_nch; ix++) {
2548                 struct mlx5e_redirect_rqt_param direct_rrp = {
2549                         .is_rss = false,
2550                         {
2551                                 .rqn    = mlx5e_get_direct_rqn(priv, ix, rrp)
2552                         },
2553                 };
2554
2555                 /* Direct RQ Tables */
2556                 if (!priv->direct_tir[ix].rqt.enabled)
2557                         continue;
2558
2559                 rqtn = priv->direct_tir[ix].rqt.rqtn;
2560                 mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
2561         }
2562 }
2563
2564 static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
2565                                             struct mlx5e_channels *chs)
2566 {
2567         struct mlx5e_redirect_rqt_param rrp = {
2568                 .is_rss        = true,
2569                 {
2570                         .rss = {
2571                                 .channels  = chs,
2572                                 .hfunc     = priv->rss_params.hfunc,
2573                         }
2574                 },
2575         };
2576
2577         mlx5e_redirect_rqts(priv, rrp);
2578 }
2579
2580 static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
2581 {
2582         struct mlx5e_redirect_rqt_param drop_rrp = {
2583                 .is_rss = false,
2584                 {
2585                         .rqn = priv->drop_rq.rqn,
2586                 },
2587         };
2588
2589         mlx5e_redirect_rqts(priv, drop_rrp);
2590 }
2591
2592 static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
2593         [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
2594                                 .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
2595                                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
2596         },
2597         [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
2598                                 .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
2599                                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
2600         },
2601         [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
2602                                 .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
2603                                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
2604         },
2605         [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
2606                                 .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
2607                                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
2608         },
2609         [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
2610                                      .l4_prot_type = 0,
2611                                      .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
2612         },
2613         [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
2614                                      .l4_prot_type = 0,
2615                                      .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
2616         },
2617         [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
2618                                       .l4_prot_type = 0,
2619                                       .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
2620         },
2621         [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
2622                                       .l4_prot_type = 0,
2623                                       .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
2624         },
2625         [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
2626                             .l4_prot_type = 0,
2627                             .rx_hash_fields = MLX5_HASH_IP,
2628         },
2629         [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
2630                             .l4_prot_type = 0,
2631                             .rx_hash_fields = MLX5_HASH_IP,
2632         },
2633 };
2634
2635 struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
2636 {
2637         return tirc_default_config[tt];
2638 }
2639
2640 static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
2641 {
2642         if (!params->lro_en)
2643                 return;
2644
2645 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2646
2647         MLX5_SET(tirc, tirc, lro_enable_mask,
2648                  MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2649                  MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2650         MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
2651                  (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2652         MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
2653 }
2654
2655 void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
2656                                     const struct mlx5e_tirc_config *ttconfig,
2657                                     void *tirc, bool inner)
2658 {
2659         void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
2660                              MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2661
2662         MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
2663         if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
2664                 void *rss_key = MLX5_ADDR_OF(tirc, tirc,
2665                                              rx_hash_toeplitz_key);
2666                 size_t len = MLX5_FLD_SZ_BYTES(tirc,
2667                                                rx_hash_toeplitz_key);
2668
2669                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2670                 memcpy(rss_key, rss_params->toeplitz_hash_key, len);
2671         }
2672         MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2673                  ttconfig->l3_prot_type);
2674         MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2675                  ttconfig->l4_prot_type);
2676         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2677                  ttconfig->rx_hash_fields);
2678 }
2679
2680 static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
2681                                         enum mlx5e_traffic_types tt,
2682                                         u32 rx_hash_fields)
2683 {
2684         *ttconfig                = tirc_default_config[tt];
2685         ttconfig->rx_hash_fields = rx_hash_fields;
2686 }
2687
2688 void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in)
2689 {
2690         void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
2691         struct mlx5e_rss_params *rss = &priv->rss_params;
2692         struct mlx5_core_dev *mdev = priv->mdev;
2693         int ctxlen = MLX5_ST_SZ_BYTES(tirc);
2694         struct mlx5e_tirc_config ttconfig;
2695         int tt;
2696
2697         MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
2698
2699         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
2700                 memset(tirc, 0, ctxlen);
2701                 mlx5e_update_rx_hash_fields(&ttconfig, tt,
2702                                             rss->rx_hash_fields[tt]);
2703                 mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
2704                 mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
2705         }
2706
2707         if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
2708                 return;
2709
2710         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
2711                 memset(tirc, 0, ctxlen);
2712                 mlx5e_update_rx_hash_fields(&ttconfig, tt,
2713                                             rss->rx_hash_fields[tt]);
2714                 mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
2715                 mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in);
2716         }
2717 }
2718
2719 static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
2720 {
2721         struct mlx5_core_dev *mdev = priv->mdev;
2722
2723         void *in;
2724         void *tirc;
2725         int inlen;
2726         int err;
2727         int tt;
2728         int ix;
2729
2730         inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
2731         in = kvzalloc(inlen, GFP_KERNEL);
2732         if (!in)
2733                 return -ENOMEM;
2734
2735         MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
2736         tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
2737
2738         mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
2739
2740         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
2741                 err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
2742                 if (err)
2743                         goto free_in;
2744         }
2745
2746         for (ix = 0; ix < priv->max_nch; ix++) {
2747                 err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in);
2748                 if (err)
2749                         goto free_in;
2750         }
2751
2752 free_in:
2753         kvfree(in);
2754
2755         return err;
2756 }
2757
2758 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
2759
2760 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
2761                          struct mlx5e_params *params, u16 mtu)
2762 {
2763         u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
2764         int err;
2765
2766         err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
2767         if (err)
2768                 return err;
2769
2770         /* Update vport context MTU */
2771         mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
2772         return 0;
2773 }
2774
2775 static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
2776                             struct mlx5e_params *params, u16 *mtu)
2777 {
2778         u16 hw_mtu = 0;
2779         int err;
2780
2781         err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2782         if (err || !hw_mtu) /* fallback to port oper mtu */
2783                 mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
2784
2785         *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
2786 }
2787
2788 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
2789 {
2790         struct mlx5e_params *params = &priv->channels.params;
2791         struct net_device *netdev = priv->netdev;
2792         struct mlx5_core_dev *mdev = priv->mdev;
2793         u16 mtu;
2794         int err;
2795
2796         err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
2797         if (err)
2798                 return err;
2799
2800         mlx5e_query_mtu(mdev, params, &mtu);
2801         if (mtu != params->sw_mtu)
2802                 netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
2803                             __func__, mtu, params->sw_mtu);
2804
2805         params->sw_mtu = mtu;
2806         return 0;
2807 }
2808
2809 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
2810
2811 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
2812 {
2813         struct mlx5e_params *params = &priv->channels.params;
2814         struct net_device *netdev   = priv->netdev;
2815         struct mlx5_core_dev *mdev  = priv->mdev;
2816         u16 max_mtu;
2817
2818         /* MTU range: 68 - hw-specific max */
2819         netdev->min_mtu = ETH_MIN_MTU;
2820
2821         mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
2822         netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
2823                                 ETH_MAX_MTU);
2824 }
2825
2826 static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
2827 {
2828         int tc;
2829
2830         netdev_reset_tc(netdev);
2831
2832         if (ntc == 1)
2833                 return;
2834
2835         netdev_set_num_tc(netdev, ntc);
2836
2837         /* Map netdev TCs to offset 0
2838          * We have our own UP to TXQ mapping for QoS
2839          */
2840         for (tc = 0; tc < ntc; tc++)
2841                 netdev_set_tc_queue(netdev, tc, nch, 0);
2842 }
2843
2844 static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
2845 {
2846         struct net_device *netdev = priv->netdev;
2847         int num_txqs, num_rxqs, nch, ntc;
2848         int old_num_txqs, old_ntc;
2849         int err;
2850
2851         old_num_txqs = netdev->real_num_tx_queues;
2852         old_ntc = netdev->num_tc;
2853
2854         nch = priv->channels.params.num_channels;
2855         ntc = priv->channels.params.num_tc;
2856         num_txqs = nch * ntc;
2857         num_rxqs = nch * priv->profile->rq_groups;
2858
2859         mlx5e_netdev_set_tcs(netdev, nch, ntc);
2860
2861         err = netif_set_real_num_tx_queues(netdev, num_txqs);
2862         if (err) {
2863                 netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
2864                 goto err_tcs;
2865         }
2866         err = netif_set_real_num_rx_queues(netdev, num_rxqs);
2867         if (err) {
2868                 netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
2869                 goto err_txqs;
2870         }
2871
2872         return 0;
2873
2874 err_txqs:
2875         /* netif_set_real_num_rx_queues could fail only when nch increased. Only
2876          * one of nch and ntc is changed in this function. That means, the call
2877          * to netif_set_real_num_tx_queues below should not fail, because it
2878          * decreases the number of TX queues.
2879          */
2880         WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
2881
2882 err_tcs:
2883         mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc);
2884         return err;
2885 }
2886
2887 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
2888                                            struct mlx5e_params *params)
2889 {
2890         struct mlx5_core_dev *mdev = priv->mdev;
2891         int num_comp_vectors, ix, irq;
2892
2893         num_comp_vectors = mlx5_comp_vectors_count(mdev);
2894
2895         for (ix = 0; ix < params->num_channels; ix++) {
2896                 cpumask_clear(priv->scratchpad.cpumask);
2897
2898                 for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
2899                         int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
2900
2901                         cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
2902                 }
2903
2904                 netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
2905         }
2906 }
2907
2908 int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
2909 {
2910         u16 count = priv->channels.params.num_channels;
2911         int err;
2912
2913         err = mlx5e_update_netdev_queues(priv);
2914         if (err)
2915                 return err;
2916
2917         mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
2918
2919         if (!netif_is_rxfh_configured(priv->netdev))
2920                 mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
2921                                               MLX5E_INDIR_RQT_SIZE, count);
2922
2923         return 0;
2924 }
2925
2926 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
2927
2928 static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
2929 {
2930         int i, ch;
2931
2932         ch = priv->channels.num;
2933
2934         for (i = 0; i < ch; i++) {
2935                 int tc;
2936
2937                 for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
2938                         struct mlx5e_channel *c = priv->channels.c[i];
2939                         struct mlx5e_txqsq *sq = &c->sq[tc];
2940
2941                         priv->txq2sq[sq->txq_ix] = sq;
2942                         priv->channel_tc2realtxq[i][tc] = i + tc * ch;
2943                 }
2944         }
2945 }
2946
2947 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
2948 {
2949         mlx5e_build_txq_maps(priv);
2950         mlx5e_activate_channels(&priv->channels);
2951         mlx5e_xdp_tx_enable(priv);
2952         netif_tx_start_all_queues(priv->netdev);
2953
2954         if (mlx5e_is_vport_rep(priv))
2955                 mlx5e_add_sqs_fwd_rules(priv);
2956
2957         mlx5e_wait_channels_min_rx_wqes(&priv->channels);
2958         mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
2959
2960         mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
2961 }
2962
2963 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
2964 {
2965         mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
2966
2967         mlx5e_redirect_rqts_to_drop(priv);
2968
2969         if (mlx5e_is_vport_rep(priv))
2970                 mlx5e_remove_sqs_fwd_rules(priv);
2971
2972         /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
2973          * polling for inactive tx queues.
2974          */
2975         netif_tx_stop_all_queues(priv->netdev);
2976         netif_tx_disable(priv->netdev);
2977         mlx5e_xdp_tx_disable(priv);
2978         mlx5e_deactivate_channels(&priv->channels);
2979 }
2980
2981 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
2982                                       struct mlx5e_channels *new_chs,
2983                                       mlx5e_fp_preactivate preactivate,
2984                                       void *context)
2985 {
2986         struct net_device *netdev = priv->netdev;
2987         struct mlx5e_channels old_chs;
2988         int carrier_ok;
2989         int err = 0;
2990
2991         carrier_ok = netif_carrier_ok(netdev);
2992         netif_carrier_off(netdev);
2993
2994         mlx5e_deactivate_priv_channels(priv);
2995
2996         old_chs = priv->channels;
2997         priv->channels = *new_chs;
2998
2999         /* New channels are ready to roll, call the preactivate hook if needed
3000          * to modify HW settings or update kernel parameters.
3001          */
3002         if (preactivate) {
3003                 err = preactivate(priv, context);
3004                 if (err) {
3005                         priv->channels = old_chs;
3006                         goto out;
3007                 }
3008         }
3009
3010         mlx5e_close_channels(&old_chs);
3011         priv->profile->update_rx(priv);
3012
3013 out:
3014         mlx5e_activate_priv_channels(priv);
3015
3016         /* return carrier back if needed */
3017         if (carrier_ok)
3018                 netif_carrier_on(netdev);
3019
3020         return err;
3021 }
3022
3023 int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
3024                                struct mlx5e_channels *new_chs,
3025                                mlx5e_fp_preactivate preactivate,
3026                                void *context)
3027 {
3028         int err;
3029
3030         err = mlx5e_open_channels(priv, new_chs);
3031         if (err)
3032                 return err;
3033
3034         err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
3035         if (err)
3036                 goto err_close;
3037
3038         return 0;
3039
3040 err_close:
3041         mlx5e_close_channels(new_chs);
3042
3043         return err;
3044 }
3045
3046 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
3047 {
3048         struct mlx5e_channels new_channels = {};
3049
3050         new_channels.params = priv->channels.params;
3051         return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
3052 }
3053
3054 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
3055 {
3056         priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
3057         priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
3058 }
3059
3060 int mlx5e_open_locked(struct net_device *netdev)
3061 {
3062         struct mlx5e_priv *priv = netdev_priv(netdev);
3063         int err;
3064
3065         set_bit(MLX5E_STATE_OPENED, &priv->state);
3066
3067         err = mlx5e_open_channels(priv, &priv->channels);
3068         if (err)
3069                 goto err_clear_state_opened_flag;
3070
3071         priv->profile->update_rx(priv);
3072         mlx5e_activate_priv_channels(priv);
3073         if (priv->profile->update_carrier)
3074                 priv->profile->update_carrier(priv);
3075
3076         mlx5e_queue_update_stats(priv);
3077         return 0;
3078
3079 err_clear_state_opened_flag:
3080         clear_bit(MLX5E_STATE_OPENED, &priv->state);
3081         return err;
3082 }
3083
3084 int mlx5e_open(struct net_device *netdev)
3085 {
3086         struct mlx5e_priv *priv = netdev_priv(netdev);
3087         int err;
3088
3089         mutex_lock(&priv->state_lock);
3090         err = mlx5e_open_locked(netdev);
3091         if (!err)
3092                 mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
3093         mutex_unlock(&priv->state_lock);
3094
3095         if (mlx5_vxlan_allowed(priv->mdev->vxlan))
3096                 udp_tunnel_get_rx_info(netdev);
3097
3098         return err;
3099 }
3100
3101 int mlx5e_close_locked(struct net_device *netdev)
3102 {
3103         struct mlx5e_priv *priv = netdev_priv(netdev);
3104
3105         /* May already be CLOSED in case a previous configuration operation
3106          * (e.g RX/TX queue size change) that involves close&open failed.
3107          */
3108         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3109                 return 0;
3110
3111         clear_bit(MLX5E_STATE_OPENED, &priv->state);
3112
3113         netif_carrier_off(priv->netdev);
3114         mlx5e_deactivate_priv_channels(priv);
3115         mlx5e_close_channels(&priv->channels);
3116
3117         return 0;
3118 }
3119
3120 int mlx5e_close(struct net_device *netdev)
3121 {
3122         struct mlx5e_priv *priv = netdev_priv(netdev);
3123         int err;
3124
3125         if (!netif_device_present(netdev))
3126                 return -ENODEV;
3127
3128         mutex_lock(&priv->state_lock);
3129         mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
3130         err = mlx5e_close_locked(netdev);
3131         mutex_unlock(&priv->state_lock);
3132
3133         return err;
3134 }
3135
3136 static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
3137                                struct mlx5e_rq *rq,
3138                                struct mlx5e_rq_param *param)
3139 {
3140         void *rqc = param->rqc;
3141         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
3142         int err;
3143
3144         param->wq.db_numa_node = param->wq.buf_numa_node;
3145
3146         err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
3147                                  &rq->wq_ctrl);
3148         if (err)
3149                 return err;
3150
3151         /* Mark as unused given "Drop-RQ" packets never reach XDP */
3152         xdp_rxq_info_unused(&rq->xdp_rxq);
3153
3154         rq->mdev = mdev;
3155
3156         return 0;
3157 }
3158
3159 static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
3160                                struct mlx5e_cq *cq,
3161                                struct mlx5e_cq_param *param)
3162 {
3163         param->wq.buf_numa_node = dev_to_node(mdev->device);
3164         param->wq.db_numa_node  = dev_to_node(mdev->device);
3165
3166         return mlx5e_alloc_cq_common(mdev, param, cq);
3167 }
3168
3169 int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
3170                        struct mlx5e_rq *drop_rq)
3171 {
3172         struct mlx5_core_dev *mdev = priv->mdev;
3173         struct mlx5e_cq_param cq_param = {};
3174         struct mlx5e_rq_param rq_param = {};
3175         struct mlx5e_cq *cq = &drop_rq->cq;
3176         int err;
3177
3178         mlx5e_build_drop_rq_param(priv, &rq_param);
3179
3180         err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
3181         if (err)
3182                 return err;
3183
3184         err = mlx5e_create_cq(cq, &cq_param);
3185         if (err)
3186                 goto err_free_cq;
3187
3188         err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
3189         if (err)
3190                 goto err_destroy_cq;
3191
3192         err = mlx5e_create_rq(drop_rq, &rq_param);
3193         if (err)
3194                 goto err_free_rq;
3195
3196         err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3197         if (err)
3198                 mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
3199
3200         return 0;
3201
3202 err_free_rq:
3203         mlx5e_free_rq(drop_rq);
3204
3205 err_destroy_cq:
3206         mlx5e_destroy_cq(cq);
3207
3208 err_free_cq:
3209         mlx5e_free_cq(cq);
3210
3211         return err;
3212 }
3213
3214 void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
3215 {
3216         mlx5e_destroy_rq(drop_rq);
3217         mlx5e_free_rq(drop_rq);
3218         mlx5e_destroy_cq(&drop_rq->cq);
3219         mlx5e_free_cq(&drop_rq->cq);
3220 }
3221
3222 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
3223 {
3224         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
3225
3226         MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
3227
3228         if (MLX5_GET(tisc, tisc, tls_en))
3229                 MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
3230
3231         if (mlx5_lag_is_lacp_owner(mdev))
3232                 MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
3233
3234         return mlx5_core_create_tis(mdev, in, tisn);
3235 }
3236
3237 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
3238 {
3239         mlx5_core_destroy_tis(mdev, tisn);
3240 }
3241
3242 void mlx5e_destroy_tises(struct mlx5e_priv *priv)
3243 {
3244         int tc, i;
3245
3246         for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
3247                 for (tc = 0; tc < priv->profile->max_tc; tc++)
3248                         mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
3249 }
3250
3251 static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
3252 {
3253         return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
3254 }
3255
3256 int mlx5e_create_tises(struct mlx5e_priv *priv)
3257 {
3258         int tc, i;
3259         int err;
3260
3261         for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
3262                 for (tc = 0; tc < priv->profile->max_tc; tc++) {
3263                         u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
3264                         void *tisc;
3265
3266                         tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
3267
3268                         MLX5_SET(tisc, tisc, prio, tc << 1);
3269
3270                         if (mlx5e_lag_should_assign_affinity(priv->mdev))
3271                                 MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
3272
3273                         err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
3274                         if (err)
3275                                 goto err_close_tises;
3276                 }
3277         }
3278
3279         return 0;
3280
3281 err_close_tises:
3282         for (; i >= 0; i--) {
3283                 for (tc--; tc >= 0; tc--)
3284                         mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
3285                 tc = priv->profile->max_tc;
3286         }
3287
3288         return err;
3289 }
3290
3291 static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
3292 {
3293         mlx5e_destroy_tises(priv);
3294 }
3295
3296 static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
3297                                              u32 rqtn, u32 *tirc)
3298 {
3299         MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
3300         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
3301         MLX5_SET(tirc, tirc, indirect_table, rqtn);
3302         MLX5_SET(tirc, tirc, tunneled_offload_en,
3303                  priv->channels.params.tunneled_offload_en);
3304
3305         mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
3306 }
3307
3308 static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
3309                                       enum mlx5e_traffic_types tt,
3310                                       u32 *tirc)
3311 {
3312         mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
3313         mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
3314                                        &tirc_default_config[tt], tirc, false);
3315 }
3316
3317 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
3318 {
3319         mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc);
3320         MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
3321 }
3322
3323 static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
3324                                             enum mlx5e_traffic_types tt,
3325                                             u32 *tirc)
3326 {
3327         mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
3328         mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
3329                                        &tirc_default_config[tt], tirc, true);
3330 }
3331
3332 int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
3333 {
3334         struct mlx5e_tir *tir;
3335         void *tirc;
3336         int inlen;
3337         int i = 0;
3338         int err;
3339         u32 *in;
3340         int tt;
3341
3342         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
3343         in = kvzalloc(inlen, GFP_KERNEL);
3344         if (!in)
3345                 return -ENOMEM;
3346
3347         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
3348                 memset(in, 0, inlen);
3349                 tir = &priv->indir_tir[tt];
3350                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
3351                 mlx5e_build_indir_tir_ctx(priv, tt, tirc);
3352                 err = mlx5e_create_tir(priv->mdev, tir, in);
3353                 if (err) {
3354                         mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
3355                         goto err_destroy_inner_tirs;
3356                 }
3357         }
3358
3359         if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
3360                 goto out;
3361
3362         for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
3363                 memset(in, 0, inlen);
3364                 tir = &priv->inner_indir_tir[i];
3365                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
3366                 mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
3367                 err = mlx5e_create_tir(priv->mdev, tir, in);
3368                 if (err) {
3369                         mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
3370                         goto err_destroy_inner_tirs;
3371                 }
3372         }
3373
3374 out:
3375         kvfree(in);
3376
3377         return 0;
3378
3379 err_destroy_inner_tirs:
3380         for (i--; i >= 0; i--)
3381                 mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
3382
3383         for (tt--; tt >= 0; tt--)
3384                 mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
3385
3386         kvfree(in);
3387
3388         return err;
3389 }
3390
3391 int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
3392 {
3393         struct mlx5e_tir *tir;
3394         void *tirc;
3395         int inlen;
3396         int err = 0;
3397         u32 *in;
3398         int ix;
3399
3400         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
3401         in = kvzalloc(inlen, GFP_KERNEL);
3402         if (!in)
3403                 return -ENOMEM;
3404
3405         for (ix = 0; ix < priv->max_nch; ix++) {
3406                 memset(in, 0, inlen);
3407                 tir = &tirs[ix];
3408                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
3409                 mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
3410                 err = mlx5e_create_tir(priv->mdev, tir, in);
3411                 if (unlikely(err))
3412                         goto err_destroy_ch_tirs;
3413         }
3414
3415         goto out;
3416
3417 err_destroy_ch_tirs:
3418         mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
3419         for (ix--; ix >= 0; ix--)
3420                 mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
3421
3422 out:
3423         kvfree(in);
3424
3425         return err;
3426 }
3427
3428 void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
3429 {
3430         int i;
3431
3432         for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
3433                 mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
3434
3435         if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
3436                 return;
3437
3438         for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
3439                 mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
3440 }
3441
3442 void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
3443 {
3444         int i;
3445
3446         for (i = 0; i < priv->max_nch; i++)
3447                 mlx5e_destroy_tir(priv->mdev, &tirs[i]);
3448 }
3449
3450 static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
3451 {
3452         int err = 0;
3453         int i;
3454
3455         for (i = 0; i < chs->num; i++) {
3456                 err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
3457                 if (err)
3458                         return err;
3459         }
3460
3461         return 0;
3462 }
3463
3464 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
3465 {
3466         int err = 0;
3467         int i;
3468
3469         for (i = 0; i < chs->num; i++) {
3470                 err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
3471                 if (err)
3472                         return err;
3473         }
3474
3475         return 0;
3476 }
3477
3478 static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
3479                                  struct tc_mqprio_qopt *mqprio)
3480 {
3481         struct mlx5e_channels new_channels = {};
3482         u8 tc = mqprio->num_tc;
3483         int err = 0;
3484
3485         mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
3486
3487         if (tc && tc != MLX5E_MAX_NUM_TC)
3488                 return -EINVAL;
3489
3490         mutex_lock(&priv->state_lock);
3491
3492         new_channels.params = priv->channels.params;
3493         new_channels.params.num_tc = tc ? tc : 1;
3494
3495         if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
3496                 priv->channels.params = new_channels.params;
3497                 goto out;
3498         }
3499
3500         err = mlx5e_safe_switch_channels(priv, &new_channels,
3501                                          mlx5e_num_channels_changed_ctx, NULL);
3502         if (err)
3503                 goto out;
3504
3505         priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
3506                                     new_channels.params.num_tc);
3507 out:
3508         mutex_unlock(&priv->state_lock);
3509         return err;
3510 }
3511
3512 #ifdef CONFIG_MLX5_ESWITCH
3513 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
3514                                      struct flow_cls_offload *cls_flower,
3515                                      unsigned long flags)
3516 {
3517         switch (cls_flower->command) {
3518         case FLOW_CLS_REPLACE:
3519                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
3520                                               flags);
3521         case FLOW_CLS_DESTROY:
3522                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
3523                                            flags);
3524         case FLOW_CLS_STATS:
3525                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
3526                                           flags);
3527         default:
3528                 return -EOPNOTSUPP;
3529         }
3530 }
3531
3532 static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
3533                                    void *cb_priv)
3534 {
3535         unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
3536         struct mlx5e_priv *priv = cb_priv;
3537
3538         switch (type) {
3539         case TC_SETUP_CLSFLOWER:
3540                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
3541         default:
3542                 return -EOPNOTSUPP;
3543         }
3544 }
3545 #endif
3546
3547 static LIST_HEAD(mlx5e_block_cb_list);
3548
3549 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
3550                           void *type_data)
3551 {
3552         struct mlx5e_priv *priv = netdev_priv(dev);
3553
3554         switch (type) {
3555 #ifdef CONFIG_MLX5_ESWITCH
3556         case TC_SETUP_BLOCK: {
3557                 struct flow_block_offload *f = type_data;
3558
3559                 f->unlocked_driver_cb = true;
3560                 return flow_block_cb_setup_simple(type_data,
3561                                                   &mlx5e_block_cb_list,
3562                                                   mlx5e_setup_tc_block_cb,
3563                                                   priv, priv, true);
3564         }
3565 #endif
3566         case TC_SETUP_QDISC_MQPRIO:
3567                 return mlx5e_setup_tc_mqprio(priv, type_data);
3568         default:
3569                 return -EOPNOTSUPP;
3570         }
3571 }
3572
3573 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
3574 {
3575         int i;
3576
3577         for (i = 0; i < priv->max_nch; i++) {
3578                 struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
3579                 struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
3580                 struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
3581                 int j;
3582
3583                 s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
3584                 s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
3585
3586                 for (j = 0; j < priv->max_opened_tc; j++) {
3587                         struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
3588
3589                         s->tx_packets    += sq_stats->packets;
3590                         s->tx_bytes      += sq_stats->bytes;
3591                         s->tx_dropped    += sq_stats->dropped;
3592                 }
3593         }
3594 }
3595
3596 void
3597 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
3598 {
3599         struct mlx5e_priv *priv = netdev_priv(dev);
3600         struct mlx5e_vport_stats *vstats = &priv->stats.vport;
3601         struct mlx5e_pport_stats *pstats = &priv->stats.pport;
3602
3603         /* In switchdev mode, monitor counters doesn't monitor
3604          * rx/tx stats of 802_3. The update stats mechanism
3605          * should keep the 802_3 layout counters updated
3606          */
3607         if (!mlx5e_monitor_counter_supported(priv) ||
3608             mlx5e_is_uplink_rep(priv)) {
3609                 /* update HW stats in background for next time */
3610                 mlx5e_queue_update_stats(priv);
3611         }
3612
3613         if (mlx5e_is_uplink_rep(priv)) {
3614                 stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
3615                 stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
3616                 stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
3617                 stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
3618         } else {
3619                 mlx5e_fold_sw_stats64(priv, stats);
3620         }
3621
3622         stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
3623
3624         stats->rx_length_errors =
3625                 PPORT_802_3_GET(pstats, a_in_range_length_errors) +
3626                 PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
3627                 PPORT_802_3_GET(pstats, a_frame_too_long_errors);
3628         stats->rx_crc_errors =
3629                 PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
3630         stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
3631         stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
3632         stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
3633                            stats->rx_frame_errors;
3634         stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
3635
3636         /* vport multicast also counts packets that are dropped due to steering
3637          * or rx out of buffer
3638          */
3639         stats->multicast =
3640                 VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
3641 }
3642
3643 static void mlx5e_set_rx_mode(struct net_device *dev)
3644 {
3645         struct mlx5e_priv *priv = netdev_priv(dev);
3646
3647         queue_work(priv->wq, &priv->set_rx_mode_work);
3648 }
3649
3650 static int mlx5e_set_mac(struct net_device *netdev, void *addr)
3651 {
3652         struct mlx5e_priv *priv = netdev_priv(netdev);
3653         struct sockaddr *saddr = addr;
3654
3655         if (!is_valid_ether_addr(saddr->sa_data))
3656                 return -EADDRNOTAVAIL;
3657
3658         netif_addr_lock_bh(netdev);
3659         ether_addr_copy(netdev->dev_addr, saddr->sa_data);
3660         netif_addr_unlock_bh(netdev);
3661
3662         queue_work(priv->wq, &priv->set_rx_mode_work);
3663
3664         return 0;
3665 }
3666
3667 #define MLX5E_SET_FEATURE(features, feature, enable)    \
3668         do {                                            \
3669                 if (enable)                             \
3670                         *features |= feature;           \
3671                 else                                    \
3672                         *features &= ~feature;          \
3673         } while (0)
3674
3675 typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
3676
3677 static int set_feature_lro(struct net_device *netdev, bool enable)
3678 {
3679         struct mlx5e_priv *priv = netdev_priv(netdev);
3680         struct mlx5_core_dev *mdev = priv->mdev;
3681         struct mlx5e_channels new_channels = {};
3682         struct mlx5e_params *old_params;
3683         int err = 0;
3684         bool reset;
3685
3686         mutex_lock(&priv->state_lock);
3687
3688         if (enable && priv->xsk.refcnt) {
3689                 netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
3690                             priv->xsk.refcnt);
3691                 err = -EINVAL;
3692                 goto out;
3693         }
3694
3695         old_params = &priv->channels.params;
3696         if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
3697                 netdev_warn(netdev, "can't set LRO with legacy RQ\n");
3698                 err = -EINVAL;
3699                 goto out;
3700         }
3701
3702         reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
3703
3704         new_channels.params = *old_params;
3705         new_channels.params.lro_en = enable;
3706
3707         if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
3708                 if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
3709                     mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
3710                         reset = false;
3711         }
3712
3713         if (!reset) {
3714                 *old_params = new_channels.params;
3715                 err = mlx5e_modify_tirs_lro(priv);
3716                 goto out;
3717         }
3718
3719         err = mlx5e_safe_switch_channels(priv, &new_channels,
3720                                          mlx5e_modify_tirs_lro_ctx, NULL);
3721 out:
3722         mutex_unlock(&priv->state_lock);
3723         return err;
3724 }
3725
3726 static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
3727 {
3728         struct mlx5e_priv *priv = netdev_priv(netdev);
3729
3730         if (enable)
3731                 mlx5e_enable_cvlan_filter(priv);
3732         else
3733                 mlx5e_disable_cvlan_filter(priv);
3734
3735         return 0;
3736 }
3737
3738 #ifdef CONFIG_MLX5_ESWITCH
3739 static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
3740 {
3741         struct mlx5e_priv *priv = netdev_priv(netdev);
3742
3743         if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
3744                 netdev_err(netdev,
3745                            "Active offloaded tc filters, can't turn hw_tc_offload off\n");
3746                 return -EINVAL;
3747         }
3748
3749         return 0;
3750 }
3751 #endif
3752
3753 static int set_feature_rx_all(struct net_device *netdev, bool enable)
3754 {
3755         struct mlx5e_priv *priv = netdev_priv(netdev);
3756         struct mlx5_core_dev *mdev = priv->mdev;
3757
3758         return mlx5_set_port_fcs(mdev, !enable);
3759 }
3760
3761 static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
3762 {
3763         struct mlx5e_priv *priv = netdev_priv(netdev);
3764         int err;
3765
3766         mutex_lock(&priv->state_lock);
3767
3768         priv->channels.params.scatter_fcs_en = enable;
3769         err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable);
3770         if (err)
3771                 priv->channels.params.scatter_fcs_en = !enable;
3772
3773         mutex_unlock(&priv->state_lock);
3774
3775         return err;
3776 }
3777
3778 static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
3779 {
3780         struct mlx5e_priv *priv = netdev_priv(netdev);
3781         int err = 0;
3782
3783         mutex_lock(&priv->state_lock);
3784
3785         priv->channels.params.vlan_strip_disable = !enable;
3786         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3787                 goto unlock;
3788
3789         err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
3790         if (err)
3791                 priv->channels.params.vlan_strip_disable = enable;
3792
3793 unlock:
3794         mutex_unlock(&priv->state_lock);
3795
3796         return err;
3797 }
3798
3799 #ifdef CONFIG_MLX5_EN_ARFS
3800 static int set_feature_arfs(struct net_device *netdev, bool enable)
3801 {
3802         struct mlx5e_priv *priv = netdev_priv(netdev);
3803         int err;
3804
3805         if (enable)
3806                 err = mlx5e_arfs_enable(priv);
3807         else
3808                 err = mlx5e_arfs_disable(priv);
3809
3810         return err;
3811 }
3812 #endif
3813
3814 static int mlx5e_handle_feature(struct net_device *netdev,
3815                                 netdev_features_t *features,
3816                                 netdev_features_t wanted_features,
3817                                 netdev_features_t feature,
3818                                 mlx5e_feature_handler feature_handler)
3819 {
3820         netdev_features_t changes = wanted_features ^ netdev->features;
3821         bool enable = !!(wanted_features & feature);
3822         int err;
3823
3824         if (!(changes & feature))
3825                 return 0;
3826
3827         err = feature_handler(netdev, enable);
3828         if (err) {
3829                 netdev_err(netdev, "%s feature %pNF failed, err %d\n",
3830                            enable ? "Enable" : "Disable", &feature, err);
3831                 return err;
3832         }
3833
3834         MLX5E_SET_FEATURE(features, feature, enable);
3835         return 0;
3836 }
3837
3838 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
3839 {
3840         netdev_features_t oper_features = netdev->features;
3841         int err = 0;
3842
3843 #define MLX5E_HANDLE_FEATURE(feature, handler) \
3844         mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
3845
3846         err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
3847         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
3848                                     set_feature_cvlan_filter);
3849 #ifdef CONFIG_MLX5_ESWITCH
3850         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
3851 #endif
3852         err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
3853         err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
3854         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
3855 #ifdef CONFIG_MLX5_EN_ARFS
3856         err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
3857 #endif
3858
3859         if (err) {
3860                 netdev->features = oper_features;
3861                 return -EINVAL;
3862         }
3863
3864         return 0;
3865 }
3866
3867 static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
3868                                             netdev_features_t features)
3869 {
3870         struct mlx5e_priv *priv = netdev_priv(netdev);
3871         struct mlx5e_params *params;
3872
3873         mutex_lock(&priv->state_lock);
3874         params = &priv->channels.params;
3875         if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
3876                 /* HW strips the outer C-tag header, this is a problem
3877                  * for S-tag traffic.
3878                  */
3879                 features &= ~NETIF_F_HW_VLAN_CTAG_RX;
3880                 if (!params->vlan_strip_disable)
3881                         netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
3882         }
3883         if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
3884                 if (features & NETIF_F_LRO) {
3885                         netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
3886                         features &= ~NETIF_F_LRO;
3887                 }
3888         }
3889
3890         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
3891                 features &= ~NETIF_F_RXHASH;
3892                 if (netdev->features & NETIF_F_RXHASH)
3893                         netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
3894         }
3895
3896         mutex_unlock(&priv->state_lock);
3897
3898         return features;
3899 }
3900
3901 static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
3902                                    struct mlx5e_channels *chs,
3903                                    struct mlx5e_params *new_params,
3904                                    struct mlx5_core_dev *mdev)
3905 {
3906         u16 ix;
3907
3908         for (ix = 0; ix < chs->params.num_channels; ix++) {
3909                 struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
3910                 struct mlx5e_xsk_param xsk;
3911
3912                 if (!umem)
3913                         continue;
3914
3915                 mlx5e_build_xsk_param(umem, &xsk);
3916
3917                 if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
3918                         u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
3919                         int max_mtu_frame, max_mtu_page, max_mtu;
3920
3921                         /* Two criteria must be met:
3922                          * 1. HW MTU + all headrooms <= XSK frame size.
3923                          * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
3924                          */
3925                         max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
3926                         max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
3927                         max_mtu = min(max_mtu_frame, max_mtu_page);
3928
3929                         netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
3930                                    new_params->sw_mtu, ix, max_mtu);
3931                         return false;
3932                 }
3933         }
3934
3935         return true;
3936 }
3937
3938 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
3939                      mlx5e_fp_preactivate preactivate)
3940 {
3941         struct mlx5e_priv *priv = netdev_priv(netdev);
3942         struct mlx5e_channels new_channels = {};
3943         struct mlx5e_params *params;
3944         int err = 0;
3945         bool reset;
3946
3947         mutex_lock(&priv->state_lock);
3948
3949         params = &priv->channels.params;
3950
3951         reset = !params->lro_en;
3952         reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
3953
3954         new_channels.params = *params;
3955         new_channels.params.sw_mtu = new_mtu;
3956
3957         if (params->xdp_prog &&
3958             !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
3959                 netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
3960                            new_mtu, mlx5e_xdp_max_mtu(params, NULL));
3961                 err = -EINVAL;
3962                 goto out;
3963         }
3964
3965         if (priv->xsk.refcnt &&
3966             !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
3967                                     &new_channels.params, priv->mdev)) {
3968                 err = -EINVAL;
3969                 goto out;
3970         }
3971
3972         if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
3973                 bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
3974                                                               &new_channels.params,
3975                                                               NULL);
3976                 u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
3977                 u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
3978
3979                 /* If XSK is active, XSK RQs are linear. */
3980                 is_linear |= priv->xsk.refcnt;
3981
3982                 /* Always reset in linear mode - hw_mtu is used in data path. */
3983                 reset = reset && (is_linear || (ppw_old != ppw_new));
3984         }
3985
3986         if (!reset) {
3987                 params->sw_mtu = new_mtu;
3988                 if (preactivate)
3989                         preactivate(priv, NULL);
3990                 netdev->mtu = params->sw_mtu;
3991                 goto out;
3992         }
3993
3994         err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL);
3995         if (err)
3996                 goto out;
3997
3998         netdev->mtu = new_channels.params.sw_mtu;
3999
4000 out:
4001         mutex_unlock(&priv->state_lock);
4002         return err;
4003 }
4004
4005 static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
4006 {
4007         return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
4008 }
4009
4010 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
4011 {
4012         struct hwtstamp_config config;
4013         int err;
4014
4015         if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
4016             (mlx5_clock_get_ptp_index(priv->mdev) == -1))
4017                 return -EOPNOTSUPP;
4018
4019         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
4020                 return -EFAULT;
4021
4022         /* TX HW timestamp */
4023         switch (config.tx_type) {
4024         case HWTSTAMP_TX_OFF:
4025         case HWTSTAMP_TX_ON:
4026                 break;
4027         default:
4028                 return -ERANGE;
4029         }
4030
4031         mutex_lock(&priv->state_lock);
4032         /* RX HW timestamp */
4033         switch (config.rx_filter) {
4034         case HWTSTAMP_FILTER_NONE:
4035                 /* Reset CQE compression to Admin default */
4036                 mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
4037                 break;
4038         case HWTSTAMP_FILTER_ALL:
4039         case HWTSTAMP_FILTER_SOME:
4040         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4041         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4042         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4043         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4044         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4045         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4046         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4047         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4048         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4049         case HWTSTAMP_FILTER_PTP_V2_EVENT:
4050         case HWTSTAMP_FILTER_PTP_V2_SYNC:
4051         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4052         case HWTSTAMP_FILTER_NTP_ALL:
4053                 /* Disable CQE compression */
4054                 if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
4055                         netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
4056                 err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
4057                 if (err) {
4058                         netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
4059                         mutex_unlock(&priv->state_lock);
4060                         return err;
4061                 }
4062                 config.rx_filter = HWTSTAMP_FILTER_ALL;
4063                 break;
4064         default:
4065                 mutex_unlock(&priv->state_lock);
4066                 return -ERANGE;
4067         }
4068
4069         memcpy(&priv->tstamp, &config, sizeof(config));
4070         mutex_unlock(&priv->state_lock);
4071
4072         /* might need to fix some features */
4073         netdev_update_features(priv->netdev);
4074
4075         return copy_to_user(ifr->ifr_data, &config,
4076                             sizeof(config)) ? -EFAULT : 0;
4077 }
4078
4079 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
4080 {
4081         struct hwtstamp_config *cfg = &priv->tstamp;
4082
4083         if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
4084                 return -EOPNOTSUPP;
4085
4086         return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
4087 }
4088
4089 static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
4090 {
4091         struct mlx5e_priv *priv = netdev_priv(dev);
4092
4093         switch (cmd) {
4094         case SIOCSHWTSTAMP:
4095                 return mlx5e_hwstamp_set(priv, ifr);
4096         case SIOCGHWTSTAMP:
4097                 return mlx5e_hwstamp_get(priv, ifr);
4098         default:
4099                 return -EOPNOTSUPP;
4100         }
4101 }
4102
4103 #ifdef CONFIG_MLX5_ESWITCH
4104 int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
4105 {
4106         struct mlx5e_priv *priv = netdev_priv(dev);
4107         struct mlx5_core_dev *mdev = priv->mdev;
4108
4109         return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
4110 }
4111
4112 static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
4113                              __be16 vlan_proto)
4114 {
4115         struct mlx5e_priv *priv = netdev_priv(dev);
4116         struct mlx5_core_dev *mdev = priv->mdev;
4117
4118         if (vlan_proto != htons(ETH_P_8021Q))
4119                 return -EPROTONOSUPPORT;
4120
4121         return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
4122                                            vlan, qos);
4123 }
4124
4125 static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
4126 {
4127         struct mlx5e_priv *priv = netdev_priv(dev);
4128         struct mlx5_core_dev *mdev = priv->mdev;
4129
4130         return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
4131 }
4132
4133 static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
4134 {
4135         struct mlx5e_priv *priv = netdev_priv(dev);
4136         struct mlx5_core_dev *mdev = priv->mdev;
4137
4138         return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
4139 }
4140
4141 int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
4142                       int max_tx_rate)
4143 {
4144         struct mlx5e_priv *priv = netdev_priv(dev);
4145         struct mlx5_core_dev *mdev = priv->mdev;
4146
4147         return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
4148                                            max_tx_rate, min_tx_rate);
4149 }
4150
4151 static int mlx5_vport_link2ifla(u8 esw_link)
4152 {
4153         switch (esw_link) {
4154         case MLX5_VPORT_ADMIN_STATE_DOWN:
4155                 return IFLA_VF_LINK_STATE_DISABLE;
4156         case MLX5_VPORT_ADMIN_STATE_UP:
4157                 return IFLA_VF_LINK_STATE_ENABLE;
4158         }
4159         return IFLA_VF_LINK_STATE_AUTO;
4160 }
4161
4162 static int mlx5_ifla_link2vport(u8 ifla_link)
4163 {
4164         switch (ifla_link) {
4165         case IFLA_VF_LINK_STATE_DISABLE:
4166                 return MLX5_VPORT_ADMIN_STATE_DOWN;
4167         case IFLA_VF_LINK_STATE_ENABLE:
4168                 return MLX5_VPORT_ADMIN_STATE_UP;
4169         }
4170         return MLX5_VPORT_ADMIN_STATE_AUTO;
4171 }
4172
4173 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
4174                                    int link_state)
4175 {
4176         struct mlx5e_priv *priv = netdev_priv(dev);
4177         struct mlx5_core_dev *mdev = priv->mdev;
4178
4179         return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
4180                                             mlx5_ifla_link2vport(link_state));
4181 }
4182
4183 int mlx5e_get_vf_config(struct net_device *dev,
4184                         int vf, struct ifla_vf_info *ivi)
4185 {
4186         struct mlx5e_priv *priv = netdev_priv(dev);
4187         struct mlx5_core_dev *mdev = priv->mdev;
4188         int err;
4189
4190         err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
4191         if (err)
4192                 return err;
4193         ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
4194         return 0;
4195 }
4196
4197 int mlx5e_get_vf_stats(struct net_device *dev,
4198                        int vf, struct ifla_vf_stats *vf_stats)
4199 {
4200         struct mlx5e_priv *priv = netdev_priv(dev);
4201         struct mlx5_core_dev *mdev = priv->mdev;
4202
4203         return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
4204                                             vf_stats);
4205 }
4206 #endif
4207
4208 struct mlx5e_vxlan_work {
4209         struct work_struct      work;
4210         struct mlx5e_priv       *priv;
4211         u16                     port;
4212 };
4213
4214 static void mlx5e_vxlan_add_work(struct work_struct *work)
4215 {
4216         struct mlx5e_vxlan_work *vxlan_work =
4217                 container_of(work, struct mlx5e_vxlan_work, work);
4218         struct mlx5e_priv *priv = vxlan_work->priv;
4219         u16 port = vxlan_work->port;
4220
4221         mutex_lock(&priv->state_lock);
4222         mlx5_vxlan_add_port(priv->mdev->vxlan, port);
4223         mutex_unlock(&priv->state_lock);
4224
4225         kfree(vxlan_work);
4226 }
4227
4228 static void mlx5e_vxlan_del_work(struct work_struct *work)
4229 {
4230         struct mlx5e_vxlan_work *vxlan_work =
4231                 container_of(work, struct mlx5e_vxlan_work, work);
4232         struct mlx5e_priv *priv         = vxlan_work->priv;
4233         u16 port = vxlan_work->port;
4234
4235         mutex_lock(&priv->state_lock);
4236         mlx5_vxlan_del_port(priv->mdev->vxlan, port);
4237         mutex_unlock(&priv->state_lock);
4238         kfree(vxlan_work);
4239 }
4240
4241 static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
4242 {
4243         struct mlx5e_vxlan_work *vxlan_work;
4244
4245         vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
4246         if (!vxlan_work)
4247                 return;
4248
4249         if (add)
4250                 INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
4251         else
4252                 INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
4253
4254         vxlan_work->priv = priv;
4255         vxlan_work->port = port;
4256         queue_work(priv->wq, &vxlan_work->work);
4257 }
4258
4259 void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
4260 {
4261         struct mlx5e_priv *priv = netdev_priv(netdev);
4262
4263         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4264                 return;
4265
4266         if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
4267                 return;
4268
4269         mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
4270 }
4271
4272 void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
4273 {
4274         struct mlx5e_priv *priv = netdev_priv(netdev);
4275
4276         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4277                 return;
4278
4279         if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
4280                 return;
4281
4282         mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
4283 }
4284
4285 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
4286                                                      struct sk_buff *skb,
4287                                                      netdev_features_t features)
4288 {
4289         unsigned int offset = 0;
4290         struct udphdr *udph;
4291         u8 proto;
4292         u16 port;
4293
4294         switch (vlan_get_protocol(skb)) {
4295         case htons(ETH_P_IP):
4296                 proto = ip_hdr(skb)->protocol;
4297                 break;
4298         case htons(ETH_P_IPV6):
4299                 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
4300                 break;
4301         default:
4302                 goto out;
4303         }
4304
4305         switch (proto) {
4306         case IPPROTO_GRE:
4307                 return features;
4308         case IPPROTO_IPIP:
4309         case IPPROTO_IPV6:
4310                 if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
4311                         return features;
4312                 break;
4313         case IPPROTO_UDP:
4314                 udph = udp_hdr(skb);
4315                 port = be16_to_cpu(udph->dest);
4316
4317                 /* Verify if UDP port is being offloaded by HW */
4318                 if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
4319                         return features;
4320
4321 #if IS_ENABLED(CONFIG_GENEVE)
4322                 /* Support Geneve offload for default UDP port */
4323                 if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
4324                         return features;
4325 #endif
4326         }
4327
4328 out:
4329         /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
4330         return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4331 }
4332
4333 netdev_features_t mlx5e_features_check(struct sk_buff *skb,
4334                                        struct net_device *netdev,
4335                                        netdev_features_t features)
4336 {
4337         struct mlx5e_priv *priv = netdev_priv(netdev);
4338
4339         features = vlan_features_check(skb, features);
4340         features = vxlan_features_check(skb, features);
4341
4342 #ifdef CONFIG_MLX5_EN_IPSEC
4343         if (mlx5e_ipsec_feature_check(skb, netdev, features))
4344                 return features;
4345 #endif
4346
4347         /* Validate if the tunneled packet is being offloaded by HW */
4348         if (skb->encapsulation &&
4349             (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
4350                 return mlx5e_tunnel_features_check(priv, skb, features);
4351
4352         return features;
4353 }
4354
4355 static void mlx5e_tx_timeout_work(struct work_struct *work)
4356 {
4357         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
4358                                                tx_timeout_work);
4359         bool report_failed = false;
4360         int err;
4361         int i;
4362
4363         rtnl_lock();
4364         mutex_lock(&priv->state_lock);
4365
4366         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4367                 goto unlock;
4368
4369         for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
4370                 struct netdev_queue *dev_queue =
4371                         netdev_get_tx_queue(priv->netdev, i);
4372                 struct mlx5e_txqsq *sq = priv->txq2sq[i];
4373
4374                 if (!netif_xmit_stopped(dev_queue))
4375                         continue;
4376
4377                 if (mlx5e_reporter_tx_timeout(sq))
4378                         report_failed = true;
4379         }
4380
4381         if (!report_failed)
4382                 goto unlock;
4383
4384         err = mlx5e_safe_reopen_channels(priv);
4385         if (err)
4386                 netdev_err(priv->netdev,
4387                            "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
4388                            err);
4389
4390 unlock:
4391         mutex_unlock(&priv->state_lock);
4392         rtnl_unlock();
4393 }
4394
4395 static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
4396 {
4397         struct mlx5e_priv *priv = netdev_priv(dev);
4398
4399         netdev_err(dev, "TX timeout detected\n");
4400         queue_work(priv->wq, &priv->tx_timeout_work);
4401 }
4402
4403 static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
4404 {
4405         struct net_device *netdev = priv->netdev;
4406         struct mlx5e_channels new_channels = {};
4407
4408         if (priv->channels.params.lro_en) {
4409                 netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
4410                 return -EINVAL;
4411         }
4412
4413         if (MLX5_IPSEC_DEV(priv->mdev)) {
4414                 netdev_warn(netdev, "can't set XDP with IPSec offload\n");
4415                 return -EINVAL;
4416         }
4417
4418         new_channels.params = priv->channels.params;
4419         new_channels.params.xdp_prog = prog;
4420
4421         /* No XSK params: AF_XDP can't be enabled yet at the point of setting
4422          * the XDP program.
4423          */
4424         if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
4425                 netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
4426                             new_channels.params.sw_mtu,
4427                             mlx5e_xdp_max_mtu(&new_channels.params, NULL));
4428                 return -EINVAL;
4429         }
4430
4431         return 0;
4432 }
4433
4434 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
4435 {
4436         struct mlx5e_priv *priv = netdev_priv(netdev);
4437         struct bpf_prog *old_prog;
4438         bool reset, was_opened;
4439         int err = 0;
4440         int i;
4441
4442         mutex_lock(&priv->state_lock);
4443
4444         if (prog) {
4445                 err = mlx5e_xdp_allowed(priv, prog);
4446                 if (err)
4447                         goto unlock;
4448         }
4449
4450         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
4451         /* no need for full reset when exchanging programs */
4452         reset = (!priv->channels.params.xdp_prog || !prog);
4453
4454         if (was_opened && !reset)
4455                 /* num_channels is invariant here, so we can take the
4456                  * batched reference right upfront.
4457                  */
4458                 bpf_prog_add(prog, priv->channels.num);
4459
4460         if (was_opened && reset) {
4461                 struct mlx5e_channels new_channels = {};
4462
4463                 new_channels.params = priv->channels.params;
4464                 new_channels.params.xdp_prog = prog;
4465                 mlx5e_set_rq_type(priv->mdev, &new_channels.params);
4466                 old_prog = priv->channels.params.xdp_prog;
4467
4468                 err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
4469                 if (err)
4470                         goto unlock;
4471         } else {
4472                 /* exchange programs, extra prog reference we got from caller
4473                  * as long as we don't fail from this point onwards.
4474                  */
4475                 old_prog = xchg(&priv->channels.params.xdp_prog, prog);
4476         }
4477
4478         if (old_prog)
4479                 bpf_prog_put(old_prog);
4480
4481         if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
4482                 mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
4483
4484         if (!was_opened || reset)
4485                 goto unlock;
4486
4487         /* exchanging programs w/o reset, we update ref counts on behalf
4488          * of the channels RQs here.
4489          */
4490         for (i = 0; i < priv->channels.num; i++) {
4491                 struct mlx5e_channel *c = priv->channels.c[i];
4492                 bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
4493
4494                 clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
4495                 if (xsk_open)
4496                         clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
4497                 napi_synchronize(&c->napi);
4498                 /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
4499
4500                 old_prog = xchg(&c->rq.xdp_prog, prog);
4501                 if (old_prog)
4502                         bpf_prog_put(old_prog);
4503
4504                 if (xsk_open) {
4505                         old_prog = xchg(&c->xskrq.xdp_prog, prog);
4506                         if (old_prog)
4507                                 bpf_prog_put(old_prog);
4508                 }
4509
4510                 set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
4511                 if (xsk_open)
4512                         set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
4513                 /* napi_schedule in case we have missed anything */
4514                 napi_schedule(&c->napi);
4515         }
4516
4517 unlock:
4518         mutex_unlock(&priv->state_lock);
4519         return err;
4520 }
4521
4522 static u32 mlx5e_xdp_query(struct net_device *dev)
4523 {
4524         struct mlx5e_priv *priv = netdev_priv(dev);
4525         const struct bpf_prog *xdp_prog;
4526         u32 prog_id = 0;
4527
4528         mutex_lock(&priv->state_lock);
4529         xdp_prog = priv->channels.params.xdp_prog;
4530         if (xdp_prog)
4531                 prog_id = xdp_prog->aux->id;
4532         mutex_unlock(&priv->state_lock);
4533
4534         return prog_id;
4535 }
4536
4537 static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
4538 {
4539         switch (xdp->command) {
4540         case XDP_SETUP_PROG:
4541                 return mlx5e_xdp_set(dev, xdp->prog);
4542         case XDP_QUERY_PROG:
4543                 xdp->prog_id = mlx5e_xdp_query(dev);
4544                 return 0;
4545         case XDP_SETUP_XSK_UMEM:
4546                 return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
4547                                             xdp->xsk.queue_id);
4548         default:
4549                 return -EINVAL;
4550         }
4551 }
4552
4553 #ifdef CONFIG_MLX5_ESWITCH
4554 static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4555                                 struct net_device *dev, u32 filter_mask,
4556                                 int nlflags)
4557 {
4558         struct mlx5e_priv *priv = netdev_priv(dev);
4559         struct mlx5_core_dev *mdev = priv->mdev;
4560         u8 mode, setting;
4561         int err;
4562
4563         err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
4564         if (err)
4565                 return err;
4566         mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
4567         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4568                                        mode,
4569                                        0, 0, nlflags, filter_mask, NULL);
4570 }
4571
4572 static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4573                                 u16 flags, struct netlink_ext_ack *extack)
4574 {
4575         struct mlx5e_priv *priv = netdev_priv(dev);
4576         struct mlx5_core_dev *mdev = priv->mdev;
4577         struct nlattr *attr, *br_spec;
4578         u16 mode = BRIDGE_MODE_UNDEF;
4579         u8 setting;
4580         int rem;
4581
4582         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4583         if (!br_spec)
4584                 return -EINVAL;
4585
4586         nla_for_each_nested(attr, br_spec, rem) {
4587                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4588                         continue;
4589
4590                 if (nla_len(attr) < sizeof(mode))
4591                         return -EINVAL;
4592
4593                 mode = nla_get_u16(attr);
4594                 if (mode > BRIDGE_MODE_VEPA)
4595                         return -EINVAL;
4596
4597                 break;
4598         }
4599
4600         if (mode == BRIDGE_MODE_UNDEF)
4601                 return -EINVAL;
4602
4603         setting = (mode == BRIDGE_MODE_VEPA) ?  1 : 0;
4604         return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
4605 }
4606 #endif
4607
4608 const struct net_device_ops mlx5e_netdev_ops = {
4609         .ndo_open                = mlx5e_open,
4610         .ndo_stop                = mlx5e_close,
4611         .ndo_start_xmit          = mlx5e_xmit,
4612         .ndo_setup_tc            = mlx5e_setup_tc,
4613         .ndo_select_queue        = mlx5e_select_queue,
4614         .ndo_get_stats64         = mlx5e_get_stats,
4615         .ndo_set_rx_mode         = mlx5e_set_rx_mode,
4616         .ndo_set_mac_address     = mlx5e_set_mac,
4617         .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
4618         .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
4619         .ndo_set_features        = mlx5e_set_features,
4620         .ndo_fix_features        = mlx5e_fix_features,
4621         .ndo_change_mtu          = mlx5e_change_nic_mtu,
4622         .ndo_do_ioctl            = mlx5e_ioctl,
4623         .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
4624         .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
4625         .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
4626         .ndo_features_check      = mlx5e_features_check,
4627         .ndo_tx_timeout          = mlx5e_tx_timeout,
4628         .ndo_bpf                 = mlx5e_xdp,
4629         .ndo_xdp_xmit            = mlx5e_xdp_xmit,
4630         .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
4631 #ifdef CONFIG_MLX5_EN_ARFS
4632         .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
4633 #endif
4634 #ifdef CONFIG_MLX5_ESWITCH
4635         .ndo_bridge_setlink      = mlx5e_bridge_setlink,
4636         .ndo_bridge_getlink      = mlx5e_bridge_getlink,
4637
4638         /* SRIOV E-Switch NDOs */
4639         .ndo_set_vf_mac          = mlx5e_set_vf_mac,
4640         .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
4641         .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
4642         .ndo_set_vf_trust        = mlx5e_set_vf_trust,
4643         .ndo_set_vf_rate         = mlx5e_set_vf_rate,
4644         .ndo_get_vf_config       = mlx5e_get_vf_config,
4645         .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
4646         .ndo_get_vf_stats        = mlx5e_get_vf_stats,
4647 #endif
4648         .ndo_get_devlink_port    = mlx5e_get_devlink_port,
4649 };
4650
4651 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
4652 {
4653         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
4654                 return -EOPNOTSUPP;
4655         if (!MLX5_CAP_GEN(mdev, eth_net_offloads) ||
4656             !MLX5_CAP_GEN(mdev, nic_flow_table) ||
4657             !MLX5_CAP_ETH(mdev, csum_cap) ||
4658             !MLX5_CAP_ETH(mdev, max_lso_cap) ||
4659             !MLX5_CAP_ETH(mdev, vlan_cap) ||
4660             !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) ||
4661             MLX5_CAP_FLOWTABLE(mdev,
4662                                flow_table_properties_nic_receive.max_ft_level)
4663                                < 3) {
4664                 mlx5_core_warn(mdev,
4665                                "Not creating net device, some required device capabilities are missing\n");
4666                 return -EOPNOTSUPP;
4667         }
4668         if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
4669                 mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
4670         if (!MLX5_CAP_GEN(mdev, cq_moderation))
4671                 mlx5_core_warn(mdev, "CQ moderation is not supported\n");
4672
4673         return 0;
4674 }
4675
4676 void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
4677                                    int num_channels)
4678 {
4679         int i;
4680
4681         for (i = 0; i < len; i++)
4682                 indirection_rqt[i] = i % num_channels;
4683 }
4684
4685 static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
4686 {
4687         u32 link_speed = 0;
4688         u32 pci_bw = 0;
4689
4690         mlx5e_port_max_linkspeed(mdev, &link_speed);
4691         pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
4692         mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
4693                            link_speed, pci_bw);
4694
4695 #define MLX5E_SLOW_PCI_RATIO (2)
4696
4697         return link_speed && pci_bw &&
4698                 link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
4699 }
4700
4701 static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
4702 {
4703         struct dim_cq_moder moder;
4704
4705         moder.cq_period_mode = cq_period_mode;
4706         moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
4707         moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
4708         if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
4709                 moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
4710
4711         return moder;
4712 }
4713
4714 static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
4715 {
4716         struct dim_cq_moder moder;
4717
4718         moder.cq_period_mode = cq_period_mode;
4719         moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
4720         moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
4721         if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
4722                 moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
4723
4724         return moder;
4725 }
4726
4727 static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
4728 {
4729         return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
4730                 DIM_CQ_PERIOD_MODE_START_FROM_CQE :
4731                 DIM_CQ_PERIOD_MODE_START_FROM_EQE;
4732 }
4733
4734 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
4735 {
4736         if (params->tx_dim_enabled) {
4737                 u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
4738
4739                 params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
4740         } else {
4741                 params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
4742         }
4743
4744         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
4745                         params->tx_cq_moderation.cq_period_mode ==
4746                                 MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
4747 }
4748
4749 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
4750 {
4751         if (params->rx_dim_enabled) {
4752                 u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
4753
4754                 params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
4755         } else {
4756                 params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
4757         }
4758
4759         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
4760                         params->rx_cq_moderation.cq_period_mode ==
4761                                 MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
4762 }
4763
4764 static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
4765 {
4766         int i;
4767
4768         /* The supported periods are organized in ascending order */
4769         for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
4770                 if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
4771                         break;
4772
4773         return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
4774 }
4775
4776 void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
4777                            struct mlx5e_params *params)
4778 {
4779         /* Prefer Striding RQ, unless any of the following holds:
4780          * - Striding RQ configuration is not possible/supported.
4781          * - Slow PCI heuristic.
4782          * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
4783          *
4784          * No XSK params: checking the availability of striding RQ in general.
4785          */
4786         if (!slow_pci_heuristic(mdev) &&
4787             mlx5e_striding_rq_possible(mdev, params) &&
4788             (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
4789              !mlx5e_rx_is_linear_skb(params, NULL)))
4790                 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
4791         mlx5e_set_rq_type(mdev, params);
4792         mlx5e_init_rq_type_params(mdev, params);
4793 }
4794
4795 void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
4796                             u16 num_channels)
4797 {
4798         enum mlx5e_traffic_types tt;
4799
4800         rss_params->hfunc = ETH_RSS_HASH_TOP;
4801         netdev_rss_key_fill(rss_params->toeplitz_hash_key,
4802                             sizeof(rss_params->toeplitz_hash_key));
4803         mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
4804                                       MLX5E_INDIR_RQT_SIZE, num_channels);
4805         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
4806                 rss_params->rx_hash_fields[tt] =
4807                         tirc_default_config[tt].rx_hash_fields;
4808 }
4809
4810 void mlx5e_build_nic_params(struct mlx5e_priv *priv,
4811                             struct mlx5e_xsk *xsk,
4812                             struct mlx5e_rss_params *rss_params,
4813                             struct mlx5e_params *params,
4814                             u16 mtu)
4815 {
4816         struct mlx5_core_dev *mdev = priv->mdev;
4817         u8 rx_cq_period_mode;
4818
4819         params->sw_mtu = mtu;
4820         params->hard_mtu = MLX5E_ETH_HARD_MTU;
4821         params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
4822                                      priv->max_nch);
4823         params->num_tc       = 1;
4824
4825         /* SQ */
4826         params->log_sq_size = is_kdump_kernel() ?
4827                 MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
4828                 MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
4829
4830         /* XDP SQ */
4831         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
4832                         MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
4833
4834         /* set CQE compression */
4835         params->rx_cqe_compress_def = false;
4836         if (MLX5_CAP_GEN(mdev, cqe_compression) &&
4837             MLX5_CAP_GEN(mdev, vport_group_manager))
4838                 params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
4839
4840         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
4841         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
4842
4843         /* RQ */
4844         mlx5e_build_rq_params(mdev, params);
4845
4846         /* HW LRO */
4847         if (MLX5_CAP_ETH(mdev, lro_cap) &&
4848             params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
4849                 /* No XSK params: checking the availability of striding RQ in general. */
4850                 if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
4851                         params->lro_en = !slow_pci_heuristic(mdev);
4852         }
4853         params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
4854
4855         /* CQ moderation params */
4856         rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
4857                         MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
4858                         MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
4859         params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4860         params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4861         mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
4862         mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
4863
4864         /* TX inline */
4865         mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
4866
4867         /* RSS */
4868         mlx5e_build_rss_params(rss_params, params->num_channels);
4869         params->tunneled_offload_en =
4870                 mlx5e_tunnel_inner_ft_supported(mdev);
4871
4872         /* AF_XDP */
4873         params->xsk = xsk;
4874 }
4875
4876 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
4877 {
4878         struct mlx5e_priv *priv = netdev_priv(netdev);
4879
4880         mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
4881         if (is_zero_ether_addr(netdev->dev_addr) &&
4882             !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
4883                 eth_hw_addr_random(netdev);
4884                 mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
4885         }
4886 }
4887
4888 static void mlx5e_build_nic_netdev(struct net_device *netdev)
4889 {
4890         struct mlx5e_priv *priv = netdev_priv(netdev);
4891         struct mlx5_core_dev *mdev = priv->mdev;
4892         bool fcs_supported;
4893         bool fcs_enabled;
4894
4895         SET_NETDEV_DEV(netdev, mdev->device);
4896
4897         netdev->netdev_ops = &mlx5e_netdev_ops;
4898
4899 #ifdef CONFIG_MLX5_CORE_EN_DCB
4900         if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
4901                 netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
4902 #endif
4903
4904         netdev->watchdog_timeo    = 15 * HZ;
4905
4906         netdev->ethtool_ops       = &mlx5e_ethtool_ops;
4907
4908         netdev->vlan_features    |= NETIF_F_SG;
4909         netdev->vlan_features    |= NETIF_F_HW_CSUM;
4910         netdev->vlan_features    |= NETIF_F_GRO;
4911         netdev->vlan_features    |= NETIF_F_TSO;
4912         netdev->vlan_features    |= NETIF_F_TSO6;
4913         netdev->vlan_features    |= NETIF_F_RXCSUM;
4914         netdev->vlan_features    |= NETIF_F_RXHASH;
4915
4916         netdev->mpls_features    |= NETIF_F_SG;
4917         netdev->mpls_features    |= NETIF_F_HW_CSUM;
4918         netdev->mpls_features    |= NETIF_F_TSO;
4919         netdev->mpls_features    |= NETIF_F_TSO6;
4920
4921         netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
4922         netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
4923
4924         if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
4925             mlx5e_check_fragmented_striding_rq_cap(mdev))
4926                 netdev->vlan_features    |= NETIF_F_LRO;
4927
4928         netdev->hw_features       = netdev->vlan_features;
4929         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_TX;
4930         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
4931         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
4932         netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
4933
4934         if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
4935             mlx5e_any_tunnel_proto_supported(mdev)) {
4936                 netdev->hw_enc_features |= NETIF_F_HW_CSUM;
4937                 netdev->hw_enc_features |= NETIF_F_TSO;
4938                 netdev->hw_enc_features |= NETIF_F_TSO6;
4939                 netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
4940         }
4941
4942         if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
4943                 netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
4944                                            NETIF_F_GSO_UDP_TUNNEL_CSUM;
4945                 netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
4946                                            NETIF_F_GSO_UDP_TUNNEL_CSUM;
4947                 netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
4948                 netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
4949                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
4950         }
4951
4952         if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) {
4953                 netdev->hw_features     |= NETIF_F_GSO_GRE |
4954                                            NETIF_F_GSO_GRE_CSUM;
4955                 netdev->hw_enc_features |= NETIF_F_GSO_GRE |
4956                                            NETIF_F_GSO_GRE_CSUM;
4957                 netdev->gso_partial_features |= NETIF_F_GSO_GRE |
4958                                                 NETIF_F_GSO_GRE_CSUM;
4959         }
4960
4961         if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_IPIP)) {
4962                 netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
4963                                        NETIF_F_GSO_IPXIP6;
4964                 netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
4965                                            NETIF_F_GSO_IPXIP6;
4966                 netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
4967                                                 NETIF_F_GSO_IPXIP6;
4968         }
4969
4970         netdev->hw_features                      |= NETIF_F_GSO_PARTIAL;
4971         netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
4972         netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
4973         netdev->features                         |= NETIF_F_GSO_UDP_L4;
4974
4975         mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
4976
4977         if (fcs_supported)
4978                 netdev->hw_features |= NETIF_F_RXALL;
4979
4980         if (MLX5_CAP_ETH(mdev, scatter_fcs))
4981                 netdev->hw_features |= NETIF_F_RXFCS;
4982
4983         netdev->features          = netdev->hw_features;
4984         if (!priv->channels.params.lro_en)
4985                 netdev->features  &= ~NETIF_F_LRO;
4986
4987         if (fcs_enabled)
4988                 netdev->features  &= ~NETIF_F_RXALL;
4989
4990         if (!priv->channels.params.scatter_fcs_en)
4991                 netdev->features  &= ~NETIF_F_RXFCS;
4992
4993         /* prefere CQE compression over rxhash */
4994         if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
4995                 netdev->features &= ~NETIF_F_RXHASH;
4996
4997 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
4998         if (FT_CAP(flow_modify_en) &&
4999             FT_CAP(modify_root) &&
5000             FT_CAP(identified_miss_table_mode) &&
5001             FT_CAP(flow_table_modify)) {
5002 #ifdef CONFIG_MLX5_ESWITCH
5003                 netdev->hw_features      |= NETIF_F_HW_TC;
5004 #endif
5005 #ifdef CONFIG_MLX5_EN_ARFS
5006                 netdev->hw_features      |= NETIF_F_NTUPLE;
5007 #endif
5008         }
5009
5010         netdev->features         |= NETIF_F_HIGHDMA;
5011         netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
5012
5013         netdev->priv_flags       |= IFF_UNICAST_FLT;
5014
5015         mlx5e_set_netdev_dev_addr(netdev);
5016         mlx5e_ipsec_build_netdev(priv);
5017         mlx5e_tls_build_netdev(priv);
5018 }
5019
5020 void mlx5e_create_q_counters(struct mlx5e_priv *priv)
5021 {
5022         u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
5023         u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
5024         struct mlx5_core_dev *mdev = priv->mdev;
5025         int err;
5026
5027         MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
5028         err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
5029         if (!err)
5030                 priv->q_counter =
5031                         MLX5_GET(alloc_q_counter_out, out, counter_set_id);
5032
5033         err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
5034         if (!err)
5035                 priv->drop_rq_q_counter =
5036                         MLX5_GET(alloc_q_counter_out, out, counter_set_id);
5037 }
5038
5039 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
5040 {
5041         u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
5042
5043         MLX5_SET(dealloc_q_counter_in, in, opcode,
5044                  MLX5_CMD_OP_DEALLOC_Q_COUNTER);
5045         if (priv->q_counter) {
5046                 MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
5047                          priv->q_counter);
5048                 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
5049         }
5050
5051         if (priv->drop_rq_q_counter) {
5052                 MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
5053                          priv->drop_rq_q_counter);
5054                 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
5055         }
5056 }
5057
5058 static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
5059                           struct net_device *netdev,
5060                           const struct mlx5e_profile *profile,
5061                           void *ppriv)
5062 {
5063         struct mlx5e_priv *priv = netdev_priv(netdev);
5064         struct mlx5e_rss_params *rss = &priv->rss_params;
5065         int err;
5066
5067         err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
5068         if (err)
5069                 return err;
5070
5071         mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params,
5072                                netdev->mtu);
5073
5074         mlx5e_timestamp_init(priv);
5075
5076         err = mlx5e_ipsec_init(priv);
5077         if (err)
5078                 mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
5079         err = mlx5e_tls_init(priv);
5080         if (err)
5081                 mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
5082         mlx5e_build_nic_netdev(netdev);
5083         mlx5e_health_create_reporters(priv);
5084
5085         return 0;
5086 }
5087
5088 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
5089 {
5090         mlx5e_health_destroy_reporters(priv);
5091         mlx5e_tls_cleanup(priv);
5092         mlx5e_ipsec_cleanup(priv);
5093         mlx5e_netdev_cleanup(priv->netdev, priv);
5094 }
5095
5096 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
5097 {
5098         struct mlx5_core_dev *mdev = priv->mdev;
5099         int err;
5100
5101         mlx5e_create_q_counters(priv);
5102
5103         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
5104         if (err) {
5105                 mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
5106                 goto err_destroy_q_counters;
5107         }
5108
5109         err = mlx5e_create_indirect_rqt(priv);
5110         if (err)
5111                 goto err_close_drop_rq;
5112
5113         err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
5114         if (err)
5115                 goto err_destroy_indirect_rqts;
5116
5117         err = mlx5e_create_indirect_tirs(priv, true);
5118         if (err)
5119                 goto err_destroy_direct_rqts;
5120
5121         err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
5122         if (err)
5123                 goto err_destroy_indirect_tirs;
5124
5125         err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
5126         if (unlikely(err))
5127                 goto err_destroy_direct_tirs;
5128
5129         err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
5130         if (unlikely(err))
5131                 goto err_destroy_xsk_rqts;
5132
5133         err = mlx5e_create_flow_steering(priv);
5134         if (err) {
5135                 mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
5136                 goto err_destroy_xsk_tirs;
5137         }
5138
5139         err = mlx5e_tc_nic_init(priv);
5140         if (err)
5141                 goto err_destroy_flow_steering;
5142
5143         return 0;
5144
5145 err_destroy_flow_steering:
5146         mlx5e_destroy_flow_steering(priv);
5147 err_destroy_xsk_tirs:
5148         mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
5149 err_destroy_xsk_rqts:
5150         mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
5151 err_destroy_direct_tirs:
5152         mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
5153 err_destroy_indirect_tirs:
5154         mlx5e_destroy_indirect_tirs(priv, true);
5155 err_destroy_direct_rqts:
5156         mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
5157 err_destroy_indirect_rqts:
5158         mlx5e_destroy_rqt(priv, &priv->indir_rqt);
5159 err_close_drop_rq:
5160         mlx5e_close_drop_rq(&priv->drop_rq);
5161 err_destroy_q_counters:
5162         mlx5e_destroy_q_counters(priv);
5163         return err;
5164 }
5165
5166 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
5167 {
5168         mlx5e_tc_nic_cleanup(priv);
5169         mlx5e_destroy_flow_steering(priv);
5170         mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
5171         mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
5172         mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
5173         mlx5e_destroy_indirect_tirs(priv, true);
5174         mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
5175         mlx5e_destroy_rqt(priv, &priv->indir_rqt);
5176         mlx5e_close_drop_rq(&priv->drop_rq);
5177         mlx5e_destroy_q_counters(priv);
5178 }
5179
5180 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
5181 {
5182         int err;
5183
5184         err = mlx5e_create_tises(priv);
5185         if (err) {
5186                 mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
5187                 return err;
5188         }
5189
5190 #ifdef CONFIG_MLX5_CORE_EN_DCB
5191         mlx5e_dcbnl_initialize(priv);
5192 #endif
5193         return 0;
5194 }
5195
5196 static void mlx5e_nic_enable(struct mlx5e_priv *priv)
5197 {
5198         struct net_device *netdev = priv->netdev;
5199         struct mlx5_core_dev *mdev = priv->mdev;
5200
5201         mlx5e_init_l2_addr(priv);
5202
5203         /* Marking the link as currently not needed by the Driver */
5204         if (!netif_running(netdev))
5205                 mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
5206
5207         mlx5e_set_netdev_mtu_boundaries(priv);
5208         mlx5e_set_dev_port_mtu(priv);
5209
5210         mlx5_lag_add(mdev, netdev);
5211
5212         mlx5e_enable_async_events(priv);
5213         if (mlx5e_monitor_counter_supported(priv))
5214                 mlx5e_monitor_counter_init(priv);
5215
5216         mlx5e_hv_vhca_stats_create(priv);
5217         if (netdev->reg_state != NETREG_REGISTERED)
5218                 return;
5219 #ifdef CONFIG_MLX5_CORE_EN_DCB
5220         mlx5e_dcbnl_init_app(priv);
5221 #endif
5222
5223         queue_work(priv->wq, &priv->set_rx_mode_work);
5224
5225         rtnl_lock();
5226         if (netif_running(netdev))
5227                 mlx5e_open(netdev);
5228         netif_device_attach(netdev);
5229         rtnl_unlock();
5230 }
5231
5232 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
5233 {
5234         struct mlx5_core_dev *mdev = priv->mdev;
5235
5236 #ifdef CONFIG_MLX5_CORE_EN_DCB
5237         if (priv->netdev->reg_state == NETREG_REGISTERED)
5238                 mlx5e_dcbnl_delete_app(priv);
5239 #endif
5240
5241         rtnl_lock();
5242         if (netif_running(priv->netdev))
5243                 mlx5e_close(priv->netdev);
5244         netif_device_detach(priv->netdev);
5245         rtnl_unlock();
5246
5247         queue_work(priv->wq, &priv->set_rx_mode_work);
5248
5249         mlx5e_hv_vhca_stats_destroy(priv);
5250         if (mlx5e_monitor_counter_supported(priv))
5251                 mlx5e_monitor_counter_cleanup(priv);
5252
5253         mlx5e_disable_async_events(priv);
5254         mlx5_lag_remove(mdev);
5255 }
5256
5257 int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
5258 {
5259         return mlx5e_refresh_tirs(priv, false);
5260 }
5261
5262 static const struct mlx5e_profile mlx5e_nic_profile = {
5263         .init              = mlx5e_nic_init,
5264         .cleanup           = mlx5e_nic_cleanup,
5265         .init_rx           = mlx5e_init_nic_rx,
5266         .cleanup_rx        = mlx5e_cleanup_nic_rx,
5267         .init_tx           = mlx5e_init_nic_tx,
5268         .cleanup_tx        = mlx5e_cleanup_nic_tx,
5269         .enable            = mlx5e_nic_enable,
5270         .disable           = mlx5e_nic_disable,
5271         .update_rx         = mlx5e_update_nic_rx,
5272         .update_stats      = mlx5e_update_ndo_stats,
5273         .update_carrier    = mlx5e_update_carrier,
5274         .rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe,
5275         .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
5276         .max_tc            = MLX5E_MAX_NUM_TC,
5277         .rq_groups         = MLX5E_NUM_RQ_GROUPS(XSK),
5278         .stats_grps        = mlx5e_nic_stats_grps,
5279         .stats_grps_num    = mlx5e_nic_stats_grps_num,
5280 };
5281
5282 /* mlx5e generic netdev management API (move to en_common.c) */
5283
5284 /* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */
5285 int mlx5e_netdev_init(struct net_device *netdev,
5286                       struct mlx5e_priv *priv,
5287                       struct mlx5_core_dev *mdev,
5288                       const struct mlx5e_profile *profile,
5289                       void *ppriv)
5290 {
5291         /* priv init */
5292         priv->mdev        = mdev;
5293         priv->netdev      = netdev;
5294         priv->profile     = profile;
5295         priv->ppriv       = ppriv;
5296         priv->msglevel    = MLX5E_MSG_LEVEL;
5297         priv->max_nch     = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
5298         priv->max_opened_tc = 1;
5299
5300         if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
5301                 return -ENOMEM;
5302
5303         mutex_init(&priv->state_lock);
5304         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
5305         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
5306         INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
5307         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
5308
5309         priv->wq = create_singlethread_workqueue("mlx5e");
5310         if (!priv->wq)
5311                 goto err_free_cpumask;
5312
5313         /* netdev init */
5314         netif_carrier_off(netdev);
5315
5316 #ifdef CONFIG_MLX5_EN_ARFS
5317         netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(mdev);
5318 #endif
5319
5320         return 0;
5321
5322 err_free_cpumask:
5323         free_cpumask_var(priv->scratchpad.cpumask);
5324
5325         return -ENOMEM;
5326 }
5327
5328 void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
5329 {
5330         destroy_workqueue(priv->wq);
5331         free_cpumask_var(priv->scratchpad.cpumask);
5332 }
5333
5334 struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
5335                                        const struct mlx5e_profile *profile,
5336                                        int nch,
5337                                        void *ppriv)
5338 {
5339         struct net_device *netdev;
5340         int err;
5341
5342         netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
5343                                     nch * profile->max_tc,
5344                                     nch * profile->rq_groups);
5345         if (!netdev) {
5346                 mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
5347                 return NULL;
5348         }
5349
5350         err = profile->init(mdev, netdev, profile, ppriv);
5351         if (err) {
5352                 mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err);
5353                 goto err_free_netdev;
5354         }
5355
5356         return netdev;
5357
5358 err_free_netdev:
5359         free_netdev(netdev);
5360
5361         return NULL;
5362 }
5363
5364 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
5365 {
5366         const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
5367         const struct mlx5e_profile *profile;
5368         int max_nch;
5369         int err;
5370
5371         profile = priv->profile;
5372         clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
5373
5374         /* max number of channels may have changed */
5375         max_nch = mlx5e_get_max_num_channels(priv->mdev);
5376         if (priv->channels.params.num_channels > max_nch) {
5377                 mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
5378                 /* Reducing the number of channels - RXFH has to be reset, and
5379                  * mlx5e_num_channels_changed below will build the RQT.
5380                  */
5381                 priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
5382                 priv->channels.params.num_channels = max_nch;
5383         }
5384         /* 1. Set the real number of queues in the kernel the first time.
5385          * 2. Set our default XPS cpumask.
5386          * 3. Build the RQT.
5387          *
5388          * rtnl_lock is required by netif_set_real_num_*_queues in case the
5389          * netdev has been registered by this point (if this function was called
5390          * in the reload or resume flow).
5391          */
5392         if (take_rtnl)
5393                 rtnl_lock();
5394         err = mlx5e_num_channels_changed(priv);
5395         if (take_rtnl)
5396                 rtnl_unlock();
5397         if (err)
5398                 goto out;
5399
5400         err = profile->init_tx(priv);
5401         if (err)
5402                 goto out;
5403
5404         err = profile->init_rx(priv);
5405         if (err)
5406                 goto err_cleanup_tx;
5407
5408         if (profile->enable)
5409                 profile->enable(priv);
5410
5411         return 0;
5412
5413 err_cleanup_tx:
5414         profile->cleanup_tx(priv);
5415
5416 out:
5417         return err;
5418 }
5419
5420 void mlx5e_detach_netdev(struct mlx5e_priv *priv)
5421 {
5422         const struct mlx5e_profile *profile = priv->profile;
5423
5424         set_bit(MLX5E_STATE_DESTROYING, &priv->state);
5425
5426         if (profile->disable)
5427                 profile->disable(priv);
5428         flush_workqueue(priv->wq);
5429
5430         profile->cleanup_rx(priv);
5431         profile->cleanup_tx(priv);
5432         cancel_work_sync(&priv->update_stats_work);
5433 }
5434
5435 void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
5436 {
5437         const struct mlx5e_profile *profile = priv->profile;
5438         struct net_device *netdev = priv->netdev;
5439
5440         if (profile->cleanup)
5441                 profile->cleanup(priv);
5442         free_netdev(netdev);
5443 }
5444
5445 /* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
5446  * hardware contexts and to connect it to the current netdev.
5447  */
5448 static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
5449 {
5450         struct mlx5e_priv *priv = vpriv;
5451         struct net_device *netdev = priv->netdev;
5452         int err;
5453
5454         if (netif_device_present(netdev))
5455                 return 0;
5456
5457         err = mlx5e_create_mdev_resources(mdev);
5458         if (err)
5459                 return err;
5460
5461         err = mlx5e_attach_netdev(priv);
5462         if (err) {
5463                 mlx5e_destroy_mdev_resources(mdev);
5464                 return err;
5465         }
5466
5467         return 0;
5468 }
5469
5470 static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
5471 {
5472         struct mlx5e_priv *priv = vpriv;
5473         struct net_device *netdev = priv->netdev;
5474
5475 #ifdef CONFIG_MLX5_ESWITCH
5476         if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev)
5477                 return;
5478 #endif
5479
5480         if (!netif_device_present(netdev))
5481                 return;
5482
5483         mlx5e_detach_netdev(priv);
5484         mlx5e_destroy_mdev_resources(mdev);
5485 }
5486
5487 static void *mlx5e_add(struct mlx5_core_dev *mdev)
5488 {
5489         struct net_device *netdev;
5490         void *priv;
5491         int err;
5492         int nch;
5493
5494         err = mlx5e_check_required_hca_cap(mdev);
5495         if (err)
5496                 return NULL;
5497
5498 #ifdef CONFIG_MLX5_ESWITCH
5499         if (MLX5_ESWITCH_MANAGER(mdev) &&
5500             mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
5501                 mlx5e_rep_register_vport_reps(mdev);
5502                 return mdev;
5503         }
5504 #endif
5505
5506         nch = mlx5e_get_max_num_channels(mdev);
5507         netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
5508         if (!netdev) {
5509                 mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
5510                 return NULL;
5511         }
5512
5513         dev_net_set(netdev, mlx5_core_net(mdev));
5514         priv = netdev_priv(netdev);
5515
5516         err = mlx5e_attach(mdev, priv);
5517         if (err) {
5518                 mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
5519                 goto err_destroy_netdev;
5520         }
5521
5522         err = mlx5e_devlink_port_register(priv);
5523         if (err) {
5524                 mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
5525                 goto err_detach;
5526         }
5527
5528         err = register_netdev(netdev);
5529         if (err) {
5530                 mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
5531                 goto err_devlink_port_unregister;
5532         }
5533
5534         mlx5e_devlink_port_type_eth_set(priv);
5535
5536 #ifdef CONFIG_MLX5_CORE_EN_DCB
5537         mlx5e_dcbnl_init_app(priv);
5538 #endif
5539         return priv;
5540
5541 err_devlink_port_unregister:
5542         mlx5e_devlink_port_unregister(priv);
5543 err_detach:
5544         mlx5e_detach(mdev, priv);
5545 err_destroy_netdev:
5546         mlx5e_destroy_netdev(priv);
5547         return NULL;
5548 }
5549
5550 static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
5551 {
5552         struct mlx5e_priv *priv;
5553
5554 #ifdef CONFIG_MLX5_ESWITCH
5555         if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
5556                 mlx5e_rep_unregister_vport_reps(mdev);
5557                 return;
5558         }
5559 #endif
5560         priv = vpriv;
5561 #ifdef CONFIG_MLX5_CORE_EN_DCB
5562         mlx5e_dcbnl_delete_app(priv);
5563 #endif
5564         unregister_netdev(priv->netdev);
5565         mlx5e_devlink_port_unregister(priv);
5566         mlx5e_detach(mdev, vpriv);
5567         mlx5e_destroy_netdev(priv);
5568 }
5569
5570 static struct mlx5_interface mlx5e_interface = {
5571         .add       = mlx5e_add,
5572         .remove    = mlx5e_remove,
5573         .attach    = mlx5e_attach,
5574         .detach    = mlx5e_detach,
5575         .protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
5576 };
5577
5578 void mlx5e_init(void)
5579 {
5580         mlx5e_ipsec_build_inverse_table();
5581         mlx5e_build_ptys2ethtool_map();
5582         mlx5_register_interface(&mlx5e_interface);
5583 }
5584
5585 void mlx5e_cleanup(void)
5586 {
5587         mlx5_unregister_interface(&mlx5e_interface);
5588 }