2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/if_vlan.h>
36 #include <linux/etherdevice.h>
37 #include <linux/timecounter.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/crash_dump.h>
40 #include <linux/mlx5/driver.h>
41 #include <linux/mlx5/qp.h>
42 #include <linux/mlx5/cq.h>
43 #include <linux/mlx5/port.h>
44 #include <linux/mlx5/vport.h>
45 #include <linux/mlx5/transobj.h>
46 #include <linux/mlx5/fs.h>
47 #include <linux/rhashtable.h>
48 #include <net/udp_tunnel.h>
49 #include <net/switchdev.h>
51 #include <linux/dim.h>
52 #include <linux/bits.h>
54 #include "mlx5_core.h"
59 #include "lib/hv_vhca.h"
60 #include "lib/clock.h"
61 #include "en/rx_res.h"
63 extern const struct net_device_ops mlx5e_netdev_ops;
66 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
67 #define MLX5E_METADATA_ETHER_LEN 8
69 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
71 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
72 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
74 #define MLX5E_MAX_NUM_TC 8
75 #define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE
77 #define MLX5_RX_HEADROOM NET_SKB_PAD
78 #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
79 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
81 #define MLX5E_RX_MAX_HEAD (256)
82 #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
83 #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
84 #define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
85 #define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
86 #define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
88 #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
89 (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
90 #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
91 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
92 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
93 MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
95 #define MLX5_MPWRQ_LOG_WQE_SZ 18
96 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
97 MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
98 #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
100 #define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8))
101 #define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2)
102 #define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
103 /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
104 * WQEs, This page will absorb write overflow by the hardware, when
105 * receiving packets larger than MTU. These oversize packets are
106 * dropped by the driver at a later stage.
108 #define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
109 #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
110 #define MLX5E_MAX_RQ_NUM_MTTS \
111 ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
112 #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
113 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
114 (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
115 #define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
116 (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
117 (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
119 #define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
120 #define MLX5E_LOG_MAX_RX_WQE_BULK \
121 (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ)))
123 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
124 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
125 #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
127 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
128 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
129 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
130 MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
132 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
134 #define MLX5E_DEFAULT_LRO_TIMEOUT 32
135 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
137 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
138 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
139 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
140 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
141 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10
142 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
143 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
144 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
146 #define MLX5E_MIN_NUM_CHANNELS 0x1
147 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
148 #define MLX5E_TX_CQ_POLL_BUDGET 128
149 #define MLX5E_TX_XSK_POLL_BUDGET 64
150 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
152 #define MLX5E_UMR_WQE_INLINE_SZ \
153 (sizeof(struct mlx5e_umr_wqe) + \
154 ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
155 MLX5_UMR_MTT_ALIGNMENT))
156 #define MLX5E_UMR_WQEBBS \
157 (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
159 #define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
160 (sizeof(struct mlx5e_umr_wqe) +\
161 (sizeof(struct mlx5_klm) * (sgl_len)))
163 #define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
164 (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
166 #define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
167 (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
169 #define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
170 (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
172 #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
173 ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
175 #define MLX5E_MAX_KLM_PER_WQE \
176 MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
178 #define MLX5E_MSG_LEVEL NETIF_MSG_LINK
180 #define mlx5e_dbg(mlevel, priv, format, ...) \
182 if (NETIF_MSG_##mlevel & (priv)->msglevel) \
183 netdev_warn(priv->netdev, format, \
187 #define mlx5e_state_dereference(priv, p) \
188 rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
190 enum mlx5e_rq_group {
191 MLX5E_RQ_GROUP_REGULAR,
193 #define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
196 static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
198 if (mlx5_lag_is_lacp_owner(mdev))
201 return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS);
204 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
207 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
208 return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW,
211 return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES,
216 /* Use this function to get max num channels (rxqs/txqs) only to create netdev */
217 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
219 return is_kdump_kernel() ?
220 MLX5E_MIN_NUM_CHANNELS :
221 min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
224 struct mlx5e_tx_wqe {
225 struct mlx5_wqe_ctrl_seg ctrl;
226 struct mlx5_wqe_eth_seg eth;
227 struct mlx5_wqe_data_seg data[];
230 struct mlx5e_rx_wqe_ll {
231 struct mlx5_wqe_srq_next_seg next;
232 struct mlx5_wqe_data_seg data[];
235 struct mlx5e_rx_wqe_cyc {
236 struct mlx5_wqe_data_seg data[0];
239 struct mlx5e_umr_wqe {
240 struct mlx5_wqe_ctrl_seg ctrl;
241 struct mlx5_wqe_umr_ctrl_seg uctrl;
242 struct mlx5_mkey_seg mkc;
244 DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
245 DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
249 enum mlx5e_priv_flag {
250 MLX5E_PFLAG_RX_CQE_BASED_MODER,
251 MLX5E_PFLAG_TX_CQE_BASED_MODER,
252 MLX5E_PFLAG_RX_CQE_COMPRESS,
253 MLX5E_PFLAG_RX_STRIDING_RQ,
254 MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
255 MLX5E_PFLAG_XDP_TX_MPWQE,
256 MLX5E_PFLAG_SKB_TX_MPWQE,
257 MLX5E_PFLAG_TX_PORT_TS,
258 MLX5E_NUM_PFLAGS, /* Keep last */
261 #define MLX5E_SET_PFLAG(params, pflag, enable) \
264 (params)->pflags |= BIT(pflag); \
266 (params)->pflags &= ~(BIT(pflag)); \
269 #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
272 MLX5E_PACKET_MERGE_NONE,
273 MLX5E_PACKET_MERGE_LRO,
274 MLX5E_PACKET_MERGE_SHAMPO,
277 struct mlx5e_packet_merge_param {
278 enum packet_merge type;
281 u8 match_criteria_type;
282 u8 alignment_granularity;
286 struct mlx5e_params {
289 u8 log_rq_mtu_frames;
294 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
296 struct mlx5e_mqprio_rl *rl;
299 bool rx_cqe_compress_def;
300 bool tunneled_offload_en;
301 struct dim_cq_moder rx_cq_moderation;
302 struct dim_cq_moder tx_cq_moderation;
303 struct mlx5e_packet_merge_param packet_merge;
304 u8 tx_min_inline_mode;
305 bool vlan_strip_disable;
310 struct bpf_prog *xdp_prog;
311 struct mlx5e_xsk *xsk;
317 static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
319 return params->mqprio.mode == TC_MQPRIO_MODE_DCB ?
320 params->mqprio.num_tc : 1;
324 MLX5E_RQ_STATE_ENABLED,
325 MLX5E_RQ_STATE_RECOVERING,
327 MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
328 MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
329 MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
330 MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
331 MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
335 /* data path - accessed per cqe */
338 /* data path - accessed per napi poll */
340 struct napi_struct *napi;
341 struct mlx5_core_cq mcq;
342 struct mlx5e_ch_stats *ch_stats;
345 struct net_device *netdev;
346 struct mlx5_core_dev *mdev;
347 struct mlx5e_priv *priv;
348 struct mlx5_wq_ctrl wq_ctrl;
349 } ____cacheline_aligned_in_smp;
351 struct mlx5e_cq_decomp {
352 /* cqe decompression */
353 struct mlx5_cqe64 title;
354 struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
358 } ____cacheline_aligned_in_smp;
360 enum mlx5e_dma_map_type {
361 MLX5E_DMA_MAP_SINGLE,
365 struct mlx5e_sq_dma {
368 enum mlx5e_dma_map_type type;
372 MLX5E_SQ_STATE_ENABLED,
373 MLX5E_SQ_STATE_MPWQE,
374 MLX5E_SQ_STATE_RECOVERING,
375 MLX5E_SQ_STATE_IPSEC,
377 MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
378 MLX5E_SQ_STATE_PENDING_XSK_TX,
379 MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
382 struct mlx5e_tx_mpwqe {
383 /* Current MPWQE session */
384 struct mlx5e_tx_wqe *wqe;
391 struct mlx5e_skb_fifo {
392 struct sk_buff **fifo;
403 /* dirtied @completion */
407 struct dim dim; /* Adaptive Moderation */
410 u16 pc ____cacheline_aligned_in_smp;
413 struct mlx5e_tx_mpwqe mpwqe;
418 struct mlx5_wq_cyc wq;
420 struct mlx5e_sq_stats *stats;
422 struct mlx5e_sq_dma *dma_fifo;
423 struct mlx5e_skb_fifo skb_fifo;
424 struct mlx5e_tx_wqe_info *wqe_info;
426 void __iomem *uar_map;
427 struct netdev_queue *txq;
435 struct hwtstamp_config *tstamp;
436 struct mlx5_clock *clock;
437 struct net_device *netdev;
438 struct mlx5_core_dev *mdev;
439 struct mlx5e_priv *priv;
442 struct mlx5_wq_ctrl wq_ctrl;
446 struct work_struct recover_work;
447 struct mlx5e_ptpsq *ptpsq;
448 cqe_ts_to_ns ptp_cyc2time;
449 } ____cacheline_aligned_in_smp;
451 struct mlx5e_dma_info {
455 struct xdp_buff *xsk;
459 /* XDP packets can be transmitted in different ways. On completion, we need to
460 * distinguish between them to clean up things in a proper way.
462 enum mlx5e_xdp_xmit_mode {
463 /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
464 * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
467 MLX5E_XDP_XMIT_MODE_FRAME,
469 /* The xdp_frame was created in place as a result of XDP_TX from a
470 * regular RQ. No DMA remapping happened, and the page belongs to us.
472 MLX5E_XDP_XMIT_MODE_PAGE,
474 /* No xdp_frame was created at all, the transmit happened from a UMEM
475 * page. The UMEM Completion Ring producer pointer has to be increased.
477 MLX5E_XDP_XMIT_MODE_XSK,
480 struct mlx5e_xdp_info {
481 enum mlx5e_xdp_xmit_mode mode;
484 struct xdp_frame *xdpf;
489 struct mlx5e_dma_info di;
494 struct mlx5e_xmit_data {
500 struct mlx5e_xdp_info_fifo {
501 struct mlx5e_xdp_info *xi;
508 typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
509 typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
510 struct mlx5e_xmit_data *,
511 struct mlx5e_xdp_info *,
517 /* dirtied @completion */
522 u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
524 struct mlx5_wqe_ctrl_seg *doorbell_cseg;
525 struct mlx5e_tx_mpwqe mpwqe;
530 struct xsk_buff_pool *xsk_pool;
531 struct mlx5_wq_cyc wq;
532 struct mlx5e_xdpsq_stats *stats;
533 mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
534 mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
536 struct mlx5e_xdp_wqe_info *wqe_info;
537 struct mlx5e_xdp_info_fifo xdpi_fifo;
539 void __iomem *uar_map;
548 struct mlx5_wq_ctrl wq_ctrl;
549 struct mlx5e_channel *channel;
550 } ____cacheline_aligned_in_smp;
552 struct mlx5e_ktls_resync_resp;
559 struct mlx5_wqe_ctrl_seg *doorbell_cseg;
562 /* write@xmit, read@completion */
564 struct mlx5e_icosq_wqe_info *wqe_info;
568 struct mlx5_wq_cyc wq;
569 void __iomem *uar_map;
573 struct mlx5e_ktls_resync_resp *ktls_resync;
576 struct mlx5_wq_ctrl wq_ctrl;
577 struct mlx5e_channel *channel;
579 struct work_struct recover_work;
580 } ____cacheline_aligned_in_smp;
582 struct mlx5e_wqe_frag_info {
583 struct mlx5e_dma_info *di;
588 struct mlx5e_umr_dma_info {
589 struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
592 struct mlx5e_mpw_info {
593 struct mlx5e_umr_dma_info umr;
594 u16 consumed_strides;
595 DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
598 #define MLX5E_MAX_RX_FRAGS 4
600 /* a single cache unit is capable to serve one napi call (for non-striding rq)
601 * or a MPWQE (for striding rq).
603 #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
604 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
605 #define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
606 struct mlx5e_page_cache {
609 struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
613 typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
614 typedef struct sk_buff *
615 (*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
616 u16 cqe_bcnt, u32 head_offset, u32 page_idx);
617 typedef struct sk_buff *
618 (*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
619 struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
620 typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
621 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
622 typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
624 int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk);
625 void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params);
628 MLX5E_RQ_FLAG_XDP_XMIT,
629 MLX5E_RQ_FLAG_XDP_REDIRECT,
632 struct mlx5e_rq_frag_info {
637 struct mlx5e_rq_frags_info {
638 struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
644 struct mlx5e_shampo_hd {
646 struct mlx5e_dma_info *info;
647 struct page *last_page;
650 unsigned long *bitmap;
657 struct mlx5e_hw_gro_data {
667 struct mlx5_wq_cyc wq;
668 struct mlx5e_wqe_frag_info *frags;
669 struct mlx5e_dma_info *di;
670 struct mlx5e_rq_frags_info info;
671 mlx5e_fp_skb_from_cqe skb_from_cqe;
674 struct mlx5_wq_ll wq;
675 struct mlx5e_umr_wqe umr_wqe;
676 struct mlx5e_mpw_info *info;
677 mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
684 struct mlx5e_shampo_hd *shampo;
690 u8 map_dir; /* dma map direction */
694 struct net_device *netdev;
695 struct mlx5e_rq_stats *stats;
697 struct mlx5e_cq_decomp cqd;
698 struct mlx5e_page_cache page_cache;
699 struct hwtstamp_config *tstamp;
700 struct mlx5_clock *clock;
701 struct mlx5e_icosq *icosq;
702 struct mlx5e_priv *priv;
704 struct mlx5e_hw_gro_data *hw_gro_data;
706 mlx5e_fp_handle_rx_cqe handle_rx_cqe;
707 mlx5e_fp_post_rx_wqes post_wqes;
708 mlx5e_fp_dealloc_wqe dealloc_wqe;
714 struct dim dim; /* Dynamic Interrupt Moderation */
717 struct bpf_prog __rcu *xdp_prog;
718 struct mlx5e_xdpsq *xdpsq;
719 DECLARE_BITMAP(flags, 8);
720 struct page_pool *page_pool;
722 /* AF_XDP zero-copy */
723 struct xsk_buff_pool *xsk_pool;
725 struct work_struct recover_work;
728 struct mlx5_wq_ctrl wq_ctrl;
732 struct mlx5_core_dev *mdev;
734 struct mlx5e_dma_info wqe_overflow;
736 /* XDP read-mostly */
737 struct xdp_rxq_info xdp_rxq;
738 cqe_ts_to_ns ptp_cyc2time;
739 } ____cacheline_aligned_in_smp;
741 enum mlx5e_channel_state {
742 MLX5E_CHANNEL_STATE_XSK,
743 MLX5E_CHANNEL_NUM_STATES
746 struct mlx5e_channel {
749 struct mlx5e_xdpsq rq_xdpsq;
750 struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
751 struct mlx5e_icosq icosq; /* internal control operations */
752 struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
754 struct napi_struct napi;
756 struct net_device *netdev;
763 struct mlx5e_xdpsq xdpsq;
765 /* AF_XDP zero-copy */
766 struct mlx5e_rq xskrq;
767 struct mlx5e_xdpsq xsksq;
770 struct mlx5e_icosq async_icosq;
771 /* async_icosq can be accessed from any CPU - the spinlock protects it. */
772 spinlock_t async_icosq_lock;
774 /* data path - accessed per napi poll */
775 const struct cpumask *aff_mask;
776 struct mlx5e_ch_stats *stats;
779 struct mlx5e_priv *priv;
780 struct mlx5_core_dev *mdev;
781 struct hwtstamp_config *tstamp;
782 DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
785 /* Sync between icosq recovery and XSK enable/disable. */
786 struct mutex icosq_recovery_lock;
791 struct mlx5e_channels {
792 struct mlx5e_channel **c;
793 struct mlx5e_ptp *ptp;
795 struct mlx5e_params params;
798 struct mlx5e_channel_stats {
799 struct mlx5e_ch_stats ch;
800 struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
801 struct mlx5e_rq_stats rq;
802 struct mlx5e_rq_stats xskrq;
803 struct mlx5e_xdpsq_stats rq_xdpsq;
804 struct mlx5e_xdpsq_stats xdpsq;
805 struct mlx5e_xdpsq_stats xsksq;
806 } ____cacheline_aligned_in_smp;
808 struct mlx5e_ptp_stats {
809 struct mlx5e_ch_stats ch;
810 struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
811 struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
812 struct mlx5e_rq_stats rq;
813 } ____cacheline_aligned_in_smp;
817 MLX5E_STATE_DESTROYING,
818 MLX5E_STATE_XDP_TX_ENABLED,
819 MLX5E_STATE_XDP_ACTIVE,
827 struct mlx5e_modify_sq_param {
833 u16 qos_queue_group_id;
836 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
837 struct mlx5e_hv_vhca_stats_agent {
838 struct mlx5_hv_vhca_agent *agent;
839 struct delayed_work work;
846 /* XSK buffer pools are stored separately from channels,
847 * because we don't want to lose them when channels are
848 * recreated. The kernel also stores buffer pool, but it doesn't
849 * distinguish between zero-copy and non-zero-copy UMEMs, so
850 * rely on our mechanism.
852 struct xsk_buff_pool **pools;
857 /* Temporary storage for variables that are allocated when struct mlx5e_priv is
858 * initialized, and used where we can't allocate them because that functions
859 * must not fail. Use with care and make sure the same variable is not used
860 * simultaneously by multiple users.
862 struct mlx5e_scratchpad {
863 cpumask_var_t cpumask;
867 DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
868 DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
869 struct mlx5e_sq_stats **qos_sq_stats;
878 /* priv data path fields - start */
879 struct mlx5e_txqsq **txq2sq;
880 int **channel_tc2realtxq;
881 int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
882 #ifdef CONFIG_MLX5_CORE_EN_DCB
883 struct mlx5e_dcbx_dp dcbx_dp;
885 /* priv data path fields - end */
889 struct mutex state_lock; /* Protects Interface state */
890 struct mlx5e_rq drop_rq;
892 struct mlx5e_channels channels;
893 u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
894 struct mlx5e_rx_res *rx_res;
897 struct mlx5e_flow_steering fs;
899 struct workqueue_struct *wq;
900 struct work_struct update_carrier_work;
901 struct work_struct set_rx_mode_work;
902 struct work_struct tx_timeout_work;
903 struct work_struct update_stats_work;
904 struct work_struct monitor_counters_work;
905 struct mlx5_nb monitor_counters_nb;
907 struct mlx5_core_dev *mdev;
908 struct net_device *netdev;
909 struct mlx5e_trap *en_trap;
910 struct mlx5e_stats stats;
911 struct mlx5e_channel_stats **channel_stats;
912 struct mlx5e_channel_stats trap_stats;
913 struct mlx5e_ptp_stats ptp_stats;
919 struct hwtstamp_config tstamp;
921 u16 drop_rq_q_counter;
922 struct notifier_block events_nb;
923 struct notifier_block blocking_events_nb;
926 struct udp_tunnel_nic_info nic_info;
927 #ifdef CONFIG_MLX5_CORE_EN_DCB
928 struct mlx5e_dcbx dcbx;
931 const struct mlx5e_profile *profile;
933 #ifdef CONFIG_MLX5_EN_IPSEC
934 struct mlx5e_ipsec *ipsec;
936 #ifdef CONFIG_MLX5_EN_TLS
937 struct mlx5e_tls *tls;
939 struct devlink_health_reporter *tx_reporter;
940 struct devlink_health_reporter *rx_reporter;
941 struct mlx5e_xsk xsk;
942 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
943 struct mlx5e_hv_vhca_stats_agent stats_agent;
945 struct mlx5e_scratchpad scratchpad;
946 struct mlx5e_htb htb;
947 struct mlx5e_mqprio_rl *mqprio_rl;
950 struct mlx5e_rx_handlers {
951 mlx5e_fp_handle_rx_cqe handle_rx_cqe;
952 mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
953 mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
956 extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
958 enum mlx5e_profile_feature {
959 MLX5E_PROFILE_FEATURE_PTP_RX,
960 MLX5E_PROFILE_FEATURE_PTP_TX,
961 MLX5E_PROFILE_FEATURE_QOS_HTB,
964 struct mlx5e_profile {
965 int (*init)(struct mlx5_core_dev *mdev,
966 struct net_device *netdev);
967 void (*cleanup)(struct mlx5e_priv *priv);
968 int (*init_rx)(struct mlx5e_priv *priv);
969 void (*cleanup_rx)(struct mlx5e_priv *priv);
970 int (*init_tx)(struct mlx5e_priv *priv);
971 void (*cleanup_tx)(struct mlx5e_priv *priv);
972 void (*enable)(struct mlx5e_priv *priv);
973 void (*disable)(struct mlx5e_priv *priv);
974 int (*update_rx)(struct mlx5e_priv *priv);
975 void (*update_stats)(struct mlx5e_priv *priv);
976 void (*update_carrier)(struct mlx5e_priv *priv);
977 int (*max_nch_limit)(struct mlx5_core_dev *mdev);
978 unsigned int (*stats_grps_num)(struct mlx5e_priv *priv);
979 mlx5e_stats_grp_t *stats_grps;
980 const struct mlx5e_rx_handlers *rx_handlers;
986 #define mlx5e_profile_feature_cap(profile, feature) \
987 ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
989 void mlx5e_build_ptys2ethtool_map(void);
991 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
993 void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
994 void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
995 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
997 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
998 int mlx5e_self_test_num(struct mlx5e_priv *priv);
999 int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data);
1000 void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
1002 void mlx5e_set_rx_mode_work(struct work_struct *work);
1004 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
1005 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
1006 int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter);
1008 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
1010 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
1012 void mlx5e_timestamp_init(struct mlx5e_priv *priv);
1014 struct mlx5e_xsk_param;
1016 struct mlx5e_rq_param;
1017 int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
1018 struct mlx5e_xsk_param *xsk, int node,
1019 struct mlx5e_rq *rq);
1020 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
1021 void mlx5e_close_rq(struct mlx5e_rq *rq);
1022 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
1023 void mlx5e_destroy_rq(struct mlx5e_rq *rq);
1025 struct mlx5e_sq_param;
1026 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
1027 struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
1028 struct mlx5e_xdpsq *sq, bool is_redirect);
1029 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
1031 struct mlx5e_create_cq_param {
1032 struct napi_struct *napi;
1033 struct mlx5e_ch_stats *ch_stats;
1038 struct mlx5e_cq_param;
1039 int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
1040 struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
1041 struct mlx5e_cq *cq);
1042 void mlx5e_close_cq(struct mlx5e_cq *cq);
1044 int mlx5e_open_locked(struct net_device *netdev);
1045 int mlx5e_close_locked(struct net_device *netdev);
1047 int mlx5e_open_channels(struct mlx5e_priv *priv,
1048 struct mlx5e_channels *chs);
1049 void mlx5e_close_channels(struct mlx5e_channels *chs);
1051 /* Function pointer to be used to modify HW or kernel settings while
1052 * switching channels
1054 typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context);
1055 #define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \
1056 int fn##_ctx(struct mlx5e_priv *priv, void *context) \
1060 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
1061 int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
1062 struct mlx5e_params *new_params,
1063 mlx5e_fp_preactivate preactivate,
1064 void *context, bool reset);
1065 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
1066 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
1067 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
1068 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
1069 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
1071 int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
1072 void mlx5e_activate_rq(struct mlx5e_rq *rq);
1073 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
1074 void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
1075 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
1077 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1078 struct mlx5e_modify_sq_param *p);
1079 int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
1080 struct mlx5e_params *params, struct mlx5e_sq_param *param,
1081 struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
1082 struct mlx5e_sq_stats *sq_stats);
1083 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
1084 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
1085 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
1086 void mlx5e_tx_disable_queue(struct netdev_queue *txq);
1087 int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa);
1088 void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq);
1089 struct mlx5e_create_sq_param;
1090 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
1091 struct mlx5e_sq_param *param,
1092 struct mlx5e_create_sq_param *csp,
1093 u16 qos_queue_group_id,
1095 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
1096 void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
1098 static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
1100 return MLX5_CAP_ETH(mdev, swp) &&
1101 MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
1104 extern const struct ethtool_ops mlx5e_ethtool_ops;
1106 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
1107 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
1108 int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
1110 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc);
1112 /* common netdev helpers */
1113 void mlx5e_create_q_counters(struct mlx5e_priv *priv);
1114 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
1115 int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
1116 struct mlx5e_rq *drop_rq);
1117 void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
1118 int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
1119 void mlx5e_free_di_list(struct mlx5e_rq *rq);
1121 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
1122 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
1124 int mlx5e_create_tises(struct mlx5e_priv *priv);
1125 void mlx5e_destroy_tises(struct mlx5e_priv *priv);
1126 int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
1127 void mlx5e_update_carrier(struct mlx5e_priv *priv);
1128 int mlx5e_close(struct net_device *netdev);
1129 int mlx5e_open(struct net_device *netdev);
1131 void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
1133 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
1134 int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context);
1135 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
1136 mlx5e_fp_preactivate preactivate);
1137 void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv);
1139 /* ethtool helpers */
1140 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
1141 struct ethtool_drvinfo *drvinfo);
1142 void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
1143 uint32_t stringset, uint8_t *data);
1144 int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
1145 void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
1146 struct ethtool_stats *stats, u64 *data);
1147 void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
1148 struct ethtool_ringparam *param);
1149 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
1150 struct ethtool_ringparam *param);
1151 void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
1152 struct ethtool_channels *ch);
1153 int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
1154 struct ethtool_channels *ch);
1155 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
1156 struct ethtool_coalesce *coal,
1157 struct kernel_ethtool_coalesce *kernel_coal);
1158 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
1159 struct ethtool_coalesce *coal,
1160 struct kernel_ethtool_coalesce *kernel_coal,
1161 struct netlink_ext_ack *extack);
1162 int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
1163 struct ethtool_link_ksettings *link_ksettings);
1164 int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
1165 const struct ethtool_link_ksettings *link_ksettings);
1166 int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc);
1167 int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key,
1169 int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1171 int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
1172 u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
1173 u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
1174 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
1175 struct ethtool_ts_info *info);
1176 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
1177 struct ethtool_flash *flash);
1178 void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
1179 struct ethtool_pauseparam *pauseparam);
1180 int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
1181 struct ethtool_pauseparam *pauseparam);
1183 /* mlx5e generic netdev management API */
1185 mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
1187 return !is_kdump_kernel() &&
1188 MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
1191 int mlx5e_priv_init(struct mlx5e_priv *priv,
1192 const struct mlx5e_profile *profile,
1193 struct net_device *netdev,
1194 struct mlx5_core_dev *mdev);
1195 void mlx5e_priv_cleanup(struct mlx5e_priv *priv);
1197 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile);
1198 int mlx5e_attach_netdev(struct mlx5e_priv *priv);
1199 void mlx5e_detach_netdev(struct mlx5e_priv *priv);
1200 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
1201 int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
1202 const struct mlx5e_profile *new_profile, void *new_ppriv);
1203 void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
1204 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
1205 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
1206 void mlx5e_rx_dim_work(struct work_struct *work);
1207 void mlx5e_tx_dim_work(struct work_struct *work);
1209 netdev_features_t mlx5e_features_check(struct sk_buff *skb,
1210 struct net_device *netdev,
1211 netdev_features_t features);
1212 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features);
1213 #ifdef CONFIG_MLX5_ESWITCH
1214 int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
1215 int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
1216 int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
1217 int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
1219 #endif /* __MLX5_EN_H__ */