2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/bpf_trace.h>
36 int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
38 int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
40 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
41 * The condition checked in mlx5e_rx_is_linear_skb is:
42 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
43 * (Note that hw_mtu == sw_mtu + hard_mtu.)
44 * What is returned from this function is:
45 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
46 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
47 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
48 * because both PAGE_SIZE and S are already aligned. Any number greater
49 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
50 * so max_mtu is the maximum MTU allowed.
53 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
57 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
60 struct mlx5e_xdp_info xdpi;
62 xdpi.xdpf = convert_to_xdp_frame(xdp);
63 if (unlikely(!xdpi.xdpf))
65 xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
66 dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
67 xdpi.xdpf->len, PCI_DMA_TODEVICE);
70 return sq->xmit_xdp_frame(sq, &xdpi);
73 /* returns true if packet was consumed by xdp */
74 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
75 void *va, u16 *rx_headroom, u32 *len)
77 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
85 xdp.data = va + *rx_headroom;
86 xdp_set_data_meta_invalid(&xdp);
87 xdp.data_end = xdp.data + *len;
88 xdp.data_hard_start = va;
89 xdp.rxq = &rq->xdp_rxq;
91 act = bpf_prog_run_xdp(prog, &xdp);
94 *rx_headroom = xdp.data - xdp.data_hard_start;
95 *len = xdp.data_end - xdp.data;
98 if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
100 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
103 /* When XDP enabled then page-refcnt==1 here */
104 err = xdp_do_redirect(rq->netdev, &xdp, prog);
107 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
108 rq->xdpsq.redirect_flush = true;
109 mlx5e_page_dma_unmap(rq, di);
110 rq->stats->xdp_redirect++;
113 bpf_warn_invalid_xdp_action(act);
117 trace_xdp_exception(rq->netdev, prog, act);
120 rq->stats->xdp_drop++;
125 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
127 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
128 struct mlx5_wq_cyc *wq = &sq->wq;
132 mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
134 prefetchw(session->wqe->data);
135 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
137 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
139 /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
140 * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
141 * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
142 * full-session WQE be cache-aligned.
144 #if L1_CACHE_BYTES < 128
145 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
147 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
150 wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
151 MLX5E_XDP_MPW_MAX_WQEBBS);
153 session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
156 static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
158 struct mlx5_wq_cyc *wq = &sq->wq;
159 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
160 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
161 u16 ds_count = session->ds_count;
162 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
163 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
165 cseg->opmod_idx_opcode =
166 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
167 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
169 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
170 wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
172 sq->pc += wi->num_wqebbs;
174 sq->doorbell_cseg = cseg;
176 session->wqe = NULL; /* Close session */
179 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
180 struct mlx5e_xdp_info *xdpi)
182 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
183 struct mlx5e_xdpsq_stats *stats = sq->stats;
185 dma_addr_t dma_addr = xdpi->dma_addr;
186 struct xdp_frame *xdpf = xdpi->xdpf;
187 unsigned int dma_len = xdpf->len;
189 if (unlikely(sq->hw_mtu < dma_len)) {
194 if (unlikely(!session->wqe)) {
195 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
196 MLX5_SEND_WQE_MAX_WQEBBS))) {
197 /* SQ is full, ring doorbell */
198 mlx5e_xmit_xdp_doorbell(sq);
203 mlx5e_xdp_mpwqe_session_start(sq);
206 mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
208 if (unlikely(session->ds_count == session->max_ds_count))
209 mlx5e_xdp_mpwqe_complete(sq);
211 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
216 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
218 struct mlx5_wq_cyc *wq = &sq->wq;
219 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
220 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
222 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
223 struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
224 struct mlx5_wqe_data_seg *dseg = wqe->data;
226 struct xdp_frame *xdpf = xdpi->xdpf;
227 dma_addr_t dma_addr = xdpi->dma_addr;
228 unsigned int dma_len = xdpf->len;
230 struct mlx5e_xdpsq_stats *stats = sq->stats;
234 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
239 if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
240 /* SQ is full, ring doorbell */
241 mlx5e_xmit_xdp_doorbell(sq);
248 /* copy the inline part if required */
249 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
250 memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
251 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
252 dma_len -= MLX5E_XDP_MIN_INLINE;
253 dma_addr += MLX5E_XDP_MIN_INLINE;
257 /* write the dma part */
258 dseg->addr = cpu_to_be64(dma_addr);
259 dseg->byte_count = cpu_to_be32(dma_len);
261 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
265 sq->doorbell_cseg = cseg;
267 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
272 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
274 struct mlx5e_xdp_info_fifo *xdpi_fifo;
275 struct mlx5e_xdpsq *sq;
276 struct mlx5_cqe64 *cqe;
281 sq = container_of(cq, struct mlx5e_xdpsq, cq);
283 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
286 cqe = mlx5_cqwq_get_cqe(&cq->wq);
291 xdpi_fifo = &sq->db.xdpi_fifo;
293 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
294 * otherwise a cq overrun may occur
303 mlx5_cqwq_pop(&cq->wq);
305 wqe_counter = be16_to_cpu(cqe->wqe_counter);
307 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
308 netdev_WARN_ONCE(sq->channel->netdev,
309 "Bad OP in XDPSQ CQE: 0x%x\n",
310 get_cqe_opcode(cqe));
313 struct mlx5e_xdp_wqe_info *wi;
316 last_wqe = (sqcc == wqe_counter);
317 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
318 wi = &sq->db.wqe_info[ci];
320 sqcc += wi->num_wqebbs;
322 for (j = 0; j < wi->num_ds; j++) {
323 struct mlx5e_xdp_info xdpi =
324 mlx5e_xdpi_fifo_pop(xdpi_fifo);
327 dma_unmap_single(sq->pdev, xdpi.dma_addr,
328 xdpi.xdpf->len, DMA_TO_DEVICE);
329 xdp_return_frame(xdpi.xdpf);
331 /* Recycle RX page */
332 mlx5e_page_release(rq, &xdpi.di, true);
336 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
338 sq->stats->cqes += i;
340 mlx5_cqwq_update_db_record(&cq->wq);
342 /* ensure cq space is freed before enabling more cqes */
346 return (i == MLX5E_TX_CQ_POLL_BUDGET);
349 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
351 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
352 bool is_redirect = !rq;
354 while (sq->cc != sq->pc) {
355 struct mlx5e_xdp_wqe_info *wi;
358 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
359 wi = &sq->db.wqe_info[ci];
361 sq->cc += wi->num_wqebbs;
363 for (i = 0; i < wi->num_ds; i++) {
364 struct mlx5e_xdp_info xdpi =
365 mlx5e_xdpi_fifo_pop(xdpi_fifo);
368 dma_unmap_single(sq->pdev, xdpi.dma_addr,
369 xdpi.xdpf->len, DMA_TO_DEVICE);
370 xdp_return_frame(xdpi.xdpf);
372 /* Recycle RX page */
373 mlx5e_page_release(rq, &xdpi.di, false);
379 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
382 struct mlx5e_priv *priv = netdev_priv(dev);
383 struct mlx5e_xdpsq *sq;
388 /* this flag is sufficient, no need to test internal sq state */
389 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
392 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
395 sq_num = smp_processor_id();
397 if (unlikely(sq_num >= priv->channels.num))
400 sq = &priv->channels.c[sq_num]->xdpsq;
402 for (i = 0; i < n; i++) {
403 struct xdp_frame *xdpf = frames[i];
404 struct mlx5e_xdp_info xdpi;
406 xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
408 if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
409 xdp_return_frame_rx_napi(xdpf);
416 if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
417 dma_unmap_single(sq->pdev, xdpi.dma_addr,
418 xdpf->len, DMA_TO_DEVICE);
419 xdp_return_frame_rx_napi(xdpf);
424 if (flags & XDP_XMIT_FLUSH) {
426 mlx5e_xdp_mpwqe_complete(sq);
427 mlx5e_xmit_xdp_doorbell(sq);
433 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
435 struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
437 if (xdpsq->mpwqe.wqe)
438 mlx5e_xdp_mpwqe_complete(xdpsq);
440 mlx5e_xmit_xdp_doorbell(xdpsq);
442 if (xdpsq->redirect_flush) {
444 xdpsq->redirect_flush = false;
448 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
450 sq->xmit_xdp_frame = is_mpw ?
451 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;