Merge tag 's390-5.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / xdp.c
1 /*
2  * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/bpf_trace.h>
34 #include "en/xdp.h"
35
36 int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
37 {
38         int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
39
40         /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
41          * The condition checked in mlx5e_rx_is_linear_skb is:
42          *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
43          *   (Note that hw_mtu == sw_mtu + hard_mtu.)
44          * What is returned from this function is:
45          *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
46          * After assigning sw_mtu := max_mtu, the left side of (1) turns to
47          * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
48          * because both PAGE_SIZE and S are already aligned. Any number greater
49          * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
50          * so max_mtu is the maximum MTU allowed.
51          */
52
53         return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
54 }
55
56 static inline bool
57 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
58                     struct xdp_buff *xdp)
59 {
60         struct mlx5e_xdp_info xdpi;
61
62         xdpi.xdpf = convert_to_xdp_frame(xdp);
63         if (unlikely(!xdpi.xdpf))
64                 return false;
65         xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
66         dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
67                                    xdpi.xdpf->len, PCI_DMA_TODEVICE);
68         xdpi.di = *di;
69
70         return sq->xmit_xdp_frame(sq, &xdpi);
71 }
72
73 /* returns true if packet was consumed by xdp */
74 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
75                       void *va, u16 *rx_headroom, u32 *len)
76 {
77         struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
78         struct xdp_buff xdp;
79         u32 act;
80         int err;
81
82         if (!prog)
83                 return false;
84
85         xdp.data = va + *rx_headroom;
86         xdp_set_data_meta_invalid(&xdp);
87         xdp.data_end = xdp.data + *len;
88         xdp.data_hard_start = va;
89         xdp.rxq = &rq->xdp_rxq;
90
91         act = bpf_prog_run_xdp(prog, &xdp);
92         switch (act) {
93         case XDP_PASS:
94                 *rx_headroom = xdp.data - xdp.data_hard_start;
95                 *len = xdp.data_end - xdp.data;
96                 return false;
97         case XDP_TX:
98                 if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
99                         goto xdp_abort;
100                 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
101                 return true;
102         case XDP_REDIRECT:
103                 /* When XDP enabled then page-refcnt==1 here */
104                 err = xdp_do_redirect(rq->netdev, &xdp, prog);
105                 if (unlikely(err))
106                         goto xdp_abort;
107                 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
108                 rq->xdpsq.redirect_flush = true;
109                 mlx5e_page_dma_unmap(rq, di);
110                 rq->stats->xdp_redirect++;
111                 return true;
112         default:
113                 bpf_warn_invalid_xdp_action(act);
114                 /* fall through */
115         case XDP_ABORTED:
116 xdp_abort:
117                 trace_xdp_exception(rq->netdev, prog, act);
118                 /* fall through */
119         case XDP_DROP:
120                 rq->stats->xdp_drop++;
121                 return true;
122         }
123 }
124
125 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
126 {
127         struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
128         struct mlx5_wq_cyc *wq = &sq->wq;
129         u8  wqebbs;
130         u16 pi;
131
132         mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
133
134         prefetchw(session->wqe->data);
135         session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
136
137         pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
138
139 /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
140  * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
141  * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
142  * full-session WQE be cache-aligned.
143  */
144 #if L1_CACHE_BYTES < 128
145 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
146 #else
147 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
148 #endif
149
150         wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
151                        MLX5E_XDP_MPW_MAX_WQEBBS);
152
153         session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
154 }
155
156 static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
157 {
158         struct mlx5_wq_cyc       *wq    = &sq->wq;
159         struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
160         struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
161         u16 ds_count = session->ds_count;
162         u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
163         struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
164
165         cseg->opmod_idx_opcode =
166                 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
167         cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
168
169         wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
170         wi->num_ds     = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
171
172         sq->pc += wi->num_wqebbs;
173
174         sq->doorbell_cseg = cseg;
175
176         session->wqe = NULL; /* Close session */
177 }
178
179 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
180                                        struct mlx5e_xdp_info *xdpi)
181 {
182         struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
183         struct mlx5e_xdpsq_stats *stats = sq->stats;
184
185         dma_addr_t dma_addr    = xdpi->dma_addr;
186         struct xdp_frame *xdpf = xdpi->xdpf;
187         unsigned int dma_len   = xdpf->len;
188
189         if (unlikely(sq->hw_mtu < dma_len)) {
190                 stats->err++;
191                 return false;
192         }
193
194         if (unlikely(!session->wqe)) {
195                 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
196                                                      MLX5_SEND_WQE_MAX_WQEBBS))) {
197                         /* SQ is full, ring doorbell */
198                         mlx5e_xmit_xdp_doorbell(sq);
199                         stats->full++;
200                         return false;
201                 }
202
203                 mlx5e_xdp_mpwqe_session_start(sq);
204         }
205
206         mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
207
208         if (unlikely(session->ds_count == session->max_ds_count))
209                 mlx5e_xdp_mpwqe_complete(sq);
210
211         mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
212         stats->xmit++;
213         return true;
214 }
215
216 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
217 {
218         struct mlx5_wq_cyc       *wq   = &sq->wq;
219         u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
220         struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
221
222         struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
223         struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
224         struct mlx5_wqe_data_seg *dseg = wqe->data;
225
226         struct xdp_frame *xdpf = xdpi->xdpf;
227         dma_addr_t dma_addr  = xdpi->dma_addr;
228         unsigned int dma_len = xdpf->len;
229
230         struct mlx5e_xdpsq_stats *stats = sq->stats;
231
232         prefetchw(wqe);
233
234         if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
235                 stats->err++;
236                 return false;
237         }
238
239         if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
240                 /* SQ is full, ring doorbell */
241                 mlx5e_xmit_xdp_doorbell(sq);
242                 stats->full++;
243                 return false;
244         }
245
246         cseg->fm_ce_se = 0;
247
248         /* copy the inline part if required */
249         if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
250                 memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
251                 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
252                 dma_len  -= MLX5E_XDP_MIN_INLINE;
253                 dma_addr += MLX5E_XDP_MIN_INLINE;
254                 dseg++;
255         }
256
257         /* write the dma part */
258         dseg->addr       = cpu_to_be64(dma_addr);
259         dseg->byte_count = cpu_to_be32(dma_len);
260
261         cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
262
263         sq->pc++;
264
265         sq->doorbell_cseg = cseg;
266
267         mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
268         stats->xmit++;
269         return true;
270 }
271
272 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
273 {
274         struct mlx5e_xdp_info_fifo *xdpi_fifo;
275         struct mlx5e_xdpsq *sq;
276         struct mlx5_cqe64 *cqe;
277         bool is_redirect;
278         u16 sqcc;
279         int i;
280
281         sq = container_of(cq, struct mlx5e_xdpsq, cq);
282
283         if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
284                 return false;
285
286         cqe = mlx5_cqwq_get_cqe(&cq->wq);
287         if (!cqe)
288                 return false;
289
290         is_redirect = !rq;
291         xdpi_fifo = &sq->db.xdpi_fifo;
292
293         /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
294          * otherwise a cq overrun may occur
295          */
296         sqcc = sq->cc;
297
298         i = 0;
299         do {
300                 u16 wqe_counter;
301                 bool last_wqe;
302
303                 mlx5_cqwq_pop(&cq->wq);
304
305                 wqe_counter = be16_to_cpu(cqe->wqe_counter);
306
307                 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
308                         netdev_WARN_ONCE(sq->channel->netdev,
309                                          "Bad OP in XDPSQ CQE: 0x%x\n",
310                                          get_cqe_opcode(cqe));
311
312                 do {
313                         struct mlx5e_xdp_wqe_info *wi;
314                         u16 ci, j;
315
316                         last_wqe = (sqcc == wqe_counter);
317                         ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
318                         wi = &sq->db.wqe_info[ci];
319
320                         sqcc += wi->num_wqebbs;
321
322                         for (j = 0; j < wi->num_ds; j++) {
323                                 struct mlx5e_xdp_info xdpi =
324                                         mlx5e_xdpi_fifo_pop(xdpi_fifo);
325
326                                 if (is_redirect) {
327                                         dma_unmap_single(sq->pdev, xdpi.dma_addr,
328                                                          xdpi.xdpf->len, DMA_TO_DEVICE);
329                                         xdp_return_frame(xdpi.xdpf);
330                                 } else {
331                                         /* Recycle RX page */
332                                         mlx5e_page_release(rq, &xdpi.di, true);
333                                 }
334                         }
335                 } while (!last_wqe);
336         } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
337
338         sq->stats->cqes += i;
339
340         mlx5_cqwq_update_db_record(&cq->wq);
341
342         /* ensure cq space is freed before enabling more cqes */
343         wmb();
344
345         sq->cc = sqcc;
346         return (i == MLX5E_TX_CQ_POLL_BUDGET);
347 }
348
349 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
350 {
351         struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
352         bool is_redirect = !rq;
353
354         while (sq->cc != sq->pc) {
355                 struct mlx5e_xdp_wqe_info *wi;
356                 u16 ci, i;
357
358                 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
359                 wi = &sq->db.wqe_info[ci];
360
361                 sq->cc += wi->num_wqebbs;
362
363                 for (i = 0; i < wi->num_ds; i++) {
364                         struct mlx5e_xdp_info xdpi =
365                                 mlx5e_xdpi_fifo_pop(xdpi_fifo);
366
367                         if (is_redirect) {
368                                 dma_unmap_single(sq->pdev, xdpi.dma_addr,
369                                                  xdpi.xdpf->len, DMA_TO_DEVICE);
370                                 xdp_return_frame(xdpi.xdpf);
371                         } else {
372                                 /* Recycle RX page */
373                                 mlx5e_page_release(rq, &xdpi.di, false);
374                         }
375                 }
376         }
377 }
378
379 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
380                    u32 flags)
381 {
382         struct mlx5e_priv *priv = netdev_priv(dev);
383         struct mlx5e_xdpsq *sq;
384         int drops = 0;
385         int sq_num;
386         int i;
387
388         /* this flag is sufficient, no need to test internal sq state */
389         if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
390                 return -ENETDOWN;
391
392         if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
393                 return -EINVAL;
394
395         sq_num = smp_processor_id();
396
397         if (unlikely(sq_num >= priv->channels.num))
398                 return -ENXIO;
399
400         sq = &priv->channels.c[sq_num]->xdpsq;
401
402         for (i = 0; i < n; i++) {
403                 struct xdp_frame *xdpf = frames[i];
404                 struct mlx5e_xdp_info xdpi;
405
406                 xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
407                                                DMA_TO_DEVICE);
408                 if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
409                         xdp_return_frame_rx_napi(xdpf);
410                         drops++;
411                         continue;
412                 }
413
414                 xdpi.xdpf = xdpf;
415
416                 if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
417                         dma_unmap_single(sq->pdev, xdpi.dma_addr,
418                                          xdpf->len, DMA_TO_DEVICE);
419                         xdp_return_frame_rx_napi(xdpf);
420                         drops++;
421                 }
422         }
423
424         if (flags & XDP_XMIT_FLUSH) {
425                 if (sq->mpwqe.wqe)
426                         mlx5e_xdp_mpwqe_complete(sq);
427                 mlx5e_xmit_xdp_doorbell(sq);
428         }
429
430         return n - drops;
431 }
432
433 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
434 {
435         struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
436
437         if (xdpsq->mpwqe.wqe)
438                 mlx5e_xdp_mpwqe_complete(xdpsq);
439
440         mlx5e_xmit_xdp_doorbell(xdpsq);
441
442         if (xdpsq->redirect_flush) {
443                 xdp_do_flush_map();
444                 xdpsq->redirect_flush = false;
445         }
446 }
447
448 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
449 {
450         sq->xmit_xdp_frame = is_mpw ?
451                 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
452 }
453