Merge tag 'for-linus-5.12-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / net / xdp / xsk.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP sockets
3  *
4  * AF_XDP sockets allows a channel between XDP programs and userspace
5  * applications.
6  * Copyright(c) 2018 Intel Corporation.
7  *
8  * Author(s): Björn Töpel <bjorn.topel@intel.com>
9  *            Magnus Karlsson <magnus.karlsson@intel.com>
10  */
11
12 #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
13
14 #include <linux/if_xdp.h>
15 #include <linux/init.h>
16 #include <linux/sched/mm.h>
17 #include <linux/sched/signal.h>
18 #include <linux/sched/task.h>
19 #include <linux/socket.h>
20 #include <linux/file.h>
21 #include <linux/uaccess.h>
22 #include <linux/net.h>
23 #include <linux/netdevice.h>
24 #include <linux/rculist.h>
25 #include <net/xdp_sock_drv.h>
26 #include <net/busy_poll.h>
27 #include <net/xdp.h>
28
29 #include "xsk_queue.h"
30 #include "xdp_umem.h"
31 #include "xsk.h"
32
33 #define TX_BATCH_SIZE 16
34
35 static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
36
37 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
38 {
39         if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
40                 return;
41
42         pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
43         pool->cached_need_wakeup |= XDP_WAKEUP_RX;
44 }
45 EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
46
47 void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
48 {
49         struct xdp_sock *xs;
50
51         if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
52                 return;
53
54         rcu_read_lock();
55         list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
56                 xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
57         }
58         rcu_read_unlock();
59
60         pool->cached_need_wakeup |= XDP_WAKEUP_TX;
61 }
62 EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
63
64 void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
65 {
66         if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
67                 return;
68
69         pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
70         pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
71 }
72 EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
73
74 void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
75 {
76         struct xdp_sock *xs;
77
78         if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
79                 return;
80
81         rcu_read_lock();
82         list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
83                 xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
84         }
85         rcu_read_unlock();
86
87         pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
88 }
89 EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
90
91 bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
92 {
93         return pool->uses_need_wakeup;
94 }
95 EXPORT_SYMBOL(xsk_uses_need_wakeup);
96
97 struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
98                                             u16 queue_id)
99 {
100         if (queue_id < dev->real_num_rx_queues)
101                 return dev->_rx[queue_id].pool;
102         if (queue_id < dev->real_num_tx_queues)
103                 return dev->_tx[queue_id].pool;
104
105         return NULL;
106 }
107 EXPORT_SYMBOL(xsk_get_pool_from_qid);
108
109 void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
110 {
111         if (queue_id < dev->num_rx_queues)
112                 dev->_rx[queue_id].pool = NULL;
113         if (queue_id < dev->num_tx_queues)
114                 dev->_tx[queue_id].pool = NULL;
115 }
116
117 /* The buffer pool is stored both in the _rx struct and the _tx struct as we do
118  * not know if the device has more tx queues than rx, or the opposite.
119  * This might also change during run time.
120  */
121 int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
122                         u16 queue_id)
123 {
124         if (queue_id >= max_t(unsigned int,
125                               dev->real_num_rx_queues,
126                               dev->real_num_tx_queues))
127                 return -EINVAL;
128
129         if (queue_id < dev->real_num_rx_queues)
130                 dev->_rx[queue_id].pool = pool;
131         if (queue_id < dev->real_num_tx_queues)
132                 dev->_tx[queue_id].pool = pool;
133
134         return 0;
135 }
136
137 void xp_release(struct xdp_buff_xsk *xskb)
138 {
139         xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
140 }
141
142 static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
143 {
144         u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
145
146         offset += xskb->pool->headroom;
147         if (!xskb->pool->unaligned)
148                 return xskb->orig_addr + offset;
149         return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
150 }
151
152 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
153 {
154         struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
155         u64 addr;
156         int err;
157
158         addr = xp_get_handle(xskb);
159         err = xskq_prod_reserve_desc(xs->rx, addr, len);
160         if (err) {
161                 xs->rx_queue_full++;
162                 return err;
163         }
164
165         xp_release(xskb);
166         return 0;
167 }
168
169 static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
170 {
171         void *from_buf, *to_buf;
172         u32 metalen;
173
174         if (unlikely(xdp_data_meta_unsupported(from))) {
175                 from_buf = from->data;
176                 to_buf = to->data;
177                 metalen = 0;
178         } else {
179                 from_buf = from->data_meta;
180                 metalen = from->data - from->data_meta;
181                 to_buf = to->data - metalen;
182         }
183
184         memcpy(to_buf, from_buf, len + metalen);
185 }
186
187 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
188 {
189         struct xdp_buff *xsk_xdp;
190         int err;
191         u32 len;
192
193         len = xdp->data_end - xdp->data;
194         if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
195                 xs->rx_dropped++;
196                 return -ENOSPC;
197         }
198
199         xsk_xdp = xsk_buff_alloc(xs->pool);
200         if (!xsk_xdp) {
201                 xs->rx_dropped++;
202                 return -ENOSPC;
203         }
204
205         xsk_copy_xdp(xsk_xdp, xdp, len);
206         err = __xsk_rcv_zc(xs, xsk_xdp, len);
207         if (err) {
208                 xsk_buff_free(xsk_xdp);
209                 return err;
210         }
211         return 0;
212 }
213
214 static bool xsk_tx_writeable(struct xdp_sock *xs)
215 {
216         if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
217                 return false;
218
219         return true;
220 }
221
222 static bool xsk_is_bound(struct xdp_sock *xs)
223 {
224         if (READ_ONCE(xs->state) == XSK_BOUND) {
225                 /* Matches smp_wmb() in bind(). */
226                 smp_rmb();
227                 return true;
228         }
229         return false;
230 }
231
232 static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
233 {
234         if (!xsk_is_bound(xs))
235                 return -EINVAL;
236
237         if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
238                 return -EINVAL;
239
240         sk_mark_napi_id_once_xdp(&xs->sk, xdp);
241         return 0;
242 }
243
244 static void xsk_flush(struct xdp_sock *xs)
245 {
246         xskq_prod_submit(xs->rx);
247         __xskq_cons_release(xs->pool->fq);
248         sock_def_readable(&xs->sk);
249 }
250
251 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
252 {
253         int err;
254
255         spin_lock_bh(&xs->rx_lock);
256         err = xsk_rcv_check(xs, xdp);
257         if (!err) {
258                 err = __xsk_rcv(xs, xdp);
259                 xsk_flush(xs);
260         }
261         spin_unlock_bh(&xs->rx_lock);
262         return err;
263 }
264
265 static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
266 {
267         int err;
268         u32 len;
269
270         err = xsk_rcv_check(xs, xdp);
271         if (err)
272                 return err;
273
274         if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
275                 len = xdp->data_end - xdp->data;
276                 return __xsk_rcv_zc(xs, xdp, len);
277         }
278
279         err = __xsk_rcv(xs, xdp);
280         if (!err)
281                 xdp_return_buff(xdp);
282         return err;
283 }
284
285 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
286 {
287         struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
288         int err;
289
290         err = xsk_rcv(xs, xdp);
291         if (err)
292                 return err;
293
294         if (!xs->flush_node.prev)
295                 list_add(&xs->flush_node, flush_list);
296
297         return 0;
298 }
299
300 void __xsk_map_flush(void)
301 {
302         struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
303         struct xdp_sock *xs, *tmp;
304
305         list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
306                 xsk_flush(xs);
307                 __list_del_clearprev(&xs->flush_node);
308         }
309 }
310
311 void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
312 {
313         xskq_prod_submit_n(pool->cq, nb_entries);
314 }
315 EXPORT_SYMBOL(xsk_tx_completed);
316
317 void xsk_tx_release(struct xsk_buff_pool *pool)
318 {
319         struct xdp_sock *xs;
320
321         rcu_read_lock();
322         list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
323                 __xskq_cons_release(xs->tx);
324                 if (xsk_tx_writeable(xs))
325                         xs->sk.sk_write_space(&xs->sk);
326         }
327         rcu_read_unlock();
328 }
329 EXPORT_SYMBOL(xsk_tx_release);
330
331 bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
332 {
333         struct xdp_sock *xs;
334
335         rcu_read_lock();
336         list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
337                 if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
338                         xs->tx->queue_empty_descs++;
339                         continue;
340                 }
341
342                 /* This is the backpressure mechanism for the Tx path.
343                  * Reserve space in the completion queue and only proceed
344                  * if there is space in it. This avoids having to implement
345                  * any buffering in the Tx path.
346                  */
347                 if (xskq_prod_reserve_addr(pool->cq, desc->addr))
348                         goto out;
349
350                 xskq_cons_release(xs->tx);
351                 rcu_read_unlock();
352                 return true;
353         }
354
355 out:
356         rcu_read_unlock();
357         return false;
358 }
359 EXPORT_SYMBOL(xsk_tx_peek_desc);
360
361 static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
362                                         u32 max_entries)
363 {
364         u32 nb_pkts = 0;
365
366         while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
367                 nb_pkts++;
368
369         xsk_tx_release(pool);
370         return nb_pkts;
371 }
372
373 u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
374                                    u32 max_entries)
375 {
376         struct xdp_sock *xs;
377         u32 nb_pkts;
378
379         rcu_read_lock();
380         if (!list_is_singular(&pool->xsk_tx_list)) {
381                 /* Fallback to the non-batched version */
382                 rcu_read_unlock();
383                 return xsk_tx_peek_release_fallback(pool, descs, max_entries);
384         }
385
386         xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
387         if (!xs) {
388                 nb_pkts = 0;
389                 goto out;
390         }
391
392         nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
393         if (!nb_pkts) {
394                 xs->tx->queue_empty_descs++;
395                 goto out;
396         }
397
398         /* This is the backpressure mechanism for the Tx path. Try to
399          * reserve space in the completion queue for all packets, but
400          * if there are fewer slots available, just process that many
401          * packets. This avoids having to implement any buffering in
402          * the Tx path.
403          */
404         nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
405         if (!nb_pkts)
406                 goto out;
407
408         xskq_cons_release_n(xs->tx, nb_pkts);
409         __xskq_cons_release(xs->tx);
410         xs->sk.sk_write_space(&xs->sk);
411
412 out:
413         rcu_read_unlock();
414         return nb_pkts;
415 }
416 EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
417
418 static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
419 {
420         struct net_device *dev = xs->dev;
421         int err;
422
423         rcu_read_lock();
424         err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
425         rcu_read_unlock();
426
427         return err;
428 }
429
430 static int xsk_zc_xmit(struct xdp_sock *xs)
431 {
432         return xsk_wakeup(xs, XDP_WAKEUP_TX);
433 }
434
435 static void xsk_destruct_skb(struct sk_buff *skb)
436 {
437         u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
438         struct xdp_sock *xs = xdp_sk(skb->sk);
439         unsigned long flags;
440
441         spin_lock_irqsave(&xs->pool->cq_lock, flags);
442         xskq_prod_submit_addr(xs->pool->cq, addr);
443         spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
444
445         sock_wfree(skb);
446 }
447
448 static int xsk_generic_xmit(struct sock *sk)
449 {
450         struct xdp_sock *xs = xdp_sk(sk);
451         u32 max_batch = TX_BATCH_SIZE;
452         bool sent_frame = false;
453         struct xdp_desc desc;
454         struct sk_buff *skb;
455         unsigned long flags;
456         int err = 0;
457
458         mutex_lock(&xs->mutex);
459
460         if (xs->queue_id >= xs->dev->real_num_tx_queues)
461                 goto out;
462
463         while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
464                 char *buffer;
465                 u64 addr;
466                 u32 len;
467
468                 if (max_batch-- == 0) {
469                         err = -EAGAIN;
470                         goto out;
471                 }
472
473                 len = desc.len;
474                 skb = sock_alloc_send_skb(sk, len, 1, &err);
475                 if (unlikely(!skb))
476                         goto out;
477
478                 skb_put(skb, len);
479                 addr = desc.addr;
480                 buffer = xsk_buff_raw_get_data(xs->pool, addr);
481                 err = skb_store_bits(skb, 0, buffer, len);
482                 /* This is the backpressure mechanism for the Tx path.
483                  * Reserve space in the completion queue and only proceed
484                  * if there is space in it. This avoids having to implement
485                  * any buffering in the Tx path.
486                  */
487                 spin_lock_irqsave(&xs->pool->cq_lock, flags);
488                 if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
489                         spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
490                         kfree_skb(skb);
491                         goto out;
492                 }
493                 spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
494
495                 skb->dev = xs->dev;
496                 skb->priority = sk->sk_priority;
497                 skb->mark = sk->sk_mark;
498                 skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
499                 skb->destructor = xsk_destruct_skb;
500
501                 err = __dev_direct_xmit(skb, xs->queue_id);
502                 if  (err == NETDEV_TX_BUSY) {
503                         /* Tell user-space to retry the send */
504                         skb->destructor = sock_wfree;
505                         spin_lock_irqsave(&xs->pool->cq_lock, flags);
506                         xskq_prod_cancel(xs->pool->cq);
507                         spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
508                         /* Free skb without triggering the perf drop trace */
509                         consume_skb(skb);
510                         err = -EAGAIN;
511                         goto out;
512                 }
513
514                 xskq_cons_release(xs->tx);
515                 /* Ignore NET_XMIT_CN as packet might have been sent */
516                 if (err == NET_XMIT_DROP) {
517                         /* SKB completed but not sent */
518                         err = -EBUSY;
519                         goto out;
520                 }
521
522                 sent_frame = true;
523         }
524
525         xs->tx->queue_empty_descs++;
526
527 out:
528         if (sent_frame)
529                 if (xsk_tx_writeable(xs))
530                         sk->sk_write_space(sk);
531
532         mutex_unlock(&xs->mutex);
533         return err;
534 }
535
536 static int __xsk_sendmsg(struct sock *sk)
537 {
538         struct xdp_sock *xs = xdp_sk(sk);
539
540         if (unlikely(!(xs->dev->flags & IFF_UP)))
541                 return -ENETDOWN;
542         if (unlikely(!xs->tx))
543                 return -ENOBUFS;
544
545         return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
546 }
547
548 static bool xsk_no_wakeup(struct sock *sk)
549 {
550 #ifdef CONFIG_NET_RX_BUSY_POLL
551         /* Prefer busy-polling, skip the wakeup. */
552         return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) &&
553                 READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID;
554 #else
555         return false;
556 #endif
557 }
558
559 static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
560 {
561         bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
562         struct sock *sk = sock->sk;
563         struct xdp_sock *xs = xdp_sk(sk);
564         struct xsk_buff_pool *pool;
565
566         if (unlikely(!xsk_is_bound(xs)))
567                 return -ENXIO;
568         if (unlikely(need_wait))
569                 return -EOPNOTSUPP;
570
571         if (sk_can_busy_loop(sk))
572                 sk_busy_loop(sk, 1); /* only support non-blocking sockets */
573
574         if (xsk_no_wakeup(sk))
575                 return 0;
576
577         pool = xs->pool;
578         if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
579                 return __xsk_sendmsg(sk);
580         return 0;
581 }
582
583 static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
584 {
585         bool need_wait = !(flags & MSG_DONTWAIT);
586         struct sock *sk = sock->sk;
587         struct xdp_sock *xs = xdp_sk(sk);
588
589         if (unlikely(!xsk_is_bound(xs)))
590                 return -ENXIO;
591         if (unlikely(!(xs->dev->flags & IFF_UP)))
592                 return -ENETDOWN;
593         if (unlikely(!xs->rx))
594                 return -ENOBUFS;
595         if (unlikely(need_wait))
596                 return -EOPNOTSUPP;
597
598         if (sk_can_busy_loop(sk))
599                 sk_busy_loop(sk, 1); /* only support non-blocking sockets */
600
601         if (xsk_no_wakeup(sk))
602                 return 0;
603
604         if (xs->pool->cached_need_wakeup & XDP_WAKEUP_RX && xs->zc)
605                 return xsk_wakeup(xs, XDP_WAKEUP_RX);
606         return 0;
607 }
608
609 static __poll_t xsk_poll(struct file *file, struct socket *sock,
610                              struct poll_table_struct *wait)
611 {
612         __poll_t mask = 0;
613         struct sock *sk = sock->sk;
614         struct xdp_sock *xs = xdp_sk(sk);
615         struct xsk_buff_pool *pool;
616
617         sock_poll_wait(file, sock, wait);
618
619         if (unlikely(!xsk_is_bound(xs)))
620                 return mask;
621
622         pool = xs->pool;
623
624         if (pool->cached_need_wakeup) {
625                 if (xs->zc)
626                         xsk_wakeup(xs, pool->cached_need_wakeup);
627                 else
628                         /* Poll needs to drive Tx also in copy mode */
629                         __xsk_sendmsg(sk);
630         }
631
632         if (xs->rx && !xskq_prod_is_empty(xs->rx))
633                 mask |= EPOLLIN | EPOLLRDNORM;
634         if (xs->tx && xsk_tx_writeable(xs))
635                 mask |= EPOLLOUT | EPOLLWRNORM;
636
637         return mask;
638 }
639
640 static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
641                           bool umem_queue)
642 {
643         struct xsk_queue *q;
644
645         if (entries == 0 || *queue || !is_power_of_2(entries))
646                 return -EINVAL;
647
648         q = xskq_create(entries, umem_queue);
649         if (!q)
650                 return -ENOMEM;
651
652         /* Make sure queue is ready before it can be seen by others */
653         smp_wmb();
654         WRITE_ONCE(*queue, q);
655         return 0;
656 }
657
658 static void xsk_unbind_dev(struct xdp_sock *xs)
659 {
660         struct net_device *dev = xs->dev;
661
662         if (xs->state != XSK_BOUND)
663                 return;
664         WRITE_ONCE(xs->state, XSK_UNBOUND);
665
666         /* Wait for driver to stop using the xdp socket. */
667         xp_del_xsk(xs->pool, xs);
668         xs->dev = NULL;
669         synchronize_net();
670         dev_put(dev);
671 }
672
673 static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
674                                               struct xdp_sock ***map_entry)
675 {
676         struct xsk_map *map = NULL;
677         struct xsk_map_node *node;
678
679         *map_entry = NULL;
680
681         spin_lock_bh(&xs->map_list_lock);
682         node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
683                                         node);
684         if (node) {
685                 bpf_map_inc(&node->map->map);
686                 map = node->map;
687                 *map_entry = node->map_entry;
688         }
689         spin_unlock_bh(&xs->map_list_lock);
690         return map;
691 }
692
693 static void xsk_delete_from_maps(struct xdp_sock *xs)
694 {
695         /* This function removes the current XDP socket from all the
696          * maps it resides in. We need to take extra care here, due to
697          * the two locks involved. Each map has a lock synchronizing
698          * updates to the entries, and each socket has a lock that
699          * synchronizes access to the list of maps (map_list). For
700          * deadlock avoidance the locks need to be taken in the order
701          * "map lock"->"socket map list lock". We start off by
702          * accessing the socket map list, and take a reference to the
703          * map to guarantee existence between the
704          * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
705          * calls. Then we ask the map to remove the socket, which
706          * tries to remove the socket from the map. Note that there
707          * might be updates to the map between
708          * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
709          */
710         struct xdp_sock **map_entry = NULL;
711         struct xsk_map *map;
712
713         while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
714                 xsk_map_try_sock_delete(map, xs, map_entry);
715                 bpf_map_put(&map->map);
716         }
717 }
718
719 static int xsk_release(struct socket *sock)
720 {
721         struct sock *sk = sock->sk;
722         struct xdp_sock *xs = xdp_sk(sk);
723         struct net *net;
724
725         if (!sk)
726                 return 0;
727
728         net = sock_net(sk);
729
730         mutex_lock(&net->xdp.lock);
731         sk_del_node_init_rcu(sk);
732         mutex_unlock(&net->xdp.lock);
733
734         local_bh_disable();
735         sock_prot_inuse_add(net, sk->sk_prot, -1);
736         local_bh_enable();
737
738         xsk_delete_from_maps(xs);
739         mutex_lock(&xs->mutex);
740         xsk_unbind_dev(xs);
741         mutex_unlock(&xs->mutex);
742
743         xskq_destroy(xs->rx);
744         xskq_destroy(xs->tx);
745         xskq_destroy(xs->fq_tmp);
746         xskq_destroy(xs->cq_tmp);
747
748         sock_orphan(sk);
749         sock->sk = NULL;
750
751         sk_refcnt_debug_release(sk);
752         sock_put(sk);
753
754         return 0;
755 }
756
757 static struct socket *xsk_lookup_xsk_from_fd(int fd)
758 {
759         struct socket *sock;
760         int err;
761
762         sock = sockfd_lookup(fd, &err);
763         if (!sock)
764                 return ERR_PTR(-ENOTSOCK);
765
766         if (sock->sk->sk_family != PF_XDP) {
767                 sockfd_put(sock);
768                 return ERR_PTR(-ENOPROTOOPT);
769         }
770
771         return sock;
772 }
773
774 static bool xsk_validate_queues(struct xdp_sock *xs)
775 {
776         return xs->fq_tmp && xs->cq_tmp;
777 }
778
779 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
780 {
781         struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
782         struct sock *sk = sock->sk;
783         struct xdp_sock *xs = xdp_sk(sk);
784         struct net_device *dev;
785         u32 flags, qid;
786         int err = 0;
787
788         if (addr_len < sizeof(struct sockaddr_xdp))
789                 return -EINVAL;
790         if (sxdp->sxdp_family != AF_XDP)
791                 return -EINVAL;
792
793         flags = sxdp->sxdp_flags;
794         if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
795                       XDP_USE_NEED_WAKEUP))
796                 return -EINVAL;
797
798         rtnl_lock();
799         mutex_lock(&xs->mutex);
800         if (xs->state != XSK_READY) {
801                 err = -EBUSY;
802                 goto out_release;
803         }
804
805         dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
806         if (!dev) {
807                 err = -ENODEV;
808                 goto out_release;
809         }
810
811         if (!xs->rx && !xs->tx) {
812                 err = -EINVAL;
813                 goto out_unlock;
814         }
815
816         qid = sxdp->sxdp_queue_id;
817
818         if (flags & XDP_SHARED_UMEM) {
819                 struct xdp_sock *umem_xs;
820                 struct socket *sock;
821
822                 if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
823                     (flags & XDP_USE_NEED_WAKEUP)) {
824                         /* Cannot specify flags for shared sockets. */
825                         err = -EINVAL;
826                         goto out_unlock;
827                 }
828
829                 if (xs->umem) {
830                         /* We have already our own. */
831                         err = -EINVAL;
832                         goto out_unlock;
833                 }
834
835                 sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
836                 if (IS_ERR(sock)) {
837                         err = PTR_ERR(sock);
838                         goto out_unlock;
839                 }
840
841                 umem_xs = xdp_sk(sock->sk);
842                 if (!xsk_is_bound(umem_xs)) {
843                         err = -EBADF;
844                         sockfd_put(sock);
845                         goto out_unlock;
846                 }
847
848                 if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
849                         /* Share the umem with another socket on another qid
850                          * and/or device.
851                          */
852                         xs->pool = xp_create_and_assign_umem(xs,
853                                                              umem_xs->umem);
854                         if (!xs->pool) {
855                                 err = -ENOMEM;
856                                 sockfd_put(sock);
857                                 goto out_unlock;
858                         }
859
860                         err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
861                                                    dev, qid);
862                         if (err) {
863                                 xp_destroy(xs->pool);
864                                 xs->pool = NULL;
865                                 sockfd_put(sock);
866                                 goto out_unlock;
867                         }
868                 } else {
869                         /* Share the buffer pool with the other socket. */
870                         if (xs->fq_tmp || xs->cq_tmp) {
871                                 /* Do not allow setting your own fq or cq. */
872                                 err = -EINVAL;
873                                 sockfd_put(sock);
874                                 goto out_unlock;
875                         }
876
877                         xp_get_pool(umem_xs->pool);
878                         xs->pool = umem_xs->pool;
879                 }
880
881                 xdp_get_umem(umem_xs->umem);
882                 WRITE_ONCE(xs->umem, umem_xs->umem);
883                 sockfd_put(sock);
884         } else if (!xs->umem || !xsk_validate_queues(xs)) {
885                 err = -EINVAL;
886                 goto out_unlock;
887         } else {
888                 /* This xsk has its own umem. */
889                 xs->pool = xp_create_and_assign_umem(xs, xs->umem);
890                 if (!xs->pool) {
891                         err = -ENOMEM;
892                         goto out_unlock;
893                 }
894
895                 err = xp_assign_dev(xs->pool, dev, qid, flags);
896                 if (err) {
897                         xp_destroy(xs->pool);
898                         xs->pool = NULL;
899                         goto out_unlock;
900                 }
901         }
902
903         /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
904         xs->fq_tmp = NULL;
905         xs->cq_tmp = NULL;
906
907         xs->dev = dev;
908         xs->zc = xs->umem->zc;
909         xs->queue_id = qid;
910         xp_add_xsk(xs->pool, xs);
911
912 out_unlock:
913         if (err) {
914                 dev_put(dev);
915         } else {
916                 /* Matches smp_rmb() in bind() for shared umem
917                  * sockets, and xsk_is_bound().
918                  */
919                 smp_wmb();
920                 WRITE_ONCE(xs->state, XSK_BOUND);
921         }
922 out_release:
923         mutex_unlock(&xs->mutex);
924         rtnl_unlock();
925         return err;
926 }
927
928 struct xdp_umem_reg_v1 {
929         __u64 addr; /* Start of packet data area */
930         __u64 len; /* Length of packet data area */
931         __u32 chunk_size;
932         __u32 headroom;
933 };
934
935 static int xsk_setsockopt(struct socket *sock, int level, int optname,
936                           sockptr_t optval, unsigned int optlen)
937 {
938         struct sock *sk = sock->sk;
939         struct xdp_sock *xs = xdp_sk(sk);
940         int err;
941
942         if (level != SOL_XDP)
943                 return -ENOPROTOOPT;
944
945         switch (optname) {
946         case XDP_RX_RING:
947         case XDP_TX_RING:
948         {
949                 struct xsk_queue **q;
950                 int entries;
951
952                 if (optlen < sizeof(entries))
953                         return -EINVAL;
954                 if (copy_from_sockptr(&entries, optval, sizeof(entries)))
955                         return -EFAULT;
956
957                 mutex_lock(&xs->mutex);
958                 if (xs->state != XSK_READY) {
959                         mutex_unlock(&xs->mutex);
960                         return -EBUSY;
961                 }
962                 q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
963                 err = xsk_init_queue(entries, q, false);
964                 if (!err && optname == XDP_TX_RING)
965                         /* Tx needs to be explicitly woken up the first time */
966                         xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
967                 mutex_unlock(&xs->mutex);
968                 return err;
969         }
970         case XDP_UMEM_REG:
971         {
972                 size_t mr_size = sizeof(struct xdp_umem_reg);
973                 struct xdp_umem_reg mr = {};
974                 struct xdp_umem *umem;
975
976                 if (optlen < sizeof(struct xdp_umem_reg_v1))
977                         return -EINVAL;
978                 else if (optlen < sizeof(mr))
979                         mr_size = sizeof(struct xdp_umem_reg_v1);
980
981                 if (copy_from_sockptr(&mr, optval, mr_size))
982                         return -EFAULT;
983
984                 mutex_lock(&xs->mutex);
985                 if (xs->state != XSK_READY || xs->umem) {
986                         mutex_unlock(&xs->mutex);
987                         return -EBUSY;
988                 }
989
990                 umem = xdp_umem_create(&mr);
991                 if (IS_ERR(umem)) {
992                         mutex_unlock(&xs->mutex);
993                         return PTR_ERR(umem);
994                 }
995
996                 /* Make sure umem is ready before it can be seen by others */
997                 smp_wmb();
998                 WRITE_ONCE(xs->umem, umem);
999                 mutex_unlock(&xs->mutex);
1000                 return 0;
1001         }
1002         case XDP_UMEM_FILL_RING:
1003         case XDP_UMEM_COMPLETION_RING:
1004         {
1005                 struct xsk_queue **q;
1006                 int entries;
1007
1008                 if (copy_from_sockptr(&entries, optval, sizeof(entries)))
1009                         return -EFAULT;
1010
1011                 mutex_lock(&xs->mutex);
1012                 if (xs->state != XSK_READY) {
1013                         mutex_unlock(&xs->mutex);
1014                         return -EBUSY;
1015                 }
1016
1017                 q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
1018                         &xs->cq_tmp;
1019                 err = xsk_init_queue(entries, q, true);
1020                 mutex_unlock(&xs->mutex);
1021                 return err;
1022         }
1023         default:
1024                 break;
1025         }
1026
1027         return -ENOPROTOOPT;
1028 }
1029
1030 static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
1031 {
1032         ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
1033         ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
1034         ring->desc = offsetof(struct xdp_rxtx_ring, desc);
1035 }
1036
1037 static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
1038 {
1039         ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
1040         ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
1041         ring->desc = offsetof(struct xdp_umem_ring, desc);
1042 }
1043
1044 struct xdp_statistics_v1 {
1045         __u64 rx_dropped;
1046         __u64 rx_invalid_descs;
1047         __u64 tx_invalid_descs;
1048 };
1049
1050 static int xsk_getsockopt(struct socket *sock, int level, int optname,
1051                           char __user *optval, int __user *optlen)
1052 {
1053         struct sock *sk = sock->sk;
1054         struct xdp_sock *xs = xdp_sk(sk);
1055         int len;
1056
1057         if (level != SOL_XDP)
1058                 return -ENOPROTOOPT;
1059
1060         if (get_user(len, optlen))
1061                 return -EFAULT;
1062         if (len < 0)
1063                 return -EINVAL;
1064
1065         switch (optname) {
1066         case XDP_STATISTICS:
1067         {
1068                 struct xdp_statistics stats = {};
1069                 bool extra_stats = true;
1070                 size_t stats_size;
1071
1072                 if (len < sizeof(struct xdp_statistics_v1)) {
1073                         return -EINVAL;
1074                 } else if (len < sizeof(stats)) {
1075                         extra_stats = false;
1076                         stats_size = sizeof(struct xdp_statistics_v1);
1077                 } else {
1078                         stats_size = sizeof(stats);
1079                 }
1080
1081                 mutex_lock(&xs->mutex);
1082                 stats.rx_dropped = xs->rx_dropped;
1083                 if (extra_stats) {
1084                         stats.rx_ring_full = xs->rx_queue_full;
1085                         stats.rx_fill_ring_empty_descs =
1086                                 xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
1087                         stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
1088                 } else {
1089                         stats.rx_dropped += xs->rx_queue_full;
1090                 }
1091                 stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
1092                 stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
1093                 mutex_unlock(&xs->mutex);
1094
1095                 if (copy_to_user(optval, &stats, stats_size))
1096                         return -EFAULT;
1097                 if (put_user(stats_size, optlen))
1098                         return -EFAULT;
1099
1100                 return 0;
1101         }
1102         case XDP_MMAP_OFFSETS:
1103         {
1104                 struct xdp_mmap_offsets off;
1105                 struct xdp_mmap_offsets_v1 off_v1;
1106                 bool flags_supported = true;
1107                 void *to_copy;
1108
1109                 if (len < sizeof(off_v1))
1110                         return -EINVAL;
1111                 else if (len < sizeof(off))
1112                         flags_supported = false;
1113
1114                 if (flags_supported) {
1115                         /* xdp_ring_offset is identical to xdp_ring_offset_v1
1116                          * except for the flags field added to the end.
1117                          */
1118                         xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
1119                                                &off.rx);
1120                         xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
1121                                                &off.tx);
1122                         xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
1123                                                &off.fr);
1124                         xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
1125                                                &off.cr);
1126                         off.rx.flags = offsetof(struct xdp_rxtx_ring,
1127                                                 ptrs.flags);
1128                         off.tx.flags = offsetof(struct xdp_rxtx_ring,
1129                                                 ptrs.flags);
1130                         off.fr.flags = offsetof(struct xdp_umem_ring,
1131                                                 ptrs.flags);
1132                         off.cr.flags = offsetof(struct xdp_umem_ring,
1133                                                 ptrs.flags);
1134
1135                         len = sizeof(off);
1136                         to_copy = &off;
1137                 } else {
1138                         xsk_enter_rxtx_offsets(&off_v1.rx);
1139                         xsk_enter_rxtx_offsets(&off_v1.tx);
1140                         xsk_enter_umem_offsets(&off_v1.fr);
1141                         xsk_enter_umem_offsets(&off_v1.cr);
1142
1143                         len = sizeof(off_v1);
1144                         to_copy = &off_v1;
1145                 }
1146
1147                 if (copy_to_user(optval, to_copy, len))
1148                         return -EFAULT;
1149                 if (put_user(len, optlen))
1150                         return -EFAULT;
1151
1152                 return 0;
1153         }
1154         case XDP_OPTIONS:
1155         {
1156                 struct xdp_options opts = {};
1157
1158                 if (len < sizeof(opts))
1159                         return -EINVAL;
1160
1161                 mutex_lock(&xs->mutex);
1162                 if (xs->zc)
1163                         opts.flags |= XDP_OPTIONS_ZEROCOPY;
1164                 mutex_unlock(&xs->mutex);
1165
1166                 len = sizeof(opts);
1167                 if (copy_to_user(optval, &opts, len))
1168                         return -EFAULT;
1169                 if (put_user(len, optlen))
1170                         return -EFAULT;
1171
1172                 return 0;
1173         }
1174         default:
1175                 break;
1176         }
1177
1178         return -EOPNOTSUPP;
1179 }
1180
1181 static int xsk_mmap(struct file *file, struct socket *sock,
1182                     struct vm_area_struct *vma)
1183 {
1184         loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
1185         unsigned long size = vma->vm_end - vma->vm_start;
1186         struct xdp_sock *xs = xdp_sk(sock->sk);
1187         struct xsk_queue *q = NULL;
1188         unsigned long pfn;
1189         struct page *qpg;
1190
1191         if (READ_ONCE(xs->state) != XSK_READY)
1192                 return -EBUSY;
1193
1194         if (offset == XDP_PGOFF_RX_RING) {
1195                 q = READ_ONCE(xs->rx);
1196         } else if (offset == XDP_PGOFF_TX_RING) {
1197                 q = READ_ONCE(xs->tx);
1198         } else {
1199                 /* Matches the smp_wmb() in XDP_UMEM_REG */
1200                 smp_rmb();
1201                 if (offset == XDP_UMEM_PGOFF_FILL_RING)
1202                         q = READ_ONCE(xs->fq_tmp);
1203                 else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
1204                         q = READ_ONCE(xs->cq_tmp);
1205         }
1206
1207         if (!q)
1208                 return -EINVAL;
1209
1210         /* Matches the smp_wmb() in xsk_init_queue */
1211         smp_rmb();
1212         qpg = virt_to_head_page(q->ring);
1213         if (size > page_size(qpg))
1214                 return -EINVAL;
1215
1216         pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
1217         return remap_pfn_range(vma, vma->vm_start, pfn,
1218                                size, vma->vm_page_prot);
1219 }
1220
1221 static int xsk_notifier(struct notifier_block *this,
1222                         unsigned long msg, void *ptr)
1223 {
1224         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1225         struct net *net = dev_net(dev);
1226         struct sock *sk;
1227
1228         switch (msg) {
1229         case NETDEV_UNREGISTER:
1230                 mutex_lock(&net->xdp.lock);
1231                 sk_for_each(sk, &net->xdp.list) {
1232                         struct xdp_sock *xs = xdp_sk(sk);
1233
1234                         mutex_lock(&xs->mutex);
1235                         if (xs->dev == dev) {
1236                                 sk->sk_err = ENETDOWN;
1237                                 if (!sock_flag(sk, SOCK_DEAD))
1238                                         sk->sk_error_report(sk);
1239
1240                                 xsk_unbind_dev(xs);
1241
1242                                 /* Clear device references. */
1243                                 xp_clear_dev(xs->pool);
1244                         }
1245                         mutex_unlock(&xs->mutex);
1246                 }
1247                 mutex_unlock(&net->xdp.lock);
1248                 break;
1249         }
1250         return NOTIFY_DONE;
1251 }
1252
1253 static struct proto xsk_proto = {
1254         .name =         "XDP",
1255         .owner =        THIS_MODULE,
1256         .obj_size =     sizeof(struct xdp_sock),
1257 };
1258
1259 static const struct proto_ops xsk_proto_ops = {
1260         .family         = PF_XDP,
1261         .owner          = THIS_MODULE,
1262         .release        = xsk_release,
1263         .bind           = xsk_bind,
1264         .connect        = sock_no_connect,
1265         .socketpair     = sock_no_socketpair,
1266         .accept         = sock_no_accept,
1267         .getname        = sock_no_getname,
1268         .poll           = xsk_poll,
1269         .ioctl          = sock_no_ioctl,
1270         .listen         = sock_no_listen,
1271         .shutdown       = sock_no_shutdown,
1272         .setsockopt     = xsk_setsockopt,
1273         .getsockopt     = xsk_getsockopt,
1274         .sendmsg        = xsk_sendmsg,
1275         .recvmsg        = xsk_recvmsg,
1276         .mmap           = xsk_mmap,
1277         .sendpage       = sock_no_sendpage,
1278 };
1279
1280 static void xsk_destruct(struct sock *sk)
1281 {
1282         struct xdp_sock *xs = xdp_sk(sk);
1283
1284         if (!sock_flag(sk, SOCK_DEAD))
1285                 return;
1286
1287         if (!xp_put_pool(xs->pool))
1288                 xdp_put_umem(xs->umem, !xs->pool);
1289
1290         sk_refcnt_debug_dec(sk);
1291 }
1292
1293 static int xsk_create(struct net *net, struct socket *sock, int protocol,
1294                       int kern)
1295 {
1296         struct xdp_sock *xs;
1297         struct sock *sk;
1298
1299         if (!ns_capable(net->user_ns, CAP_NET_RAW))
1300                 return -EPERM;
1301         if (sock->type != SOCK_RAW)
1302                 return -ESOCKTNOSUPPORT;
1303
1304         if (protocol)
1305                 return -EPROTONOSUPPORT;
1306
1307         sock->state = SS_UNCONNECTED;
1308
1309         sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
1310         if (!sk)
1311                 return -ENOBUFS;
1312
1313         sock->ops = &xsk_proto_ops;
1314
1315         sock_init_data(sock, sk);
1316
1317         sk->sk_family = PF_XDP;
1318
1319         sk->sk_destruct = xsk_destruct;
1320         sk_refcnt_debug_inc(sk);
1321
1322         sock_set_flag(sk, SOCK_RCU_FREE);
1323
1324         xs = xdp_sk(sk);
1325         xs->state = XSK_READY;
1326         mutex_init(&xs->mutex);
1327         spin_lock_init(&xs->rx_lock);
1328
1329         INIT_LIST_HEAD(&xs->map_list);
1330         spin_lock_init(&xs->map_list_lock);
1331
1332         mutex_lock(&net->xdp.lock);
1333         sk_add_node_rcu(sk, &net->xdp.list);
1334         mutex_unlock(&net->xdp.lock);
1335
1336         local_bh_disable();
1337         sock_prot_inuse_add(net, &xsk_proto, 1);
1338         local_bh_enable();
1339
1340         return 0;
1341 }
1342
1343 static const struct net_proto_family xsk_family_ops = {
1344         .family = PF_XDP,
1345         .create = xsk_create,
1346         .owner  = THIS_MODULE,
1347 };
1348
1349 static struct notifier_block xsk_netdev_notifier = {
1350         .notifier_call  = xsk_notifier,
1351 };
1352
1353 static int __net_init xsk_net_init(struct net *net)
1354 {
1355         mutex_init(&net->xdp.lock);
1356         INIT_HLIST_HEAD(&net->xdp.list);
1357         return 0;
1358 }
1359
1360 static void __net_exit xsk_net_exit(struct net *net)
1361 {
1362         WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
1363 }
1364
1365 static struct pernet_operations xsk_net_ops = {
1366         .init = xsk_net_init,
1367         .exit = xsk_net_exit,
1368 };
1369
1370 static int __init xsk_init(void)
1371 {
1372         int err, cpu;
1373
1374         err = proto_register(&xsk_proto, 0 /* no slab */);
1375         if (err)
1376                 goto out;
1377
1378         err = sock_register(&xsk_family_ops);
1379         if (err)
1380                 goto out_proto;
1381
1382         err = register_pernet_subsys(&xsk_net_ops);
1383         if (err)
1384                 goto out_sk;
1385
1386         err = register_netdevice_notifier(&xsk_netdev_notifier);
1387         if (err)
1388                 goto out_pernet;
1389
1390         for_each_possible_cpu(cpu)
1391                 INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
1392         return 0;
1393
1394 out_pernet:
1395         unregister_pernet_subsys(&xsk_net_ops);
1396 out_sk:
1397         sock_unregister(PF_XDP);
1398 out_proto:
1399         proto_unregister(&xsk_proto);
1400 out:
1401         return err;
1402 }
1403
1404 fs_initcall(xsk_init);