Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-2.6-microblaze.git] / drivers / net / geneve.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * GENEVE: Generic Network Virtualization Encapsulation
4  *
5  * Copyright (c) 2015 Red Hat, Inc.
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/ethtool.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/etherdevice.h>
14 #include <linux/hash.h>
15 #include <net/ipv6_stubs.h>
16 #include <net/dst_metadata.h>
17 #include <net/gro_cells.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
20 #include <net/gro.h>
21 #include <net/protocol.h>
22
23 #define GENEVE_NETDEV_VER       "0.6"
24
25 #define GENEVE_N_VID            (1u << 24)
26 #define GENEVE_VID_MASK         (GENEVE_N_VID - 1)
27
28 #define VNI_HASH_BITS           10
29 #define VNI_HASH_SIZE           (1<<VNI_HASH_BITS)
30
31 static bool log_ecn_error = true;
32 module_param(log_ecn_error, bool, 0644);
33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
34
35 #define GENEVE_VER 0
36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
39
40 /* per-network namespace private data for this module */
41 struct geneve_net {
42         struct list_head        geneve_list;
43         struct list_head        sock_list;
44 };
45
46 static unsigned int geneve_net_id;
47
48 struct geneve_dev_node {
49         struct hlist_node hlist;
50         struct geneve_dev *geneve;
51 };
52
53 struct geneve_config {
54         struct ip_tunnel_info   info;
55         bool                    collect_md;
56         bool                    use_udp6_rx_checksums;
57         bool                    ttl_inherit;
58         enum ifla_geneve_df     df;
59 };
60
61 /* Pseudo network device */
62 struct geneve_dev {
63         struct geneve_dev_node hlist4;  /* vni hash table for IPv4 socket */
64 #if IS_ENABLED(CONFIG_IPV6)
65         struct geneve_dev_node hlist6;  /* vni hash table for IPv6 socket */
66 #endif
67         struct net         *net;        /* netns for packet i/o */
68         struct net_device  *dev;        /* netdev for geneve tunnel */
69         struct geneve_sock __rcu *sock4;        /* IPv4 socket used for geneve tunnel */
70 #if IS_ENABLED(CONFIG_IPV6)
71         struct geneve_sock __rcu *sock6;        /* IPv6 socket used for geneve tunnel */
72 #endif
73         struct list_head   next;        /* geneve's per namespace list */
74         struct gro_cells   gro_cells;
75         struct geneve_config cfg;
76 };
77
78 struct geneve_sock {
79         bool                    collect_md;
80         struct list_head        list;
81         struct socket           *sock;
82         struct rcu_head         rcu;
83         int                     refcnt;
84         struct hlist_head       vni_list[VNI_HASH_SIZE];
85 };
86
87 static inline __u32 geneve_net_vni_hash(u8 vni[3])
88 {
89         __u32 vnid;
90
91         vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
92         return hash_32(vnid, VNI_HASH_BITS);
93 }
94
95 static __be64 vni_to_tunnel_id(const __u8 *vni)
96 {
97 #ifdef __BIG_ENDIAN
98         return (vni[0] << 16) | (vni[1] << 8) | vni[2];
99 #else
100         return (__force __be64)(((__force u64)vni[0] << 40) |
101                                 ((__force u64)vni[1] << 48) |
102                                 ((__force u64)vni[2] << 56));
103 #endif
104 }
105
106 /* Convert 64 bit tunnel ID to 24 bit VNI. */
107 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
108 {
109 #ifdef __BIG_ENDIAN
110         vni[0] = (__force __u8)(tun_id >> 16);
111         vni[1] = (__force __u8)(tun_id >> 8);
112         vni[2] = (__force __u8)tun_id;
113 #else
114         vni[0] = (__force __u8)((__force u64)tun_id >> 40);
115         vni[1] = (__force __u8)((__force u64)tun_id >> 48);
116         vni[2] = (__force __u8)((__force u64)tun_id >> 56);
117 #endif
118 }
119
120 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
121 {
122         return !memcmp(vni, &tun_id[5], 3);
123 }
124
125 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
126 {
127         return gs->sock->sk->sk_family;
128 }
129
130 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
131                                         __be32 addr, u8 vni[])
132 {
133         struct hlist_head *vni_list_head;
134         struct geneve_dev_node *node;
135         __u32 hash;
136
137         /* Find the device for this VNI */
138         hash = geneve_net_vni_hash(vni);
139         vni_list_head = &gs->vni_list[hash];
140         hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
141                 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
142                     addr == node->geneve->cfg.info.key.u.ipv4.dst)
143                         return node->geneve;
144         }
145         return NULL;
146 }
147
148 #if IS_ENABLED(CONFIG_IPV6)
149 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
150                                          struct in6_addr addr6, u8 vni[])
151 {
152         struct hlist_head *vni_list_head;
153         struct geneve_dev_node *node;
154         __u32 hash;
155
156         /* Find the device for this VNI */
157         hash = geneve_net_vni_hash(vni);
158         vni_list_head = &gs->vni_list[hash];
159         hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
160                 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
161                     ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
162                         return node->geneve;
163         }
164         return NULL;
165 }
166 #endif
167
168 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
169 {
170         return (struct genevehdr *)(udp_hdr(skb) + 1);
171 }
172
173 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
174                                             struct sk_buff *skb)
175 {
176         static u8 zero_vni[3];
177         u8 *vni;
178
179         if (geneve_get_sk_family(gs) == AF_INET) {
180                 struct iphdr *iph;
181                 __be32 addr;
182
183                 iph = ip_hdr(skb); /* outer IP header... */
184
185                 if (gs->collect_md) {
186                         vni = zero_vni;
187                         addr = 0;
188                 } else {
189                         vni = geneve_hdr(skb)->vni;
190                         addr = iph->saddr;
191                 }
192
193                 return geneve_lookup(gs, addr, vni);
194 #if IS_ENABLED(CONFIG_IPV6)
195         } else if (geneve_get_sk_family(gs) == AF_INET6) {
196                 static struct in6_addr zero_addr6;
197                 struct ipv6hdr *ip6h;
198                 struct in6_addr addr6;
199
200                 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
201
202                 if (gs->collect_md) {
203                         vni = zero_vni;
204                         addr6 = zero_addr6;
205                 } else {
206                         vni = geneve_hdr(skb)->vni;
207                         addr6 = ip6h->saddr;
208                 }
209
210                 return geneve6_lookup(gs, addr6, vni);
211 #endif
212         }
213         return NULL;
214 }
215
216 /* geneve receive/decap routine */
217 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
218                       struct sk_buff *skb)
219 {
220         struct genevehdr *gnvh = geneve_hdr(skb);
221         struct metadata_dst *tun_dst = NULL;
222         unsigned int len;
223         int err = 0;
224         void *oiph;
225
226         if (ip_tunnel_collect_metadata() || gs->collect_md) {
227                 __be16 flags;
228
229                 flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
230                         (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
231
232                 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
233                                          vni_to_tunnel_id(gnvh->vni),
234                                          gnvh->opt_len * 4);
235                 if (!tun_dst) {
236                         geneve->dev->stats.rx_dropped++;
237                         goto drop;
238                 }
239                 /* Update tunnel dst according to Geneve options. */
240                 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
241                                         gnvh->options, gnvh->opt_len * 4,
242                                         TUNNEL_GENEVE_OPT);
243         } else {
244                 /* Drop packets w/ critical options,
245                  * since we don't support any...
246                  */
247                 if (gnvh->critical) {
248                         geneve->dev->stats.rx_frame_errors++;
249                         geneve->dev->stats.rx_errors++;
250                         goto drop;
251                 }
252         }
253
254         skb_reset_mac_header(skb);
255         skb->protocol = eth_type_trans(skb, geneve->dev);
256         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
257
258         if (tun_dst)
259                 skb_dst_set(skb, &tun_dst->dst);
260
261         /* Ignore packet loops (and multicast echo) */
262         if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
263                 geneve->dev->stats.rx_errors++;
264                 goto drop;
265         }
266
267         oiph = skb_network_header(skb);
268         skb_reset_network_header(skb);
269
270         if (geneve_get_sk_family(gs) == AF_INET)
271                 err = IP_ECN_decapsulate(oiph, skb);
272 #if IS_ENABLED(CONFIG_IPV6)
273         else
274                 err = IP6_ECN_decapsulate(oiph, skb);
275 #endif
276
277         if (unlikely(err)) {
278                 if (log_ecn_error) {
279                         if (geneve_get_sk_family(gs) == AF_INET)
280                                 net_info_ratelimited("non-ECT from %pI4 "
281                                                      "with TOS=%#x\n",
282                                                      &((struct iphdr *)oiph)->saddr,
283                                                      ((struct iphdr *)oiph)->tos);
284 #if IS_ENABLED(CONFIG_IPV6)
285                         else
286                                 net_info_ratelimited("non-ECT from %pI6\n",
287                                                      &((struct ipv6hdr *)oiph)->saddr);
288 #endif
289                 }
290                 if (err > 1) {
291                         ++geneve->dev->stats.rx_frame_errors;
292                         ++geneve->dev->stats.rx_errors;
293                         goto drop;
294                 }
295         }
296
297         len = skb->len;
298         err = gro_cells_receive(&geneve->gro_cells, skb);
299         if (likely(err == NET_RX_SUCCESS))
300                 dev_sw_netstats_rx_add(geneve->dev, len);
301
302         return;
303 drop:
304         /* Consume bad packet */
305         kfree_skb(skb);
306 }
307
308 /* Setup stats when device is created */
309 static int geneve_init(struct net_device *dev)
310 {
311         struct geneve_dev *geneve = netdev_priv(dev);
312         int err;
313
314         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
315         if (!dev->tstats)
316                 return -ENOMEM;
317
318         err = gro_cells_init(&geneve->gro_cells, dev);
319         if (err) {
320                 free_percpu(dev->tstats);
321                 return err;
322         }
323
324         err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
325         if (err) {
326                 free_percpu(dev->tstats);
327                 gro_cells_destroy(&geneve->gro_cells);
328                 return err;
329         }
330         return 0;
331 }
332
333 static void geneve_uninit(struct net_device *dev)
334 {
335         struct geneve_dev *geneve = netdev_priv(dev);
336
337         dst_cache_destroy(&geneve->cfg.info.dst_cache);
338         gro_cells_destroy(&geneve->gro_cells);
339         free_percpu(dev->tstats);
340 }
341
342 /* Callback from net/ipv4/udp.c to receive packets */
343 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
344 {
345         struct genevehdr *geneveh;
346         struct geneve_dev *geneve;
347         struct geneve_sock *gs;
348         int opts_len;
349
350         /* Need UDP and Geneve header to be present */
351         if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
352                 goto drop;
353
354         /* Return packets with reserved bits set */
355         geneveh = geneve_hdr(skb);
356         if (unlikely(geneveh->ver != GENEVE_VER))
357                 goto drop;
358
359         if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
360                 goto drop;
361
362         gs = rcu_dereference_sk_user_data(sk);
363         if (!gs)
364                 goto drop;
365
366         geneve = geneve_lookup_skb(gs, skb);
367         if (!geneve)
368                 goto drop;
369
370         opts_len = geneveh->opt_len * 4;
371         if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
372                                  htons(ETH_P_TEB),
373                                  !net_eq(geneve->net, dev_net(geneve->dev)))) {
374                 geneve->dev->stats.rx_dropped++;
375                 goto drop;
376         }
377
378         geneve_rx(geneve, gs, skb);
379         return 0;
380
381 drop:
382         /* Consume bad packet */
383         kfree_skb(skb);
384         return 0;
385 }
386
387 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
388 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
389 {
390         struct genevehdr *geneveh;
391         struct geneve_sock *gs;
392         u8 zero_vni[3] = { 0 };
393         u8 *vni = zero_vni;
394
395         if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
396                 return -EINVAL;
397
398         geneveh = geneve_hdr(skb);
399         if (geneveh->ver != GENEVE_VER)
400                 return -EINVAL;
401
402         if (geneveh->proto_type != htons(ETH_P_TEB))
403                 return -EINVAL;
404
405         gs = rcu_dereference_sk_user_data(sk);
406         if (!gs)
407                 return -ENOENT;
408
409         if (geneve_get_sk_family(gs) == AF_INET) {
410                 struct iphdr *iph = ip_hdr(skb);
411                 __be32 addr4 = 0;
412
413                 if (!gs->collect_md) {
414                         vni = geneve_hdr(skb)->vni;
415                         addr4 = iph->daddr;
416                 }
417
418                 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
419         }
420
421 #if IS_ENABLED(CONFIG_IPV6)
422         if (geneve_get_sk_family(gs) == AF_INET6) {
423                 struct ipv6hdr *ip6h = ipv6_hdr(skb);
424                 struct in6_addr addr6;
425
426                 memset(&addr6, 0, sizeof(struct in6_addr));
427
428                 if (!gs->collect_md) {
429                         vni = geneve_hdr(skb)->vni;
430                         addr6 = ip6h->daddr;
431                 }
432
433                 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
434         }
435 #endif
436
437         return -EPFNOSUPPORT;
438 }
439
440 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
441                                          __be16 port, bool ipv6_rx_csum)
442 {
443         struct socket *sock;
444         struct udp_port_cfg udp_conf;
445         int err;
446
447         memset(&udp_conf, 0, sizeof(udp_conf));
448
449         if (ipv6) {
450                 udp_conf.family = AF_INET6;
451                 udp_conf.ipv6_v6only = 1;
452                 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
453         } else {
454                 udp_conf.family = AF_INET;
455                 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
456         }
457
458         udp_conf.local_udp_port = port;
459
460         /* Open UDP socket */
461         err = udp_sock_create(net, &udp_conf, &sock);
462         if (err < 0)
463                 return ERR_PTR(err);
464
465         udp_allow_gso(sock->sk);
466         return sock;
467 }
468
469 static int geneve_hlen(struct genevehdr *gh)
470 {
471         return sizeof(*gh) + gh->opt_len * 4;
472 }
473
474 static struct sk_buff *geneve_gro_receive(struct sock *sk,
475                                           struct list_head *head,
476                                           struct sk_buff *skb)
477 {
478         struct sk_buff *pp = NULL;
479         struct sk_buff *p;
480         struct genevehdr *gh, *gh2;
481         unsigned int hlen, gh_len, off_gnv;
482         const struct packet_offload *ptype;
483         __be16 type;
484         int flush = 1;
485
486         off_gnv = skb_gro_offset(skb);
487         hlen = off_gnv + sizeof(*gh);
488         gh = skb_gro_header_fast(skb, off_gnv);
489         if (skb_gro_header_hard(skb, hlen)) {
490                 gh = skb_gro_header_slow(skb, hlen, off_gnv);
491                 if (unlikely(!gh))
492                         goto out;
493         }
494
495         if (gh->ver != GENEVE_VER || gh->oam)
496                 goto out;
497         gh_len = geneve_hlen(gh);
498
499         hlen = off_gnv + gh_len;
500         if (skb_gro_header_hard(skb, hlen)) {
501                 gh = skb_gro_header_slow(skb, hlen, off_gnv);
502                 if (unlikely(!gh))
503                         goto out;
504         }
505
506         list_for_each_entry(p, head, list) {
507                 if (!NAPI_GRO_CB(p)->same_flow)
508                         continue;
509
510                 gh2 = (struct genevehdr *)(p->data + off_gnv);
511                 if (gh->opt_len != gh2->opt_len ||
512                     memcmp(gh, gh2, gh_len)) {
513                         NAPI_GRO_CB(p)->same_flow = 0;
514                         continue;
515                 }
516         }
517
518         type = gh->proto_type;
519
520         ptype = gro_find_receive_by_type(type);
521         if (!ptype)
522                 goto out;
523
524         skb_gro_pull(skb, gh_len);
525         skb_gro_postpull_rcsum(skb, gh, gh_len);
526         pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
527         flush = 0;
528
529 out:
530         skb_gro_flush_final(skb, pp, flush);
531
532         return pp;
533 }
534
535 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
536                                int nhoff)
537 {
538         struct genevehdr *gh;
539         struct packet_offload *ptype;
540         __be16 type;
541         int gh_len;
542         int err = -ENOSYS;
543
544         gh = (struct genevehdr *)(skb->data + nhoff);
545         gh_len = geneve_hlen(gh);
546         type = gh->proto_type;
547
548         ptype = gro_find_complete_by_type(type);
549         if (ptype)
550                 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
551
552         skb_set_inner_mac_header(skb, nhoff + gh_len);
553
554         return err;
555 }
556
557 /* Create new listen socket if needed */
558 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
559                                                 bool ipv6, bool ipv6_rx_csum)
560 {
561         struct geneve_net *gn = net_generic(net, geneve_net_id);
562         struct geneve_sock *gs;
563         struct socket *sock;
564         struct udp_tunnel_sock_cfg tunnel_cfg;
565         int h;
566
567         gs = kzalloc(sizeof(*gs), GFP_KERNEL);
568         if (!gs)
569                 return ERR_PTR(-ENOMEM);
570
571         sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
572         if (IS_ERR(sock)) {
573                 kfree(gs);
574                 return ERR_CAST(sock);
575         }
576
577         gs->sock = sock;
578         gs->refcnt = 1;
579         for (h = 0; h < VNI_HASH_SIZE; ++h)
580                 INIT_HLIST_HEAD(&gs->vni_list[h]);
581
582         /* Initialize the geneve udp offloads structure */
583         udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
584
585         /* Mark socket as an encapsulation socket */
586         memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
587         tunnel_cfg.sk_user_data = gs;
588         tunnel_cfg.encap_type = 1;
589         tunnel_cfg.gro_receive = geneve_gro_receive;
590         tunnel_cfg.gro_complete = geneve_gro_complete;
591         tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
592         tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
593         tunnel_cfg.encap_destroy = NULL;
594         setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
595         list_add(&gs->list, &gn->sock_list);
596         return gs;
597 }
598
599 static void __geneve_sock_release(struct geneve_sock *gs)
600 {
601         if (!gs || --gs->refcnt)
602                 return;
603
604         list_del(&gs->list);
605         udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
606         udp_tunnel_sock_release(gs->sock);
607         kfree_rcu(gs, rcu);
608 }
609
610 static void geneve_sock_release(struct geneve_dev *geneve)
611 {
612         struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
613 #if IS_ENABLED(CONFIG_IPV6)
614         struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
615
616         rcu_assign_pointer(geneve->sock6, NULL);
617 #endif
618
619         rcu_assign_pointer(geneve->sock4, NULL);
620         synchronize_net();
621
622         __geneve_sock_release(gs4);
623 #if IS_ENABLED(CONFIG_IPV6)
624         __geneve_sock_release(gs6);
625 #endif
626 }
627
628 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
629                                             sa_family_t family,
630                                             __be16 dst_port)
631 {
632         struct geneve_sock *gs;
633
634         list_for_each_entry(gs, &gn->sock_list, list) {
635                 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
636                     geneve_get_sk_family(gs) == family) {
637                         return gs;
638                 }
639         }
640         return NULL;
641 }
642
643 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
644 {
645         struct net *net = geneve->net;
646         struct geneve_net *gn = net_generic(net, geneve_net_id);
647         struct geneve_dev_node *node;
648         struct geneve_sock *gs;
649         __u8 vni[3];
650         __u32 hash;
651
652         gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst);
653         if (gs) {
654                 gs->refcnt++;
655                 goto out;
656         }
657
658         gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
659                                   geneve->cfg.use_udp6_rx_checksums);
660         if (IS_ERR(gs))
661                 return PTR_ERR(gs);
662
663 out:
664         gs->collect_md = geneve->cfg.collect_md;
665 #if IS_ENABLED(CONFIG_IPV6)
666         if (ipv6) {
667                 rcu_assign_pointer(geneve->sock6, gs);
668                 node = &geneve->hlist6;
669         } else
670 #endif
671         {
672                 rcu_assign_pointer(geneve->sock4, gs);
673                 node = &geneve->hlist4;
674         }
675         node->geneve = geneve;
676
677         tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
678         hash = geneve_net_vni_hash(vni);
679         hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
680         return 0;
681 }
682
683 static int geneve_open(struct net_device *dev)
684 {
685         struct geneve_dev *geneve = netdev_priv(dev);
686         bool metadata = geneve->cfg.collect_md;
687         bool ipv4, ipv6;
688         int ret = 0;
689
690         ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
691         ipv4 = !ipv6 || metadata;
692 #if IS_ENABLED(CONFIG_IPV6)
693         if (ipv6) {
694                 ret = geneve_sock_add(geneve, true);
695                 if (ret < 0 && ret != -EAFNOSUPPORT)
696                         ipv4 = false;
697         }
698 #endif
699         if (ipv4)
700                 ret = geneve_sock_add(geneve, false);
701         if (ret < 0)
702                 geneve_sock_release(geneve);
703
704         return ret;
705 }
706
707 static int geneve_stop(struct net_device *dev)
708 {
709         struct geneve_dev *geneve = netdev_priv(dev);
710
711         hlist_del_init_rcu(&geneve->hlist4.hlist);
712 #if IS_ENABLED(CONFIG_IPV6)
713         hlist_del_init_rcu(&geneve->hlist6.hlist);
714 #endif
715         geneve_sock_release(geneve);
716         return 0;
717 }
718
719 static void geneve_build_header(struct genevehdr *geneveh,
720                                 const struct ip_tunnel_info *info)
721 {
722         geneveh->ver = GENEVE_VER;
723         geneveh->opt_len = info->options_len / 4;
724         geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
725         geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
726         geneveh->rsvd1 = 0;
727         tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
728         geneveh->proto_type = htons(ETH_P_TEB);
729         geneveh->rsvd2 = 0;
730
731         if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
732                 ip_tunnel_info_opts_get(geneveh->options, info);
733 }
734
735 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
736                             const struct ip_tunnel_info *info,
737                             bool xnet, int ip_hdr_len)
738 {
739         bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
740         struct genevehdr *gnvh;
741         int min_headroom;
742         int err;
743
744         skb_reset_mac_header(skb);
745         skb_scrub_packet(skb, xnet);
746
747         min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
748                        GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
749         err = skb_cow_head(skb, min_headroom);
750         if (unlikely(err))
751                 goto free_dst;
752
753         err = udp_tunnel_handle_offloads(skb, udp_sum);
754         if (err)
755                 goto free_dst;
756
757         gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
758         geneve_build_header(gnvh, info);
759         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
760         return 0;
761
762 free_dst:
763         dst_release(dst);
764         return err;
765 }
766
767 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
768                                        struct net_device *dev,
769                                        struct geneve_sock *gs4,
770                                        struct flowi4 *fl4,
771                                        const struct ip_tunnel_info *info,
772                                        __be16 dport, __be16 sport)
773 {
774         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
775         struct geneve_dev *geneve = netdev_priv(dev);
776         struct dst_cache *dst_cache;
777         struct rtable *rt = NULL;
778         __u8 tos;
779
780         if (!gs4)
781                 return ERR_PTR(-EIO);
782
783         memset(fl4, 0, sizeof(*fl4));
784         fl4->flowi4_mark = skb->mark;
785         fl4->flowi4_proto = IPPROTO_UDP;
786         fl4->daddr = info->key.u.ipv4.dst;
787         fl4->saddr = info->key.u.ipv4.src;
788         fl4->fl4_dport = dport;
789         fl4->fl4_sport = sport;
790
791         tos = info->key.tos;
792         if ((tos == 1) && !geneve->cfg.collect_md) {
793                 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
794                 use_cache = false;
795         }
796         fl4->flowi4_tos = RT_TOS(tos);
797
798         dst_cache = (struct dst_cache *)&info->dst_cache;
799         if (use_cache) {
800                 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
801                 if (rt)
802                         return rt;
803         }
804         rt = ip_route_output_key(geneve->net, fl4);
805         if (IS_ERR(rt)) {
806                 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
807                 return ERR_PTR(-ENETUNREACH);
808         }
809         if (rt->dst.dev == dev) { /* is this necessary? */
810                 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
811                 ip_rt_put(rt);
812                 return ERR_PTR(-ELOOP);
813         }
814         if (use_cache)
815                 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
816         return rt;
817 }
818
819 #if IS_ENABLED(CONFIG_IPV6)
820 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
821                                            struct net_device *dev,
822                                            struct geneve_sock *gs6,
823                                            struct flowi6 *fl6,
824                                            const struct ip_tunnel_info *info,
825                                            __be16 dport, __be16 sport)
826 {
827         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
828         struct geneve_dev *geneve = netdev_priv(dev);
829         struct dst_entry *dst = NULL;
830         struct dst_cache *dst_cache;
831         __u8 prio;
832
833         if (!gs6)
834                 return ERR_PTR(-EIO);
835
836         memset(fl6, 0, sizeof(*fl6));
837         fl6->flowi6_mark = skb->mark;
838         fl6->flowi6_proto = IPPROTO_UDP;
839         fl6->daddr = info->key.u.ipv6.dst;
840         fl6->saddr = info->key.u.ipv6.src;
841         fl6->fl6_dport = dport;
842         fl6->fl6_sport = sport;
843
844         prio = info->key.tos;
845         if ((prio == 1) && !geneve->cfg.collect_md) {
846                 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
847                 use_cache = false;
848         }
849
850         fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
851                                            info->key.label);
852         dst_cache = (struct dst_cache *)&info->dst_cache;
853         if (use_cache) {
854                 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
855                 if (dst)
856                         return dst;
857         }
858         dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
859                                               NULL);
860         if (IS_ERR(dst)) {
861                 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
862                 return ERR_PTR(-ENETUNREACH);
863         }
864         if (dst->dev == dev) { /* is this necessary? */
865                 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
866                 dst_release(dst);
867                 return ERR_PTR(-ELOOP);
868         }
869
870         if (use_cache)
871                 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
872         return dst;
873 }
874 #endif
875
876 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
877                            struct geneve_dev *geneve,
878                            const struct ip_tunnel_info *info)
879 {
880         bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
881         struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
882         const struct ip_tunnel_key *key = &info->key;
883         struct rtable *rt;
884         struct flowi4 fl4;
885         __u8 tos, ttl;
886         __be16 df = 0;
887         __be16 sport;
888         int err;
889
890         if (!pskb_inet_may_pull(skb))
891                 return -EINVAL;
892
893         sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
894         rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
895                               geneve->cfg.info.key.tp_dst, sport);
896         if (IS_ERR(rt))
897                 return PTR_ERR(rt);
898
899         err = skb_tunnel_check_pmtu(skb, &rt->dst,
900                                     GENEVE_IPV4_HLEN + info->options_len,
901                                     netif_is_any_bridge_port(dev));
902         if (err < 0) {
903                 dst_release(&rt->dst);
904                 return err;
905         } else if (err) {
906                 struct ip_tunnel_info *info;
907
908                 info = skb_tunnel_info(skb);
909                 if (info) {
910                         struct ip_tunnel_info *unclone;
911
912                         unclone = skb_tunnel_info_unclone(skb);
913                         if (unlikely(!unclone)) {
914                                 dst_release(&rt->dst);
915                                 return -ENOMEM;
916                         }
917
918                         unclone->key.u.ipv4.dst = fl4.saddr;
919                         unclone->key.u.ipv4.src = fl4.daddr;
920                 }
921
922                 if (!pskb_may_pull(skb, ETH_HLEN)) {
923                         dst_release(&rt->dst);
924                         return -EINVAL;
925                 }
926
927                 skb->protocol = eth_type_trans(skb, geneve->dev);
928                 netif_rx(skb);
929                 dst_release(&rt->dst);
930                 return -EMSGSIZE;
931         }
932
933         if (geneve->cfg.collect_md) {
934                 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
935                 ttl = key->ttl;
936
937                 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
938         } else {
939                 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
940                 if (geneve->cfg.ttl_inherit)
941                         ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
942                 else
943                         ttl = key->ttl;
944                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
945
946                 if (geneve->cfg.df == GENEVE_DF_SET) {
947                         df = htons(IP_DF);
948                 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
949                         struct ethhdr *eth = eth_hdr(skb);
950
951                         if (ntohs(eth->h_proto) == ETH_P_IPV6) {
952                                 df = htons(IP_DF);
953                         } else if (ntohs(eth->h_proto) == ETH_P_IP) {
954                                 struct iphdr *iph = ip_hdr(skb);
955
956                                 if (iph->frag_off & htons(IP_DF))
957                                         df = htons(IP_DF);
958                         }
959                 }
960         }
961
962         err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
963         if (unlikely(err))
964                 return err;
965
966         udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
967                             tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
968                             !net_eq(geneve->net, dev_net(geneve->dev)),
969                             !(info->key.tun_flags & TUNNEL_CSUM));
970         return 0;
971 }
972
973 #if IS_ENABLED(CONFIG_IPV6)
974 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
975                             struct geneve_dev *geneve,
976                             const struct ip_tunnel_info *info)
977 {
978         bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
979         struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
980         const struct ip_tunnel_key *key = &info->key;
981         struct dst_entry *dst = NULL;
982         struct flowi6 fl6;
983         __u8 prio, ttl;
984         __be16 sport;
985         int err;
986
987         if (!pskb_inet_may_pull(skb))
988                 return -EINVAL;
989
990         sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
991         dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
992                                 geneve->cfg.info.key.tp_dst, sport);
993         if (IS_ERR(dst))
994                 return PTR_ERR(dst);
995
996         err = skb_tunnel_check_pmtu(skb, dst,
997                                     GENEVE_IPV6_HLEN + info->options_len,
998                                     netif_is_any_bridge_port(dev));
999         if (err < 0) {
1000                 dst_release(dst);
1001                 return err;
1002         } else if (err) {
1003                 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1004
1005                 if (info) {
1006                         struct ip_tunnel_info *unclone;
1007
1008                         unclone = skb_tunnel_info_unclone(skb);
1009                         if (unlikely(!unclone)) {
1010                                 dst_release(dst);
1011                                 return -ENOMEM;
1012                         }
1013
1014                         unclone->key.u.ipv6.dst = fl6.saddr;
1015                         unclone->key.u.ipv6.src = fl6.daddr;
1016                 }
1017
1018                 if (!pskb_may_pull(skb, ETH_HLEN)) {
1019                         dst_release(dst);
1020                         return -EINVAL;
1021                 }
1022
1023                 skb->protocol = eth_type_trans(skb, geneve->dev);
1024                 netif_rx(skb);
1025                 dst_release(dst);
1026                 return -EMSGSIZE;
1027         }
1028
1029         if (geneve->cfg.collect_md) {
1030                 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
1031                 ttl = key->ttl;
1032         } else {
1033                 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
1034                                            ip_hdr(skb), skb);
1035                 if (geneve->cfg.ttl_inherit)
1036                         ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
1037                 else
1038                         ttl = key->ttl;
1039                 ttl = ttl ? : ip6_dst_hoplimit(dst);
1040         }
1041         err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
1042         if (unlikely(err))
1043                 return err;
1044
1045         udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
1046                              &fl6.saddr, &fl6.daddr, prio, ttl,
1047                              info->key.label, sport, geneve->cfg.info.key.tp_dst,
1048                              !(info->key.tun_flags & TUNNEL_CSUM));
1049         return 0;
1050 }
1051 #endif
1052
1053 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
1054 {
1055         struct geneve_dev *geneve = netdev_priv(dev);
1056         struct ip_tunnel_info *info = NULL;
1057         int err;
1058
1059         if (geneve->cfg.collect_md) {
1060                 info = skb_tunnel_info(skb);
1061                 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
1062                         netdev_dbg(dev, "no tunnel metadata\n");
1063                         dev_kfree_skb(skb);
1064                         dev->stats.tx_dropped++;
1065                         return NETDEV_TX_OK;
1066                 }
1067         } else {
1068                 info = &geneve->cfg.info;
1069         }
1070
1071         rcu_read_lock();
1072 #if IS_ENABLED(CONFIG_IPV6)
1073         if (info->mode & IP_TUNNEL_INFO_IPV6)
1074                 err = geneve6_xmit_skb(skb, dev, geneve, info);
1075         else
1076 #endif
1077                 err = geneve_xmit_skb(skb, dev, geneve, info);
1078         rcu_read_unlock();
1079
1080         if (likely(!err))
1081                 return NETDEV_TX_OK;
1082
1083         if (err != -EMSGSIZE)
1084                 dev_kfree_skb(skb);
1085
1086         if (err == -ELOOP)
1087                 dev->stats.collisions++;
1088         else if (err == -ENETUNREACH)
1089                 dev->stats.tx_carrier_errors++;
1090
1091         dev->stats.tx_errors++;
1092         return NETDEV_TX_OK;
1093 }
1094
1095 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
1096 {
1097         if (new_mtu > dev->max_mtu)
1098                 new_mtu = dev->max_mtu;
1099         else if (new_mtu < dev->min_mtu)
1100                 new_mtu = dev->min_mtu;
1101
1102         dev->mtu = new_mtu;
1103         return 0;
1104 }
1105
1106 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1107 {
1108         struct ip_tunnel_info *info = skb_tunnel_info(skb);
1109         struct geneve_dev *geneve = netdev_priv(dev);
1110         __be16 sport;
1111
1112         if (ip_tunnel_info_af(info) == AF_INET) {
1113                 struct rtable *rt;
1114                 struct flowi4 fl4;
1115
1116                 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
1117                 sport = udp_flow_src_port(geneve->net, skb,
1118                                           1, USHRT_MAX, true);
1119
1120                 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
1121                                       geneve->cfg.info.key.tp_dst, sport);
1122                 if (IS_ERR(rt))
1123                         return PTR_ERR(rt);
1124
1125                 ip_rt_put(rt);
1126                 info->key.u.ipv4.src = fl4.saddr;
1127 #if IS_ENABLED(CONFIG_IPV6)
1128         } else if (ip_tunnel_info_af(info) == AF_INET6) {
1129                 struct dst_entry *dst;
1130                 struct flowi6 fl6;
1131
1132                 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1133                 sport = udp_flow_src_port(geneve->net, skb,
1134                                           1, USHRT_MAX, true);
1135
1136                 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
1137                                         geneve->cfg.info.key.tp_dst, sport);
1138                 if (IS_ERR(dst))
1139                         return PTR_ERR(dst);
1140
1141                 dst_release(dst);
1142                 info->key.u.ipv6.src = fl6.saddr;
1143 #endif
1144         } else {
1145                 return -EINVAL;
1146         }
1147
1148         info->key.tp_src = sport;
1149         info->key.tp_dst = geneve->cfg.info.key.tp_dst;
1150         return 0;
1151 }
1152
1153 static const struct net_device_ops geneve_netdev_ops = {
1154         .ndo_init               = geneve_init,
1155         .ndo_uninit             = geneve_uninit,
1156         .ndo_open               = geneve_open,
1157         .ndo_stop               = geneve_stop,
1158         .ndo_start_xmit         = geneve_xmit,
1159         .ndo_get_stats64        = dev_get_tstats64,
1160         .ndo_change_mtu         = geneve_change_mtu,
1161         .ndo_validate_addr      = eth_validate_addr,
1162         .ndo_set_mac_address    = eth_mac_addr,
1163         .ndo_fill_metadata_dst  = geneve_fill_metadata_dst,
1164 };
1165
1166 static void geneve_get_drvinfo(struct net_device *dev,
1167                                struct ethtool_drvinfo *drvinfo)
1168 {
1169         strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1170         strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1171 }
1172
1173 static const struct ethtool_ops geneve_ethtool_ops = {
1174         .get_drvinfo    = geneve_get_drvinfo,
1175         .get_link       = ethtool_op_get_link,
1176 };
1177
1178 /* Info for udev, that this is a virtual tunnel endpoint */
1179 static struct device_type geneve_type = {
1180         .name = "geneve",
1181 };
1182
1183 /* Calls the ndo_udp_tunnel_add of the caller in order to
1184  * supply the listening GENEVE udp ports. Callers are expected
1185  * to implement the ndo_udp_tunnel_add.
1186  */
1187 static void geneve_offload_rx_ports(struct net_device *dev, bool push)
1188 {
1189         struct net *net = dev_net(dev);
1190         struct geneve_net *gn = net_generic(net, geneve_net_id);
1191         struct geneve_sock *gs;
1192
1193         rcu_read_lock();
1194         list_for_each_entry_rcu(gs, &gn->sock_list, list) {
1195                 if (push) {
1196                         udp_tunnel_push_rx_port(dev, gs->sock,
1197                                                 UDP_TUNNEL_TYPE_GENEVE);
1198                 } else {
1199                         udp_tunnel_drop_rx_port(dev, gs->sock,
1200                                                 UDP_TUNNEL_TYPE_GENEVE);
1201                 }
1202         }
1203         rcu_read_unlock();
1204 }
1205
1206 /* Initialize the device structure. */
1207 static void geneve_setup(struct net_device *dev)
1208 {
1209         ether_setup(dev);
1210
1211         dev->netdev_ops = &geneve_netdev_ops;
1212         dev->ethtool_ops = &geneve_ethtool_ops;
1213         dev->needs_free_netdev = true;
1214
1215         SET_NETDEV_DEVTYPE(dev, &geneve_type);
1216
1217         dev->features    |= NETIF_F_LLTX;
1218         dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1219         dev->features    |= NETIF_F_RXCSUM;
1220         dev->features    |= NETIF_F_GSO_SOFTWARE;
1221
1222         dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1223         dev->hw_features |= NETIF_F_RXCSUM;
1224         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1225
1226         /* MTU range: 68 - (something less than 65535) */
1227         dev->min_mtu = ETH_MIN_MTU;
1228         /* The max_mtu calculation does not take account of GENEVE
1229          * options, to avoid excluding potentially valid
1230          * configurations. This will be further reduced by IPvX hdr size.
1231          */
1232         dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1233
1234         netif_keep_dst(dev);
1235         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1236         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
1237         eth_hw_addr_random(dev);
1238 }
1239
1240 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1241         [IFLA_GENEVE_ID]                = { .type = NLA_U32 },
1242         [IFLA_GENEVE_REMOTE]            = { .len = sizeof_field(struct iphdr, daddr) },
1243         [IFLA_GENEVE_REMOTE6]           = { .len = sizeof(struct in6_addr) },
1244         [IFLA_GENEVE_TTL]               = { .type = NLA_U8 },
1245         [IFLA_GENEVE_TOS]               = { .type = NLA_U8 },
1246         [IFLA_GENEVE_LABEL]             = { .type = NLA_U32 },
1247         [IFLA_GENEVE_PORT]              = { .type = NLA_U16 },
1248         [IFLA_GENEVE_COLLECT_METADATA]  = { .type = NLA_FLAG },
1249         [IFLA_GENEVE_UDP_CSUM]          = { .type = NLA_U8 },
1250         [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1251         [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
1252         [IFLA_GENEVE_TTL_INHERIT]       = { .type = NLA_U8 },
1253         [IFLA_GENEVE_DF]                = { .type = NLA_U8 },
1254 };
1255
1256 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1257                            struct netlink_ext_ack *extack)
1258 {
1259         if (tb[IFLA_ADDRESS]) {
1260                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1261                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1262                                             "Provided link layer address is not Ethernet");
1263                         return -EINVAL;
1264                 }
1265
1266                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1267                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1268                                             "Provided Ethernet address is not unicast");
1269                         return -EADDRNOTAVAIL;
1270                 }
1271         }
1272
1273         if (!data) {
1274                 NL_SET_ERR_MSG(extack,
1275                                "Not enough attributes provided to perform the operation");
1276                 return -EINVAL;
1277         }
1278
1279         if (data[IFLA_GENEVE_ID]) {
1280                 __u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
1281
1282                 if (vni >= GENEVE_N_VID) {
1283                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1284                                             "Geneve ID must be lower than 16777216");
1285                         return -ERANGE;
1286                 }
1287         }
1288
1289         if (data[IFLA_GENEVE_DF]) {
1290                 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1291
1292                 if (df < 0 || df > GENEVE_DF_MAX) {
1293                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
1294                                             "Invalid DF attribute");
1295                         return -EINVAL;
1296                 }
1297         }
1298
1299         return 0;
1300 }
1301
1302 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
1303                                           const struct ip_tunnel_info *info,
1304                                           bool *tun_on_same_port,
1305                                           bool *tun_collect_md)
1306 {
1307         struct geneve_dev *geneve, *t = NULL;
1308
1309         *tun_on_same_port = false;
1310         *tun_collect_md = false;
1311         list_for_each_entry(geneve, &gn->geneve_list, next) {
1312                 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
1313                         *tun_collect_md = geneve->cfg.collect_md;
1314                         *tun_on_same_port = true;
1315                 }
1316                 if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
1317                     info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
1318                     !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
1319                         t = geneve;
1320         }
1321         return t;
1322 }
1323
1324 static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1325 {
1326         return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
1327                  info->key.ttl || info->key.label || info->key.tp_src ||
1328                  memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
1329 }
1330
1331 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1332                                   struct ip_tunnel_info *b)
1333 {
1334         if (ip_tunnel_info_af(a) == AF_INET)
1335                 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1336         else
1337                 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1338 }
1339
1340 static int geneve_configure(struct net *net, struct net_device *dev,
1341                             struct netlink_ext_ack *extack,
1342                             const struct geneve_config *cfg)
1343 {
1344         struct geneve_net *gn = net_generic(net, geneve_net_id);
1345         struct geneve_dev *t, *geneve = netdev_priv(dev);
1346         const struct ip_tunnel_info *info = &cfg->info;
1347         bool tun_collect_md, tun_on_same_port;
1348         int err, encap_len;
1349
1350         if (cfg->collect_md && !is_tnl_info_zero(info)) {
1351                 NL_SET_ERR_MSG(extack,
1352                                "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1353                 return -EINVAL;
1354         }
1355
1356         geneve->net = net;
1357         geneve->dev = dev;
1358
1359         t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
1360         if (t)
1361                 return -EBUSY;
1362
1363         /* make enough headroom for basic scenario */
1364         encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
1365         if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
1366                 encap_len += sizeof(struct iphdr);
1367                 dev->max_mtu -= sizeof(struct iphdr);
1368         } else {
1369                 encap_len += sizeof(struct ipv6hdr);
1370                 dev->max_mtu -= sizeof(struct ipv6hdr);
1371         }
1372         dev->needed_headroom = encap_len + ETH_HLEN;
1373
1374         if (cfg->collect_md) {
1375                 if (tun_on_same_port) {
1376                         NL_SET_ERR_MSG(extack,
1377                                        "There can be only one externally controlled device on a destination port");
1378                         return -EPERM;
1379                 }
1380         } else {
1381                 if (tun_collect_md) {
1382                         NL_SET_ERR_MSG(extack,
1383                                        "There already exists an externally controlled device on this destination port");
1384                         return -EPERM;
1385                 }
1386         }
1387
1388         dst_cache_reset(&geneve->cfg.info.dst_cache);
1389         memcpy(&geneve->cfg, cfg, sizeof(*cfg));
1390
1391         err = register_netdevice(dev);
1392         if (err)
1393                 return err;
1394
1395         list_add(&geneve->next, &gn->geneve_list);
1396         return 0;
1397 }
1398
1399 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1400 {
1401         memset(info, 0, sizeof(*info));
1402         info->key.tp_dst = htons(dst_port);
1403 }
1404
1405 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1406                           struct netlink_ext_ack *extack,
1407                           struct geneve_config *cfg, bool changelink)
1408 {
1409         struct ip_tunnel_info *info = &cfg->info;
1410         int attrtype;
1411
1412         if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1413                 NL_SET_ERR_MSG(extack,
1414                                "Cannot specify both IPv4 and IPv6 Remote addresses");
1415                 return -EINVAL;
1416         }
1417
1418         if (data[IFLA_GENEVE_REMOTE]) {
1419                 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1420                         attrtype = IFLA_GENEVE_REMOTE;
1421                         goto change_notsup;
1422                 }
1423
1424                 info->key.u.ipv4.dst =
1425                         nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
1426
1427                 if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
1428                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1429                                             "Remote IPv4 address cannot be Multicast");
1430                         return -EINVAL;
1431                 }
1432         }
1433
1434         if (data[IFLA_GENEVE_REMOTE6]) {
1435 #if IS_ENABLED(CONFIG_IPV6)
1436                 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1437                         attrtype = IFLA_GENEVE_REMOTE6;
1438                         goto change_notsup;
1439                 }
1440
1441                 info->mode = IP_TUNNEL_INFO_IPV6;
1442                 info->key.u.ipv6.dst =
1443                         nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1444
1445                 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
1446                     IPV6_ADDR_LINKLOCAL) {
1447                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1448                                             "Remote IPv6 address cannot be link-local");
1449                         return -EINVAL;
1450                 }
1451                 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
1452                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1453                                             "Remote IPv6 address cannot be Multicast");
1454                         return -EINVAL;
1455                 }
1456                 info->key.tun_flags |= TUNNEL_CSUM;
1457                 cfg->use_udp6_rx_checksums = true;
1458 #else
1459                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1460                                     "IPv6 support not enabled in the kernel");
1461                 return -EPFNOSUPPORT;
1462 #endif
1463         }
1464
1465         if (data[IFLA_GENEVE_ID]) {
1466                 __u32 vni;
1467                 __u8 tvni[3];
1468                 __be64 tunid;
1469
1470                 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1471                 tvni[0] = (vni & 0x00ff0000) >> 16;
1472                 tvni[1] = (vni & 0x0000ff00) >> 8;
1473                 tvni[2] =  vni & 0x000000ff;
1474
1475                 tunid = vni_to_tunnel_id(tvni);
1476                 if (changelink && (tunid != info->key.tun_id)) {
1477                         attrtype = IFLA_GENEVE_ID;
1478                         goto change_notsup;
1479                 }
1480                 info->key.tun_id = tunid;
1481         }
1482
1483         if (data[IFLA_GENEVE_TTL_INHERIT]) {
1484                 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1485                         cfg->ttl_inherit = true;
1486                 else
1487                         cfg->ttl_inherit = false;
1488         } else if (data[IFLA_GENEVE_TTL]) {
1489                 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
1490                 cfg->ttl_inherit = false;
1491         }
1492
1493         if (data[IFLA_GENEVE_TOS])
1494                 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
1495
1496         if (data[IFLA_GENEVE_DF])
1497                 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
1498
1499         if (data[IFLA_GENEVE_LABEL]) {
1500                 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
1501                                   IPV6_FLOWLABEL_MASK;
1502                 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1503                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1504                                             "Label attribute only applies for IPv6 Geneve devices");
1505                         return -EINVAL;
1506                 }
1507         }
1508
1509         if (data[IFLA_GENEVE_PORT]) {
1510                 if (changelink) {
1511                         attrtype = IFLA_GENEVE_PORT;
1512                         goto change_notsup;
1513                 }
1514                 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1515         }
1516
1517         if (data[IFLA_GENEVE_COLLECT_METADATA]) {
1518                 if (changelink) {
1519                         attrtype = IFLA_GENEVE_COLLECT_METADATA;
1520                         goto change_notsup;
1521                 }
1522                 cfg->collect_md = true;
1523         }
1524
1525         if (data[IFLA_GENEVE_UDP_CSUM]) {
1526                 if (changelink) {
1527                         attrtype = IFLA_GENEVE_UDP_CSUM;
1528                         goto change_notsup;
1529                 }
1530                 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1531                         info->key.tun_flags |= TUNNEL_CSUM;
1532         }
1533
1534         if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
1535 #if IS_ENABLED(CONFIG_IPV6)
1536                 if (changelink) {
1537                         attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
1538                         goto change_notsup;
1539                 }
1540                 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1541                         info->key.tun_flags &= ~TUNNEL_CSUM;
1542 #else
1543                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
1544                                     "IPv6 support not enabled in the kernel");
1545                 return -EPFNOSUPPORT;
1546 #endif
1547         }
1548
1549         if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
1550 #if IS_ENABLED(CONFIG_IPV6)
1551                 if (changelink) {
1552                         attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
1553                         goto change_notsup;
1554                 }
1555                 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1556                         cfg->use_udp6_rx_checksums = false;
1557 #else
1558                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
1559                                     "IPv6 support not enabled in the kernel");
1560                 return -EPFNOSUPPORT;
1561 #endif
1562         }
1563
1564         return 0;
1565 change_notsup:
1566         NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
1567                             "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
1568         return -EOPNOTSUPP;
1569 }
1570
1571 static void geneve_link_config(struct net_device *dev,
1572                                struct ip_tunnel_info *info, struct nlattr *tb[])
1573 {
1574         struct geneve_dev *geneve = netdev_priv(dev);
1575         int ldev_mtu = 0;
1576
1577         if (tb[IFLA_MTU]) {
1578                 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1579                 return;
1580         }
1581
1582         switch (ip_tunnel_info_af(info)) {
1583         case AF_INET: {
1584                 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
1585                 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
1586
1587                 if (!IS_ERR(rt) && rt->dst.dev) {
1588                         ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
1589                         ip_rt_put(rt);
1590                 }
1591                 break;
1592         }
1593 #if IS_ENABLED(CONFIG_IPV6)
1594         case AF_INET6: {
1595                 struct rt6_info *rt;
1596
1597                 if (!__in6_dev_get(dev))
1598                         break;
1599
1600                 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
1601                                 NULL, 0);
1602
1603                 if (rt && rt->dst.dev)
1604                         ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
1605                 ip6_rt_put(rt);
1606                 break;
1607         }
1608 #endif
1609         }
1610
1611         if (ldev_mtu <= 0)
1612                 return;
1613
1614         geneve_change_mtu(dev, ldev_mtu - info->options_len);
1615 }
1616
1617 static int geneve_newlink(struct net *net, struct net_device *dev,
1618                           struct nlattr *tb[], struct nlattr *data[],
1619                           struct netlink_ext_ack *extack)
1620 {
1621         struct geneve_config cfg = {
1622                 .df = GENEVE_DF_UNSET,
1623                 .use_udp6_rx_checksums = false,
1624                 .ttl_inherit = false,
1625                 .collect_md = false,
1626         };
1627         int err;
1628
1629         init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
1630         err = geneve_nl2info(tb, data, extack, &cfg, false);
1631         if (err)
1632                 return err;
1633
1634         err = geneve_configure(net, dev, extack, &cfg);
1635         if (err)
1636                 return err;
1637
1638         geneve_link_config(dev, &cfg.info, tb);
1639
1640         return 0;
1641 }
1642
1643 /* Quiesces the geneve device data path for both TX and RX.
1644  *
1645  * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1646  * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1647  * to complete for the existing set of in-flight packets to be transmitted,
1648  * then we would have quiesced the transmit data path. All the future packets
1649  * will get dropped until we unquiesce the data path.
1650  *
1651  * On receive geneve dereference the geneve_sock stashed in the socket. So,
1652  * if we set that to NULL under RCU and wait for synchronize_net() to
1653  * complete, then we would have quiesced the receive data path.
1654  */
1655 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
1656                            struct geneve_sock **gs6)
1657 {
1658         *gs4 = rtnl_dereference(geneve->sock4);
1659         rcu_assign_pointer(geneve->sock4, NULL);
1660         if (*gs4)
1661                 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
1662 #if IS_ENABLED(CONFIG_IPV6)
1663         *gs6 = rtnl_dereference(geneve->sock6);
1664         rcu_assign_pointer(geneve->sock6, NULL);
1665         if (*gs6)
1666                 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
1667 #else
1668         *gs6 = NULL;
1669 #endif
1670         synchronize_net();
1671 }
1672
1673 /* Resumes the geneve device data path for both TX and RX. */
1674 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
1675                              struct geneve_sock __maybe_unused *gs6)
1676 {
1677         rcu_assign_pointer(geneve->sock4, gs4);
1678         if (gs4)
1679                 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
1680 #if IS_ENABLED(CONFIG_IPV6)
1681         rcu_assign_pointer(geneve->sock6, gs6);
1682         if (gs6)
1683                 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
1684 #endif
1685         synchronize_net();
1686 }
1687
1688 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
1689                              struct nlattr *data[],
1690                              struct netlink_ext_ack *extack)
1691 {
1692         struct geneve_dev *geneve = netdev_priv(dev);
1693         struct geneve_sock *gs4, *gs6;
1694         struct geneve_config cfg;
1695         int err;
1696
1697         /* If the geneve device is configured for metadata (or externally
1698          * controlled, for example, OVS), then nothing can be changed.
1699          */
1700         if (geneve->cfg.collect_md)
1701                 return -EOPNOTSUPP;
1702
1703         /* Start with the existing info. */
1704         memcpy(&cfg, &geneve->cfg, sizeof(cfg));
1705         err = geneve_nl2info(tb, data, extack, &cfg, true);
1706         if (err)
1707                 return err;
1708
1709         if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
1710                 dst_cache_reset(&cfg.info.dst_cache);
1711                 geneve_link_config(dev, &cfg.info, tb);
1712         }
1713
1714         geneve_quiesce(geneve, &gs4, &gs6);
1715         memcpy(&geneve->cfg, &cfg, sizeof(cfg));
1716         geneve_unquiesce(geneve, gs4, gs6);
1717
1718         return 0;
1719 }
1720
1721 static void geneve_dellink(struct net_device *dev, struct list_head *head)
1722 {
1723         struct geneve_dev *geneve = netdev_priv(dev);
1724
1725         list_del(&geneve->next);
1726         unregister_netdevice_queue(dev, head);
1727 }
1728
1729 static size_t geneve_get_size(const struct net_device *dev)
1730 {
1731         return nla_total_size(sizeof(__u32)) +  /* IFLA_GENEVE_ID */
1732                 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
1733                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
1734                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
1735                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_DF */
1736                 nla_total_size(sizeof(__be32)) +  /* IFLA_GENEVE_LABEL */
1737                 nla_total_size(sizeof(__be16)) +  /* IFLA_GENEVE_PORT */
1738                 nla_total_size(0) +      /* IFLA_GENEVE_COLLECT_METADATA */
1739                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1740                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1741                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1742                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
1743                 0;
1744 }
1745
1746 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1747 {
1748         struct geneve_dev *geneve = netdev_priv(dev);
1749         struct ip_tunnel_info *info = &geneve->cfg.info;
1750         bool ttl_inherit = geneve->cfg.ttl_inherit;
1751         bool metadata = geneve->cfg.collect_md;
1752         __u8 tmp_vni[3];
1753         __u32 vni;
1754
1755         tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1756         vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
1757         if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1758                 goto nla_put_failure;
1759
1760         if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
1761                 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
1762                                     info->key.u.ipv4.dst))
1763                         goto nla_put_failure;
1764                 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1765                                !!(info->key.tun_flags & TUNNEL_CSUM)))
1766                         goto nla_put_failure;
1767
1768 #if IS_ENABLED(CONFIG_IPV6)
1769         } else if (!metadata) {
1770                 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
1771                                      &info->key.u.ipv6.dst))
1772                         goto nla_put_failure;
1773                 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1774                                !(info->key.tun_flags & TUNNEL_CSUM)))
1775                         goto nla_put_failure;
1776 #endif
1777         }
1778
1779         if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1780             nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1781             nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
1782                 goto nla_put_failure;
1783
1784         if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
1785                 goto nla_put_failure;
1786
1787         if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
1788                 goto nla_put_failure;
1789
1790         if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
1791                 goto nla_put_failure;
1792
1793 #if IS_ENABLED(CONFIG_IPV6)
1794         if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1795                        !geneve->cfg.use_udp6_rx_checksums))
1796                 goto nla_put_failure;
1797 #endif
1798
1799         if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
1800                 goto nla_put_failure;
1801
1802         return 0;
1803
1804 nla_put_failure:
1805         return -EMSGSIZE;
1806 }
1807
1808 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1809         .kind           = "geneve",
1810         .maxtype        = IFLA_GENEVE_MAX,
1811         .policy         = geneve_policy,
1812         .priv_size      = sizeof(struct geneve_dev),
1813         .setup          = geneve_setup,
1814         .validate       = geneve_validate,
1815         .newlink        = geneve_newlink,
1816         .changelink     = geneve_changelink,
1817         .dellink        = geneve_dellink,
1818         .get_size       = geneve_get_size,
1819         .fill_info      = geneve_fill_info,
1820 };
1821
1822 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1823                                         u8 name_assign_type, u16 dst_port)
1824 {
1825         struct nlattr *tb[IFLA_MAX + 1];
1826         struct net_device *dev;
1827         LIST_HEAD(list_kill);
1828         int err;
1829         struct geneve_config cfg = {
1830                 .df = GENEVE_DF_UNSET,
1831                 .use_udp6_rx_checksums = true,
1832                 .ttl_inherit = false,
1833                 .collect_md = true,
1834         };
1835
1836         memset(tb, 0, sizeof(tb));
1837         dev = rtnl_create_link(net, name, name_assign_type,
1838                                &geneve_link_ops, tb, NULL);
1839         if (IS_ERR(dev))
1840                 return dev;
1841
1842         init_tnl_info(&cfg.info, dst_port);
1843         err = geneve_configure(net, dev, NULL, &cfg);
1844         if (err) {
1845                 free_netdev(dev);
1846                 return ERR_PTR(err);
1847         }
1848
1849         /* openvswitch users expect packet sizes to be unrestricted,
1850          * so set the largest MTU we can.
1851          */
1852         err = geneve_change_mtu(dev, IP_MAX_MTU);
1853         if (err)
1854                 goto err;
1855
1856         err = rtnl_configure_link(dev, NULL);
1857         if (err < 0)
1858                 goto err;
1859
1860         return dev;
1861 err:
1862         geneve_dellink(dev, &list_kill);
1863         unregister_netdevice_many(&list_kill);
1864         return ERR_PTR(err);
1865 }
1866 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1867
1868 static int geneve_netdevice_event(struct notifier_block *unused,
1869                                   unsigned long event, void *ptr)
1870 {
1871         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1872
1873         if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
1874                 geneve_offload_rx_ports(dev, true);
1875         else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
1876                 geneve_offload_rx_ports(dev, false);
1877
1878         return NOTIFY_DONE;
1879 }
1880
1881 static struct notifier_block geneve_notifier_block __read_mostly = {
1882         .notifier_call = geneve_netdevice_event,
1883 };
1884
1885 static __net_init int geneve_init_net(struct net *net)
1886 {
1887         struct geneve_net *gn = net_generic(net, geneve_net_id);
1888
1889         INIT_LIST_HEAD(&gn->geneve_list);
1890         INIT_LIST_HEAD(&gn->sock_list);
1891         return 0;
1892 }
1893
1894 static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
1895 {
1896         struct geneve_net *gn = net_generic(net, geneve_net_id);
1897         struct geneve_dev *geneve, *next;
1898         struct net_device *dev, *aux;
1899
1900         /* gather any geneve devices that were moved into this ns */
1901         for_each_netdev_safe(net, dev, aux)
1902                 if (dev->rtnl_link_ops == &geneve_link_ops)
1903                         unregister_netdevice_queue(dev, head);
1904
1905         /* now gather any other geneve devices that were created in this ns */
1906         list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1907                 /* If geneve->dev is in the same netns, it was already added
1908                  * to the list by the previous loop.
1909                  */
1910                 if (!net_eq(dev_net(geneve->dev), net))
1911                         unregister_netdevice_queue(geneve->dev, head);
1912         }
1913 }
1914
1915 static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
1916 {
1917         struct net *net;
1918         LIST_HEAD(list);
1919
1920         rtnl_lock();
1921         list_for_each_entry(net, net_list, exit_list)
1922                 geneve_destroy_tunnels(net, &list);
1923
1924         /* unregister the devices gathered above */
1925         unregister_netdevice_many(&list);
1926         rtnl_unlock();
1927
1928         list_for_each_entry(net, net_list, exit_list) {
1929                 const struct geneve_net *gn = net_generic(net, geneve_net_id);
1930
1931                 WARN_ON_ONCE(!list_empty(&gn->sock_list));
1932         }
1933 }
1934
1935 static struct pernet_operations geneve_net_ops = {
1936         .init = geneve_init_net,
1937         .exit_batch = geneve_exit_batch_net,
1938         .id   = &geneve_net_id,
1939         .size = sizeof(struct geneve_net),
1940 };
1941
1942 static int __init geneve_init_module(void)
1943 {
1944         int rc;
1945
1946         rc = register_pernet_subsys(&geneve_net_ops);
1947         if (rc)
1948                 goto out1;
1949
1950         rc = register_netdevice_notifier(&geneve_notifier_block);
1951         if (rc)
1952                 goto out2;
1953
1954         rc = rtnl_link_register(&geneve_link_ops);
1955         if (rc)
1956                 goto out3;
1957
1958         return 0;
1959 out3:
1960         unregister_netdevice_notifier(&geneve_notifier_block);
1961 out2:
1962         unregister_pernet_subsys(&geneve_net_ops);
1963 out1:
1964         return rc;
1965 }
1966 late_initcall(geneve_init_module);
1967
1968 static void __exit geneve_cleanup_module(void)
1969 {
1970         rtnl_link_unregister(&geneve_link_ops);
1971         unregister_netdevice_notifier(&geneve_notifier_block);
1972         unregister_pernet_subsys(&geneve_net_ops);
1973 }
1974 module_exit(geneve_cleanup_module);
1975
1976 MODULE_LICENSE("GPL");
1977 MODULE_VERSION(GENEVE_NETDEV_VER);
1978 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1979 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1980 MODULE_ALIAS_RTNL_LINK("geneve");