vxlan: vni filtering support on collect metadata device
[linux-2.6-microblaze.git] / drivers / net / vxlan / vxlan_core.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VXLAN: Virtual eXtensible Local Area Network
4  *
5  * Copyright (c) 2012-2013 Vyatta Inc.
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/errno.h>
13 #include <linux/slab.h>
14 #include <linux/udp.h>
15 #include <linux/igmp.h>
16 #include <linux/if_ether.h>
17 #include <linux/ethtool.h>
18 #include <net/arp.h>
19 #include <net/ndisc.h>
20 #include <net/gro.h>
21 #include <net/ipv6_stubs.h>
22 #include <net/ip.h>
23 #include <net/icmp.h>
24 #include <net/rtnetlink.h>
25 #include <net/inet_ecn.h>
26 #include <net/net_namespace.h>
27 #include <net/netns/generic.h>
28 #include <net/tun_proto.h>
29 #include <net/vxlan.h>
30 #include <net/nexthop.h>
31
32 #if IS_ENABLED(CONFIG_IPV6)
33 #include <net/ip6_tunnel.h>
34 #include <net/ip6_checksum.h>
35 #endif
36
37 #include "vxlan_private.h"
38
39 #define VXLAN_VERSION   "0.1"
40
41 #define FDB_AGE_DEFAULT 300 /* 5 min */
42 #define FDB_AGE_INTERVAL (10 * HZ)      /* rescan interval */
43
44 /* UDP port for VXLAN traffic.
45  * The IANA assigned port is 4789, but the Linux default is 8472
46  * for compatibility with early adopters.
47  */
48 static unsigned short vxlan_port __read_mostly = 8472;
49 module_param_named(udp_port, vxlan_port, ushort, 0444);
50 MODULE_PARM_DESC(udp_port, "Destination UDP port");
51
52 static bool log_ecn_error = true;
53 module_param(log_ecn_error, bool, 0644);
54 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
55
56 unsigned int vxlan_net_id;
57
58 const u8 all_zeros_mac[ETH_ALEN + 2];
59 static struct rtnl_link_ops vxlan_link_ops;
60
61 static int vxlan_sock_add(struct vxlan_dev *vxlan);
62
63 static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
64
65 /* salt for hash table */
66 static u32 vxlan_salt __read_mostly;
67
68 static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
69 {
70         return vs->flags & VXLAN_F_COLLECT_METADATA ||
71                ip_tunnel_collect_metadata();
72 }
73
74 #if IS_ENABLED(CONFIG_IPV6)
75 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
76 {
77         if (nla_len(nla) >= sizeof(struct in6_addr)) {
78                 ip->sin6.sin6_addr = nla_get_in6_addr(nla);
79                 ip->sa.sa_family = AF_INET6;
80                 return 0;
81         } else if (nla_len(nla) >= sizeof(__be32)) {
82                 ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
83                 ip->sa.sa_family = AF_INET;
84                 return 0;
85         } else {
86                 return -EAFNOSUPPORT;
87         }
88 }
89
90 static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
91                               const union vxlan_addr *ip)
92 {
93         if (ip->sa.sa_family == AF_INET6)
94                 return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
95         else
96                 return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
97 }
98
99 #else /* !CONFIG_IPV6 */
100
101 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
102 {
103         if (nla_len(nla) >= sizeof(struct in6_addr)) {
104                 return -EAFNOSUPPORT;
105         } else if (nla_len(nla) >= sizeof(__be32)) {
106                 ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
107                 ip->sa.sa_family = AF_INET;
108                 return 0;
109         } else {
110                 return -EAFNOSUPPORT;
111         }
112 }
113
114 static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
115                               const union vxlan_addr *ip)
116 {
117         return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
118 }
119 #endif
120
121 /* Find VXLAN socket based on network namespace, address family, UDP port,
122  * enabled unshareable flags and socket device binding (see l3mdev with
123  * non-default VRF).
124  */
125 static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
126                                           __be16 port, u32 flags, int ifindex)
127 {
128         struct vxlan_sock *vs;
129
130         flags &= VXLAN_F_RCV_FLAGS;
131
132         hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
133                 if (inet_sk(vs->sock->sk)->inet_sport == port &&
134                     vxlan_get_sk_family(vs) == family &&
135                     vs->flags == flags &&
136                     vs->sock->sk->sk_bound_dev_if == ifindex)
137                         return vs;
138         }
139         return NULL;
140 }
141
142 static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
143                                            __be32 vni)
144 {
145         struct vxlan_dev_node *node;
146
147         /* For flow based devices, map all packets to VNI 0 */
148         if (vs->flags & VXLAN_F_COLLECT_METADATA &&
149             !(vs->flags & VXLAN_F_VNIFILTER))
150                 vni = 0;
151
152         hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
153                 if (!node->vxlan)
154                         continue;
155                 if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
156                         if (!vxlan_vnifilter_lookup(node->vxlan, vni))
157                                 continue;
158                 } else if (node->vxlan->default_dst.remote_vni != vni) {
159                         continue;
160                 }
161
162                 if (IS_ENABLED(CONFIG_IPV6)) {
163                         const struct vxlan_config *cfg = &node->vxlan->cfg;
164
165                         if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
166                             cfg->remote_ifindex != ifindex)
167                                 continue;
168                 }
169
170                 return node->vxlan;
171         }
172
173         return NULL;
174 }
175
176 /* Look up VNI in a per net namespace table */
177 static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
178                                         __be32 vni, sa_family_t family,
179                                         __be16 port, u32 flags)
180 {
181         struct vxlan_sock *vs;
182
183         vs = vxlan_find_sock(net, family, port, flags, ifindex);
184         if (!vs)
185                 return NULL;
186
187         return vxlan_vs_find_vni(vs, ifindex, vni);
188 }
189
190 /* Fill in neighbour message in skbuff. */
191 static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
192                           const struct vxlan_fdb *fdb,
193                           u32 portid, u32 seq, int type, unsigned int flags,
194                           const struct vxlan_rdst *rdst)
195 {
196         unsigned long now = jiffies;
197         struct nda_cacheinfo ci;
198         bool send_ip, send_eth;
199         struct nlmsghdr *nlh;
200         struct nexthop *nh;
201         struct ndmsg *ndm;
202         int nh_family;
203         u32 nh_id;
204
205         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
206         if (nlh == NULL)
207                 return -EMSGSIZE;
208
209         ndm = nlmsg_data(nlh);
210         memset(ndm, 0, sizeof(*ndm));
211
212         send_eth = send_ip = true;
213
214         rcu_read_lock();
215         nh = rcu_dereference(fdb->nh);
216         if (nh) {
217                 nh_family = nexthop_get_family(nh);
218                 nh_id = nh->id;
219         }
220         rcu_read_unlock();
221
222         if (type == RTM_GETNEIGH) {
223                 if (rdst) {
224                         send_ip = !vxlan_addr_any(&rdst->remote_ip);
225                         ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
226                 } else if (nh) {
227                         ndm->ndm_family = nh_family;
228                 }
229                 send_eth = !is_zero_ether_addr(fdb->eth_addr);
230         } else
231                 ndm->ndm_family = AF_BRIDGE;
232         ndm->ndm_state = fdb->state;
233         ndm->ndm_ifindex = vxlan->dev->ifindex;
234         ndm->ndm_flags = fdb->flags;
235         if (rdst && rdst->offloaded)
236                 ndm->ndm_flags |= NTF_OFFLOADED;
237         ndm->ndm_type = RTN_UNICAST;
238
239         if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
240             nla_put_s32(skb, NDA_LINK_NETNSID,
241                         peernet2id(dev_net(vxlan->dev), vxlan->net)))
242                 goto nla_put_failure;
243
244         if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
245                 goto nla_put_failure;
246         if (nh) {
247                 if (nla_put_u32(skb, NDA_NH_ID, nh_id))
248                         goto nla_put_failure;
249         } else if (rdst) {
250                 if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
251                                                   &rdst->remote_ip))
252                         goto nla_put_failure;
253
254                 if (rdst->remote_port &&
255                     rdst->remote_port != vxlan->cfg.dst_port &&
256                     nla_put_be16(skb, NDA_PORT, rdst->remote_port))
257                         goto nla_put_failure;
258                 if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
259                     nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
260                         goto nla_put_failure;
261                 if (rdst->remote_ifindex &&
262                     nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
263                         goto nla_put_failure;
264         }
265
266         if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
267             nla_put_u32(skb, NDA_SRC_VNI,
268                         be32_to_cpu(fdb->vni)))
269                 goto nla_put_failure;
270
271         ci.ndm_used      = jiffies_to_clock_t(now - fdb->used);
272         ci.ndm_confirmed = 0;
273         ci.ndm_updated   = jiffies_to_clock_t(now - fdb->updated);
274         ci.ndm_refcnt    = 0;
275
276         if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
277                 goto nla_put_failure;
278
279         nlmsg_end(skb, nlh);
280         return 0;
281
282 nla_put_failure:
283         nlmsg_cancel(skb, nlh);
284         return -EMSGSIZE;
285 }
286
287 static inline size_t vxlan_nlmsg_size(void)
288 {
289         return NLMSG_ALIGN(sizeof(struct ndmsg))
290                 + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
291                 + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
292                 + nla_total_size(sizeof(__be16)) /* NDA_PORT */
293                 + nla_total_size(sizeof(__be32)) /* NDA_VNI */
294                 + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
295                 + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
296                 + nla_total_size(sizeof(struct nda_cacheinfo));
297 }
298
299 static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
300                                struct vxlan_rdst *rd, int type)
301 {
302         struct net *net = dev_net(vxlan->dev);
303         struct sk_buff *skb;
304         int err = -ENOBUFS;
305
306         skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
307         if (skb == NULL)
308                 goto errout;
309
310         err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
311         if (err < 0) {
312                 /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
313                 WARN_ON(err == -EMSGSIZE);
314                 kfree_skb(skb);
315                 goto errout;
316         }
317
318         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
319         return;
320 errout:
321         if (err < 0)
322                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
323 }
324
325 static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
326                             const struct vxlan_fdb *fdb,
327                             const struct vxlan_rdst *rd,
328                             struct netlink_ext_ack *extack,
329                             struct switchdev_notifier_vxlan_fdb_info *fdb_info)
330 {
331         fdb_info->info.dev = vxlan->dev;
332         fdb_info->info.extack = extack;
333         fdb_info->remote_ip = rd->remote_ip;
334         fdb_info->remote_port = rd->remote_port;
335         fdb_info->remote_vni = rd->remote_vni;
336         fdb_info->remote_ifindex = rd->remote_ifindex;
337         memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
338         fdb_info->vni = fdb->vni;
339         fdb_info->offloaded = rd->offloaded;
340         fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
341 }
342
343 static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
344                                               struct vxlan_fdb *fdb,
345                                               struct vxlan_rdst *rd,
346                                               bool adding,
347                                               struct netlink_ext_ack *extack)
348 {
349         struct switchdev_notifier_vxlan_fdb_info info;
350         enum switchdev_notifier_type notifier_type;
351         int ret;
352
353         if (WARN_ON(!rd))
354                 return 0;
355
356         notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
357                                : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
358         vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
359         ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
360                                        &info.info, extack);
361         return notifier_to_errno(ret);
362 }
363
364 static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
365                             struct vxlan_rdst *rd, int type, bool swdev_notify,
366                             struct netlink_ext_ack *extack)
367 {
368         int err;
369
370         if (swdev_notify && rd) {
371                 switch (type) {
372                 case RTM_NEWNEIGH:
373                         err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
374                                                                  true, extack);
375                         if (err)
376                                 return err;
377                         break;
378                 case RTM_DELNEIGH:
379                         vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
380                                                            false, extack);
381                         break;
382                 }
383         }
384
385         __vxlan_fdb_notify(vxlan, fdb, rd, type);
386         return 0;
387 }
388
389 static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
390 {
391         struct vxlan_dev *vxlan = netdev_priv(dev);
392         struct vxlan_fdb f = {
393                 .state = NUD_STALE,
394         };
395         struct vxlan_rdst remote = {
396                 .remote_ip = *ipa, /* goes to NDA_DST */
397                 .remote_vni = cpu_to_be32(VXLAN_N_VID),
398         };
399
400         vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
401 }
402
403 static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
404 {
405         struct vxlan_fdb f = {
406                 .state = NUD_STALE,
407         };
408         struct vxlan_rdst remote = { };
409
410         memcpy(f.eth_addr, eth_addr, ETH_ALEN);
411
412         vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
413 }
414
415 /* Hash Ethernet address */
416 static u32 eth_hash(const unsigned char *addr)
417 {
418         u64 value = get_unaligned((u64 *)addr);
419
420         /* only want 6 bytes */
421 #ifdef __BIG_ENDIAN
422         value >>= 16;
423 #else
424         value <<= 16;
425 #endif
426         return hash_64(value, FDB_HASH_BITS);
427 }
428
429 u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
430 {
431         /* use 1 byte of OUI and 3 bytes of NIC */
432         u32 key = get_unaligned((u32 *)(addr + 2));
433
434         return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
435 }
436
437 u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
438 {
439         if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
440                 return eth_vni_hash(mac, vni);
441         else
442                 return eth_hash(mac);
443 }
444
445 /* Hash chain to use given mac address */
446 static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
447                                                 const u8 *mac, __be32 vni)
448 {
449         return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
450 }
451
452 /* Look up Ethernet address in forwarding table */
453 static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
454                                           const u8 *mac, __be32 vni)
455 {
456         struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
457         struct vxlan_fdb *f;
458
459         hlist_for_each_entry_rcu(f, head, hlist) {
460                 if (ether_addr_equal(mac, f->eth_addr)) {
461                         if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
462                                 if (vni == f->vni)
463                                         return f;
464                         } else {
465                                 return f;
466                         }
467                 }
468         }
469
470         return NULL;
471 }
472
473 static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
474                                         const u8 *mac, __be32 vni)
475 {
476         struct vxlan_fdb *f;
477
478         f = __vxlan_find_mac(vxlan, mac, vni);
479         if (f && f->used != jiffies)
480                 f->used = jiffies;
481
482         return f;
483 }
484
485 /* caller should hold vxlan->hash_lock */
486 static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
487                                               union vxlan_addr *ip, __be16 port,
488                                               __be32 vni, __u32 ifindex)
489 {
490         struct vxlan_rdst *rd;
491
492         list_for_each_entry(rd, &f->remotes, list) {
493                 if (vxlan_addr_equal(&rd->remote_ip, ip) &&
494                     rd->remote_port == port &&
495                     rd->remote_vni == vni &&
496                     rd->remote_ifindex == ifindex)
497                         return rd;
498         }
499
500         return NULL;
501 }
502
503 int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
504                       struct switchdev_notifier_vxlan_fdb_info *fdb_info)
505 {
506         struct vxlan_dev *vxlan = netdev_priv(dev);
507         u8 eth_addr[ETH_ALEN + 2] = { 0 };
508         struct vxlan_rdst *rdst;
509         struct vxlan_fdb *f;
510         int rc = 0;
511
512         if (is_multicast_ether_addr(mac) ||
513             is_zero_ether_addr(mac))
514                 return -EINVAL;
515
516         ether_addr_copy(eth_addr, mac);
517
518         rcu_read_lock();
519
520         f = __vxlan_find_mac(vxlan, eth_addr, vni);
521         if (!f) {
522                 rc = -ENOENT;
523                 goto out;
524         }
525
526         rdst = first_remote_rcu(f);
527         vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
528
529 out:
530         rcu_read_unlock();
531         return rc;
532 }
533 EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
534
535 static int vxlan_fdb_notify_one(struct notifier_block *nb,
536                                 const struct vxlan_dev *vxlan,
537                                 const struct vxlan_fdb *f,
538                                 const struct vxlan_rdst *rdst,
539                                 struct netlink_ext_ack *extack)
540 {
541         struct switchdev_notifier_vxlan_fdb_info fdb_info;
542         int rc;
543
544         vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
545         rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
546                                &fdb_info);
547         return notifier_to_errno(rc);
548 }
549
550 int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
551                      struct notifier_block *nb,
552                      struct netlink_ext_ack *extack)
553 {
554         struct vxlan_dev *vxlan;
555         struct vxlan_rdst *rdst;
556         struct vxlan_fdb *f;
557         unsigned int h;
558         int rc = 0;
559
560         if (!netif_is_vxlan(dev))
561                 return -EINVAL;
562         vxlan = netdev_priv(dev);
563
564         for (h = 0; h < FDB_HASH_SIZE; ++h) {
565                 spin_lock_bh(&vxlan->hash_lock[h]);
566                 hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
567                         if (f->vni == vni) {
568                                 list_for_each_entry(rdst, &f->remotes, list) {
569                                         rc = vxlan_fdb_notify_one(nb, vxlan,
570                                                                   f, rdst,
571                                                                   extack);
572                                         if (rc)
573                                                 goto unlock;
574                                 }
575                         }
576                 }
577                 spin_unlock_bh(&vxlan->hash_lock[h]);
578         }
579         return 0;
580
581 unlock:
582         spin_unlock_bh(&vxlan->hash_lock[h]);
583         return rc;
584 }
585 EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
586
587 void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
588 {
589         struct vxlan_dev *vxlan;
590         struct vxlan_rdst *rdst;
591         struct vxlan_fdb *f;
592         unsigned int h;
593
594         if (!netif_is_vxlan(dev))
595                 return;
596         vxlan = netdev_priv(dev);
597
598         for (h = 0; h < FDB_HASH_SIZE; ++h) {
599                 spin_lock_bh(&vxlan->hash_lock[h]);
600                 hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
601                         if (f->vni == vni)
602                                 list_for_each_entry(rdst, &f->remotes, list)
603                                         rdst->offloaded = false;
604                 spin_unlock_bh(&vxlan->hash_lock[h]);
605         }
606
607 }
608 EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
609
610 /* Replace destination of unicast mac */
611 static int vxlan_fdb_replace(struct vxlan_fdb *f,
612                              union vxlan_addr *ip, __be16 port, __be32 vni,
613                              __u32 ifindex, struct vxlan_rdst *oldrd)
614 {
615         struct vxlan_rdst *rd;
616
617         rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
618         if (rd)
619                 return 0;
620
621         rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
622         if (!rd)
623                 return 0;
624
625         *oldrd = *rd;
626         dst_cache_reset(&rd->dst_cache);
627         rd->remote_ip = *ip;
628         rd->remote_port = port;
629         rd->remote_vni = vni;
630         rd->remote_ifindex = ifindex;
631         rd->offloaded = false;
632         return 1;
633 }
634
635 /* Add/update destinations for multicast */
636 static int vxlan_fdb_append(struct vxlan_fdb *f,
637                             union vxlan_addr *ip, __be16 port, __be32 vni,
638                             __u32 ifindex, struct vxlan_rdst **rdp)
639 {
640         struct vxlan_rdst *rd;
641
642         rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
643         if (rd)
644                 return 0;
645
646         rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
647         if (rd == NULL)
648                 return -ENOBUFS;
649
650         if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
651                 kfree(rd);
652                 return -ENOBUFS;
653         }
654
655         rd->remote_ip = *ip;
656         rd->remote_port = port;
657         rd->offloaded = false;
658         rd->remote_vni = vni;
659         rd->remote_ifindex = ifindex;
660
661         list_add_tail_rcu(&rd->list, &f->remotes);
662
663         *rdp = rd;
664         return 1;
665 }
666
667 static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
668                                           unsigned int off,
669                                           struct vxlanhdr *vh, size_t hdrlen,
670                                           __be32 vni_field,
671                                           struct gro_remcsum *grc,
672                                           bool nopartial)
673 {
674         size_t start, offset;
675
676         if (skb->remcsum_offload)
677                 return vh;
678
679         if (!NAPI_GRO_CB(skb)->csum_valid)
680                 return NULL;
681
682         start = vxlan_rco_start(vni_field);
683         offset = start + vxlan_rco_offset(vni_field);
684
685         vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
686                                      start, offset, grc, nopartial);
687
688         skb->remcsum_offload = 1;
689
690         return vh;
691 }
692
693 static struct sk_buff *vxlan_gro_receive(struct sock *sk,
694                                          struct list_head *head,
695                                          struct sk_buff *skb)
696 {
697         struct sk_buff *pp = NULL;
698         struct sk_buff *p;
699         struct vxlanhdr *vh, *vh2;
700         unsigned int hlen, off_vx;
701         int flush = 1;
702         struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
703         __be32 flags;
704         struct gro_remcsum grc;
705
706         skb_gro_remcsum_init(&grc);
707
708         off_vx = skb_gro_offset(skb);
709         hlen = off_vx + sizeof(*vh);
710         vh   = skb_gro_header_fast(skb, off_vx);
711         if (skb_gro_header_hard(skb, hlen)) {
712                 vh = skb_gro_header_slow(skb, hlen, off_vx);
713                 if (unlikely(!vh))
714                         goto out;
715         }
716
717         skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
718
719         flags = vh->vx_flags;
720
721         if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
722                 vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
723                                        vh->vx_vni, &grc,
724                                        !!(vs->flags &
725                                           VXLAN_F_REMCSUM_NOPARTIAL));
726
727                 if (!vh)
728                         goto out;
729         }
730
731         skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
732
733         list_for_each_entry(p, head, list) {
734                 if (!NAPI_GRO_CB(p)->same_flow)
735                         continue;
736
737                 vh2 = (struct vxlanhdr *)(p->data + off_vx);
738                 if (vh->vx_flags != vh2->vx_flags ||
739                     vh->vx_vni != vh2->vx_vni) {
740                         NAPI_GRO_CB(p)->same_flow = 0;
741                         continue;
742                 }
743         }
744
745         pp = call_gro_receive(eth_gro_receive, head, skb);
746         flush = 0;
747
748 out:
749         skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
750
751         return pp;
752 }
753
754 static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
755 {
756         /* Sets 'skb->inner_mac_header' since we are always called with
757          * 'skb->encapsulation' set.
758          */
759         return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
760 }
761
762 static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
763                                          __u16 state, __be32 src_vni,
764                                          __u16 ndm_flags)
765 {
766         struct vxlan_fdb *f;
767
768         f = kmalloc(sizeof(*f), GFP_ATOMIC);
769         if (!f)
770                 return NULL;
771         f->state = state;
772         f->flags = ndm_flags;
773         f->updated = f->used = jiffies;
774         f->vni = src_vni;
775         f->nh = NULL;
776         RCU_INIT_POINTER(f->vdev, vxlan);
777         INIT_LIST_HEAD(&f->nh_list);
778         INIT_LIST_HEAD(&f->remotes);
779         memcpy(f->eth_addr, mac, ETH_ALEN);
780
781         return f;
782 }
783
784 static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
785                              __be32 src_vni, struct vxlan_fdb *f)
786 {
787         ++vxlan->addrcnt;
788         hlist_add_head_rcu(&f->hlist,
789                            vxlan_fdb_head(vxlan, mac, src_vni));
790 }
791
792 static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
793                                u32 nhid, struct netlink_ext_ack *extack)
794 {
795         struct nexthop *old_nh = rtnl_dereference(fdb->nh);
796         struct nexthop *nh;
797         int err = -EINVAL;
798
799         if (old_nh && old_nh->id == nhid)
800                 return 0;
801
802         nh = nexthop_find_by_id(vxlan->net, nhid);
803         if (!nh) {
804                 NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
805                 goto err_inval;
806         }
807
808         if (nh) {
809                 if (!nexthop_get(nh)) {
810                         NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
811                         nh = NULL;
812                         goto err_inval;
813                 }
814                 if (!nexthop_is_fdb(nh)) {
815                         NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
816                         goto err_inval;
817                 }
818
819                 if (!nexthop_is_multipath(nh)) {
820                         NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
821                         goto err_inval;
822                 }
823
824                 /* check nexthop group family */
825                 switch (vxlan->default_dst.remote_ip.sa.sa_family) {
826                 case AF_INET:
827                         if (!nexthop_has_v4(nh)) {
828                                 err = -EAFNOSUPPORT;
829                                 NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
830                                 goto err_inval;
831                         }
832                         break;
833                 case AF_INET6:
834                         if (nexthop_has_v4(nh)) {
835                                 err = -EAFNOSUPPORT;
836                                 NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
837                                 goto err_inval;
838                         }
839                 }
840         }
841
842         if (old_nh) {
843                 list_del_rcu(&fdb->nh_list);
844                 nexthop_put(old_nh);
845         }
846         rcu_assign_pointer(fdb->nh, nh);
847         list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
848         return 1;
849
850 err_inval:
851         if (nh)
852                 nexthop_put(nh);
853         return err;
854 }
855
856 int vxlan_fdb_create(struct vxlan_dev *vxlan,
857                      const u8 *mac, union vxlan_addr *ip,
858                      __u16 state, __be16 port, __be32 src_vni,
859                      __be32 vni, __u32 ifindex, __u16 ndm_flags,
860                      u32 nhid, struct vxlan_fdb **fdb,
861                      struct netlink_ext_ack *extack)
862 {
863         struct vxlan_rdst *rd = NULL;
864         struct vxlan_fdb *f;
865         int rc;
866
867         if (vxlan->cfg.addrmax &&
868             vxlan->addrcnt >= vxlan->cfg.addrmax)
869                 return -ENOSPC;
870
871         netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
872         f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
873         if (!f)
874                 return -ENOMEM;
875
876         if (nhid)
877                 rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
878         else
879                 rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
880         if (rc < 0)
881                 goto errout;
882
883         *fdb = f;
884
885         return 0;
886
887 errout:
888         kfree(f);
889         return rc;
890 }
891
892 static void __vxlan_fdb_free(struct vxlan_fdb *f)
893 {
894         struct vxlan_rdst *rd, *nd;
895         struct nexthop *nh;
896
897         nh = rcu_dereference_raw(f->nh);
898         if (nh) {
899                 rcu_assign_pointer(f->nh, NULL);
900                 rcu_assign_pointer(f->vdev, NULL);
901                 nexthop_put(nh);
902         }
903
904         list_for_each_entry_safe(rd, nd, &f->remotes, list) {
905                 dst_cache_destroy(&rd->dst_cache);
906                 kfree(rd);
907         }
908         kfree(f);
909 }
910
911 static void vxlan_fdb_free(struct rcu_head *head)
912 {
913         struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
914
915         __vxlan_fdb_free(f);
916 }
917
918 static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
919                               bool do_notify, bool swdev_notify)
920 {
921         struct vxlan_rdst *rd;
922
923         netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
924
925         --vxlan->addrcnt;
926         if (do_notify) {
927                 if (rcu_access_pointer(f->nh))
928                         vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
929                                          swdev_notify, NULL);
930                 else
931                         list_for_each_entry(rd, &f->remotes, list)
932                                 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
933                                                  swdev_notify, NULL);
934         }
935
936         hlist_del_rcu(&f->hlist);
937         list_del_rcu(&f->nh_list);
938         call_rcu(&f->rcu, vxlan_fdb_free);
939 }
940
941 static void vxlan_dst_free(struct rcu_head *head)
942 {
943         struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
944
945         dst_cache_destroy(&rd->dst_cache);
946         kfree(rd);
947 }
948
949 static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
950                                      union vxlan_addr *ip,
951                                      __u16 state, __u16 flags,
952                                      __be16 port, __be32 vni,
953                                      __u32 ifindex, __u16 ndm_flags,
954                                      struct vxlan_fdb *f, u32 nhid,
955                                      bool swdev_notify,
956                                      struct netlink_ext_ack *extack)
957 {
958         __u16 fdb_flags = (ndm_flags & ~NTF_USE);
959         struct vxlan_rdst *rd = NULL;
960         struct vxlan_rdst oldrd;
961         int notify = 0;
962         int rc = 0;
963         int err;
964
965         if (nhid && !rcu_access_pointer(f->nh)) {
966                 NL_SET_ERR_MSG(extack,
967                                "Cannot replace an existing non nexthop fdb with a nexthop");
968                 return -EOPNOTSUPP;
969         }
970
971         if (nhid && (flags & NLM_F_APPEND)) {
972                 NL_SET_ERR_MSG(extack,
973                                "Cannot append to a nexthop fdb");
974                 return -EOPNOTSUPP;
975         }
976
977         /* Do not allow an externally learned entry to take over an entry added
978          * by the user.
979          */
980         if (!(fdb_flags & NTF_EXT_LEARNED) ||
981             !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
982                 if (f->state != state) {
983                         f->state = state;
984                         f->updated = jiffies;
985                         notify = 1;
986                 }
987                 if (f->flags != fdb_flags) {
988                         f->flags = fdb_flags;
989                         f->updated = jiffies;
990                         notify = 1;
991                 }
992         }
993
994         if ((flags & NLM_F_REPLACE)) {
995                 /* Only change unicasts */
996                 if (!(is_multicast_ether_addr(f->eth_addr) ||
997                       is_zero_ether_addr(f->eth_addr))) {
998                         if (nhid) {
999                                 rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
1000                                 if (rc < 0)
1001                                         return rc;
1002                         } else {
1003                                 rc = vxlan_fdb_replace(f, ip, port, vni,
1004                                                        ifindex, &oldrd);
1005                         }
1006                         notify |= rc;
1007                 } else {
1008                         NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
1009                         return -EOPNOTSUPP;
1010                 }
1011         }
1012         if ((flags & NLM_F_APPEND) &&
1013             (is_multicast_ether_addr(f->eth_addr) ||
1014              is_zero_ether_addr(f->eth_addr))) {
1015                 rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
1016
1017                 if (rc < 0)
1018                         return rc;
1019                 notify |= rc;
1020         }
1021
1022         if (ndm_flags & NTF_USE)
1023                 f->used = jiffies;
1024
1025         if (notify) {
1026                 if (rd == NULL)
1027                         rd = first_remote_rtnl(f);
1028
1029                 err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
1030                                        swdev_notify, extack);
1031                 if (err)
1032                         goto err_notify;
1033         }
1034
1035         return 0;
1036
1037 err_notify:
1038         if (nhid)
1039                 return err;
1040         if ((flags & NLM_F_REPLACE) && rc)
1041                 *rd = oldrd;
1042         else if ((flags & NLM_F_APPEND) && rc) {
1043                 list_del_rcu(&rd->list);
1044                 call_rcu(&rd->rcu, vxlan_dst_free);
1045         }
1046         return err;
1047 }
1048
1049 static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
1050                                    const u8 *mac, union vxlan_addr *ip,
1051                                    __u16 state, __u16 flags,
1052                                    __be16 port, __be32 src_vni, __be32 vni,
1053                                    __u32 ifindex, __u16 ndm_flags, u32 nhid,
1054                                    bool swdev_notify,
1055                                    struct netlink_ext_ack *extack)
1056 {
1057         __u16 fdb_flags = (ndm_flags & ~NTF_USE);
1058         struct vxlan_fdb *f;
1059         int rc;
1060
1061         /* Disallow replace to add a multicast entry */
1062         if ((flags & NLM_F_REPLACE) &&
1063             (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
1064                 return -EOPNOTSUPP;
1065
1066         netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
1067         rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
1068                               vni, ifindex, fdb_flags, nhid, &f, extack);
1069         if (rc < 0)
1070                 return rc;
1071
1072         vxlan_fdb_insert(vxlan, mac, src_vni, f);
1073         rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
1074                               swdev_notify, extack);
1075         if (rc)
1076                 goto err_notify;
1077
1078         return 0;
1079
1080 err_notify:
1081         vxlan_fdb_destroy(vxlan, f, false, false);
1082         return rc;
1083 }
1084
1085 /* Add new entry to forwarding table -- assumes lock held */
1086 int vxlan_fdb_update(struct vxlan_dev *vxlan,
1087                      const u8 *mac, union vxlan_addr *ip,
1088                      __u16 state, __u16 flags,
1089                      __be16 port, __be32 src_vni, __be32 vni,
1090                      __u32 ifindex, __u16 ndm_flags, u32 nhid,
1091                      bool swdev_notify,
1092                      struct netlink_ext_ack *extack)
1093 {
1094         struct vxlan_fdb *f;
1095
1096         f = __vxlan_find_mac(vxlan, mac, src_vni);
1097         if (f) {
1098                 if (flags & NLM_F_EXCL) {
1099                         netdev_dbg(vxlan->dev,
1100                                    "lost race to create %pM\n", mac);
1101                         return -EEXIST;
1102                 }
1103
1104                 return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
1105                                                  vni, ifindex, ndm_flags, f,
1106                                                  nhid, swdev_notify, extack);
1107         } else {
1108                 if (!(flags & NLM_F_CREATE))
1109                         return -ENOENT;
1110
1111                 return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
1112                                                port, src_vni, vni, ifindex,
1113                                                ndm_flags, nhid, swdev_notify,
1114                                                extack);
1115         }
1116 }
1117
1118 static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
1119                                   struct vxlan_rdst *rd, bool swdev_notify)
1120 {
1121         list_del_rcu(&rd->list);
1122         vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
1123         call_rcu(&rd->rcu, vxlan_dst_free);
1124 }
1125
1126 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
1127                            union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
1128                            __be32 *vni, u32 *ifindex, u32 *nhid)
1129 {
1130         struct net *net = dev_net(vxlan->dev);
1131         int err;
1132
1133         if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
1134             tb[NDA_PORT]))
1135                 return -EINVAL;
1136
1137         if (tb[NDA_DST]) {
1138                 err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
1139                 if (err)
1140                         return err;
1141         } else {
1142                 union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
1143
1144                 if (remote->sa.sa_family == AF_INET) {
1145                         ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
1146                         ip->sa.sa_family = AF_INET;
1147 #if IS_ENABLED(CONFIG_IPV6)
1148                 } else {
1149                         ip->sin6.sin6_addr = in6addr_any;
1150                         ip->sa.sa_family = AF_INET6;
1151 #endif
1152                 }
1153         }
1154
1155         if (tb[NDA_PORT]) {
1156                 if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
1157                         return -EINVAL;
1158                 *port = nla_get_be16(tb[NDA_PORT]);
1159         } else {
1160                 *port = vxlan->cfg.dst_port;
1161         }
1162
1163         if (tb[NDA_VNI]) {
1164                 if (nla_len(tb[NDA_VNI]) != sizeof(u32))
1165                         return -EINVAL;
1166                 *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
1167         } else {
1168                 *vni = vxlan->default_dst.remote_vni;
1169         }
1170
1171         if (tb[NDA_SRC_VNI]) {
1172                 if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
1173                         return -EINVAL;
1174                 *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
1175         } else {
1176                 *src_vni = vxlan->default_dst.remote_vni;
1177         }
1178
1179         if (tb[NDA_IFINDEX]) {
1180                 struct net_device *tdev;
1181
1182                 if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
1183                         return -EINVAL;
1184                 *ifindex = nla_get_u32(tb[NDA_IFINDEX]);
1185                 tdev = __dev_get_by_index(net, *ifindex);
1186                 if (!tdev)
1187                         return -EADDRNOTAVAIL;
1188         } else {
1189                 *ifindex = 0;
1190         }
1191
1192         if (tb[NDA_NH_ID])
1193                 *nhid = nla_get_u32(tb[NDA_NH_ID]);
1194         else
1195                 *nhid = 0;
1196
1197         return 0;
1198 }
1199
1200 /* Add static entry (via netlink) */
1201 static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
1202                          struct net_device *dev,
1203                          const unsigned char *addr, u16 vid, u16 flags,
1204                          struct netlink_ext_ack *extack)
1205 {
1206         struct vxlan_dev *vxlan = netdev_priv(dev);
1207         /* struct net *net = dev_net(vxlan->dev); */
1208         union vxlan_addr ip;
1209         __be16 port;
1210         __be32 src_vni, vni;
1211         u32 ifindex, nhid;
1212         u32 hash_index;
1213         int err;
1214
1215         if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
1216                 pr_info("RTM_NEWNEIGH with invalid state %#x\n",
1217                         ndm->ndm_state);
1218                 return -EINVAL;
1219         }
1220
1221         if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
1222                 return -EINVAL;
1223
1224         err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1225                               &nhid);
1226         if (err)
1227                 return err;
1228
1229         if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
1230                 return -EAFNOSUPPORT;
1231
1232         hash_index = fdb_head_index(vxlan, addr, src_vni);
1233         spin_lock_bh(&vxlan->hash_lock[hash_index]);
1234         err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
1235                                port, src_vni, vni, ifindex,
1236                                ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
1237                                nhid, true, extack);
1238         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
1239
1240         return err;
1241 }
1242
1243 int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
1244                        const unsigned char *addr, union vxlan_addr ip,
1245                        __be16 port, __be32 src_vni, __be32 vni,
1246                        u32 ifindex, bool swdev_notify)
1247 {
1248         struct vxlan_rdst *rd = NULL;
1249         struct vxlan_fdb *f;
1250         int err = -ENOENT;
1251
1252         f = vxlan_find_mac(vxlan, addr, src_vni);
1253         if (!f)
1254                 return err;
1255
1256         if (!vxlan_addr_any(&ip)) {
1257                 rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
1258                 if (!rd)
1259                         goto out;
1260         }
1261
1262         /* remove a destination if it's not the only one on the list,
1263          * otherwise destroy the fdb entry
1264          */
1265         if (rd && !list_is_singular(&f->remotes)) {
1266                 vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
1267                 goto out;
1268         }
1269
1270         vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
1271
1272 out:
1273         return 0;
1274 }
1275
1276 /* Delete entry (via netlink) */
1277 static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
1278                             struct net_device *dev,
1279                             const unsigned char *addr, u16 vid)
1280 {
1281         struct vxlan_dev *vxlan = netdev_priv(dev);
1282         union vxlan_addr ip;
1283         __be32 src_vni, vni;
1284         u32 ifindex, nhid;
1285         u32 hash_index;
1286         __be16 port;
1287         int err;
1288
1289         err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1290                               &nhid);
1291         if (err)
1292                 return err;
1293
1294         hash_index = fdb_head_index(vxlan, addr, src_vni);
1295         spin_lock_bh(&vxlan->hash_lock[hash_index]);
1296         err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
1297                                  true);
1298         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
1299
1300         return err;
1301 }
1302
1303 /* Dump forwarding table */
1304 static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1305                           struct net_device *dev,
1306                           struct net_device *filter_dev, int *idx)
1307 {
1308         struct vxlan_dev *vxlan = netdev_priv(dev);
1309         unsigned int h;
1310         int err = 0;
1311
1312         for (h = 0; h < FDB_HASH_SIZE; ++h) {
1313                 struct vxlan_fdb *f;
1314
1315                 rcu_read_lock();
1316                 hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
1317                         struct vxlan_rdst *rd;
1318
1319                         if (rcu_access_pointer(f->nh)) {
1320                                 if (*idx < cb->args[2])
1321                                         goto skip_nh;
1322                                 err = vxlan_fdb_info(skb, vxlan, f,
1323                                                      NETLINK_CB(cb->skb).portid,
1324                                                      cb->nlh->nlmsg_seq,
1325                                                      RTM_NEWNEIGH,
1326                                                      NLM_F_MULTI, NULL);
1327                                 if (err < 0) {
1328                                         rcu_read_unlock();
1329                                         goto out;
1330                                 }
1331 skip_nh:
1332                                 *idx += 1;
1333                                 continue;
1334                         }
1335
1336                         list_for_each_entry_rcu(rd, &f->remotes, list) {
1337                                 if (*idx < cb->args[2])
1338                                         goto skip;
1339
1340                                 err = vxlan_fdb_info(skb, vxlan, f,
1341                                                      NETLINK_CB(cb->skb).portid,
1342                                                      cb->nlh->nlmsg_seq,
1343                                                      RTM_NEWNEIGH,
1344                                                      NLM_F_MULTI, rd);
1345                                 if (err < 0) {
1346                                         rcu_read_unlock();
1347                                         goto out;
1348                                 }
1349 skip:
1350                                 *idx += 1;
1351                         }
1352                 }
1353                 rcu_read_unlock();
1354         }
1355 out:
1356         return err;
1357 }
1358
1359 static int vxlan_fdb_get(struct sk_buff *skb,
1360                          struct nlattr *tb[],
1361                          struct net_device *dev,
1362                          const unsigned char *addr,
1363                          u16 vid, u32 portid, u32 seq,
1364                          struct netlink_ext_ack *extack)
1365 {
1366         struct vxlan_dev *vxlan = netdev_priv(dev);
1367         struct vxlan_fdb *f;
1368         __be32 vni;
1369         int err;
1370
1371         if (tb[NDA_VNI])
1372                 vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
1373         else
1374                 vni = vxlan->default_dst.remote_vni;
1375
1376         rcu_read_lock();
1377
1378         f = __vxlan_find_mac(vxlan, addr, vni);
1379         if (!f) {
1380                 NL_SET_ERR_MSG(extack, "Fdb entry not found");
1381                 err = -ENOENT;
1382                 goto errout;
1383         }
1384
1385         err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
1386                              RTM_NEWNEIGH, 0, first_remote_rcu(f));
1387 errout:
1388         rcu_read_unlock();
1389         return err;
1390 }
1391
1392 /* Watch incoming packets to learn mapping between Ethernet address
1393  * and Tunnel endpoint.
1394  * Return true if packet is bogus and should be dropped.
1395  */
1396 static bool vxlan_snoop(struct net_device *dev,
1397                         union vxlan_addr *src_ip, const u8 *src_mac,
1398                         u32 src_ifindex, __be32 vni)
1399 {
1400         struct vxlan_dev *vxlan = netdev_priv(dev);
1401         struct vxlan_fdb *f;
1402         u32 ifindex = 0;
1403
1404 #if IS_ENABLED(CONFIG_IPV6)
1405         if (src_ip->sa.sa_family == AF_INET6 &&
1406             (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
1407                 ifindex = src_ifindex;
1408 #endif
1409
1410         f = vxlan_find_mac(vxlan, src_mac, vni);
1411         if (likely(f)) {
1412                 struct vxlan_rdst *rdst = first_remote_rcu(f);
1413
1414                 if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
1415                            rdst->remote_ifindex == ifindex))
1416                         return false;
1417
1418                 /* Don't migrate static entries, drop packets */
1419                 if (f->state & (NUD_PERMANENT | NUD_NOARP))
1420                         return true;
1421
1422                 /* Don't override an fdb with nexthop with a learnt entry */
1423                 if (rcu_access_pointer(f->nh))
1424                         return true;
1425
1426                 if (net_ratelimit())
1427                         netdev_info(dev,
1428                                     "%pM migrated from %pIS to %pIS\n",
1429                                     src_mac, &rdst->remote_ip.sa, &src_ip->sa);
1430
1431                 rdst->remote_ip = *src_ip;
1432                 f->updated = jiffies;
1433                 vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
1434         } else {
1435                 u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
1436
1437                 /* learned new entry */
1438                 spin_lock(&vxlan->hash_lock[hash_index]);
1439
1440                 /* close off race between vxlan_flush and incoming packets */
1441                 if (netif_running(dev))
1442                         vxlan_fdb_update(vxlan, src_mac, src_ip,
1443                                          NUD_REACHABLE,
1444                                          NLM_F_EXCL|NLM_F_CREATE,
1445                                          vxlan->cfg.dst_port,
1446                                          vni,
1447                                          vxlan->default_dst.remote_vni,
1448                                          ifindex, NTF_SELF, 0, true, NULL);
1449                 spin_unlock(&vxlan->hash_lock[hash_index]);
1450         }
1451
1452         return false;
1453 }
1454
1455 static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
1456 {
1457         struct vxlan_net *vn;
1458
1459         if (!vs)
1460                 return false;
1461         if (!refcount_dec_and_test(&vs->refcnt))
1462                 return false;
1463
1464         vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
1465         spin_lock(&vn->sock_lock);
1466         hlist_del_rcu(&vs->hlist);
1467         udp_tunnel_notify_del_rx_port(vs->sock,
1468                                       (vs->flags & VXLAN_F_GPE) ?
1469                                       UDP_TUNNEL_TYPE_VXLAN_GPE :
1470                                       UDP_TUNNEL_TYPE_VXLAN);
1471         spin_unlock(&vn->sock_lock);
1472
1473         return true;
1474 }
1475
1476 static void vxlan_sock_release(struct vxlan_dev *vxlan)
1477 {
1478         struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
1479 #if IS_ENABLED(CONFIG_IPV6)
1480         struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
1481
1482         RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
1483 #endif
1484
1485         RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
1486         synchronize_net();
1487
1488         if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
1489                 vxlan_vs_del_vnigrp(vxlan);
1490         else
1491                 vxlan_vs_del_dev(vxlan);
1492
1493         if (__vxlan_sock_release_prep(sock4)) {
1494                 udp_tunnel_sock_release(sock4->sock);
1495                 kfree(sock4);
1496         }
1497
1498 #if IS_ENABLED(CONFIG_IPV6)
1499         if (__vxlan_sock_release_prep(sock6)) {
1500                 udp_tunnel_sock_release(sock6->sock);
1501                 kfree(sock6);
1502         }
1503 #endif
1504 }
1505
1506 static bool vxlan_remcsum(struct vxlanhdr *unparsed,
1507                           struct sk_buff *skb, u32 vxflags)
1508 {
1509         size_t start, offset;
1510
1511         if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
1512                 goto out;
1513
1514         start = vxlan_rco_start(unparsed->vx_vni);
1515         offset = start + vxlan_rco_offset(unparsed->vx_vni);
1516
1517         if (!pskb_may_pull(skb, offset + sizeof(u16)))
1518                 return false;
1519
1520         skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
1521                             !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
1522 out:
1523         unparsed->vx_flags &= ~VXLAN_HF_RCO;
1524         unparsed->vx_vni &= VXLAN_VNI_MASK;
1525         return true;
1526 }
1527
1528 static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
1529                                 struct sk_buff *skb, u32 vxflags,
1530                                 struct vxlan_metadata *md)
1531 {
1532         struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
1533         struct metadata_dst *tun_dst;
1534
1535         if (!(unparsed->vx_flags & VXLAN_HF_GBP))
1536                 goto out;
1537
1538         md->gbp = ntohs(gbp->policy_id);
1539
1540         tun_dst = (struct metadata_dst *)skb_dst(skb);
1541         if (tun_dst) {
1542                 tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
1543                 tun_dst->u.tun_info.options_len = sizeof(*md);
1544         }
1545         if (gbp->dont_learn)
1546                 md->gbp |= VXLAN_GBP_DONT_LEARN;
1547
1548         if (gbp->policy_applied)
1549                 md->gbp |= VXLAN_GBP_POLICY_APPLIED;
1550
1551         /* In flow-based mode, GBP is carried in dst_metadata */
1552         if (!(vxflags & VXLAN_F_COLLECT_METADATA))
1553                 skb->mark = md->gbp;
1554 out:
1555         unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
1556 }
1557
1558 static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
1559                                 __be16 *protocol,
1560                                 struct sk_buff *skb, u32 vxflags)
1561 {
1562         struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
1563
1564         /* Need to have Next Protocol set for interfaces in GPE mode. */
1565         if (!gpe->np_applied)
1566                 return false;
1567         /* "The initial version is 0. If a receiver does not support the
1568          * version indicated it MUST drop the packet.
1569          */
1570         if (gpe->version != 0)
1571                 return false;
1572         /* "When the O bit is set to 1, the packet is an OAM packet and OAM
1573          * processing MUST occur." However, we don't implement OAM
1574          * processing, thus drop the packet.
1575          */
1576         if (gpe->oam_flag)
1577                 return false;
1578
1579         *protocol = tun_p_to_eth_p(gpe->next_protocol);
1580         if (!*protocol)
1581                 return false;
1582
1583         unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
1584         return true;
1585 }
1586
1587 static bool vxlan_set_mac(struct vxlan_dev *vxlan,
1588                           struct vxlan_sock *vs,
1589                           struct sk_buff *skb, __be32 vni)
1590 {
1591         union vxlan_addr saddr;
1592         u32 ifindex = skb->dev->ifindex;
1593
1594         skb_reset_mac_header(skb);
1595         skb->protocol = eth_type_trans(skb, vxlan->dev);
1596         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1597
1598         /* Ignore packet loops (and multicast echo) */
1599         if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
1600                 return false;
1601
1602         /* Get address from the outer IP header */
1603         if (vxlan_get_sk_family(vs) == AF_INET) {
1604                 saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
1605                 saddr.sa.sa_family = AF_INET;
1606 #if IS_ENABLED(CONFIG_IPV6)
1607         } else {
1608                 saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
1609                 saddr.sa.sa_family = AF_INET6;
1610 #endif
1611         }
1612
1613         if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
1614             vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
1615                 return false;
1616
1617         return true;
1618 }
1619
1620 static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
1621                                   struct sk_buff *skb)
1622 {
1623         int err = 0;
1624
1625         if (vxlan_get_sk_family(vs) == AF_INET)
1626                 err = IP_ECN_decapsulate(oiph, skb);
1627 #if IS_ENABLED(CONFIG_IPV6)
1628         else
1629                 err = IP6_ECN_decapsulate(oiph, skb);
1630 #endif
1631
1632         if (unlikely(err) && log_ecn_error) {
1633                 if (vxlan_get_sk_family(vs) == AF_INET)
1634                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
1635                                              &((struct iphdr *)oiph)->saddr,
1636                                              ((struct iphdr *)oiph)->tos);
1637                 else
1638                         net_info_ratelimited("non-ECT from %pI6\n",
1639                                              &((struct ipv6hdr *)oiph)->saddr);
1640         }
1641         return err <= 1;
1642 }
1643
1644 /* Callback from net/ipv4/udp.c to receive packets */
1645 static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1646 {
1647         struct vxlan_dev *vxlan;
1648         struct vxlan_sock *vs;
1649         struct vxlanhdr unparsed;
1650         struct vxlan_metadata _md;
1651         struct vxlan_metadata *md = &_md;
1652         __be16 protocol = htons(ETH_P_TEB);
1653         bool raw_proto = false;
1654         void *oiph;
1655         __be32 vni = 0;
1656
1657         /* Need UDP and VXLAN header to be present */
1658         if (!pskb_may_pull(skb, VXLAN_HLEN))
1659                 goto drop;
1660
1661         unparsed = *vxlan_hdr(skb);
1662         /* VNI flag always required to be set */
1663         if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
1664                 netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
1665                            ntohl(vxlan_hdr(skb)->vx_flags),
1666                            ntohl(vxlan_hdr(skb)->vx_vni));
1667                 /* Return non vxlan pkt */
1668                 goto drop;
1669         }
1670         unparsed.vx_flags &= ~VXLAN_HF_VNI;
1671         unparsed.vx_vni &= ~VXLAN_VNI_MASK;
1672
1673         vs = rcu_dereference_sk_user_data(sk);
1674         if (!vs)
1675                 goto drop;
1676
1677         vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
1678
1679         vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
1680         if (!vxlan)
1681                 goto drop;
1682
1683         /* For backwards compatibility, only allow reserved fields to be
1684          * used by VXLAN extensions if explicitly requested.
1685          */
1686         if (vs->flags & VXLAN_F_GPE) {
1687                 if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
1688                         goto drop;
1689                 raw_proto = true;
1690         }
1691
1692         if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
1693                                    !net_eq(vxlan->net, dev_net(vxlan->dev))))
1694                 goto drop;
1695
1696         if (vs->flags & VXLAN_F_REMCSUM_RX)
1697                 if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
1698                         goto drop;
1699
1700         if (vxlan_collect_metadata(vs)) {
1701                 struct metadata_dst *tun_dst;
1702
1703                 tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
1704                                          key32_to_tunnel_id(vni), sizeof(*md));
1705
1706                 if (!tun_dst)
1707                         goto drop;
1708
1709                 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
1710
1711                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
1712         } else {
1713                 memset(md, 0, sizeof(*md));
1714         }
1715
1716         if (vs->flags & VXLAN_F_GBP)
1717                 vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
1718         /* Note that GBP and GPE can never be active together. This is
1719          * ensured in vxlan_dev_configure.
1720          */
1721
1722         if (unparsed.vx_flags || unparsed.vx_vni) {
1723                 /* If there are any unprocessed flags remaining treat
1724                  * this as a malformed packet. This behavior diverges from
1725                  * VXLAN RFC (RFC7348) which stipulates that bits in reserved
1726                  * in reserved fields are to be ignored. The approach here
1727                  * maintains compatibility with previous stack code, and also
1728                  * is more robust and provides a little more security in
1729                  * adding extensions to VXLAN.
1730                  */
1731                 goto drop;
1732         }
1733
1734         if (!raw_proto) {
1735                 if (!vxlan_set_mac(vxlan, vs, skb, vni))
1736                         goto drop;
1737         } else {
1738                 skb_reset_mac_header(skb);
1739                 skb->dev = vxlan->dev;
1740                 skb->pkt_type = PACKET_HOST;
1741         }
1742
1743         oiph = skb_network_header(skb);
1744         skb_reset_network_header(skb);
1745
1746         if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
1747                 ++vxlan->dev->stats.rx_frame_errors;
1748                 ++vxlan->dev->stats.rx_errors;
1749                 goto drop;
1750         }
1751
1752         rcu_read_lock();
1753
1754         if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
1755                 rcu_read_unlock();
1756                 atomic_long_inc(&vxlan->dev->rx_dropped);
1757                 goto drop;
1758         }
1759
1760         dev_sw_netstats_rx_add(vxlan->dev, skb->len);
1761         gro_cells_receive(&vxlan->gro_cells, skb);
1762
1763         rcu_read_unlock();
1764
1765         return 0;
1766
1767 drop:
1768         /* Consume bad packet */
1769         kfree_skb(skb);
1770         return 0;
1771 }
1772
1773 /* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
1774 static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
1775 {
1776         struct vxlan_dev *vxlan;
1777         struct vxlan_sock *vs;
1778         struct vxlanhdr *hdr;
1779         __be32 vni;
1780
1781         if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
1782                 return -EINVAL;
1783
1784         hdr = vxlan_hdr(skb);
1785
1786         if (!(hdr->vx_flags & VXLAN_HF_VNI))
1787                 return -EINVAL;
1788
1789         vs = rcu_dereference_sk_user_data(sk);
1790         if (!vs)
1791                 return -ENOENT;
1792
1793         vni = vxlan_vni(hdr->vx_vni);
1794         vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
1795         if (!vxlan)
1796                 return -ENOENT;
1797
1798         return 0;
1799 }
1800
1801 static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
1802 {
1803         struct vxlan_dev *vxlan = netdev_priv(dev);
1804         struct arphdr *parp;
1805         u8 *arpptr, *sha;
1806         __be32 sip, tip;
1807         struct neighbour *n;
1808
1809         if (dev->flags & IFF_NOARP)
1810                 goto out;
1811
1812         if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
1813                 dev->stats.tx_dropped++;
1814                 goto out;
1815         }
1816         parp = arp_hdr(skb);
1817
1818         if ((parp->ar_hrd != htons(ARPHRD_ETHER) &&
1819              parp->ar_hrd != htons(ARPHRD_IEEE802)) ||
1820             parp->ar_pro != htons(ETH_P_IP) ||
1821             parp->ar_op != htons(ARPOP_REQUEST) ||
1822             parp->ar_hln != dev->addr_len ||
1823             parp->ar_pln != 4)
1824                 goto out;
1825         arpptr = (u8 *)parp + sizeof(struct arphdr);
1826         sha = arpptr;
1827         arpptr += dev->addr_len;        /* sha */
1828         memcpy(&sip, arpptr, sizeof(sip));
1829         arpptr += sizeof(sip);
1830         arpptr += dev->addr_len;        /* tha */
1831         memcpy(&tip, arpptr, sizeof(tip));
1832
1833         if (ipv4_is_loopback(tip) ||
1834             ipv4_is_multicast(tip))
1835                 goto out;
1836
1837         n = neigh_lookup(&arp_tbl, &tip, dev);
1838
1839         if (n) {
1840                 struct vxlan_fdb *f;
1841                 struct sk_buff  *reply;
1842
1843                 if (!(n->nud_state & NUD_CONNECTED)) {
1844                         neigh_release(n);
1845                         goto out;
1846                 }
1847
1848                 f = vxlan_find_mac(vxlan, n->ha, vni);
1849                 if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
1850                         /* bridge-local neighbor */
1851                         neigh_release(n);
1852                         goto out;
1853                 }
1854
1855                 reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1856                                 n->ha, sha);
1857
1858                 neigh_release(n);
1859
1860                 if (reply == NULL)
1861                         goto out;
1862
1863                 skb_reset_mac_header(reply);
1864                 __skb_pull(reply, skb_network_offset(reply));
1865                 reply->ip_summed = CHECKSUM_UNNECESSARY;
1866                 reply->pkt_type = PACKET_HOST;
1867
1868                 if (netif_rx_ni(reply) == NET_RX_DROP)
1869                         dev->stats.rx_dropped++;
1870         } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
1871                 union vxlan_addr ipa = {
1872                         .sin.sin_addr.s_addr = tip,
1873                         .sin.sin_family = AF_INET,
1874                 };
1875
1876                 vxlan_ip_miss(dev, &ipa);
1877         }
1878 out:
1879         consume_skb(skb);
1880         return NETDEV_TX_OK;
1881 }
1882
1883 #if IS_ENABLED(CONFIG_IPV6)
1884 static struct sk_buff *vxlan_na_create(struct sk_buff *request,
1885         struct neighbour *n, bool isrouter)
1886 {
1887         struct net_device *dev = request->dev;
1888         struct sk_buff *reply;
1889         struct nd_msg *ns, *na;
1890         struct ipv6hdr *pip6;
1891         u8 *daddr;
1892         int na_olen = 8; /* opt hdr + ETH_ALEN for target */
1893         int ns_olen;
1894         int i, len;
1895
1896         if (dev == NULL || !pskb_may_pull(request, request->len))
1897                 return NULL;
1898
1899         len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
1900                 sizeof(*na) + na_olen + dev->needed_tailroom;
1901         reply = alloc_skb(len, GFP_ATOMIC);
1902         if (reply == NULL)
1903                 return NULL;
1904
1905         reply->protocol = htons(ETH_P_IPV6);
1906         reply->dev = dev;
1907         skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
1908         skb_push(reply, sizeof(struct ethhdr));
1909         skb_reset_mac_header(reply);
1910
1911         ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
1912
1913         daddr = eth_hdr(request)->h_source;
1914         ns_olen = request->len - skb_network_offset(request) -
1915                 sizeof(struct ipv6hdr) - sizeof(*ns);
1916         for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
1917                 if (!ns->opt[i + 1]) {
1918                         kfree_skb(reply);
1919                         return NULL;
1920                 }
1921                 if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
1922                         daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
1923                         break;
1924                 }
1925         }
1926
1927         /* Ethernet header */
1928         ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
1929         ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
1930         eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
1931         reply->protocol = htons(ETH_P_IPV6);
1932
1933         skb_pull(reply, sizeof(struct ethhdr));
1934         skb_reset_network_header(reply);
1935         skb_put(reply, sizeof(struct ipv6hdr));
1936
1937         /* IPv6 header */
1938
1939         pip6 = ipv6_hdr(reply);
1940         memset(pip6, 0, sizeof(struct ipv6hdr));
1941         pip6->version = 6;
1942         pip6->priority = ipv6_hdr(request)->priority;
1943         pip6->nexthdr = IPPROTO_ICMPV6;
1944         pip6->hop_limit = 255;
1945         pip6->daddr = ipv6_hdr(request)->saddr;
1946         pip6->saddr = *(struct in6_addr *)n->primary_key;
1947
1948         skb_pull(reply, sizeof(struct ipv6hdr));
1949         skb_reset_transport_header(reply);
1950
1951         /* Neighbor Advertisement */
1952         na = skb_put_zero(reply, sizeof(*na) + na_olen);
1953         na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
1954         na->icmph.icmp6_router = isrouter;
1955         na->icmph.icmp6_override = 1;
1956         na->icmph.icmp6_solicited = 1;
1957         na->target = ns->target;
1958         ether_addr_copy(&na->opt[2], n->ha);
1959         na->opt[0] = ND_OPT_TARGET_LL_ADDR;
1960         na->opt[1] = na_olen >> 3;
1961
1962         na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
1963                 &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
1964                 csum_partial(na, sizeof(*na)+na_olen, 0));
1965
1966         pip6->payload_len = htons(sizeof(*na)+na_olen);
1967
1968         skb_push(reply, sizeof(struct ipv6hdr));
1969
1970         reply->ip_summed = CHECKSUM_UNNECESSARY;
1971
1972         return reply;
1973 }
1974
1975 static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
1976 {
1977         struct vxlan_dev *vxlan = netdev_priv(dev);
1978         const struct in6_addr *daddr;
1979         const struct ipv6hdr *iphdr;
1980         struct inet6_dev *in6_dev;
1981         struct neighbour *n;
1982         struct nd_msg *msg;
1983
1984         rcu_read_lock();
1985         in6_dev = __in6_dev_get(dev);
1986         if (!in6_dev)
1987                 goto out;
1988
1989         iphdr = ipv6_hdr(skb);
1990         daddr = &iphdr->daddr;
1991         msg = (struct nd_msg *)(iphdr + 1);
1992
1993         if (ipv6_addr_loopback(daddr) ||
1994             ipv6_addr_is_multicast(&msg->target))
1995                 goto out;
1996
1997         n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
1998
1999         if (n) {
2000                 struct vxlan_fdb *f;
2001                 struct sk_buff *reply;
2002
2003                 if (!(n->nud_state & NUD_CONNECTED)) {
2004                         neigh_release(n);
2005                         goto out;
2006                 }
2007
2008                 f = vxlan_find_mac(vxlan, n->ha, vni);
2009                 if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
2010                         /* bridge-local neighbor */
2011                         neigh_release(n);
2012                         goto out;
2013                 }
2014
2015                 reply = vxlan_na_create(skb, n,
2016                                         !!(f ? f->flags & NTF_ROUTER : 0));
2017
2018                 neigh_release(n);
2019
2020                 if (reply == NULL)
2021                         goto out;
2022
2023                 if (netif_rx_ni(reply) == NET_RX_DROP)
2024                         dev->stats.rx_dropped++;
2025
2026         } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
2027                 union vxlan_addr ipa = {
2028                         .sin6.sin6_addr = msg->target,
2029                         .sin6.sin6_family = AF_INET6,
2030                 };
2031
2032                 vxlan_ip_miss(dev, &ipa);
2033         }
2034
2035 out:
2036         rcu_read_unlock();
2037         consume_skb(skb);
2038         return NETDEV_TX_OK;
2039 }
2040 #endif
2041
2042 static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
2043 {
2044         struct vxlan_dev *vxlan = netdev_priv(dev);
2045         struct neighbour *n;
2046
2047         if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
2048                 return false;
2049
2050         n = NULL;
2051         switch (ntohs(eth_hdr(skb)->h_proto)) {
2052         case ETH_P_IP:
2053         {
2054                 struct iphdr *pip;
2055
2056                 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
2057                         return false;
2058                 pip = ip_hdr(skb);
2059                 n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
2060                 if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
2061                         union vxlan_addr ipa = {
2062                                 .sin.sin_addr.s_addr = pip->daddr,
2063                                 .sin.sin_family = AF_INET,
2064                         };
2065
2066                         vxlan_ip_miss(dev, &ipa);
2067                         return false;
2068                 }
2069
2070                 break;
2071         }
2072 #if IS_ENABLED(CONFIG_IPV6)
2073         case ETH_P_IPV6:
2074         {
2075                 struct ipv6hdr *pip6;
2076
2077                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
2078                         return false;
2079                 pip6 = ipv6_hdr(skb);
2080                 n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
2081                 if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
2082                         union vxlan_addr ipa = {
2083                                 .sin6.sin6_addr = pip6->daddr,
2084                                 .sin6.sin6_family = AF_INET6,
2085                         };
2086
2087                         vxlan_ip_miss(dev, &ipa);
2088                         return false;
2089                 }
2090
2091                 break;
2092         }
2093 #endif
2094         default:
2095                 return false;
2096         }
2097
2098         if (n) {
2099                 bool diff;
2100
2101                 diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha);
2102                 if (diff) {
2103                         memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
2104                                 dev->addr_len);
2105                         memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len);
2106                 }
2107                 neigh_release(n);
2108                 return diff;
2109         }
2110
2111         return false;
2112 }
2113
2114 static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
2115                                 struct vxlan_metadata *md)
2116 {
2117         struct vxlanhdr_gbp *gbp;
2118
2119         if (!md->gbp)
2120                 return;
2121
2122         gbp = (struct vxlanhdr_gbp *)vxh;
2123         vxh->vx_flags |= VXLAN_HF_GBP;
2124
2125         if (md->gbp & VXLAN_GBP_DONT_LEARN)
2126                 gbp->dont_learn = 1;
2127
2128         if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
2129                 gbp->policy_applied = 1;
2130
2131         gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
2132 }
2133
2134 static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
2135                                __be16 protocol)
2136 {
2137         struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
2138
2139         gpe->np_applied = 1;
2140         gpe->next_protocol = tun_p_from_eth_p(protocol);
2141         if (!gpe->next_protocol)
2142                 return -EPFNOSUPPORT;
2143         return 0;
2144 }
2145
2146 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
2147                            int iphdr_len, __be32 vni,
2148                            struct vxlan_metadata *md, u32 vxflags,
2149                            bool udp_sum)
2150 {
2151         struct vxlanhdr *vxh;
2152         int min_headroom;
2153         int err;
2154         int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
2155         __be16 inner_protocol = htons(ETH_P_TEB);
2156
2157         if ((vxflags & VXLAN_F_REMCSUM_TX) &&
2158             skb->ip_summed == CHECKSUM_PARTIAL) {
2159                 int csum_start = skb_checksum_start_offset(skb);
2160
2161                 if (csum_start <= VXLAN_MAX_REMCSUM_START &&
2162                     !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
2163                     (skb->csum_offset == offsetof(struct udphdr, check) ||
2164                      skb->csum_offset == offsetof(struct tcphdr, check)))
2165                         type |= SKB_GSO_TUNNEL_REMCSUM;
2166         }
2167
2168         min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
2169                         + VXLAN_HLEN + iphdr_len;
2170
2171         /* Need space for new headers (invalidates iph ptr) */
2172         err = skb_cow_head(skb, min_headroom);
2173         if (unlikely(err))
2174                 return err;
2175
2176         err = iptunnel_handle_offloads(skb, type);
2177         if (err)
2178                 return err;
2179
2180         vxh = __skb_push(skb, sizeof(*vxh));
2181         vxh->vx_flags = VXLAN_HF_VNI;
2182         vxh->vx_vni = vxlan_vni_field(vni);
2183
2184         if (type & SKB_GSO_TUNNEL_REMCSUM) {
2185                 unsigned int start;
2186
2187                 start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
2188                 vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
2189                 vxh->vx_flags |= VXLAN_HF_RCO;
2190
2191                 if (!skb_is_gso(skb)) {
2192                         skb->ip_summed = CHECKSUM_NONE;
2193                         skb->encapsulation = 0;
2194                 }
2195         }
2196
2197         if (vxflags & VXLAN_F_GBP)
2198                 vxlan_build_gbp_hdr(vxh, vxflags, md);
2199         if (vxflags & VXLAN_F_GPE) {
2200                 err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
2201                 if (err < 0)
2202                         return err;
2203                 inner_protocol = skb->protocol;
2204         }
2205
2206         skb_set_inner_protocol(skb, inner_protocol);
2207         return 0;
2208 }
2209
2210 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
2211                                       struct vxlan_sock *sock4,
2212                                       struct sk_buff *skb, int oif, u8 tos,
2213                                       __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
2214                                       struct dst_cache *dst_cache,
2215                                       const struct ip_tunnel_info *info)
2216 {
2217         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
2218         struct rtable *rt = NULL;
2219         struct flowi4 fl4;
2220
2221         if (!sock4)
2222                 return ERR_PTR(-EIO);
2223
2224         if (tos && !info)
2225                 use_cache = false;
2226         if (use_cache) {
2227                 rt = dst_cache_get_ip4(dst_cache, saddr);
2228                 if (rt)
2229                         return rt;
2230         }
2231
2232         memset(&fl4, 0, sizeof(fl4));
2233         fl4.flowi4_oif = oif;
2234         fl4.flowi4_tos = RT_TOS(tos);
2235         fl4.flowi4_mark = skb->mark;
2236         fl4.flowi4_proto = IPPROTO_UDP;
2237         fl4.daddr = daddr;
2238         fl4.saddr = *saddr;
2239         fl4.fl4_dport = dport;
2240         fl4.fl4_sport = sport;
2241
2242         rt = ip_route_output_key(vxlan->net, &fl4);
2243         if (!IS_ERR(rt)) {
2244                 if (rt->dst.dev == dev) {
2245                         netdev_dbg(dev, "circular route to %pI4\n", &daddr);
2246                         ip_rt_put(rt);
2247                         return ERR_PTR(-ELOOP);
2248                 }
2249
2250                 *saddr = fl4.saddr;
2251                 if (use_cache)
2252                         dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
2253         } else {
2254                 netdev_dbg(dev, "no route to %pI4\n", &daddr);
2255                 return ERR_PTR(-ENETUNREACH);
2256         }
2257         return rt;
2258 }
2259
2260 #if IS_ENABLED(CONFIG_IPV6)
2261 static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
2262                                           struct net_device *dev,
2263                                           struct vxlan_sock *sock6,
2264                                           struct sk_buff *skb, int oif, u8 tos,
2265                                           __be32 label,
2266                                           const struct in6_addr *daddr,
2267                                           struct in6_addr *saddr,
2268                                           __be16 dport, __be16 sport,
2269                                           struct dst_cache *dst_cache,
2270                                           const struct ip_tunnel_info *info)
2271 {
2272         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
2273         struct dst_entry *ndst;
2274         struct flowi6 fl6;
2275
2276         if (!sock6)
2277                 return ERR_PTR(-EIO);
2278
2279         if (tos && !info)
2280                 use_cache = false;
2281         if (use_cache) {
2282                 ndst = dst_cache_get_ip6(dst_cache, saddr);
2283                 if (ndst)
2284                         return ndst;
2285         }
2286
2287         memset(&fl6, 0, sizeof(fl6));
2288         fl6.flowi6_oif = oif;
2289         fl6.daddr = *daddr;
2290         fl6.saddr = *saddr;
2291         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
2292         fl6.flowi6_mark = skb->mark;
2293         fl6.flowi6_proto = IPPROTO_UDP;
2294         fl6.fl6_dport = dport;
2295         fl6.fl6_sport = sport;
2296
2297         ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
2298                                                &fl6, NULL);
2299         if (IS_ERR(ndst)) {
2300                 netdev_dbg(dev, "no route to %pI6\n", daddr);
2301                 return ERR_PTR(-ENETUNREACH);
2302         }
2303
2304         if (unlikely(ndst->dev == dev)) {
2305                 netdev_dbg(dev, "circular route to %pI6\n", daddr);
2306                 dst_release(ndst);
2307                 return ERR_PTR(-ELOOP);
2308         }
2309
2310         *saddr = fl6.saddr;
2311         if (use_cache)
2312                 dst_cache_set_ip6(dst_cache, ndst, saddr);
2313         return ndst;
2314 }
2315 #endif
2316
2317 /* Bypass encapsulation if the destination is local */
2318 static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
2319                                struct vxlan_dev *dst_vxlan, __be32 vni,
2320                                bool snoop)
2321 {
2322         struct pcpu_sw_netstats *tx_stats, *rx_stats;
2323         union vxlan_addr loopback;
2324         union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
2325         struct net_device *dev;
2326         int len = skb->len;
2327
2328         tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
2329         rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
2330         skb->pkt_type = PACKET_HOST;
2331         skb->encapsulation = 0;
2332         skb->dev = dst_vxlan->dev;
2333         __skb_pull(skb, skb_network_offset(skb));
2334
2335         if (remote_ip->sa.sa_family == AF_INET) {
2336                 loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2337                 loopback.sa.sa_family =  AF_INET;
2338 #if IS_ENABLED(CONFIG_IPV6)
2339         } else {
2340                 loopback.sin6.sin6_addr = in6addr_loopback;
2341                 loopback.sa.sa_family =  AF_INET6;
2342 #endif
2343         }
2344
2345         rcu_read_lock();
2346         dev = skb->dev;
2347         if (unlikely(!(dev->flags & IFF_UP))) {
2348                 kfree_skb(skb);
2349                 goto drop;
2350         }
2351
2352         if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
2353                 vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
2354
2355         u64_stats_update_begin(&tx_stats->syncp);
2356         tx_stats->tx_packets++;
2357         tx_stats->tx_bytes += len;
2358         u64_stats_update_end(&tx_stats->syncp);
2359
2360         if (__netif_rx(skb) == NET_RX_SUCCESS) {
2361                 u64_stats_update_begin(&rx_stats->syncp);
2362                 rx_stats->rx_packets++;
2363                 rx_stats->rx_bytes += len;
2364                 u64_stats_update_end(&rx_stats->syncp);
2365         } else {
2366 drop:
2367                 dev->stats.rx_dropped++;
2368         }
2369         rcu_read_unlock();
2370 }
2371
2372 static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
2373                                  struct vxlan_dev *vxlan,
2374                                  union vxlan_addr *daddr,
2375                                  __be16 dst_port, int dst_ifindex, __be32 vni,
2376                                  struct dst_entry *dst,
2377                                  u32 rt_flags)
2378 {
2379 #if IS_ENABLED(CONFIG_IPV6)
2380         /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
2381          * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
2382          * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
2383          */
2384         BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
2385 #endif
2386         /* Bypass encapsulation if the destination is local */
2387         if (rt_flags & RTCF_LOCAL &&
2388             !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
2389                 struct vxlan_dev *dst_vxlan;
2390
2391                 dst_release(dst);
2392                 dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
2393                                            daddr->sa.sa_family, dst_port,
2394                                            vxlan->cfg.flags);
2395                 if (!dst_vxlan) {
2396                         dev->stats.tx_errors++;
2397                         kfree_skb(skb);
2398
2399                         return -ENOENT;
2400                 }
2401                 vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
2402                 return 1;
2403         }
2404
2405         return 0;
2406 }
2407
2408 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2409                            __be32 default_vni, struct vxlan_rdst *rdst,
2410                            bool did_rsc)
2411 {
2412         struct dst_cache *dst_cache;
2413         struct ip_tunnel_info *info;
2414         struct vxlan_dev *vxlan = netdev_priv(dev);
2415         const struct iphdr *old_iph = ip_hdr(skb);
2416         union vxlan_addr *dst;
2417         union vxlan_addr remote_ip, local_ip;
2418         struct vxlan_metadata _md;
2419         struct vxlan_metadata *md = &_md;
2420         __be16 src_port = 0, dst_port;
2421         struct dst_entry *ndst = NULL;
2422         __u8 tos, ttl;
2423         int ifindex;
2424         int err;
2425         u32 flags = vxlan->cfg.flags;
2426         bool udp_sum = false;
2427         bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
2428         __be32 vni = 0;
2429 #if IS_ENABLED(CONFIG_IPV6)
2430         __be32 label;
2431 #endif
2432
2433         info = skb_tunnel_info(skb);
2434
2435         if (rdst) {
2436                 dst = &rdst->remote_ip;
2437                 if (vxlan_addr_any(dst)) {
2438                         if (did_rsc) {
2439                                 /* short-circuited back to local bridge */
2440                                 vxlan_encap_bypass(skb, vxlan, vxlan,
2441                                                    default_vni, true);
2442                                 return;
2443                         }
2444                         goto drop;
2445                 }
2446
2447                 dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
2448                 vni = (rdst->remote_vni) ? : default_vni;
2449                 ifindex = rdst->remote_ifindex;
2450                 local_ip = vxlan->cfg.saddr;
2451                 dst_cache = &rdst->dst_cache;
2452                 md->gbp = skb->mark;
2453                 if (flags & VXLAN_F_TTL_INHERIT) {
2454                         ttl = ip_tunnel_get_ttl(old_iph, skb);
2455                 } else {
2456                         ttl = vxlan->cfg.ttl;
2457                         if (!ttl && vxlan_addr_multicast(dst))
2458                                 ttl = 1;
2459                 }
2460
2461                 tos = vxlan->cfg.tos;
2462                 if (tos == 1)
2463                         tos = ip_tunnel_get_dsfield(old_iph, skb);
2464
2465                 if (dst->sa.sa_family == AF_INET)
2466                         udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
2467                 else
2468                         udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
2469 #if IS_ENABLED(CONFIG_IPV6)
2470                 label = vxlan->cfg.label;
2471 #endif
2472         } else {
2473                 if (!info) {
2474                         WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
2475                                   dev->name);
2476                         goto drop;
2477                 }
2478                 remote_ip.sa.sa_family = ip_tunnel_info_af(info);
2479                 if (remote_ip.sa.sa_family == AF_INET) {
2480                         remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
2481                         local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
2482                 } else {
2483                         remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
2484                         local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
2485                 }
2486                 dst = &remote_ip;
2487                 dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
2488                 vni = tunnel_id_to_key32(info->key.tun_id);
2489                 ifindex = 0;
2490                 dst_cache = &info->dst_cache;
2491                 if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
2492                         if (info->options_len < sizeof(*md))
2493                                 goto drop;
2494                         md = ip_tunnel_info_opts(info);
2495                 }
2496                 ttl = info->key.ttl;
2497                 tos = info->key.tos;
2498 #if IS_ENABLED(CONFIG_IPV6)
2499                 label = info->key.label;
2500 #endif
2501                 udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
2502         }
2503         src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
2504                                      vxlan->cfg.port_max, true);
2505
2506         rcu_read_lock();
2507         if (dst->sa.sa_family == AF_INET) {
2508                 struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
2509                 struct rtable *rt;
2510                 __be16 df = 0;
2511
2512                 if (!ifindex)
2513                         ifindex = sock4->sock->sk->sk_bound_dev_if;
2514
2515                 rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
2516                                      dst->sin.sin_addr.s_addr,
2517                                      &local_ip.sin.sin_addr.s_addr,
2518                                      dst_port, src_port,
2519                                      dst_cache, info);
2520                 if (IS_ERR(rt)) {
2521                         err = PTR_ERR(rt);
2522                         goto tx_error;
2523                 }
2524
2525                 if (!info) {
2526                         /* Bypass encapsulation if the destination is local */
2527                         err = encap_bypass_if_local(skb, dev, vxlan, dst,
2528                                                     dst_port, ifindex, vni,
2529                                                     &rt->dst, rt->rt_flags);
2530                         if (err)
2531                                 goto out_unlock;
2532
2533                         if (vxlan->cfg.df == VXLAN_DF_SET) {
2534                                 df = htons(IP_DF);
2535                         } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
2536                                 struct ethhdr *eth = eth_hdr(skb);
2537
2538                                 if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
2539                                     (ntohs(eth->h_proto) == ETH_P_IP &&
2540                                      old_iph->frag_off & htons(IP_DF)))
2541                                         df = htons(IP_DF);
2542                         }
2543                 } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
2544                         df = htons(IP_DF);
2545                 }
2546
2547                 ndst = &rt->dst;
2548                 err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
2549                                             netif_is_any_bridge_port(dev));
2550                 if (err < 0) {
2551                         goto tx_error;
2552                 } else if (err) {
2553                         if (info) {
2554                                 struct ip_tunnel_info *unclone;
2555                                 struct in_addr src, dst;
2556
2557                                 unclone = skb_tunnel_info_unclone(skb);
2558                                 if (unlikely(!unclone))
2559                                         goto tx_error;
2560
2561                                 src = remote_ip.sin.sin_addr;
2562                                 dst = local_ip.sin.sin_addr;
2563                                 unclone->key.u.ipv4.src = src.s_addr;
2564                                 unclone->key.u.ipv4.dst = dst.s_addr;
2565                         }
2566                         vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2567                         dst_release(ndst);
2568                         goto out_unlock;
2569                 }
2570
2571                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2572                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2573                 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
2574                                       vni, md, flags, udp_sum);
2575                 if (err < 0)
2576                         goto tx_error;
2577
2578                 udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
2579                                     dst->sin.sin_addr.s_addr, tos, ttl, df,
2580                                     src_port, dst_port, xnet, !udp_sum);
2581 #if IS_ENABLED(CONFIG_IPV6)
2582         } else {
2583                 struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
2584
2585                 if (!ifindex)
2586                         ifindex = sock6->sock->sk->sk_bound_dev_if;
2587
2588                 ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
2589                                         label, &dst->sin6.sin6_addr,
2590                                         &local_ip.sin6.sin6_addr,
2591                                         dst_port, src_port,
2592                                         dst_cache, info);
2593                 if (IS_ERR(ndst)) {
2594                         err = PTR_ERR(ndst);
2595                         ndst = NULL;
2596                         goto tx_error;
2597                 }
2598
2599                 if (!info) {
2600                         u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
2601
2602                         err = encap_bypass_if_local(skb, dev, vxlan, dst,
2603                                                     dst_port, ifindex, vni,
2604                                                     ndst, rt6i_flags);
2605                         if (err)
2606                                 goto out_unlock;
2607                 }
2608
2609                 err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
2610                                             netif_is_any_bridge_port(dev));
2611                 if (err < 0) {
2612                         goto tx_error;
2613                 } else if (err) {
2614                         if (info) {
2615                                 struct ip_tunnel_info *unclone;
2616                                 struct in6_addr src, dst;
2617
2618                                 unclone = skb_tunnel_info_unclone(skb);
2619                                 if (unlikely(!unclone))
2620                                         goto tx_error;
2621
2622                                 src = remote_ip.sin6.sin6_addr;
2623                                 dst = local_ip.sin6.sin6_addr;
2624                                 unclone->key.u.ipv6.src = src;
2625                                 unclone->key.u.ipv6.dst = dst;
2626                         }
2627
2628                         vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2629                         dst_release(ndst);
2630                         goto out_unlock;
2631                 }
2632
2633                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2634                 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2635                 skb_scrub_packet(skb, xnet);
2636                 err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
2637                                       vni, md, flags, udp_sum);
2638                 if (err < 0)
2639                         goto tx_error;
2640
2641                 udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
2642                                      &local_ip.sin6.sin6_addr,
2643                                      &dst->sin6.sin6_addr, tos, ttl,
2644                                      label, src_port, dst_port, !udp_sum);
2645 #endif
2646         }
2647 out_unlock:
2648         rcu_read_unlock();
2649         return;
2650
2651 drop:
2652         dev->stats.tx_dropped++;
2653         dev_kfree_skb(skb);
2654         return;
2655
2656 tx_error:
2657         rcu_read_unlock();
2658         if (err == -ELOOP)
2659                 dev->stats.collisions++;
2660         else if (err == -ENETUNREACH)
2661                 dev->stats.tx_carrier_errors++;
2662         dst_release(ndst);
2663         dev->stats.tx_errors++;
2664         kfree_skb(skb);
2665 }
2666
2667 static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
2668                           struct vxlan_fdb *f, __be32 vni, bool did_rsc)
2669 {
2670         struct vxlan_rdst nh_rdst;
2671         struct nexthop *nh;
2672         bool do_xmit;
2673         u32 hash;
2674
2675         memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
2676         hash = skb_get_hash(skb);
2677
2678         rcu_read_lock();
2679         nh = rcu_dereference(f->nh);
2680         if (!nh) {
2681                 rcu_read_unlock();
2682                 goto drop;
2683         }
2684         do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
2685         rcu_read_unlock();
2686
2687         if (likely(do_xmit))
2688                 vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
2689         else
2690                 goto drop;
2691
2692         return;
2693
2694 drop:
2695         dev->stats.tx_dropped++;
2696         dev_kfree_skb(skb);
2697 }
2698
2699 /* Transmit local packets over Vxlan
2700  *
2701  * Outer IP header inherits ECN and DF from inner header.
2702  * Outer UDP destination is the VXLAN assigned port.
2703  *           source port is based on hash of flow
2704  */
2705 static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
2706 {
2707         struct vxlan_dev *vxlan = netdev_priv(dev);
2708         struct vxlan_rdst *rdst, *fdst = NULL;
2709         const struct ip_tunnel_info *info;
2710         bool did_rsc = false;
2711         struct vxlan_fdb *f;
2712         struct ethhdr *eth;
2713         __be32 vni = 0;
2714
2715         info = skb_tunnel_info(skb);
2716
2717         skb_reset_mac_header(skb);
2718
2719         if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
2720                 if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
2721                     info->mode & IP_TUNNEL_INFO_TX) {
2722                         vni = tunnel_id_to_key32(info->key.tun_id);
2723                 } else {
2724                         if (info && info->mode & IP_TUNNEL_INFO_TX)
2725                                 vxlan_xmit_one(skb, dev, vni, NULL, false);
2726                         else
2727                                 kfree_skb(skb);
2728                         return NETDEV_TX_OK;
2729                 }
2730         }
2731
2732         if (vxlan->cfg.flags & VXLAN_F_PROXY) {
2733                 eth = eth_hdr(skb);
2734                 if (ntohs(eth->h_proto) == ETH_P_ARP)
2735                         return arp_reduce(dev, skb, vni);
2736 #if IS_ENABLED(CONFIG_IPV6)
2737                 else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
2738                          pskb_may_pull(skb, sizeof(struct ipv6hdr) +
2739                                             sizeof(struct nd_msg)) &&
2740                          ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
2741                         struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
2742
2743                         if (m->icmph.icmp6_code == 0 &&
2744                             m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
2745                                 return neigh_reduce(dev, skb, vni);
2746                 }
2747 #endif
2748         }
2749
2750         eth = eth_hdr(skb);
2751         f = vxlan_find_mac(vxlan, eth->h_dest, vni);
2752         did_rsc = false;
2753
2754         if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&
2755             (ntohs(eth->h_proto) == ETH_P_IP ||
2756              ntohs(eth->h_proto) == ETH_P_IPV6)) {
2757                 did_rsc = route_shortcircuit(dev, skb);
2758                 if (did_rsc)
2759                         f = vxlan_find_mac(vxlan, eth->h_dest, vni);
2760         }
2761
2762         if (f == NULL) {
2763                 f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
2764                 if (f == NULL) {
2765                         if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&
2766                             !is_multicast_ether_addr(eth->h_dest))
2767                                 vxlan_fdb_miss(vxlan, eth->h_dest);
2768
2769                         dev->stats.tx_dropped++;
2770                         kfree_skb(skb);
2771                         return NETDEV_TX_OK;
2772                 }
2773         }
2774
2775         if (rcu_access_pointer(f->nh)) {
2776                 vxlan_xmit_nh(skb, dev, f,
2777                               (vni ? : vxlan->default_dst.remote_vni), did_rsc);
2778         } else {
2779                 list_for_each_entry_rcu(rdst, &f->remotes, list) {
2780                         struct sk_buff *skb1;
2781
2782                         if (!fdst) {
2783                                 fdst = rdst;
2784                                 continue;
2785                         }
2786                         skb1 = skb_clone(skb, GFP_ATOMIC);
2787                         if (skb1)
2788                                 vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
2789                 }
2790                 if (fdst)
2791                         vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
2792                 else
2793                         kfree_skb(skb);
2794         }
2795
2796         return NETDEV_TX_OK;
2797 }
2798
2799 /* Walk the forwarding table and purge stale entries */
2800 static void vxlan_cleanup(struct timer_list *t)
2801 {
2802         struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
2803         unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
2804         unsigned int h;
2805
2806         if (!netif_running(vxlan->dev))
2807                 return;
2808
2809         for (h = 0; h < FDB_HASH_SIZE; ++h) {
2810                 struct hlist_node *p, *n;
2811
2812                 spin_lock(&vxlan->hash_lock[h]);
2813                 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2814                         struct vxlan_fdb *f
2815                                 = container_of(p, struct vxlan_fdb, hlist);
2816                         unsigned long timeout;
2817
2818                         if (f->state & (NUD_PERMANENT | NUD_NOARP))
2819                                 continue;
2820
2821                         if (f->flags & NTF_EXT_LEARNED)
2822                                 continue;
2823
2824                         timeout = f->used + vxlan->cfg.age_interval * HZ;
2825                         if (time_before_eq(timeout, jiffies)) {
2826                                 netdev_dbg(vxlan->dev,
2827                                            "garbage collect %pM\n",
2828                                            f->eth_addr);
2829                                 f->state = NUD_STALE;
2830                                 vxlan_fdb_destroy(vxlan, f, true, true);
2831                         } else if (time_before(timeout, next_timer))
2832                                 next_timer = timeout;
2833                 }
2834                 spin_unlock(&vxlan->hash_lock[h]);
2835         }
2836
2837         mod_timer(&vxlan->age_timer, next_timer);
2838 }
2839
2840 static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
2841 {
2842         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2843
2844         spin_lock(&vn->sock_lock);
2845         hlist_del_init_rcu(&vxlan->hlist4.hlist);
2846 #if IS_ENABLED(CONFIG_IPV6)
2847         hlist_del_init_rcu(&vxlan->hlist6.hlist);
2848 #endif
2849         spin_unlock(&vn->sock_lock);
2850 }
2851
2852 static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
2853                              struct vxlan_dev_node *node)
2854 {
2855         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2856         __be32 vni = vxlan->default_dst.remote_vni;
2857
2858         node->vxlan = vxlan;
2859         spin_lock(&vn->sock_lock);
2860         hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
2861         spin_unlock(&vn->sock_lock);
2862 }
2863
2864 /* Setup stats when device is created */
2865 static int vxlan_init(struct net_device *dev)
2866 {
2867         struct vxlan_dev *vxlan = netdev_priv(dev);
2868         int err;
2869
2870         if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2871                 vxlan_vnigroup_init(vxlan);
2872
2873         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
2874         if (!dev->tstats)
2875                 return -ENOMEM;
2876
2877         err = gro_cells_init(&vxlan->gro_cells, dev);
2878         if (err) {
2879                 free_percpu(dev->tstats);
2880                 return err;
2881         }
2882
2883         return 0;
2884 }
2885
2886 static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
2887 {
2888         struct vxlan_fdb *f;
2889         u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
2890
2891         spin_lock_bh(&vxlan->hash_lock[hash_index]);
2892         f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
2893         if (f)
2894                 vxlan_fdb_destroy(vxlan, f, true, true);
2895         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
2896 }
2897
2898 static void vxlan_uninit(struct net_device *dev)
2899 {
2900         struct vxlan_dev *vxlan = netdev_priv(dev);
2901
2902         if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2903                 vxlan_vnigroup_uninit(vxlan);
2904
2905         gro_cells_destroy(&vxlan->gro_cells);
2906
2907         vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
2908
2909         free_percpu(dev->tstats);
2910 }
2911
2912 /* Start ageing timer and join group when device is brought up */
2913 static int vxlan_open(struct net_device *dev)
2914 {
2915         struct vxlan_dev *vxlan = netdev_priv(dev);
2916         int ret;
2917
2918         ret = vxlan_sock_add(vxlan);
2919         if (ret < 0)
2920                 return ret;
2921
2922         ret = vxlan_multicast_join(vxlan);
2923         if (ret) {
2924                 vxlan_sock_release(vxlan);
2925                 return ret;
2926         }
2927
2928         if (vxlan->cfg.age_interval)
2929                 mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
2930
2931         return ret;
2932 }
2933
2934 /* Purge the forwarding table */
2935 static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
2936 {
2937         unsigned int h;
2938
2939         for (h = 0; h < FDB_HASH_SIZE; ++h) {
2940                 struct hlist_node *p, *n;
2941
2942                 spin_lock_bh(&vxlan->hash_lock[h]);
2943                 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2944                         struct vxlan_fdb *f
2945                                 = container_of(p, struct vxlan_fdb, hlist);
2946                         if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
2947                                 continue;
2948                         /* the all_zeros_mac entry is deleted at vxlan_uninit */
2949                         if (is_zero_ether_addr(f->eth_addr) &&
2950                             f->vni == vxlan->cfg.vni)
2951                                 continue;
2952                         vxlan_fdb_destroy(vxlan, f, true, true);
2953                 }
2954                 spin_unlock_bh(&vxlan->hash_lock[h]);
2955         }
2956 }
2957
2958 /* Cleanup timer and forwarding table on shutdown */
2959 static int vxlan_stop(struct net_device *dev)
2960 {
2961         struct vxlan_dev *vxlan = netdev_priv(dev);
2962         int ret = 0;
2963
2964         vxlan_multicast_leave(vxlan);
2965
2966         del_timer_sync(&vxlan->age_timer);
2967
2968         vxlan_flush(vxlan, false);
2969         vxlan_sock_release(vxlan);
2970
2971         return ret;
2972 }
2973
2974 /* Stub, nothing needs to be done. */
2975 static void vxlan_set_multicast_list(struct net_device *dev)
2976 {
2977 }
2978
2979 static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
2980 {
2981         struct vxlan_dev *vxlan = netdev_priv(dev);
2982         struct vxlan_rdst *dst = &vxlan->default_dst;
2983         struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
2984                                                          dst->remote_ifindex);
2985         bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
2986
2987         /* This check is different than dev->max_mtu, because it looks at
2988          * the lowerdev->mtu, rather than the static dev->max_mtu
2989          */
2990         if (lowerdev) {
2991                 int max_mtu = lowerdev->mtu -
2992                               (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
2993                 if (new_mtu > max_mtu)
2994                         return -EINVAL;
2995         }
2996
2997         dev->mtu = new_mtu;
2998         return 0;
2999 }
3000
3001 static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
3002 {
3003         struct vxlan_dev *vxlan = netdev_priv(dev);
3004         struct ip_tunnel_info *info = skb_tunnel_info(skb);
3005         __be16 sport, dport;
3006
3007         sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
3008                                   vxlan->cfg.port_max, true);
3009         dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
3010
3011         if (ip_tunnel_info_af(info) == AF_INET) {
3012                 struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
3013                 struct rtable *rt;
3014
3015                 rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
3016                                      info->key.u.ipv4.dst,
3017                                      &info->key.u.ipv4.src, dport, sport,
3018                                      &info->dst_cache, info);
3019                 if (IS_ERR(rt))
3020                         return PTR_ERR(rt);
3021                 ip_rt_put(rt);
3022         } else {
3023 #if IS_ENABLED(CONFIG_IPV6)
3024                 struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
3025                 struct dst_entry *ndst;
3026
3027                 ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
3028                                         info->key.label, &info->key.u.ipv6.dst,
3029                                         &info->key.u.ipv6.src, dport, sport,
3030                                         &info->dst_cache, info);
3031                 if (IS_ERR(ndst))
3032                         return PTR_ERR(ndst);
3033                 dst_release(ndst);
3034 #else /* !CONFIG_IPV6 */
3035                 return -EPFNOSUPPORT;
3036 #endif
3037         }
3038         info->key.tp_src = sport;
3039         info->key.tp_dst = dport;
3040         return 0;
3041 }
3042
3043 static const struct net_device_ops vxlan_netdev_ether_ops = {
3044         .ndo_init               = vxlan_init,
3045         .ndo_uninit             = vxlan_uninit,
3046         .ndo_open               = vxlan_open,
3047         .ndo_stop               = vxlan_stop,
3048         .ndo_start_xmit         = vxlan_xmit,
3049         .ndo_get_stats64        = dev_get_tstats64,
3050         .ndo_set_rx_mode        = vxlan_set_multicast_list,
3051         .ndo_change_mtu         = vxlan_change_mtu,
3052         .ndo_validate_addr      = eth_validate_addr,
3053         .ndo_set_mac_address    = eth_mac_addr,
3054         .ndo_fdb_add            = vxlan_fdb_add,
3055         .ndo_fdb_del            = vxlan_fdb_delete,
3056         .ndo_fdb_dump           = vxlan_fdb_dump,
3057         .ndo_fdb_get            = vxlan_fdb_get,
3058         .ndo_fill_metadata_dst  = vxlan_fill_metadata_dst,
3059 };
3060
3061 static const struct net_device_ops vxlan_netdev_raw_ops = {
3062         .ndo_init               = vxlan_init,
3063         .ndo_uninit             = vxlan_uninit,
3064         .ndo_open               = vxlan_open,
3065         .ndo_stop               = vxlan_stop,
3066         .ndo_start_xmit         = vxlan_xmit,
3067         .ndo_get_stats64        = dev_get_tstats64,
3068         .ndo_change_mtu         = vxlan_change_mtu,
3069         .ndo_fill_metadata_dst  = vxlan_fill_metadata_dst,
3070 };
3071
3072 /* Info for udev, that this is a virtual tunnel endpoint */
3073 static struct device_type vxlan_type = {
3074         .name = "vxlan",
3075 };
3076
3077 /* Calls the ndo_udp_tunnel_add of the caller in order to
3078  * supply the listening VXLAN udp ports. Callers are expected
3079  * to implement the ndo_udp_tunnel_add.
3080  */
3081 static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
3082 {
3083         struct vxlan_sock *vs;
3084         struct net *net = dev_net(dev);
3085         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3086         unsigned int i;
3087
3088         spin_lock(&vn->sock_lock);
3089         for (i = 0; i < PORT_HASH_SIZE; ++i) {
3090                 hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
3091                         unsigned short type;
3092
3093                         if (vs->flags & VXLAN_F_GPE)
3094                                 type = UDP_TUNNEL_TYPE_VXLAN_GPE;
3095                         else
3096                                 type = UDP_TUNNEL_TYPE_VXLAN;
3097
3098                         if (push)
3099                                 udp_tunnel_push_rx_port(dev, vs->sock, type);
3100                         else
3101                                 udp_tunnel_drop_rx_port(dev, vs->sock, type);
3102                 }
3103         }
3104         spin_unlock(&vn->sock_lock);
3105 }
3106
3107 /* Initialize the device structure. */
3108 static void vxlan_setup(struct net_device *dev)
3109 {
3110         struct vxlan_dev *vxlan = netdev_priv(dev);
3111         unsigned int h;
3112
3113         eth_hw_addr_random(dev);
3114         ether_setup(dev);
3115
3116         dev->needs_free_netdev = true;
3117         SET_NETDEV_DEVTYPE(dev, &vxlan_type);
3118
3119         dev->features   |= NETIF_F_LLTX;
3120         dev->features   |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
3121         dev->features   |= NETIF_F_RXCSUM;
3122         dev->features   |= NETIF_F_GSO_SOFTWARE;
3123
3124         dev->vlan_features = dev->features;
3125         dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
3126         dev->hw_features |= NETIF_F_RXCSUM;
3127         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
3128         netif_keep_dst(dev);
3129         dev->priv_flags |= IFF_NO_QUEUE | IFF_CHANGE_PROTO_DOWN;
3130
3131         /* MTU range: 68 - 65535 */
3132         dev->min_mtu = ETH_MIN_MTU;
3133         dev->max_mtu = ETH_MAX_MTU;
3134
3135         INIT_LIST_HEAD(&vxlan->next);
3136
3137         timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
3138
3139         vxlan->dev = dev;
3140
3141         for (h = 0; h < FDB_HASH_SIZE; ++h) {
3142                 spin_lock_init(&vxlan->hash_lock[h]);
3143                 INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
3144         }
3145 }
3146
3147 static void vxlan_ether_setup(struct net_device *dev)
3148 {
3149         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
3150         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3151         dev->netdev_ops = &vxlan_netdev_ether_ops;
3152 }
3153
3154 static void vxlan_raw_setup(struct net_device *dev)
3155 {
3156         dev->header_ops = NULL;
3157         dev->type = ARPHRD_NONE;
3158         dev->hard_header_len = 0;
3159         dev->addr_len = 0;
3160         dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
3161         dev->netdev_ops = &vxlan_netdev_raw_ops;
3162 }
3163
3164 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
3165         [IFLA_VXLAN_ID]         = { .type = NLA_U32 },
3166         [IFLA_VXLAN_GROUP]      = { .len = sizeof_field(struct iphdr, daddr) },
3167         [IFLA_VXLAN_GROUP6]     = { .len = sizeof(struct in6_addr) },
3168         [IFLA_VXLAN_LINK]       = { .type = NLA_U32 },
3169         [IFLA_VXLAN_LOCAL]      = { .len = sizeof_field(struct iphdr, saddr) },
3170         [IFLA_VXLAN_LOCAL6]     = { .len = sizeof(struct in6_addr) },
3171         [IFLA_VXLAN_TOS]        = { .type = NLA_U8 },
3172         [IFLA_VXLAN_TTL]        = { .type = NLA_U8 },
3173         [IFLA_VXLAN_LABEL]      = { .type = NLA_U32 },
3174         [IFLA_VXLAN_LEARNING]   = { .type = NLA_U8 },
3175         [IFLA_VXLAN_AGEING]     = { .type = NLA_U32 },
3176         [IFLA_VXLAN_LIMIT]      = { .type = NLA_U32 },
3177         [IFLA_VXLAN_PORT_RANGE] = { .len  = sizeof(struct ifla_vxlan_port_range) },
3178         [IFLA_VXLAN_PROXY]      = { .type = NLA_U8 },
3179         [IFLA_VXLAN_RSC]        = { .type = NLA_U8 },
3180         [IFLA_VXLAN_L2MISS]     = { .type = NLA_U8 },
3181         [IFLA_VXLAN_L3MISS]     = { .type = NLA_U8 },
3182         [IFLA_VXLAN_COLLECT_METADATA]   = { .type = NLA_U8 },
3183         [IFLA_VXLAN_PORT]       = { .type = NLA_U16 },
3184         [IFLA_VXLAN_UDP_CSUM]   = { .type = NLA_U8 },
3185         [IFLA_VXLAN_UDP_ZERO_CSUM6_TX]  = { .type = NLA_U8 },
3186         [IFLA_VXLAN_UDP_ZERO_CSUM6_RX]  = { .type = NLA_U8 },
3187         [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
3188         [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
3189         [IFLA_VXLAN_GBP]        = { .type = NLA_FLAG, },
3190         [IFLA_VXLAN_GPE]        = { .type = NLA_FLAG, },
3191         [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
3192         [IFLA_VXLAN_TTL_INHERIT]        = { .type = NLA_FLAG },
3193         [IFLA_VXLAN_DF]         = { .type = NLA_U8 },
3194         [IFLA_VXLAN_VNIFILTER]  = { .type = NLA_U8 },
3195 };
3196
3197 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
3198                           struct netlink_ext_ack *extack)
3199 {
3200         if (tb[IFLA_ADDRESS]) {
3201                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
3202                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
3203                                             "Provided link layer address is not Ethernet");
3204                         return -EINVAL;
3205                 }
3206
3207                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
3208                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
3209                                             "Provided Ethernet address is not unicast");
3210                         return -EADDRNOTAVAIL;
3211                 }
3212         }
3213
3214         if (tb[IFLA_MTU]) {
3215                 u32 mtu = nla_get_u32(tb[IFLA_MTU]);
3216
3217                 if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) {
3218                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
3219                                             "MTU must be between 68 and 65535");
3220                         return -EINVAL;
3221                 }
3222         }
3223
3224         if (!data) {
3225                 NL_SET_ERR_MSG(extack,
3226                                "Required attributes not provided to perform the operation");
3227                 return -EINVAL;
3228         }
3229
3230         if (data[IFLA_VXLAN_ID]) {
3231                 u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
3232
3233                 if (id >= VXLAN_N_VID) {
3234                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
3235                                             "VXLAN ID must be lower than 16777216");
3236                         return -ERANGE;
3237                 }
3238         }
3239
3240         if (data[IFLA_VXLAN_PORT_RANGE]) {
3241                 const struct ifla_vxlan_port_range *p
3242                         = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
3243
3244                 if (ntohs(p->high) < ntohs(p->low)) {
3245                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
3246                                             "Invalid source port range");
3247                         return -EINVAL;
3248                 }
3249         }
3250
3251         if (data[IFLA_VXLAN_DF]) {
3252                 enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
3253
3254                 if (df < 0 || df > VXLAN_DF_MAX) {
3255                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
3256                                             "Invalid DF attribute");
3257                         return -EINVAL;
3258                 }
3259         }
3260
3261         return 0;
3262 }
3263
3264 static void vxlan_get_drvinfo(struct net_device *netdev,
3265                               struct ethtool_drvinfo *drvinfo)
3266 {
3267         strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
3268         strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
3269 }
3270
3271 static int vxlan_get_link_ksettings(struct net_device *dev,
3272                                     struct ethtool_link_ksettings *cmd)
3273 {
3274         struct vxlan_dev *vxlan = netdev_priv(dev);
3275         struct vxlan_rdst *dst = &vxlan->default_dst;
3276         struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
3277                                                          dst->remote_ifindex);
3278
3279         if (!lowerdev) {
3280                 cmd->base.duplex = DUPLEX_UNKNOWN;
3281                 cmd->base.port = PORT_OTHER;
3282                 cmd->base.speed = SPEED_UNKNOWN;
3283
3284                 return 0;
3285         }
3286
3287         return __ethtool_get_link_ksettings(lowerdev, cmd);
3288 }
3289
3290 static const struct ethtool_ops vxlan_ethtool_ops = {
3291         .get_drvinfo            = vxlan_get_drvinfo,
3292         .get_link               = ethtool_op_get_link,
3293         .get_link_ksettings     = vxlan_get_link_ksettings,
3294 };
3295
3296 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
3297                                         __be16 port, u32 flags, int ifindex)
3298 {
3299         struct socket *sock;
3300         struct udp_port_cfg udp_conf;
3301         int err;
3302
3303         memset(&udp_conf, 0, sizeof(udp_conf));
3304
3305         if (ipv6) {
3306                 udp_conf.family = AF_INET6;
3307                 udp_conf.use_udp6_rx_checksums =
3308                     !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
3309                 udp_conf.ipv6_v6only = 1;
3310         } else {
3311                 udp_conf.family = AF_INET;
3312         }
3313
3314         udp_conf.local_udp_port = port;
3315         udp_conf.bind_ifindex = ifindex;
3316
3317         /* Open UDP socket */
3318         err = udp_sock_create(net, &udp_conf, &sock);
3319         if (err < 0)
3320                 return ERR_PTR(err);
3321
3322         udp_allow_gso(sock->sk);
3323         return sock;
3324 }
3325
3326 /* Create new listen socket if needed */
3327 static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
3328                                               __be16 port, u32 flags,
3329                                               int ifindex)
3330 {
3331         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3332         struct vxlan_sock *vs;
3333         struct socket *sock;
3334         unsigned int h;
3335         struct udp_tunnel_sock_cfg tunnel_cfg;
3336
3337         vs = kzalloc(sizeof(*vs), GFP_KERNEL);
3338         if (!vs)
3339                 return ERR_PTR(-ENOMEM);
3340
3341         for (h = 0; h < VNI_HASH_SIZE; ++h)
3342                 INIT_HLIST_HEAD(&vs->vni_list[h]);
3343
3344         sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
3345         if (IS_ERR(sock)) {
3346                 kfree(vs);
3347                 return ERR_CAST(sock);
3348         }
3349
3350         vs->sock = sock;
3351         refcount_set(&vs->refcnt, 1);
3352         vs->flags = (flags & VXLAN_F_RCV_FLAGS);
3353
3354         spin_lock(&vn->sock_lock);
3355         hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
3356         udp_tunnel_notify_add_rx_port(sock,
3357                                       (vs->flags & VXLAN_F_GPE) ?
3358                                       UDP_TUNNEL_TYPE_VXLAN_GPE :
3359                                       UDP_TUNNEL_TYPE_VXLAN);
3360         spin_unlock(&vn->sock_lock);
3361
3362         /* Mark socket as an encapsulation socket. */
3363         memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
3364         tunnel_cfg.sk_user_data = vs;
3365         tunnel_cfg.encap_type = 1;
3366         tunnel_cfg.encap_rcv = vxlan_rcv;
3367         tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
3368         tunnel_cfg.encap_destroy = NULL;
3369         tunnel_cfg.gro_receive = vxlan_gro_receive;
3370         tunnel_cfg.gro_complete = vxlan_gro_complete;
3371
3372         setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
3373
3374         return vs;
3375 }
3376
3377 static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
3378 {
3379         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
3380         bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
3381         struct vxlan_sock *vs = NULL;
3382         struct vxlan_dev_node *node;
3383         int l3mdev_index = 0;
3384
3385         if (vxlan->cfg.remote_ifindex)
3386                 l3mdev_index = l3mdev_master_upper_ifindex_by_index(
3387                         vxlan->net, vxlan->cfg.remote_ifindex);
3388
3389         if (!vxlan->cfg.no_share) {
3390                 spin_lock(&vn->sock_lock);
3391                 vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
3392                                      vxlan->cfg.dst_port, vxlan->cfg.flags,
3393                                      l3mdev_index);
3394                 if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
3395                         spin_unlock(&vn->sock_lock);
3396                         return -EBUSY;
3397                 }
3398                 spin_unlock(&vn->sock_lock);
3399         }
3400         if (!vs)
3401                 vs = vxlan_socket_create(vxlan->net, ipv6,
3402                                          vxlan->cfg.dst_port, vxlan->cfg.flags,
3403                                          l3mdev_index);
3404         if (IS_ERR(vs))
3405                 return PTR_ERR(vs);
3406 #if IS_ENABLED(CONFIG_IPV6)
3407         if (ipv6) {
3408                 rcu_assign_pointer(vxlan->vn6_sock, vs);
3409                 node = &vxlan->hlist6;
3410         } else
3411 #endif
3412         {
3413                 rcu_assign_pointer(vxlan->vn4_sock, vs);
3414                 node = &vxlan->hlist4;
3415         }
3416
3417         if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER))
3418                 vxlan_vs_add_vnigrp(vxlan, vs, ipv6);
3419         else
3420                 vxlan_vs_add_dev(vs, vxlan, node);
3421
3422         return 0;
3423 }
3424
3425 static int vxlan_sock_add(struct vxlan_dev *vxlan)
3426 {
3427         bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
3428         bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
3429         bool ipv4 = !ipv6 || metadata;
3430         int ret = 0;
3431
3432         RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
3433 #if IS_ENABLED(CONFIG_IPV6)
3434         RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
3435         if (ipv6) {
3436                 ret = __vxlan_sock_add(vxlan, true);
3437                 if (ret < 0 && ret != -EAFNOSUPPORT)
3438                         ipv4 = false;
3439         }
3440 #endif
3441         if (ipv4)
3442                 ret = __vxlan_sock_add(vxlan, false);
3443         if (ret < 0)
3444                 vxlan_sock_release(vxlan);
3445         return ret;
3446 }
3447
3448 int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
3449                      struct vxlan_config *conf, __be32 vni)
3450 {
3451         struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
3452         struct vxlan_dev *tmp;
3453
3454         list_for_each_entry(tmp, &vn->vxlan_list, next) {
3455                 if (tmp == vxlan)
3456                         continue;
3457                 if (tmp->cfg.flags & VXLAN_F_VNIFILTER) {
3458                         if (!vxlan_vnifilter_lookup(tmp, vni))
3459                                 continue;
3460                 } else if (tmp->cfg.vni != vni) {
3461                         continue;
3462                 }
3463                 if (tmp->cfg.dst_port != conf->dst_port)
3464                         continue;
3465                 if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
3466                     (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
3467                         continue;
3468
3469                 if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
3470                     tmp->cfg.remote_ifindex != conf->remote_ifindex)
3471                         continue;
3472
3473                 return -EEXIST;
3474         }
3475
3476         return 0;
3477 }
3478
3479 static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
3480                                  struct net_device **lower,
3481                                  struct vxlan_dev *old,
3482                                  struct netlink_ext_ack *extack)
3483 {
3484         bool use_ipv6 = false;
3485
3486         if (conf->flags & VXLAN_F_GPE) {
3487                 /* For now, allow GPE only together with
3488                  * COLLECT_METADATA. This can be relaxed later; in such
3489                  * case, the other side of the PtP link will have to be
3490                  * provided.
3491                  */
3492                 if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
3493                     !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
3494                         NL_SET_ERR_MSG(extack,
3495                                        "VXLAN GPE does not support this combination of attributes");
3496                         return -EINVAL;
3497                 }
3498         }
3499
3500         if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) {
3501                 /* Unless IPv6 is explicitly requested, assume IPv4 */
3502                 conf->remote_ip.sa.sa_family = AF_INET;
3503                 conf->saddr.sa.sa_family = AF_INET;
3504         } else if (!conf->remote_ip.sa.sa_family) {
3505                 conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family;
3506         } else if (!conf->saddr.sa.sa_family) {
3507                 conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
3508         }
3509
3510         if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) {
3511                 NL_SET_ERR_MSG(extack,
3512                                "Local and remote address must be from the same family");
3513                 return -EINVAL;
3514         }
3515
3516         if (vxlan_addr_multicast(&conf->saddr)) {
3517                 NL_SET_ERR_MSG(extack, "Local address cannot be multicast");
3518                 return -EINVAL;
3519         }
3520
3521         if (conf->saddr.sa.sa_family == AF_INET6) {
3522                 if (!IS_ENABLED(CONFIG_IPV6)) {
3523                         NL_SET_ERR_MSG(extack,
3524                                        "IPv6 support not enabled in the kernel");
3525                         return -EPFNOSUPPORT;
3526                 }
3527                 use_ipv6 = true;
3528                 conf->flags |= VXLAN_F_IPV6;
3529
3530                 if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
3531                         int local_type =
3532                                 ipv6_addr_type(&conf->saddr.sin6.sin6_addr);
3533                         int remote_type =
3534                                 ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr);
3535
3536                         if (local_type & IPV6_ADDR_LINKLOCAL) {
3537                                 if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
3538                                     (remote_type != IPV6_ADDR_ANY)) {
3539                                         NL_SET_ERR_MSG(extack,
3540                                                        "Invalid combination of local and remote address scopes");
3541                                         return -EINVAL;
3542                                 }
3543
3544                                 conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
3545                         } else {
3546                                 if (remote_type ==
3547                                     (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) {
3548                                         NL_SET_ERR_MSG(extack,
3549                                                        "Invalid combination of local and remote address scopes");
3550                                         return -EINVAL;
3551                                 }
3552
3553                                 conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
3554                         }
3555                 }
3556         }
3557
3558         if (conf->label && !use_ipv6) {
3559                 NL_SET_ERR_MSG(extack,
3560                                "Label attribute only applies to IPv6 VXLAN devices");
3561                 return -EINVAL;
3562         }
3563
3564         if (conf->remote_ifindex) {
3565                 struct net_device *lowerdev;
3566
3567                 lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
3568                 if (!lowerdev) {
3569                         NL_SET_ERR_MSG(extack,
3570                                        "Invalid local interface, device not found");
3571                         return -ENODEV;
3572                 }
3573
3574 #if IS_ENABLED(CONFIG_IPV6)
3575                 if (use_ipv6) {
3576                         struct inet6_dev *idev = __in6_dev_get(lowerdev);
3577
3578                         if (idev && idev->cnf.disable_ipv6) {
3579                                 NL_SET_ERR_MSG(extack,
3580                                                "IPv6 support disabled by administrator");
3581                                 return -EPERM;
3582                         }
3583                 }
3584 #endif
3585
3586                 *lower = lowerdev;
3587         } else {
3588                 if (vxlan_addr_multicast(&conf->remote_ip)) {
3589                         NL_SET_ERR_MSG(extack,
3590                                        "Local interface required for multicast remote destination");
3591
3592                         return -EINVAL;
3593                 }
3594
3595 #if IS_ENABLED(CONFIG_IPV6)
3596                 if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) {
3597                         NL_SET_ERR_MSG(extack,
3598                                        "Local interface required for link-local local/remote addresses");
3599                         return -EINVAL;
3600                 }
3601 #endif
3602
3603                 *lower = NULL;
3604         }
3605
3606         if (!conf->dst_port) {
3607                 if (conf->flags & VXLAN_F_GPE)
3608                         conf->dst_port = htons(IANA_VXLAN_GPE_UDP_PORT);
3609                 else
3610                         conf->dst_port = htons(vxlan_port);
3611         }
3612
3613         if (!conf->age_interval)
3614                 conf->age_interval = FDB_AGE_DEFAULT;
3615
3616         if (vxlan_vni_in_use(src_net, old, conf, conf->vni)) {
3617                 NL_SET_ERR_MSG(extack,
3618                                "A VXLAN device with the specified VNI already exists");
3619                 return -EEXIST;
3620         }
3621
3622         return 0;
3623 }
3624
3625 static void vxlan_config_apply(struct net_device *dev,
3626                                struct vxlan_config *conf,
3627                                struct net_device *lowerdev,
3628                                struct net *src_net,
3629                                bool changelink)
3630 {
3631         struct vxlan_dev *vxlan = netdev_priv(dev);
3632         struct vxlan_rdst *dst = &vxlan->default_dst;
3633         unsigned short needed_headroom = ETH_HLEN;
3634         bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
3635         int max_mtu = ETH_MAX_MTU;
3636
3637         if (!changelink) {
3638                 if (conf->flags & VXLAN_F_GPE)
3639                         vxlan_raw_setup(dev);
3640                 else
3641                         vxlan_ether_setup(dev);
3642
3643                 if (conf->mtu)
3644                         dev->mtu = conf->mtu;
3645
3646                 vxlan->net = src_net;
3647         }
3648
3649         dst->remote_vni = conf->vni;
3650
3651         memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
3652
3653         if (lowerdev) {
3654                 dst->remote_ifindex = conf->remote_ifindex;
3655
3656                 netif_set_gso_max_size(dev, lowerdev->gso_max_size);
3657                 netif_set_gso_max_segs(dev, lowerdev->gso_max_segs);
3658
3659                 needed_headroom = lowerdev->hard_header_len;
3660                 needed_headroom += lowerdev->needed_headroom;
3661
3662                 dev->needed_tailroom = lowerdev->needed_tailroom;
3663
3664                 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3665                                            VXLAN_HEADROOM);
3666                 if (max_mtu < ETH_MIN_MTU)
3667                         max_mtu = ETH_MIN_MTU;
3668
3669                 if (!changelink && !conf->mtu)
3670                         dev->mtu = max_mtu;
3671         }
3672
3673         if (dev->mtu > max_mtu)
3674                 dev->mtu = max_mtu;
3675
3676         if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
3677                 needed_headroom += VXLAN6_HEADROOM;
3678         else
3679                 needed_headroom += VXLAN_HEADROOM;
3680         dev->needed_headroom = needed_headroom;
3681
3682         memcpy(&vxlan->cfg, conf, sizeof(*conf));
3683 }
3684
3685 static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
3686                                struct vxlan_config *conf, bool changelink,
3687                                struct netlink_ext_ack *extack)
3688 {
3689         struct vxlan_dev *vxlan = netdev_priv(dev);
3690         struct net_device *lowerdev;
3691         int ret;
3692
3693         ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan, extack);
3694         if (ret)
3695                 return ret;
3696
3697         vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
3698
3699         return 0;
3700 }
3701
3702 static int __vxlan_dev_create(struct net *net, struct net_device *dev,
3703                               struct vxlan_config *conf,
3704                               struct netlink_ext_ack *extack)
3705 {
3706         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3707         struct vxlan_dev *vxlan = netdev_priv(dev);
3708         struct net_device *remote_dev = NULL;
3709         struct vxlan_fdb *f = NULL;
3710         bool unregister = false;
3711         struct vxlan_rdst *dst;
3712         int err;
3713
3714         dst = &vxlan->default_dst;
3715         err = vxlan_dev_configure(net, dev, conf, false, extack);
3716         if (err)
3717                 return err;
3718
3719         dev->ethtool_ops = &vxlan_ethtool_ops;
3720
3721         /* create an fdb entry for a valid default destination */
3722         if (!vxlan_addr_any(&dst->remote_ip)) {
3723                 err = vxlan_fdb_create(vxlan, all_zeros_mac,
3724                                        &dst->remote_ip,
3725                                        NUD_REACHABLE | NUD_PERMANENT,
3726                                        vxlan->cfg.dst_port,
3727                                        dst->remote_vni,
3728                                        dst->remote_vni,
3729                                        dst->remote_ifindex,
3730                                        NTF_SELF, 0, &f, extack);
3731                 if (err)
3732                         return err;
3733         }
3734
3735         err = register_netdevice(dev);
3736         if (err)
3737                 goto errout;
3738         unregister = true;
3739
3740         if (dst->remote_ifindex) {
3741                 remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
3742                 if (!remote_dev) {
3743                         err = -ENODEV;
3744                         goto errout;
3745                 }
3746
3747                 err = netdev_upper_dev_link(remote_dev, dev, extack);
3748                 if (err)
3749                         goto errout;
3750         }
3751
3752         err = rtnl_configure_link(dev, NULL);
3753         if (err < 0)
3754                 goto unlink;
3755
3756         if (f) {
3757                 vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
3758
3759                 /* notify default fdb entry */
3760                 err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
3761                                        RTM_NEWNEIGH, true, extack);
3762                 if (err) {
3763                         vxlan_fdb_destroy(vxlan, f, false, false);
3764                         if (remote_dev)
3765                                 netdev_upper_dev_unlink(remote_dev, dev);
3766                         goto unregister;
3767                 }
3768         }
3769
3770         list_add(&vxlan->next, &vn->vxlan_list);
3771         if (remote_dev)
3772                 dst->remote_dev = remote_dev;
3773         return 0;
3774 unlink:
3775         if (remote_dev)
3776                 netdev_upper_dev_unlink(remote_dev, dev);
3777 errout:
3778         /* unregister_netdevice() destroys the default FDB entry with deletion
3779          * notification. But the addition notification was not sent yet, so
3780          * destroy the entry by hand here.
3781          */
3782         if (f)
3783                 __vxlan_fdb_free(f);
3784 unregister:
3785         if (unregister)
3786                 unregister_netdevice(dev);
3787         return err;
3788 }
3789
3790 /* Set/clear flags based on attribute */
3791 static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
3792                           int attrtype, unsigned long mask, bool changelink,
3793                           bool changelink_supported,
3794                           struct netlink_ext_ack *extack)
3795 {
3796         unsigned long flags;
3797
3798         if (!tb[attrtype])
3799                 return 0;
3800
3801         if (changelink && !changelink_supported) {
3802                 vxlan_flag_attr_error(attrtype, extack);
3803                 return -EOPNOTSUPP;
3804         }
3805
3806         if (vxlan_policy[attrtype].type == NLA_FLAG)
3807                 flags = conf->flags | mask;
3808         else if (nla_get_u8(tb[attrtype]))
3809                 flags = conf->flags | mask;
3810         else
3811                 flags = conf->flags & ~mask;
3812
3813         conf->flags = flags;
3814
3815         return 0;
3816 }
3817
3818 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
3819                          struct net_device *dev, struct vxlan_config *conf,
3820                          bool changelink, struct netlink_ext_ack *extack)
3821 {
3822         struct vxlan_dev *vxlan = netdev_priv(dev);
3823         int err = 0;
3824
3825         memset(conf, 0, sizeof(*conf));
3826
3827         /* if changelink operation, start with old existing cfg */
3828         if (changelink)
3829                 memcpy(conf, &vxlan->cfg, sizeof(*conf));
3830
3831         if (data[IFLA_VXLAN_ID]) {
3832                 __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
3833
3834                 if (changelink && (vni != conf->vni)) {
3835                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
3836                         return -EOPNOTSUPP;
3837                 }
3838                 conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
3839         }
3840
3841         if (data[IFLA_VXLAN_GROUP]) {
3842                 if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
3843                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
3844                         return -EOPNOTSUPP;
3845                 }
3846
3847                 conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
3848                 conf->remote_ip.sa.sa_family = AF_INET;
3849         } else if (data[IFLA_VXLAN_GROUP6]) {
3850                 if (!IS_ENABLED(CONFIG_IPV6)) {
3851                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
3852                         return -EPFNOSUPPORT;
3853                 }
3854
3855                 if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
3856                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
3857                         return -EOPNOTSUPP;
3858                 }
3859
3860                 conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
3861                 conf->remote_ip.sa.sa_family = AF_INET6;
3862         }
3863
3864         if (data[IFLA_VXLAN_LOCAL]) {
3865                 if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
3866                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
3867                         return -EOPNOTSUPP;
3868                 }
3869
3870                 conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
3871                 conf->saddr.sa.sa_family = AF_INET;
3872         } else if (data[IFLA_VXLAN_LOCAL6]) {
3873                 if (!IS_ENABLED(CONFIG_IPV6)) {
3874                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
3875                         return -EPFNOSUPPORT;
3876                 }
3877
3878                 if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
3879                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
3880                         return -EOPNOTSUPP;
3881                 }
3882
3883                 /* TODO: respect scope id */
3884                 conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
3885                 conf->saddr.sa.sa_family = AF_INET6;
3886         }
3887
3888         if (data[IFLA_VXLAN_LINK])
3889                 conf->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
3890
3891         if (data[IFLA_VXLAN_TOS])
3892                 conf->tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
3893
3894         if (data[IFLA_VXLAN_TTL])
3895                 conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
3896
3897         if (data[IFLA_VXLAN_TTL_INHERIT]) {
3898                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
3899                                     VXLAN_F_TTL_INHERIT, changelink, false,
3900                                     extack);
3901                 if (err)
3902                         return err;
3903
3904         }
3905
3906         if (data[IFLA_VXLAN_LABEL])
3907                 conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
3908                              IPV6_FLOWLABEL_MASK;
3909
3910         if (data[IFLA_VXLAN_LEARNING]) {
3911                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
3912                                     VXLAN_F_LEARN, changelink, true,
3913                                     extack);
3914                 if (err)
3915                         return err;
3916         } else if (!changelink) {
3917                 /* default to learn on a new device */
3918                 conf->flags |= VXLAN_F_LEARN;
3919         }
3920
3921         if (data[IFLA_VXLAN_AGEING])
3922                 conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
3923
3924         if (data[IFLA_VXLAN_PROXY]) {
3925                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
3926                                     VXLAN_F_PROXY, changelink, false,
3927                                     extack);
3928                 if (err)
3929                         return err;
3930         }
3931
3932         if (data[IFLA_VXLAN_RSC]) {
3933                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
3934                                     VXLAN_F_RSC, changelink, false,
3935                                     extack);
3936                 if (err)
3937                         return err;
3938         }
3939
3940         if (data[IFLA_VXLAN_L2MISS]) {
3941                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
3942                                     VXLAN_F_L2MISS, changelink, false,
3943                                     extack);
3944                 if (err)
3945                         return err;
3946         }
3947
3948         if (data[IFLA_VXLAN_L3MISS]) {
3949                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
3950                                     VXLAN_F_L3MISS, changelink, false,
3951                                     extack);
3952                 if (err)
3953                         return err;
3954         }
3955
3956         if (data[IFLA_VXLAN_LIMIT]) {
3957                 if (changelink) {
3958                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
3959                                             "Cannot change limit");
3960                         return -EOPNOTSUPP;
3961                 }
3962                 conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
3963         }
3964
3965         if (data[IFLA_VXLAN_COLLECT_METADATA]) {
3966                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
3967                                     VXLAN_F_COLLECT_METADATA, changelink, false,
3968                                     extack);
3969                 if (err)
3970                         return err;
3971         }
3972
3973         if (data[IFLA_VXLAN_PORT_RANGE]) {
3974                 if (!changelink) {
3975                         const struct ifla_vxlan_port_range *p
3976                                 = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
3977                         conf->port_min = ntohs(p->low);
3978                         conf->port_max = ntohs(p->high);
3979                 } else {
3980                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
3981                                             "Cannot change port range");
3982                         return -EOPNOTSUPP;
3983                 }
3984         }
3985
3986         if (data[IFLA_VXLAN_PORT]) {
3987                 if (changelink) {
3988                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
3989                                             "Cannot change port");
3990                         return -EOPNOTSUPP;
3991                 }
3992                 conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
3993         }
3994
3995         if (data[IFLA_VXLAN_UDP_CSUM]) {
3996                 if (changelink) {
3997                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
3998                                             "Cannot change UDP_CSUM flag");
3999                         return -EOPNOTSUPP;
4000                 }
4001                 if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
4002                         conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
4003         }
4004
4005         if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
4006                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
4007                                     VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
4008                                     false, extack);
4009                 if (err)
4010                         return err;
4011         }
4012
4013         if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
4014                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
4015                                     VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
4016                                     false, extack);
4017                 if (err)
4018                         return err;
4019         }
4020
4021         if (data[IFLA_VXLAN_REMCSUM_TX]) {
4022                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
4023                                     VXLAN_F_REMCSUM_TX, changelink, false,
4024                                     extack);
4025                 if (err)
4026                         return err;
4027         }
4028
4029         if (data[IFLA_VXLAN_REMCSUM_RX]) {
4030                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
4031                                     VXLAN_F_REMCSUM_RX, changelink, false,
4032                                     extack);
4033                 if (err)
4034                         return err;
4035         }
4036
4037         if (data[IFLA_VXLAN_GBP]) {
4038                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
4039                                     VXLAN_F_GBP, changelink, false, extack);
4040                 if (err)
4041                         return err;
4042         }
4043
4044         if (data[IFLA_VXLAN_GPE]) {
4045                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
4046                                     VXLAN_F_GPE, changelink, false,
4047                                     extack);
4048                 if (err)
4049                         return err;
4050         }
4051
4052         if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
4053                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
4054                                     VXLAN_F_REMCSUM_NOPARTIAL, changelink,
4055                                     false, extack);
4056                 if (err)
4057                         return err;
4058         }
4059
4060         if (tb[IFLA_MTU]) {
4061                 if (changelink) {
4062                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
4063                                             "Cannot change mtu");
4064                         return -EOPNOTSUPP;
4065                 }
4066                 conf->mtu = nla_get_u32(tb[IFLA_MTU]);
4067         }
4068
4069         if (data[IFLA_VXLAN_DF])
4070                 conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
4071
4072         if (data[IFLA_VXLAN_VNIFILTER]) {
4073                 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER,
4074                                     VXLAN_F_VNIFILTER, changelink, false,
4075                                     extack);
4076                 if (err)
4077                         return err;
4078
4079                 if ((conf->flags & VXLAN_F_VNIFILTER) &&
4080                     !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
4081                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER],
4082                                             "vxlan vnifilter only valid in collect metadata mode");
4083                         return -EINVAL;
4084                 }
4085         }
4086
4087         return 0;
4088 }
4089
4090 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
4091                          struct nlattr *tb[], struct nlattr *data[],
4092                          struct netlink_ext_ack *extack)
4093 {
4094         struct vxlan_config conf;
4095         int err;
4096
4097         err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
4098         if (err)
4099                 return err;
4100
4101         return __vxlan_dev_create(src_net, dev, &conf, extack);
4102 }
4103
4104 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
4105                             struct nlattr *data[],
4106                             struct netlink_ext_ack *extack)
4107 {
4108         struct vxlan_dev *vxlan = netdev_priv(dev);
4109         struct net_device *lowerdev;
4110         struct vxlan_config conf;
4111         struct vxlan_rdst *dst;
4112         int err;
4113
4114         dst = &vxlan->default_dst;
4115         err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
4116         if (err)
4117                 return err;
4118
4119         err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
4120                                     vxlan, extack);
4121         if (err)
4122                 return err;
4123
4124         if (dst->remote_dev == lowerdev)
4125                 lowerdev = NULL;
4126
4127         err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
4128                                              extack);
4129         if (err)
4130                 return err;
4131
4132         /* handle default dst entry */
4133         if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
4134                 u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
4135
4136                 spin_lock_bh(&vxlan->hash_lock[hash_index]);
4137                 if (!vxlan_addr_any(&conf.remote_ip)) {
4138                         err = vxlan_fdb_update(vxlan, all_zeros_mac,
4139                                                &conf.remote_ip,
4140                                                NUD_REACHABLE | NUD_PERMANENT,
4141                                                NLM_F_APPEND | NLM_F_CREATE,
4142                                                vxlan->cfg.dst_port,
4143                                                conf.vni, conf.vni,
4144                                                conf.remote_ifindex,
4145                                                NTF_SELF, 0, true, extack);
4146                         if (err) {
4147                                 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4148                                 netdev_adjacent_change_abort(dst->remote_dev,
4149                                                              lowerdev, dev);
4150                                 return err;
4151                         }
4152                 }
4153                 if (!vxlan_addr_any(&dst->remote_ip))
4154                         __vxlan_fdb_delete(vxlan, all_zeros_mac,
4155                                            dst->remote_ip,
4156                                            vxlan->cfg.dst_port,
4157                                            dst->remote_vni,
4158                                            dst->remote_vni,
4159                                            dst->remote_ifindex,
4160                                            true);
4161                 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4162
4163                 /* If vni filtering device, also update fdb entries of
4164                  * all vnis that were using default remote ip
4165                  */
4166                 if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
4167                         err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip,
4168                                                          &conf.remote_ip, extack);
4169                         if (err) {
4170                                 netdev_adjacent_change_abort(dst->remote_dev,
4171                                                              lowerdev, dev);
4172                                 return err;
4173                         }
4174                 }
4175         }
4176
4177         if (conf.age_interval != vxlan->cfg.age_interval)
4178                 mod_timer(&vxlan->age_timer, jiffies);
4179
4180         netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
4181         if (lowerdev && lowerdev != dst->remote_dev)
4182                 dst->remote_dev = lowerdev;
4183         vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
4184         return 0;
4185 }
4186
4187 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
4188 {
4189         struct vxlan_dev *vxlan = netdev_priv(dev);
4190
4191         vxlan_flush(vxlan, true);
4192
4193         list_del(&vxlan->next);
4194         unregister_netdevice_queue(dev, head);
4195         if (vxlan->default_dst.remote_dev)
4196                 netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
4197 }
4198
4199 static size_t vxlan_get_size(const struct net_device *dev)
4200 {
4201
4202         return nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_ID */
4203                 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
4204                 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
4205                 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
4206                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
4207                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL_INHERIT */
4208                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
4209                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_DF */
4210                 nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
4211                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
4212                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_PROXY */
4213                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_RSC */
4214                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L2MISS */
4215                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L3MISS */
4216                 nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_COLLECT_METADATA */
4217                 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
4218                 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
4219                 nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
4220                 nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
4221                 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
4222                 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
4223                 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
4224                 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
4225                 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
4226                 0;
4227 }
4228
4229 static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
4230 {
4231         const struct vxlan_dev *vxlan = netdev_priv(dev);
4232         const struct vxlan_rdst *dst = &vxlan->default_dst;
4233         struct ifla_vxlan_port_range ports = {
4234                 .low =  htons(vxlan->cfg.port_min),
4235                 .high = htons(vxlan->cfg.port_max),
4236         };
4237
4238         if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
4239                 goto nla_put_failure;
4240
4241         if (!vxlan_addr_any(&dst->remote_ip)) {
4242                 if (dst->remote_ip.sa.sa_family == AF_INET) {
4243                         if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
4244                                             dst->remote_ip.sin.sin_addr.s_addr))
4245                                 goto nla_put_failure;
4246 #if IS_ENABLED(CONFIG_IPV6)
4247                 } else {
4248                         if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
4249                                              &dst->remote_ip.sin6.sin6_addr))
4250                                 goto nla_put_failure;
4251 #endif
4252                 }
4253         }
4254
4255         if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
4256                 goto nla_put_failure;
4257
4258         if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
4259                 if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
4260                         if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
4261                                             vxlan->cfg.saddr.sin.sin_addr.s_addr))
4262                                 goto nla_put_failure;
4263 #if IS_ENABLED(CONFIG_IPV6)
4264                 } else {
4265                         if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
4266                                              &vxlan->cfg.saddr.sin6.sin6_addr))
4267                                 goto nla_put_failure;
4268 #endif
4269                 }
4270         }
4271
4272         if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
4273             nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
4274                        !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
4275             nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
4276             nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
4277             nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
4278             nla_put_u8(skb, IFLA_VXLAN_LEARNING,
4279                        !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
4280             nla_put_u8(skb, IFLA_VXLAN_PROXY,
4281                        !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
4282             nla_put_u8(skb, IFLA_VXLAN_RSC,
4283                        !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
4284             nla_put_u8(skb, IFLA_VXLAN_L2MISS,
4285                        !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
4286             nla_put_u8(skb, IFLA_VXLAN_L3MISS,
4287                        !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
4288             nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
4289                        !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
4290             nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
4291             nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
4292             nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
4293             nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
4294                        !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
4295             nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
4296                        !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
4297             nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
4298                        !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
4299             nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
4300                        !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
4301             nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
4302                        !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
4303                 goto nla_put_failure;
4304
4305         if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
4306                 goto nla_put_failure;
4307
4308         if (vxlan->cfg.flags & VXLAN_F_GBP &&
4309             nla_put_flag(skb, IFLA_VXLAN_GBP))
4310                 goto nla_put_failure;
4311
4312         if (vxlan->cfg.flags & VXLAN_F_GPE &&
4313             nla_put_flag(skb, IFLA_VXLAN_GPE))
4314                 goto nla_put_failure;
4315
4316         if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&
4317             nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
4318                 goto nla_put_failure;
4319
4320         if (vxlan->cfg.flags & VXLAN_F_VNIFILTER &&
4321             nla_put_u8(skb, IFLA_VXLAN_VNIFILTER,
4322                        !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
4323                 goto nla_put_failure;
4324
4325         return 0;
4326
4327 nla_put_failure:
4328         return -EMSGSIZE;
4329 }
4330
4331 static struct net *vxlan_get_link_net(const struct net_device *dev)
4332 {
4333         struct vxlan_dev *vxlan = netdev_priv(dev);
4334
4335         return vxlan->net;
4336 }
4337
4338 static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
4339         .kind           = "vxlan",
4340         .maxtype        = IFLA_VXLAN_MAX,
4341         .policy         = vxlan_policy,
4342         .priv_size      = sizeof(struct vxlan_dev),
4343         .setup          = vxlan_setup,
4344         .validate       = vxlan_validate,
4345         .newlink        = vxlan_newlink,
4346         .changelink     = vxlan_changelink,
4347         .dellink        = vxlan_dellink,
4348         .get_size       = vxlan_get_size,
4349         .fill_info      = vxlan_fill_info,
4350         .get_link_net   = vxlan_get_link_net,
4351 };
4352
4353 struct net_device *vxlan_dev_create(struct net *net, const char *name,
4354                                     u8 name_assign_type,
4355                                     struct vxlan_config *conf)
4356 {
4357         struct nlattr *tb[IFLA_MAX + 1];
4358         struct net_device *dev;
4359         int err;
4360
4361         memset(&tb, 0, sizeof(tb));
4362
4363         dev = rtnl_create_link(net, name, name_assign_type,
4364                                &vxlan_link_ops, tb, NULL);
4365         if (IS_ERR(dev))
4366                 return dev;
4367
4368         err = __vxlan_dev_create(net, dev, conf, NULL);
4369         if (err < 0) {
4370                 free_netdev(dev);
4371                 return ERR_PTR(err);
4372         }
4373
4374         err = rtnl_configure_link(dev, NULL);
4375         if (err < 0) {
4376                 LIST_HEAD(list_kill);
4377
4378                 vxlan_dellink(dev, &list_kill);
4379                 unregister_netdevice_many(&list_kill);
4380                 return ERR_PTR(err);
4381         }
4382
4383         return dev;
4384 }
4385 EXPORT_SYMBOL_GPL(vxlan_dev_create);
4386
4387 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
4388                                              struct net_device *dev)
4389 {
4390         struct vxlan_dev *vxlan, *next;
4391         LIST_HEAD(list_kill);
4392
4393         list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
4394                 struct vxlan_rdst *dst = &vxlan->default_dst;
4395
4396                 /* In case we created vxlan device with carrier
4397                  * and we loose the carrier due to module unload
4398                  * we also need to remove vxlan device. In other
4399                  * cases, it's not necessary and remote_ifindex
4400                  * is 0 here, so no matches.
4401                  */
4402                 if (dst->remote_ifindex == dev->ifindex)
4403                         vxlan_dellink(vxlan->dev, &list_kill);
4404         }
4405
4406         unregister_netdevice_many(&list_kill);
4407 }
4408
4409 static int vxlan_netdevice_event(struct notifier_block *unused,
4410                                  unsigned long event, void *ptr)
4411 {
4412         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4413         struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
4414
4415         if (event == NETDEV_UNREGISTER)
4416                 vxlan_handle_lowerdev_unregister(vn, dev);
4417         else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
4418                 vxlan_offload_rx_ports(dev, true);
4419         else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
4420                 vxlan_offload_rx_ports(dev, false);
4421
4422         return NOTIFY_DONE;
4423 }
4424
4425 static struct notifier_block vxlan_notifier_block __read_mostly = {
4426         .notifier_call = vxlan_netdevice_event,
4427 };
4428
4429 static void
4430 vxlan_fdb_offloaded_set(struct net_device *dev,
4431                         struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4432 {
4433         struct vxlan_dev *vxlan = netdev_priv(dev);
4434         struct vxlan_rdst *rdst;
4435         struct vxlan_fdb *f;
4436         u32 hash_index;
4437
4438         hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4439
4440         spin_lock_bh(&vxlan->hash_lock[hash_index]);
4441
4442         f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4443         if (!f)
4444                 goto out;
4445
4446         rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
4447                                    fdb_info->remote_port,
4448                                    fdb_info->remote_vni,
4449                                    fdb_info->remote_ifindex);
4450         if (!rdst)
4451                 goto out;
4452
4453         rdst->offloaded = fdb_info->offloaded;
4454
4455 out:
4456         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4457 }
4458
4459 static int
4460 vxlan_fdb_external_learn_add(struct net_device *dev,
4461                              struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4462 {
4463         struct vxlan_dev *vxlan = netdev_priv(dev);
4464         struct netlink_ext_ack *extack;
4465         u32 hash_index;
4466         int err;
4467
4468         hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4469         extack = switchdev_notifier_info_to_extack(&fdb_info->info);
4470
4471         spin_lock_bh(&vxlan->hash_lock[hash_index]);
4472         err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
4473                                NUD_REACHABLE,
4474                                NLM_F_CREATE | NLM_F_REPLACE,
4475                                fdb_info->remote_port,
4476                                fdb_info->vni,
4477                                fdb_info->remote_vni,
4478                                fdb_info->remote_ifindex,
4479                                NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
4480                                0, false, extack);
4481         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4482
4483         return err;
4484 }
4485
4486 static int
4487 vxlan_fdb_external_learn_del(struct net_device *dev,
4488                              struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4489 {
4490         struct vxlan_dev *vxlan = netdev_priv(dev);
4491         struct vxlan_fdb *f;
4492         u32 hash_index;
4493         int err = 0;
4494
4495         hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4496         spin_lock_bh(&vxlan->hash_lock[hash_index]);
4497
4498         f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4499         if (!f)
4500                 err = -ENOENT;
4501         else if (f->flags & NTF_EXT_LEARNED)
4502                 err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
4503                                          fdb_info->remote_ip,
4504                                          fdb_info->remote_port,
4505                                          fdb_info->vni,
4506                                          fdb_info->remote_vni,
4507                                          fdb_info->remote_ifindex,
4508                                          false);
4509
4510         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4511
4512         return err;
4513 }
4514
4515 static int vxlan_switchdev_event(struct notifier_block *unused,
4516                                  unsigned long event, void *ptr)
4517 {
4518         struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
4519         struct switchdev_notifier_vxlan_fdb_info *fdb_info;
4520         int err = 0;
4521
4522         switch (event) {
4523         case SWITCHDEV_VXLAN_FDB_OFFLOADED:
4524                 vxlan_fdb_offloaded_set(dev, ptr);
4525                 break;
4526         case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
4527                 fdb_info = ptr;
4528                 err = vxlan_fdb_external_learn_add(dev, fdb_info);
4529                 if (err) {
4530                         err = notifier_from_errno(err);
4531                         break;
4532                 }
4533                 fdb_info->offloaded = true;
4534                 vxlan_fdb_offloaded_set(dev, fdb_info);
4535                 break;
4536         case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
4537                 fdb_info = ptr;
4538                 err = vxlan_fdb_external_learn_del(dev, fdb_info);
4539                 if (err) {
4540                         err = notifier_from_errno(err);
4541                         break;
4542                 }
4543                 fdb_info->offloaded = false;
4544                 vxlan_fdb_offloaded_set(dev, fdb_info);
4545                 break;
4546         }
4547
4548         return err;
4549 }
4550
4551 static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
4552         .notifier_call = vxlan_switchdev_event,
4553 };
4554
4555 static void vxlan_fdb_nh_flush(struct nexthop *nh)
4556 {
4557         struct vxlan_fdb *fdb;
4558         struct vxlan_dev *vxlan;
4559         u32 hash_index;
4560
4561         rcu_read_lock();
4562         list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
4563                 vxlan = rcu_dereference(fdb->vdev);
4564                 WARN_ON(!vxlan);
4565                 hash_index = fdb_head_index(vxlan, fdb->eth_addr,
4566                                             vxlan->default_dst.remote_vni);
4567                 spin_lock_bh(&vxlan->hash_lock[hash_index]);
4568                 if (!hlist_unhashed(&fdb->hlist))
4569                         vxlan_fdb_destroy(vxlan, fdb, false, false);
4570                 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4571         }
4572         rcu_read_unlock();
4573 }
4574
4575 static int vxlan_nexthop_event(struct notifier_block *nb,
4576                                unsigned long event, void *ptr)
4577 {
4578         struct nh_notifier_info *info = ptr;
4579         struct nexthop *nh;
4580
4581         if (event != NEXTHOP_EVENT_DEL)
4582                 return NOTIFY_DONE;
4583
4584         nh = nexthop_find_by_id(info->net, info->id);
4585         if (!nh)
4586                 return NOTIFY_DONE;
4587
4588         vxlan_fdb_nh_flush(nh);
4589
4590         return NOTIFY_DONE;
4591 }
4592
4593 static __net_init int vxlan_init_net(struct net *net)
4594 {
4595         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4596         unsigned int h;
4597
4598         INIT_LIST_HEAD(&vn->vxlan_list);
4599         spin_lock_init(&vn->sock_lock);
4600         vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event;
4601
4602         for (h = 0; h < PORT_HASH_SIZE; ++h)
4603                 INIT_HLIST_HEAD(&vn->sock_list[h]);
4604
4605         return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
4606                                          NULL);
4607 }
4608
4609 static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
4610 {
4611         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4612         struct vxlan_dev *vxlan, *next;
4613         struct net_device *dev, *aux;
4614
4615         for_each_netdev_safe(net, dev, aux)
4616                 if (dev->rtnl_link_ops == &vxlan_link_ops)
4617                         unregister_netdevice_queue(dev, head);
4618
4619         list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
4620                 /* If vxlan->dev is in the same netns, it has already been added
4621                  * to the list by the previous loop.
4622                  */
4623                 if (!net_eq(dev_net(vxlan->dev), net))
4624                         unregister_netdevice_queue(vxlan->dev, head);
4625         }
4626
4627 }
4628
4629 static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
4630 {
4631         struct net *net;
4632         LIST_HEAD(list);
4633         unsigned int h;
4634
4635         list_for_each_entry(net, net_list, exit_list) {
4636                 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4637
4638                 unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
4639         }
4640         rtnl_lock();
4641         list_for_each_entry(net, net_list, exit_list)
4642                 vxlan_destroy_tunnels(net, &list);
4643
4644         unregister_netdevice_many(&list);
4645         rtnl_unlock();
4646
4647         list_for_each_entry(net, net_list, exit_list) {
4648                 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4649
4650                 for (h = 0; h < PORT_HASH_SIZE; ++h)
4651                         WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
4652         }
4653 }
4654
4655 static struct pernet_operations vxlan_net_ops = {
4656         .init = vxlan_init_net,
4657         .exit_batch = vxlan_exit_batch_net,
4658         .id   = &vxlan_net_id,
4659         .size = sizeof(struct vxlan_net),
4660 };
4661
4662 static int __init vxlan_init_module(void)
4663 {
4664         int rc;
4665
4666         get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
4667
4668         rc = register_pernet_subsys(&vxlan_net_ops);
4669         if (rc)
4670                 goto out1;
4671
4672         rc = register_netdevice_notifier(&vxlan_notifier_block);
4673         if (rc)
4674                 goto out2;
4675
4676         rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
4677         if (rc)
4678                 goto out3;
4679
4680         rc = rtnl_link_register(&vxlan_link_ops);
4681         if (rc)
4682                 goto out4;
4683
4684         vxlan_vnifilter_init();
4685
4686         return 0;
4687 out4:
4688         unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
4689 out3:
4690         unregister_netdevice_notifier(&vxlan_notifier_block);
4691 out2:
4692         unregister_pernet_subsys(&vxlan_net_ops);
4693 out1:
4694         return rc;
4695 }
4696 late_initcall(vxlan_init_module);
4697
4698 static void __exit vxlan_cleanup_module(void)
4699 {
4700         vxlan_vnifilter_uninit();
4701         rtnl_link_unregister(&vxlan_link_ops);
4702         unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
4703         unregister_netdevice_notifier(&vxlan_notifier_block);
4704         unregister_pernet_subsys(&vxlan_net_ops);
4705         /* rcu_barrier() is called by netns */
4706 }
4707 module_exit(vxlan_cleanup_module);
4708
4709 MODULE_LICENSE("GPL");
4710 MODULE_VERSION(VXLAN_VERSION);
4711 MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>");
4712 MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic");
4713 MODULE_ALIAS_RTNL_LINK("vxlan");