struct vxlan_dev_node *node;
/* For flow based devices, map all packets to VNI 0 */
- if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ if (vs->flags & VXLAN_F_COLLECT_METADATA &&
+ !(vs->flags & VXLAN_F_VNIFILTER))
vni = 0;
hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
- if (node->vxlan->default_dst.remote_vni != vni)
+ if (!node->vxlan)
continue;
+ if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
+ if (!vxlan_vnifilter_lookup(node->vxlan, vni))
+ continue;
+ } else if (node->vxlan->default_dst.remote_vni != vni) {
+ continue;
+ }
if (IS_ENABLED(CONFIG_IPV6)) {
const struct vxlan_config *cfg = &node->vxlan->cfg;
RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
synchronize_net();
- vxlan_vs_del_dev(vxlan);
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vs_del_vnigrp(vxlan);
+ else
+ vxlan_vs_del_dev(vxlan);
if (__vxlan_sock_release_prep(sock4)) {
udp_tunnel_sock_release(sock4->sock);
struct vxlan_dev *vxlan = netdev_priv(dev);
int err;
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vnigroup_init(vxlan);
+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vnigroup_uninit(vxlan);
+
gro_cells_destroy(&vxlan->gro_cells);
vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
if (ret < 0)
return ret;
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
- ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip,
- vxlan->default_dst.remote_ifindex);
- if (ret == -EADDRINUSE)
- ret = 0;
- if (ret) {
- vxlan_sock_release(vxlan);
- return ret;
- }
+ ret = vxlan_multicast_join(vxlan);
+ if (ret) {
+ vxlan_sock_release(vxlan);
+ return ret;
}
if (vxlan->cfg.age_interval)
static int vxlan_stop(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
int ret = 0;
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- !vxlan_group_used(vn, vxlan, NULL, 0))
- ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip,
- vxlan->default_dst.remote_ifindex);
+ vxlan_multicast_leave(vxlan);
del_timer_sync(&vxlan->age_timer);
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
+ [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
int l3mdev_index = 0;
rcu_assign_pointer(vxlan->vn4_sock, vs);
node = &vxlan->hlist4;
}
- vxlan_vs_add_dev(vs, vxlan, node);
+
+ if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ vxlan_vs_add_vnigrp(vxlan, vs, ipv6);
+ else
+ vxlan_vs_add_dev(vs, vxlan, node);
+
return 0;
}
return ret;
}
-static int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
- struct vxlan_config *conf, __be32 vni)
+int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
+ struct vxlan_config *conf, __be32 vni)
{
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
struct vxlan_dev *tmp;
list_for_each_entry(tmp, &vn->vxlan_list, next) {
if (tmp == vxlan)
continue;
- if (tmp->cfg.vni != vni)
+ if (tmp->cfg.flags & VXLAN_F_VNIFILTER) {
+ if (!vxlan_vnifilter_lookup(tmp, vni))
+ continue;
+ } else if (tmp->cfg.vni != vni) {
continue;
+ }
if (tmp->cfg.dst_port != conf->dst_port)
continue;
if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
if (data[IFLA_VXLAN_DF])
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
+ if (data[IFLA_VXLAN_VNIFILTER]) {
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER,
+ VXLAN_F_VNIFILTER, changelink, false,
+ extack);
+ if (err)
+ return err;
+
+ if ((conf->flags & VXLAN_F_VNIFILTER) &&
+ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER],
+ "vxlan vnifilter only valid in collect metadata mode");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
dst->remote_ifindex,
true);
spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ /* If vni filtering device, also update fdb entries of
+ * all vnis that were using default remote ip
+ */
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
+ err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip,
+ &conf.remote_ip, extack);
+ if (err) {
+ netdev_adjacent_change_abort(dst->remote_dev,
+ lowerdev, dev);
+ return err;
+ }
+ }
}
if (conf.age_interval != vxlan->cfg.age_interval)
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
goto nla_put_failure;
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER &&
+ nla_put_u8(skb, IFLA_VXLAN_VNIFILTER,
+ !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
if (rc)
goto out4;
+ vxlan_vnifilter_init();
+
return 0;
out4:
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
static void __exit vxlan_cleanup_module(void)
{
+ vxlan_vnifilter_uninit();
rtnl_link_unregister(&vxlan_link_ops);
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
unregister_netdevice_notifier(&vxlan_notifier_block);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Vxlan vni filter for collect metadata mode
+ *
+ * Authors: Roopa Prabhu <roopa@nvidia.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/etherdevice.h>
+#include <linux/rhashtable.h>
+#include <net/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/vxlan.h>
+
+#include "vxlan_private.h"
+
+static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct vxlan_vni_node *vnode = ptr;
+ __be32 vni = *(__be32 *)arg->key;
+
+ return vnode->vni != vni;
+}
+
+const struct rhashtable_params vxlan_vni_rht_params = {
+ .head_offset = offsetof(struct vxlan_vni_node, vnode),
+ .key_offset = offsetof(struct vxlan_vni_node, vni),
+ .key_len = sizeof(__be32),
+ .nelem_hint = 3,
+ .max_size = VXLAN_N_VID,
+ .obj_cmpfn = vxlan_vni_cmp,
+ .automatic_shrinking = true,
+};
+
+static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *v,
+ bool del)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_dev_node *node;
+ struct vxlan_sock *vs;
+
+ spin_lock(&vn->sock_lock);
+ if (del) {
+ if (!hlist_unhashed(&v->hlist4.hlist))
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!hlist_unhashed(&v->hlist6.hlist))
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ goto out;
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ vs = rtnl_dereference(vxlan->vn6_sock);
+ if (vs && v) {
+ node = &v->hlist6;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+#endif
+ vs = rtnl_dereference(vxlan->vn4_sock);
+ if (vs && v) {
+ node = &v->hlist4;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+out:
+ spin_unlock(&vn->sock_lock);
+}
+
+void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
+ struct vxlan_sock *vs,
+ bool ipv6)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_vni_node *v, *tmp;
+ struct vxlan_dev_node *node;
+
+ if (!vg)
+ return;
+
+ spin_lock(&vn->sock_lock);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6)
+ node = &v->hlist6;
+ else
+#endif
+ node = &v->hlist4;
+ node->vxlan = vxlan;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+ spin_unlock(&vn->sock_lock);
+}
+
+void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_vni_node *v, *tmp;
+
+ if (!vg)
+ return;
+
+ spin_lock(&vn->sock_lock);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ }
+ spin_unlock(&vn->sock_lock);
+}
+
+static u32 vnirange(struct vxlan_vni_node *vbegin,
+ struct vxlan_vni_node *vend)
+{
+ return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
+}
+
+static size_t vxlan_vnifilter_entry_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct tunnel_msg))
+ + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
+ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
+ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
+ + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
+}
+
+static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
+ struct vxlan_vni_node *vbegin,
+ struct vxlan_vni_node *vend)
+{
+ struct nlattr *ventry;
+ u32 vs = be32_to_cpu(vbegin->vni);
+ u32 ve = 0;
+
+ if (vbegin != vend)
+ ve = be32_to_cpu(vend->vni);
+
+ ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
+ if (!ventry)
+ return false;
+
+ if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
+ goto out_err;
+
+ if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
+ goto out_err;
+
+ if (!vxlan_addr_any(&vbegin->remote_ip)) {
+ if (vbegin->remote_ip.sa.sa_family == AF_INET) {
+ if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
+ vbegin->remote_ip.sin.sin_addr.s_addr))
+ goto out_err;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
+ &vbegin->remote_ip.sin6.sin6_addr))
+ goto out_err;
+#endif
+ }
+ }
+
+ nla_nest_end(skb, ventry);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, ventry);
+
+ return false;
+}
+
+static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode, int cmd)
+{
+ struct tunnel_msg *tmsg;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ struct net *net = dev_net(vxlan->dev);
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
+ if (!nlh)
+ goto out_err;
+ tmsg = nlmsg_data(nlh);
+ memset(tmsg, 0, sizeof(*tmsg));
+ tmsg->family = AF_BRIDGE;
+ tmsg->ifindex = vxlan->dev->ifindex;
+
+ if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
+
+ return;
+
+out_err:
+ rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
+
+ kfree_skb(skb);
+}
+
+static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct tunnel_msg *new_tmsg;
+ int idx = 0, s_idx = cb->args[1];
+ struct vxlan_vni_group *vg;
+ struct nlmsghdr *nlh;
+ int err = 0;
+
+ if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ return -EINVAL;
+
+ /* RCU needed because of the vni locking rules (rcu || rtnl) */
+ vg = rcu_dereference(vxlan->vnigrp);
+ if (!vg || !vg->num_vnis)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+ new_tmsg = nlmsg_data(nlh);
+ memset(new_tmsg, 0, sizeof(*new_tmsg));
+ new_tmsg->family = PF_BRIDGE;
+ new_tmsg->ifindex = dev->ifindex;
+
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (idx < s_idx) {
+ idx++;
+ continue;
+ }
+ if (!vbegin) {
+ vbegin = v;
+ vend = v;
+ continue;
+ }
+ if (vnirange(vend, v) == 1 &&
+ vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
+ goto update_end;
+ } else {
+ if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ idx += vnirange(vbegin, vend) + 1;
+ vbegin = v;
+ }
+update_end:
+ vend = v;
+ }
+
+ if (!err && vbegin) {
+ if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend))
+ err = -EMSGSIZE;
+ }
+
+ cb->args[1] = err ? idx : 0;
+
+ nlmsg_end(skb, nlh);
+
+ return err;
+}
+
+static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx = 0, err = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ struct tunnel_msg *tmsg;
+ struct net_device *dev;
+
+ tmsg = nlmsg_data(cb->nlh);
+
+ rcu_read_lock();
+ if (tmsg->ifindex) {
+ dev = dev_get_by_index_rcu(net, tmsg->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ err = vxlan_vnifilter_dump_dev(dev, skb, cb);
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
+ goto out_err;
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ if (!netif_is_vxlan(dev))
+ continue;
+ if (idx < s_idx)
+ goto skip;
+ err = vxlan_vnifilter_dump_dev(dev, skb, cb);
+ if (err == -EMSGSIZE)
+ break;
+skip:
+ idx++;
+ }
+ }
+ cb->args[0] = idx;
+ rcu_read_unlock();
+
+ return skb->len;
+
+out_err:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
+ [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
+ [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
+ [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY,
+ .len = sizeof_field(struct iphdr, daddr) },
+ [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr) },
+};
+
+static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
+ [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
+};
+
+static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
+ union vxlan_addr *old_remote_ip,
+ union vxlan_addr *remote_ip,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ u32 hash_index;
+ int err = 0;
+
+ hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ if (remote_ip && !vxlan_addr_any(remote_ip)) {
+ err = vxlan_fdb_update(vxlan, all_zeros_mac,
+ remote_ip,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_APPEND | NLM_F_CREATE,
+ vxlan->cfg.dst_port,
+ vni,
+ vni,
+ dst->remote_ifindex,
+ NTF_SELF, 0, true, extack);
+ if (err) {
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ return err;
+ }
+ }
+
+ if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ *old_remote_ip,
+ vxlan->cfg.dst_port,
+ vni, vni,
+ dst->remote_ifindex,
+ true);
+ }
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ return err;
+}
+
+static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode,
+ union vxlan_addr *group,
+ bool create, bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ union vxlan_addr *newrip = NULL, *oldrip = NULL;
+ union vxlan_addr old_remote_ip;
+ int ret = 0;
+
+ memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
+
+ /* if per vni remote ip is not present use vxlan dev
+ * default dst remote ip for fdb entry
+ */
+ if (group && !vxlan_addr_any(group)) {
+ newrip = group;
+ } else {
+ if (!vxlan_addr_any(&dst->remote_ip))
+ newrip = &dst->remote_ip;
+ }
+
+ /* if old rip exists, and no newrip,
+ * explicitly delete old rip
+ */
+ if (!newrip && !vxlan_addr_any(&old_remote_ip))
+ oldrip = &old_remote_ip;
+
+ if (!newrip && !oldrip)
+ return 0;
+
+ if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
+ return 0;
+
+ ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
+ oldrip, newrip,
+ extack);
+ if (ret)
+ goto out;
+
+ if (group)
+ memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
+
+ if (vxlan->dev->flags & IFF_UP) {
+ if (vxlan_addr_multicast(&old_remote_ip) &&
+ !vxlan_group_used(vn, vxlan, vninode->vni,
+ &old_remote_ip,
+ vxlan->default_dst.remote_ifindex)) {
+ ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
+ 0);
+ if (ret)
+ goto out;
+ }
+
+ if (vxlan_addr_multicast(&vninode->remote_ip)) {
+ ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
+ if (ret == -EADDRINUSE)
+ ret = 0;
+ if (ret)
+ goto out;
+ }
+ }
+
+ *changed = true;
+
+ return 0;
+out:
+ return ret;
+}
+
+int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
+ union vxlan_addr *old_remote_ip,
+ union vxlan_addr *new_remote_ip,
+ struct netlink_ext_ack *extack)
+{
+ struct list_head *headp, *hpos;
+ struct vxlan_vni_group *vg;
+ struct vxlan_vni_node *vent;
+ int ret;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ headp = &vg->vni_list;
+ list_for_each_prev(hpos, headp) {
+ vent = list_entry(hpos, struct vxlan_vni_node, vlist);
+ if (vxlan_addr_any(&vent->remote_ip)) {
+ ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
+ old_remote_ip,
+ new_remote_ip,
+ extack);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+
+ /* if per vni remote_ip not present, delete the
+ * default dst remote_ip previously added for this vni
+ */
+ if (!vxlan_addr_any(&vninode->remote_ip) ||
+ !vxlan_addr_any(&dst->remote_ip))
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ (vxlan_addr_any(&vninode->remote_ip) ?
+ dst->remote_ip : vninode->remote_ip),
+ vxlan->cfg.dst_port,
+ vninode->vni, vninode->vni,
+ dst->remote_ifindex,
+ true);
+
+ if (vxlan->dev->flags & IFF_UP) {
+ if (vxlan_addr_multicast(&vninode->remote_ip) &&
+ !vxlan_group_used(vn, vxlan, vninode->vni,
+ &vninode->remote_ip,
+ dst->remote_ifindex)) {
+ vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
+ }
+ }
+}
+
+static int vxlan_vni_update(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ __be32 vni, union vxlan_addr *group,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ int ret;
+
+ vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
+ vxlan_vni_rht_params);
+ if (!vninode)
+ return 0;
+
+ ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
+ extack);
+ if (ret)
+ return ret;
+
+ if (changed)
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+
+ return 0;
+}
+
+static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
+ struct vxlan_vni_node *v)
+{
+ struct list_head *headp, *hpos;
+ struct vxlan_vni_node *vent;
+
+ headp = &vg->vni_list;
+ list_for_each_prev(hpos, headp) {
+ vent = list_entry(hpos, struct vxlan_vni_node, vlist);
+ if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
+ continue;
+ else
+ break;
+ }
+ list_add_rcu(&v->vlist, hpos);
+ vg->num_vnis++;
+}
+
+static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
+ struct vxlan_vni_node *v)
+{
+ list_del_rcu(&v->vlist);
+ vg->num_vnis--;
+}
+
+static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
+ __be32 vni)
+{
+ struct vxlan_vni_node *vninode;
+
+ vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC);
+ if (!vninode)
+ return NULL;
+ vninode->vni = vni;
+ vninode->hlist4.vxlan = vxlan;
+#if IS_ENABLED(CONFIG_IPV6)
+ vninode->hlist6.vxlan = vxlan;
+#endif
+
+ return vninode;
+}
+
+static int vxlan_vni_add(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ u32 vni, union vxlan_addr *group,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ __be32 v = cpu_to_be32(vni);
+ bool changed = false;
+ int err = 0;
+
+ if (vxlan_vnifilter_lookup(vxlan, v))
+ return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
+
+ err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "VNI in use");
+ return err;
+ }
+
+ vninode = vxlan_vni_alloc(vxlan, v);
+ if (!vninode)
+ return -ENOMEM;
+
+ err = rhashtable_lookup_insert_fast(&vg->vni_hash,
+ &vninode->vnode,
+ vxlan_vni_rht_params);
+ if (err) {
+ kfree(vninode);
+ return err;
+ }
+
+ __vxlan_vni_add_list(vg, vninode);
+
+ if (vxlan->dev->flags & IFF_UP)
+ vxlan_vs_add_del_vninode(vxlan, vninode, false);
+
+ err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
+ extack);
+
+ if (changed)
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+
+ return err;
+}
+
+static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
+{
+ struct vxlan_vni_node *v;
+
+ v = container_of(rcu, struct vxlan_vni_node, rcu);
+ kfree(v);
+}
+
+static int vxlan_vni_del(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ u32 vni, struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ __be32 v = cpu_to_be32(vni);
+ int err = 0;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
+ vxlan_vni_rht_params);
+ if (!vninode) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ vxlan_vni_delete_group(vxlan, vninode);
+
+ err = rhashtable_remove_fast(&vg->vni_hash,
+ &vninode->vnode,
+ vxlan_vni_rht_params);
+ if (err)
+ goto out;
+
+ __vxlan_vni_del_list(vg, vninode);
+
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
+
+ if (vxlan->dev->flags & IFF_UP)
+ vxlan_vs_add_del_vninode(vxlan, vninode, true);
+
+ call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
+
+ return 0;
+out:
+ return err;
+}
+
+static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
+ __u32 end_vni, union vxlan_addr *group,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_group *vg;
+ int v, err = 0;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ for (v = start_vni; v <= end_vni; v++) {
+ switch (cmd) {
+ case RTM_NEWTUNNEL:
+ err = vxlan_vni_add(vxlan, vg, v, group, extack);
+ break;
+ case RTM_DELTUNNEL:
+ err = vxlan_vni_del(vxlan, vg, v, extack);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ if (err)
+ goto out;
+ }
+
+ return 0;
+out:
+ return err;
+}
+
+static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
+ struct nlattr *nlvnifilter,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
+ u32 vni_start = 0, vni_end = 0;
+ union vxlan_addr group;
+ int err;
+
+ err = nla_parse_nested(vattrs,
+ VXLAN_VNIFILTER_ENTRY_MAX,
+ nlvnifilter, vni_filter_entry_policy,
+ extack);
+ if (err)
+ return err;
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
+ vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
+ vni_end = vni_start;
+ }
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
+ vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
+
+ if (!vni_start && !vni_end) {
+ NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
+ "vni start nor end found in vni entry");
+ return -EINVAL;
+ }
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
+ group.sin.sin_addr.s_addr =
+ nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
+ group.sa.sa_family = AF_INET;
+ } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
+ group.sin6.sin6_addr =
+ nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
+ group.sa.sa_family = AF_INET6;
+ } else {
+ memset(&group, 0, sizeof(group));
+ }
+
+ if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
+ NL_SET_ERR_MSG(extack,
+ "Local interface required for multicast remote group");
+
+ return -EINVAL;
+ }
+
+ err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
+ extack);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_node *v, *tmp;
+ struct vxlan_vni_group *vg;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
+ vxlan_vni_rht_params);
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ __vxlan_vni_del_list(vg, v);
+ vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
+ call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
+ }
+ rhashtable_destroy(&vg->vni_hash);
+ kfree(vg);
+}
+
+int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_group *vg;
+ int ret;
+
+ vg = kzalloc(sizeof(*vg), GFP_KERNEL);
+ if (!vg)
+ return -ENOMEM;
+ ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
+ if (ret) {
+ kfree(vg);
+ return ret;
+ }
+ INIT_LIST_HEAD(&vg->vni_list);
+ rcu_assign_pointer(vxlan->vnigrp, vg);
+
+ return 0;
+}
+
+static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tunnel_msg *tmsg;
+ struct vxlan_dev *vxlan;
+ struct net_device *dev;
+ struct nlattr *attr;
+ int err, vnis = 0;
+ int rem;
+
+ /* this should validate the header and check for remaining bytes */
+ err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
+ vni_filter_policy, extack);
+ if (err < 0)
+ return err;
+
+ tmsg = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, tmsg->ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ if (!netif_is_vxlan(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
+ return -EINVAL;
+ }
+
+ vxlan = netdev_priv(dev);
+
+ if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ return -EOPNOTSUPP;
+
+ nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
+ switch (nla_type(attr)) {
+ case VXLAN_VNIFILTER_ENTRY:
+ err = vxlan_process_vni_filter(vxlan, attr,
+ nlh->nlmsg_type, extack);
+ break;
+ default:
+ continue;
+ }
+ vnis++;
+ if (err)
+ break;
+ }
+
+ if (!vnis) {
+ NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+void vxlan_vnifilter_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL,
+ vxlan_vnifilter_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL,
+ vxlan_vnifilter_process, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL,
+ vxlan_vnifilter_process, NULL, 0);
+}
+
+void vxlan_vnifilter_uninit(void)
+{
+ rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL);
+ rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL);
+ rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL);
+}