return __bpf_redirect_no_mac(skb, dev, flags);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ u32 hh_len = LL_RESERVED_SPACE(dev);
+ const struct in6_addr *nexthop;
+ struct neighbour *neigh;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ goto out_drop;
+ }
+
+ skb->dev = dev;
+ skb->tstamp = 0;
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, hh_len);
+ if (unlikely(!skb2)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ &ipv6_hdr(skb)->daddr);
+ neigh = ip_neigh_gw6(dev, nexthop);
+ if (likely(!IS_ERR(neigh))) {
+ int ret;
+
+ sock_confirm_neigh(skb, neigh);
+ dev_xmit_recursion_inc();
+ ret = neigh_output(neigh, skb, false);
+ dev_xmit_recursion_dec();
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+out_drop:
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct net *net = dev_net(dev);
+ int err, ret = NET_XMIT_DROP;
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {
+ .flowi6_flags = FLOWI_FLAG_ANYSRC,
+ .flowi6_mark = skb->mark,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_oif = dev->ifindex,
+ .flowi6_proto = ip6h->nexthdr,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ };
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ goto out_drop;
+
+ skb_dst_set(skb, dst);
+
+ err = bpf_out_neigh_v6(net, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+out_drop:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+out_xmit:
+ return ret;
+}
+#else
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+#endif /* CONFIG_IPV6 */
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct net_device *dev = dst->dev;
+ u32 hh_len = LL_RESERVED_SPACE(dev);
+ struct neighbour *neigh;
+ bool is_v6gw = false;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ goto out_drop;
+ }
+
+ skb->dev = dev;
+ skb->tstamp = 0;
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, hh_len);
+ if (unlikely(!skb2)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ rcu_read_lock_bh();
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+ if (likely(!IS_ERR(neigh))) {
+ int ret;
+
+ sock_confirm_neigh(skb, neigh);
+ dev_xmit_recursion_inc();
+ ret = neigh_output(neigh, skb, is_v6gw);
+ dev_xmit_recursion_dec();
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+out_drop:
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct iphdr *ip4h = ip_hdr(skb);
+ struct net *net = dev_net(dev);
+ int err, ret = NET_XMIT_DROP;
+ struct rtable *rt;
+ struct flowi4 fl4 = {
+ .flowi4_flags = FLOWI_FLAG_ANYSRC,
+ .flowi4_mark = skb->mark,
+ .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_oif = dev->ifindex,
+ .flowi4_proto = ip4h->protocol,
+ .daddr = ip4h->daddr,
+ .saddr = ip4h->saddr,
+ };
+
+ rt = ip_route_output_flow(net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out_drop;
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+ ip_rt_put(rt);
+ goto out_drop;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+
+ err = bpf_out_neigh_v4(net, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+out_drop:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+out_xmit:
+ return ret;
+}
+#else
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+#endif /* CONFIG_INET */
+
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ethhdr *ethh = eth_hdr(skb);
+
+ if (unlikely(skb->mac_header >= skb->network_header))
+ goto out;
+ bpf_push_mac_rcsum(skb);
+ if (is_multicast_ether_addr(ethh->h_dest))
+ goto out;
+
+ skb_pull(skb, sizeof(*ethh));
+ skb_unset_mac_header(skb);
+ skb_reset_network_header(skb);
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return __bpf_redirect_neigh_v4(skb, dev);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return __bpf_redirect_neigh_v6(skb, dev);
+out:
+ kfree_skb(skb);
+ return -ENOTSUPP;
+}
+
+/* Internal, non-exposed redirect flags. */
+enum {
+ BPF_F_NEIGH = (1ULL << 1),
+#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH)
+};
+
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
struct net_device *dev;
struct sk_buff *clone;
int ret;
- if (unlikely(flags & ~(BPF_F_INGRESS)))
+ if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return -EINVAL;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
-BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
-{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-
- if (unlikely(flags & ~(BPF_F_INGRESS)))
- return TC_ACT_SHOT;
-
- ri->flags = flags;
- ri->tgt_index = ifindex;
-
- return TC_ACT_REDIRECT;
-}
-
int skb_do_redirect(struct sk_buff *skb)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *dev;
+ u32 flags = ri->flags;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
ri->tgt_index = 0;
return -EINVAL;
}
- return __bpf_redirect(skb, dev, ri->flags);
+ return flags & BPF_F_NEIGH ?
+ __bpf_redirect_neigh(skb, dev) :
+ __bpf_redirect(skb, dev, flags);
+}
+
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
+ return TC_ACT_SHOT;
+
+ ri->flags = flags;
+ ri->tgt_index = ifindex;
+
+ return TC_ACT_REDIRECT;
}
static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+
+ ri->flags = BPF_F_NEIGH;
+ ri->tgt_index = ifindex;
+
+ return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_neigh_proto = {
+ .func = bpf_redirect_neigh,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
{
msg->apply_bytes = bytes;
return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
+ case BPF_FUNC_redirect_neigh:
+ return &bpf_redirect_neigh_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc: